PR27116, Spelling errors found by Debian style checker
[official-gcc.git] / gcc / tree-ssa-math-opts.cc
blob712097ac5be029d422e1864d0b6674820a9d2960
1 /* Global, SSA-based optimizations using mathematical identities.
2 Copyright (C) 2005-2023 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* Currently, the only mini-pass in this file tries to CSE reciprocal
21 operations. These are common in sequences such as this one:
23 modulus = sqrt(x*x + y*y + z*z);
24 x = x / modulus;
25 y = y / modulus;
26 z = z / modulus;
28 that can be optimized to
30 modulus = sqrt(x*x + y*y + z*z);
31 rmodulus = 1.0 / modulus;
32 x = x * rmodulus;
33 y = y * rmodulus;
34 z = z * rmodulus;
36 We do this for loop invariant divisors, and with this pass whenever
37 we notice that a division has the same divisor multiple times.
39 Of course, like in PRE, we don't insert a division if a dominator
40 already has one. However, this cannot be done as an extension of
41 PRE for several reasons.
43 First of all, with some experiments it was found out that the
44 transformation is not always useful if there are only two divisions
45 by the same divisor. This is probably because modern processors
46 can pipeline the divisions; on older, in-order processors it should
47 still be effective to optimize two divisions by the same number.
48 We make this a param, and it shall be called N in the remainder of
49 this comment.
51 Second, if trapping math is active, we have less freedom on where
52 to insert divisions: we can only do so in basic blocks that already
53 contain one. (If divisions don't trap, instead, we can insert
54 divisions elsewhere, which will be in blocks that are common dominators
55 of those that have the division).
57 We really don't want to compute the reciprocal unless a division will
58 be found. To do this, we won't insert the division in a basic block
59 that has less than N divisions *post-dominating* it.
61 The algorithm constructs a subset of the dominator tree, holding the
62 blocks containing the divisions and the common dominators to them,
63 and walk it twice. The first walk is in post-order, and it annotates
64 each block with the number of divisions that post-dominate it: this
65 gives information on where divisions can be inserted profitably.
66 The second walk is in pre-order, and it inserts divisions as explained
67 above, and replaces divisions by multiplications.
69 In the best case, the cost of the pass is O(n_statements). In the
70 worst-case, the cost is due to creating the dominator tree subset,
71 with a cost of O(n_basic_blocks ^ 2); however this can only happen
72 for n_statements / n_basic_blocks statements. So, the amortized cost
73 of creating the dominator tree subset is O(n_basic_blocks) and the
74 worst-case cost of the pass is O(n_statements * n_basic_blocks).
76 More practically, the cost will be small because there are few
77 divisions, and they tend to be in the same basic block, so insert_bb
78 is called very few times.
80 If we did this using domwalk.cc, an efficient implementation would have
81 to work on all the variables in a single pass, because we could not
82 work on just a subset of the dominator tree, as we do now, and the
83 cost would also be something like O(n_statements * n_basic_blocks).
84 The data structures would be more complex in order to work on all the
85 variables in a single pass. */
87 #include "config.h"
88 #include "system.h"
89 #include "coretypes.h"
90 #include "backend.h"
91 #include "target.h"
92 #include "rtl.h"
93 #include "tree.h"
94 #include "gimple.h"
95 #include "predict.h"
96 #include "alloc-pool.h"
97 #include "tree-pass.h"
98 #include "ssa.h"
99 #include "optabs-tree.h"
100 #include "gimple-pretty-print.h"
101 #include "alias.h"
102 #include "fold-const.h"
103 #include "gimple-iterator.h"
104 #include "gimple-fold.h"
105 #include "gimplify.h"
106 #include "gimplify-me.h"
107 #include "stor-layout.h"
108 #include "tree-cfg.h"
109 #include "tree-dfa.h"
110 #include "tree-ssa.h"
111 #include "builtins.h"
112 #include "internal-fn.h"
113 #include "case-cfn-macros.h"
114 #include "optabs-libfuncs.h"
115 #include "tree-eh.h"
116 #include "targhooks.h"
117 #include "domwalk.h"
118 #include "tree-ssa-math-opts.h"
120 /* This structure represents one basic block that either computes a
121 division, or is a common dominator for basic block that compute a
122 division. */
123 struct occurrence {
124 /* The basic block represented by this structure. */
125 basic_block bb = basic_block();
127 /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
128 inserted in BB. */
129 tree recip_def = tree();
131 /* If non-NULL, the SSA_NAME holding the definition for a squared
132 reciprocal inserted in BB. */
133 tree square_recip_def = tree();
135 /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
136 was inserted in BB. */
137 gimple *recip_def_stmt = nullptr;
139 /* Pointer to a list of "struct occurrence"s for blocks dominated
140 by BB. */
141 struct occurrence *children = nullptr;
143 /* Pointer to the next "struct occurrence"s in the list of blocks
144 sharing a common dominator. */
145 struct occurrence *next = nullptr;
147 /* The number of divisions that are in BB before compute_merit. The
148 number of divisions that are in BB or post-dominate it after
149 compute_merit. */
150 int num_divisions = 0;
152 /* True if the basic block has a division, false if it is a common
153 dominator for basic blocks that do. If it is false and trapping
154 math is active, BB is not a candidate for inserting a reciprocal. */
155 bool bb_has_division = false;
157 /* Construct a struct occurrence for basic block BB, and whose
158 children list is headed by CHILDREN. */
159 occurrence (basic_block bb, struct occurrence *children)
160 : bb (bb), children (children)
162 bb->aux = this;
165 /* Destroy a struct occurrence and remove it from its basic block. */
166 ~occurrence ()
168 bb->aux = nullptr;
171 /* Allocate memory for a struct occurrence from OCC_POOL. */
172 static void* operator new (size_t);
174 /* Return memory for a struct occurrence to OCC_POOL. */
175 static void operator delete (void*, size_t);
178 static struct
180 /* Number of 1.0/X ops inserted. */
181 int rdivs_inserted;
183 /* Number of 1.0/FUNC ops inserted. */
184 int rfuncs_inserted;
185 } reciprocal_stats;
187 static struct
189 /* Number of cexpi calls inserted. */
190 int inserted;
192 /* Number of conversions removed. */
193 int conv_removed;
195 } sincos_stats;
197 static struct
199 /* Number of widening multiplication ops inserted. */
200 int widen_mults_inserted;
202 /* Number of integer multiply-and-accumulate ops inserted. */
203 int maccs_inserted;
205 /* Number of fp fused multiply-add ops inserted. */
206 int fmas_inserted;
208 /* Number of divmod calls inserted. */
209 int divmod_calls_inserted;
211 /* Number of highpart multiplication ops inserted. */
212 int highpart_mults_inserted;
213 } widen_mul_stats;
215 /* The instance of "struct occurrence" representing the highest
216 interesting block in the dominator tree. */
217 static struct occurrence *occ_head;
219 /* Allocation pool for getting instances of "struct occurrence". */
220 static object_allocator<occurrence> *occ_pool;
222 void* occurrence::operator new (size_t n)
224 gcc_assert (n == sizeof(occurrence));
225 return occ_pool->allocate_raw ();
228 void occurrence::operator delete (void *occ, size_t n)
230 gcc_assert (n == sizeof(occurrence));
231 occ_pool->remove_raw (occ);
234 /* Insert NEW_OCC into our subset of the dominator tree. P_HEAD points to a
235 list of "struct occurrence"s, one per basic block, having IDOM as
236 their common dominator.
238 We try to insert NEW_OCC as deep as possible in the tree, and we also
239 insert any other block that is a common dominator for BB and one
240 block already in the tree. */
242 static void
243 insert_bb (struct occurrence *new_occ, basic_block idom,
244 struct occurrence **p_head)
246 struct occurrence *occ, **p_occ;
248 for (p_occ = p_head; (occ = *p_occ) != NULL; )
250 basic_block bb = new_occ->bb, occ_bb = occ->bb;
251 basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb);
252 if (dom == bb)
254 /* BB dominates OCC_BB. OCC becomes NEW_OCC's child: remove OCC
255 from its list. */
256 *p_occ = occ->next;
257 occ->next = new_occ->children;
258 new_occ->children = occ;
260 /* Try the next block (it may as well be dominated by BB). */
263 else if (dom == occ_bb)
265 /* OCC_BB dominates BB. Tail recurse to look deeper. */
266 insert_bb (new_occ, dom, &occ->children);
267 return;
270 else if (dom != idom)
272 gcc_assert (!dom->aux);
274 /* There is a dominator between IDOM and BB, add it and make
275 two children out of NEW_OCC and OCC. First, remove OCC from
276 its list. */
277 *p_occ = occ->next;
278 new_occ->next = occ;
279 occ->next = NULL;
281 /* None of the previous blocks has DOM as a dominator: if we tail
282 recursed, we would reexamine them uselessly. Just switch BB with
283 DOM, and go on looking for blocks dominated by DOM. */
284 new_occ = new occurrence (dom, new_occ);
287 else
289 /* Nothing special, go on with the next element. */
290 p_occ = &occ->next;
294 /* No place was found as a child of IDOM. Make BB a sibling of IDOM. */
295 new_occ->next = *p_head;
296 *p_head = new_occ;
299 /* Register that we found a division in BB.
300 IMPORTANCE is a measure of how much weighting to give
301 that division. Use IMPORTANCE = 2 to register a single
302 division. If the division is going to be found multiple
303 times use 1 (as it is with squares). */
305 static inline void
306 register_division_in (basic_block bb, int importance)
308 struct occurrence *occ;
310 occ = (struct occurrence *) bb->aux;
311 if (!occ)
313 occ = new occurrence (bb, NULL);
314 insert_bb (occ, ENTRY_BLOCK_PTR_FOR_FN (cfun), &occ_head);
317 occ->bb_has_division = true;
318 occ->num_divisions += importance;
322 /* Compute the number of divisions that postdominate each block in OCC and
323 its children. */
325 static void
326 compute_merit (struct occurrence *occ)
328 struct occurrence *occ_child;
329 basic_block dom = occ->bb;
331 for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
333 basic_block bb;
334 if (occ_child->children)
335 compute_merit (occ_child);
337 if (flag_exceptions)
338 bb = single_noncomplex_succ (dom);
339 else
340 bb = dom;
342 if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
343 occ->num_divisions += occ_child->num_divisions;
348 /* Return whether USE_STMT is a floating-point division by DEF. */
349 static inline bool
350 is_division_by (gimple *use_stmt, tree def)
352 return is_gimple_assign (use_stmt)
353 && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
354 && gimple_assign_rhs2 (use_stmt) == def
355 /* Do not recognize x / x as valid division, as we are getting
356 confused later by replacing all immediate uses x in such
357 a stmt. */
358 && gimple_assign_rhs1 (use_stmt) != def
359 && !stmt_can_throw_internal (cfun, use_stmt);
362 /* Return TRUE if USE_STMT is a multiplication of DEF by A. */
363 static inline bool
364 is_mult_by (gimple *use_stmt, tree def, tree a)
366 if (gimple_code (use_stmt) == GIMPLE_ASSIGN
367 && gimple_assign_rhs_code (use_stmt) == MULT_EXPR)
369 tree op0 = gimple_assign_rhs1 (use_stmt);
370 tree op1 = gimple_assign_rhs2 (use_stmt);
372 return (op0 == def && op1 == a)
373 || (op0 == a && op1 == def);
375 return 0;
378 /* Return whether USE_STMT is DEF * DEF. */
379 static inline bool
380 is_square_of (gimple *use_stmt, tree def)
382 return is_mult_by (use_stmt, def, def);
385 /* Return whether USE_STMT is a floating-point division by
386 DEF * DEF. */
387 static inline bool
388 is_division_by_square (gimple *use_stmt, tree def)
390 if (gimple_code (use_stmt) == GIMPLE_ASSIGN
391 && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
392 && gimple_assign_rhs1 (use_stmt) != gimple_assign_rhs2 (use_stmt)
393 && !stmt_can_throw_internal (cfun, use_stmt))
395 tree denominator = gimple_assign_rhs2 (use_stmt);
396 if (TREE_CODE (denominator) == SSA_NAME)
397 return is_square_of (SSA_NAME_DEF_STMT (denominator), def);
399 return 0;
402 /* Walk the subset of the dominator tree rooted at OCC, setting the
403 RECIP_DEF field to a definition of 1.0 / DEF that can be used in
404 the given basic block. The field may be left NULL, of course,
405 if it is not possible or profitable to do the optimization.
407 DEF_BSI is an iterator pointing at the statement defining DEF.
408 If RECIP_DEF is set, a dominator already has a computation that can
409 be used.
411 If should_insert_square_recip is set, then this also inserts
412 the square of the reciprocal immediately after the definition
413 of the reciprocal. */
415 static void
416 insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
417 tree def, tree recip_def, tree square_recip_def,
418 int should_insert_square_recip, int threshold)
420 tree type;
421 gassign *new_stmt, *new_square_stmt;
422 gimple_stmt_iterator gsi;
423 struct occurrence *occ_child;
425 if (!recip_def
426 && (occ->bb_has_division || !flag_trapping_math)
427 /* Divide by two as all divisions are counted twice in
428 the costing loop. */
429 && occ->num_divisions / 2 >= threshold)
431 /* Make a variable with the replacement and substitute it. */
432 type = TREE_TYPE (def);
433 recip_def = create_tmp_reg (type, "reciptmp");
434 new_stmt = gimple_build_assign (recip_def, RDIV_EXPR,
435 build_one_cst (type), def);
437 if (should_insert_square_recip)
439 square_recip_def = create_tmp_reg (type, "powmult_reciptmp");
440 new_square_stmt = gimple_build_assign (square_recip_def, MULT_EXPR,
441 recip_def, recip_def);
444 if (occ->bb_has_division)
446 /* Case 1: insert before an existing division. */
447 gsi = gsi_after_labels (occ->bb);
448 while (!gsi_end_p (gsi)
449 && (!is_division_by (gsi_stmt (gsi), def))
450 && (!is_division_by_square (gsi_stmt (gsi), def)))
451 gsi_next (&gsi);
453 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
454 if (should_insert_square_recip)
455 gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
457 else if (def_gsi && occ->bb == gsi_bb (*def_gsi))
459 /* Case 2: insert right after the definition. Note that this will
460 never happen if the definition statement can throw, because in
461 that case the sole successor of the statement's basic block will
462 dominate all the uses as well. */
463 gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
464 if (should_insert_square_recip)
465 gsi_insert_after (def_gsi, new_square_stmt, GSI_NEW_STMT);
467 else
469 /* Case 3: insert in a basic block not containing defs/uses. */
470 gsi = gsi_after_labels (occ->bb);
471 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
472 if (should_insert_square_recip)
473 gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
476 reciprocal_stats.rdivs_inserted++;
478 occ->recip_def_stmt = new_stmt;
481 occ->recip_def = recip_def;
482 occ->square_recip_def = square_recip_def;
483 for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
484 insert_reciprocals (def_gsi, occ_child, def, recip_def,
485 square_recip_def, should_insert_square_recip,
486 threshold);
489 /* Replace occurrences of expr / (x * x) with expr * ((1 / x) * (1 / x)).
490 Take as argument the use for (x * x). */
491 static inline void
492 replace_reciprocal_squares (use_operand_p use_p)
494 gimple *use_stmt = USE_STMT (use_p);
495 basic_block bb = gimple_bb (use_stmt);
496 struct occurrence *occ = (struct occurrence *) bb->aux;
498 if (optimize_bb_for_speed_p (bb) && occ->square_recip_def
499 && occ->recip_def)
501 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
502 gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
503 gimple_assign_set_rhs2 (use_stmt, occ->square_recip_def);
504 SET_USE (use_p, occ->square_recip_def);
505 fold_stmt_inplace (&gsi);
506 update_stmt (use_stmt);
511 /* Replace the division at USE_P with a multiplication by the reciprocal, if
512 possible. */
514 static inline void
515 replace_reciprocal (use_operand_p use_p)
517 gimple *use_stmt = USE_STMT (use_p);
518 basic_block bb = gimple_bb (use_stmt);
519 struct occurrence *occ = (struct occurrence *) bb->aux;
521 if (optimize_bb_for_speed_p (bb)
522 && occ->recip_def && use_stmt != occ->recip_def_stmt)
524 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
525 gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
526 SET_USE (use_p, occ->recip_def);
527 fold_stmt_inplace (&gsi);
528 update_stmt (use_stmt);
533 /* Free OCC and return one more "struct occurrence" to be freed. */
535 static struct occurrence *
536 free_bb (struct occurrence *occ)
538 struct occurrence *child, *next;
540 /* First get the two pointers hanging off OCC. */
541 next = occ->next;
542 child = occ->children;
543 delete occ;
545 /* Now ensure that we don't recurse unless it is necessary. */
546 if (!child)
547 return next;
548 else
550 while (next)
551 next = free_bb (next);
553 return child;
557 /* Transform sequences like
558 t = sqrt (a)
559 x = 1.0 / t;
560 r1 = x * x;
561 r2 = a * x;
562 into:
563 t = sqrt (a)
564 r1 = 1.0 / a;
565 r2 = t;
566 x = r1 * r2;
567 depending on the uses of x, r1, r2. This removes one multiplication and
568 allows the sqrt and division operations to execute in parallel.
569 DEF_GSI is the gsi of the initial division by sqrt that defines
570 DEF (x in the example above). */
572 static void
573 optimize_recip_sqrt (gimple_stmt_iterator *def_gsi, tree def)
575 gimple *use_stmt;
576 imm_use_iterator use_iter;
577 gimple *stmt = gsi_stmt (*def_gsi);
578 tree x = def;
579 tree orig_sqrt_ssa_name = gimple_assign_rhs2 (stmt);
580 tree div_rhs1 = gimple_assign_rhs1 (stmt);
582 if (TREE_CODE (orig_sqrt_ssa_name) != SSA_NAME
583 || TREE_CODE (div_rhs1) != REAL_CST
584 || !real_equal (&TREE_REAL_CST (div_rhs1), &dconst1))
585 return;
587 gcall *sqrt_stmt
588 = dyn_cast <gcall *> (SSA_NAME_DEF_STMT (orig_sqrt_ssa_name));
590 if (!sqrt_stmt || !gimple_call_lhs (sqrt_stmt))
591 return;
593 switch (gimple_call_combined_fn (sqrt_stmt))
595 CASE_CFN_SQRT:
596 CASE_CFN_SQRT_FN:
597 break;
599 default:
600 return;
602 tree a = gimple_call_arg (sqrt_stmt, 0);
604 /* We have 'a' and 'x'. Now analyze the uses of 'x'. */
606 /* Statements that use x in x * x. */
607 auto_vec<gimple *> sqr_stmts;
608 /* Statements that use x in a * x. */
609 auto_vec<gimple *> mult_stmts;
610 bool has_other_use = false;
611 bool mult_on_main_path = false;
613 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, x)
615 if (is_gimple_debug (use_stmt))
616 continue;
617 if (is_square_of (use_stmt, x))
619 sqr_stmts.safe_push (use_stmt);
620 if (gimple_bb (use_stmt) == gimple_bb (stmt))
621 mult_on_main_path = true;
623 else if (is_mult_by (use_stmt, x, a))
625 mult_stmts.safe_push (use_stmt);
626 if (gimple_bb (use_stmt) == gimple_bb (stmt))
627 mult_on_main_path = true;
629 else
630 has_other_use = true;
633 /* In the x * x and a * x cases we just rewire stmt operands or
634 remove multiplications. In the has_other_use case we introduce
635 a multiplication so make sure we don't introduce a multiplication
636 on a path where there was none. */
637 if (has_other_use && !mult_on_main_path)
638 return;
640 if (sqr_stmts.is_empty () && mult_stmts.is_empty ())
641 return;
643 /* If x = 1.0 / sqrt (a) has uses other than those optimized here we want
644 to be able to compose it from the sqr and mult cases. */
645 if (has_other_use && (sqr_stmts.is_empty () || mult_stmts.is_empty ()))
646 return;
648 if (dump_file)
650 fprintf (dump_file, "Optimizing reciprocal sqrt multiplications of\n");
651 print_gimple_stmt (dump_file, sqrt_stmt, 0, TDF_NONE);
652 print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
653 fprintf (dump_file, "\n");
656 bool delete_div = !has_other_use;
657 tree sqr_ssa_name = NULL_TREE;
658 if (!sqr_stmts.is_empty ())
660 /* r1 = x * x. Transform the original
661 x = 1.0 / t
662 into
663 tmp1 = 1.0 / a
664 r1 = tmp1. */
666 sqr_ssa_name
667 = make_temp_ssa_name (TREE_TYPE (a), NULL, "recip_sqrt_sqr");
669 if (dump_file)
671 fprintf (dump_file, "Replacing original division\n");
672 print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
673 fprintf (dump_file, "with new division\n");
675 stmt
676 = gimple_build_assign (sqr_ssa_name, gimple_assign_rhs_code (stmt),
677 gimple_assign_rhs1 (stmt), a);
678 gsi_insert_before (def_gsi, stmt, GSI_SAME_STMT);
679 gsi_remove (def_gsi, true);
680 *def_gsi = gsi_for_stmt (stmt);
681 fold_stmt_inplace (def_gsi);
682 update_stmt (stmt);
684 if (dump_file)
685 print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
687 delete_div = false;
688 gimple *sqr_stmt;
689 unsigned int i;
690 FOR_EACH_VEC_ELT (sqr_stmts, i, sqr_stmt)
692 gimple_stmt_iterator gsi2 = gsi_for_stmt (sqr_stmt);
693 gimple_assign_set_rhs_from_tree (&gsi2, sqr_ssa_name);
694 update_stmt (sqr_stmt);
697 if (!mult_stmts.is_empty ())
699 /* r2 = a * x. Transform this into:
700 r2 = t (The original sqrt (a)). */
701 unsigned int i;
702 gimple *mult_stmt = NULL;
703 FOR_EACH_VEC_ELT (mult_stmts, i, mult_stmt)
705 gimple_stmt_iterator gsi2 = gsi_for_stmt (mult_stmt);
707 if (dump_file)
709 fprintf (dump_file, "Replacing squaring multiplication\n");
710 print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE);
711 fprintf (dump_file, "with assignment\n");
713 gimple_assign_set_rhs_from_tree (&gsi2, orig_sqrt_ssa_name);
714 fold_stmt_inplace (&gsi2);
715 update_stmt (mult_stmt);
716 if (dump_file)
717 print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE);
721 if (has_other_use)
723 /* Using the two temporaries tmp1, tmp2 from above
724 the original x is now:
725 x = tmp1 * tmp2. */
726 gcc_assert (orig_sqrt_ssa_name);
727 gcc_assert (sqr_ssa_name);
729 gimple *new_stmt
730 = gimple_build_assign (x, MULT_EXPR,
731 orig_sqrt_ssa_name, sqr_ssa_name);
732 gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
733 update_stmt (stmt);
735 else if (delete_div)
737 /* Remove the original division. */
738 gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt);
739 gsi_remove (&gsi2, true);
740 release_defs (stmt);
742 else
743 release_ssa_name (x);
746 /* Look for floating-point divisions among DEF's uses, and try to
747 replace them by multiplications with the reciprocal. Add
748 as many statements computing the reciprocal as needed.
750 DEF must be a GIMPLE register of a floating-point type. */
752 static void
753 execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
755 use_operand_p use_p, square_use_p;
756 imm_use_iterator use_iter, square_use_iter;
757 tree square_def;
758 struct occurrence *occ;
759 int count = 0;
760 int threshold;
761 int square_recip_count = 0;
762 int sqrt_recip_count = 0;
764 gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && TREE_CODE (def) == SSA_NAME);
765 threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
767 /* If DEF is a square (x * x), count the number of divisions by x.
768 If there are more divisions by x than by (DEF * DEF), prefer to optimize
769 the reciprocal of x instead of DEF. This improves cases like:
770 def = x * x
771 t0 = a / def
772 t1 = b / def
773 t2 = c / x
774 Reciprocal optimization of x results in 1 division rather than 2 or 3. */
775 gimple *def_stmt = SSA_NAME_DEF_STMT (def);
777 if (is_gimple_assign (def_stmt)
778 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
779 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME
780 && gimple_assign_rhs1 (def_stmt) == gimple_assign_rhs2 (def_stmt))
782 tree op0 = gimple_assign_rhs1 (def_stmt);
784 FOR_EACH_IMM_USE_FAST (use_p, use_iter, op0)
786 gimple *use_stmt = USE_STMT (use_p);
787 if (is_division_by (use_stmt, op0))
788 sqrt_recip_count++;
792 FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
794 gimple *use_stmt = USE_STMT (use_p);
795 if (is_division_by (use_stmt, def))
797 register_division_in (gimple_bb (use_stmt), 2);
798 count++;
801 if (is_square_of (use_stmt, def))
803 square_def = gimple_assign_lhs (use_stmt);
804 FOR_EACH_IMM_USE_FAST (square_use_p, square_use_iter, square_def)
806 gimple *square_use_stmt = USE_STMT (square_use_p);
807 if (is_division_by (square_use_stmt, square_def))
809 /* This is executed twice for each division by a square. */
810 register_division_in (gimple_bb (square_use_stmt), 1);
811 square_recip_count++;
817 /* Square reciprocals were counted twice above. */
818 square_recip_count /= 2;
820 /* If it is more profitable to optimize 1 / x, don't optimize 1 / (x * x). */
821 if (sqrt_recip_count > square_recip_count)
822 goto out;
824 /* Do the expensive part only if we can hope to optimize something. */
825 if (count + square_recip_count >= threshold && count >= 1)
827 gimple *use_stmt;
828 for (occ = occ_head; occ; occ = occ->next)
830 compute_merit (occ);
831 insert_reciprocals (def_gsi, occ, def, NULL, NULL,
832 square_recip_count, threshold);
835 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
837 if (is_division_by (use_stmt, def))
839 FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
840 replace_reciprocal (use_p);
842 else if (square_recip_count > 0 && is_square_of (use_stmt, def))
844 FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
846 /* Find all uses of the square that are divisions and
847 * replace them by multiplications with the inverse. */
848 imm_use_iterator square_iterator;
849 gimple *powmult_use_stmt = USE_STMT (use_p);
850 tree powmult_def_name = gimple_assign_lhs (powmult_use_stmt);
852 FOR_EACH_IMM_USE_STMT (powmult_use_stmt,
853 square_iterator, powmult_def_name)
854 FOR_EACH_IMM_USE_ON_STMT (square_use_p, square_iterator)
856 gimple *powmult_use_stmt = USE_STMT (square_use_p);
857 if (is_division_by (powmult_use_stmt, powmult_def_name))
858 replace_reciprocal_squares (square_use_p);
865 out:
866 for (occ = occ_head; occ; )
867 occ = free_bb (occ);
869 occ_head = NULL;
872 /* Return an internal function that implements the reciprocal of CALL,
873 or IFN_LAST if there is no such function that the target supports. */
875 internal_fn
876 internal_fn_reciprocal (gcall *call)
878 internal_fn ifn;
880 switch (gimple_call_combined_fn (call))
882 CASE_CFN_SQRT:
883 CASE_CFN_SQRT_FN:
884 ifn = IFN_RSQRT;
885 break;
887 default:
888 return IFN_LAST;
891 tree_pair types = direct_internal_fn_types (ifn, call);
892 if (!direct_internal_fn_supported_p (ifn, types, OPTIMIZE_FOR_SPEED))
893 return IFN_LAST;
895 return ifn;
898 /* Go through all the floating-point SSA_NAMEs, and call
899 execute_cse_reciprocals_1 on each of them. */
900 namespace {
902 const pass_data pass_data_cse_reciprocals =
904 GIMPLE_PASS, /* type */
905 "recip", /* name */
906 OPTGROUP_NONE, /* optinfo_flags */
907 TV_TREE_RECIP, /* tv_id */
908 PROP_ssa, /* properties_required */
909 0, /* properties_provided */
910 0, /* properties_destroyed */
911 0, /* todo_flags_start */
912 TODO_update_ssa, /* todo_flags_finish */
915 class pass_cse_reciprocals : public gimple_opt_pass
917 public:
918 pass_cse_reciprocals (gcc::context *ctxt)
919 : gimple_opt_pass (pass_data_cse_reciprocals, ctxt)
922 /* opt_pass methods: */
923 bool gate (function *) final override
925 return optimize && flag_reciprocal_math;
927 unsigned int execute (function *) final override;
929 }; // class pass_cse_reciprocals
931 unsigned int
932 pass_cse_reciprocals::execute (function *fun)
934 basic_block bb;
935 tree arg;
937 occ_pool = new object_allocator<occurrence> ("dominators for recip");
939 memset (&reciprocal_stats, 0, sizeof (reciprocal_stats));
940 calculate_dominance_info (CDI_DOMINATORS);
941 calculate_dominance_info (CDI_POST_DOMINATORS);
943 if (flag_checking)
944 FOR_EACH_BB_FN (bb, fun)
945 gcc_assert (!bb->aux);
947 for (arg = DECL_ARGUMENTS (fun->decl); arg; arg = DECL_CHAIN (arg))
948 if (FLOAT_TYPE_P (TREE_TYPE (arg))
949 && is_gimple_reg (arg))
951 tree name = ssa_default_def (fun, arg);
952 if (name)
953 execute_cse_reciprocals_1 (NULL, name);
956 FOR_EACH_BB_FN (bb, fun)
958 tree def;
960 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
961 gsi_next (&gsi))
963 gphi *phi = gsi.phi ();
964 def = PHI_RESULT (phi);
965 if (! virtual_operand_p (def)
966 && FLOAT_TYPE_P (TREE_TYPE (def)))
967 execute_cse_reciprocals_1 (NULL, def);
970 for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
971 gsi_next (&gsi))
973 gimple *stmt = gsi_stmt (gsi);
975 if (gimple_has_lhs (stmt)
976 && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
977 && FLOAT_TYPE_P (TREE_TYPE (def))
978 && TREE_CODE (def) == SSA_NAME)
980 execute_cse_reciprocals_1 (&gsi, def);
981 stmt = gsi_stmt (gsi);
982 if (flag_unsafe_math_optimizations
983 && is_gimple_assign (stmt)
984 && gimple_assign_lhs (stmt) == def
985 && !stmt_can_throw_internal (cfun, stmt)
986 && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
987 optimize_recip_sqrt (&gsi, def);
991 if (optimize_bb_for_size_p (bb))
992 continue;
994 /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b). */
995 for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
996 gsi_next (&gsi))
998 gimple *stmt = gsi_stmt (gsi);
1000 if (is_gimple_assign (stmt)
1001 && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
1003 tree arg1 = gimple_assign_rhs2 (stmt);
1004 gimple *stmt1;
1006 if (TREE_CODE (arg1) != SSA_NAME)
1007 continue;
1009 stmt1 = SSA_NAME_DEF_STMT (arg1);
1011 if (is_gimple_call (stmt1)
1012 && gimple_call_lhs (stmt1))
1014 bool fail;
1015 imm_use_iterator ui;
1016 use_operand_p use_p;
1017 tree fndecl = NULL_TREE;
1019 gcall *call = as_a <gcall *> (stmt1);
1020 internal_fn ifn = internal_fn_reciprocal (call);
1021 if (ifn == IFN_LAST)
1023 fndecl = gimple_call_fndecl (call);
1024 if (!fndecl
1025 || !fndecl_built_in_p (fndecl, BUILT_IN_MD))
1026 continue;
1027 fndecl = targetm.builtin_reciprocal (fndecl);
1028 if (!fndecl)
1029 continue;
1032 /* Check that all uses of the SSA name are divisions,
1033 otherwise replacing the defining statement will do
1034 the wrong thing. */
1035 fail = false;
1036 FOR_EACH_IMM_USE_FAST (use_p, ui, arg1)
1038 gimple *stmt2 = USE_STMT (use_p);
1039 if (is_gimple_debug (stmt2))
1040 continue;
1041 if (!is_gimple_assign (stmt2)
1042 || gimple_assign_rhs_code (stmt2) != RDIV_EXPR
1043 || gimple_assign_rhs1 (stmt2) == arg1
1044 || gimple_assign_rhs2 (stmt2) != arg1)
1046 fail = true;
1047 break;
1050 if (fail)
1051 continue;
1053 gimple_replace_ssa_lhs (call, arg1);
1054 if (gimple_call_internal_p (call) != (ifn != IFN_LAST))
1056 auto_vec<tree, 4> args;
1057 for (unsigned int i = 0;
1058 i < gimple_call_num_args (call); i++)
1059 args.safe_push (gimple_call_arg (call, i));
1060 gcall *stmt2;
1061 if (ifn == IFN_LAST)
1062 stmt2 = gimple_build_call_vec (fndecl, args);
1063 else
1064 stmt2 = gimple_build_call_internal_vec (ifn, args);
1065 gimple_call_set_lhs (stmt2, arg1);
1066 gimple_move_vops (stmt2, call);
1067 gimple_call_set_nothrow (stmt2,
1068 gimple_call_nothrow_p (call));
1069 gimple_stmt_iterator gsi2 = gsi_for_stmt (call);
1070 gsi_replace (&gsi2, stmt2, true);
1072 else
1074 if (ifn == IFN_LAST)
1075 gimple_call_set_fndecl (call, fndecl);
1076 else
1077 gimple_call_set_internal_fn (call, ifn);
1078 update_stmt (call);
1080 reciprocal_stats.rfuncs_inserted++;
1082 FOR_EACH_IMM_USE_STMT (stmt, ui, arg1)
1084 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1085 gimple_assign_set_rhs_code (stmt, MULT_EXPR);
1086 fold_stmt_inplace (&gsi);
1087 update_stmt (stmt);
1094 statistics_counter_event (fun, "reciprocal divs inserted",
1095 reciprocal_stats.rdivs_inserted);
1096 statistics_counter_event (fun, "reciprocal functions inserted",
1097 reciprocal_stats.rfuncs_inserted);
1099 free_dominance_info (CDI_DOMINATORS);
1100 free_dominance_info (CDI_POST_DOMINATORS);
1101 delete occ_pool;
1102 return 0;
1105 } // anon namespace
1107 gimple_opt_pass *
1108 make_pass_cse_reciprocals (gcc::context *ctxt)
1110 return new pass_cse_reciprocals (ctxt);
1113 /* If NAME is the result of a type conversion, look for other
1114 equivalent dominating or dominated conversions, and replace all
1115 uses with the earliest dominating name, removing the redundant
1116 conversions. Return the prevailing name. */
1118 static tree
1119 execute_cse_conv_1 (tree name, bool *cfg_changed)
1121 if (SSA_NAME_IS_DEFAULT_DEF (name)
1122 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name))
1123 return name;
1125 gimple *def_stmt = SSA_NAME_DEF_STMT (name);
1127 if (!gimple_assign_cast_p (def_stmt))
1128 return name;
1130 tree src = gimple_assign_rhs1 (def_stmt);
1132 if (TREE_CODE (src) != SSA_NAME)
1133 return name;
1135 imm_use_iterator use_iter;
1136 gimple *use_stmt;
1138 /* Find the earliest dominating def. */
1139 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, src)
1141 if (use_stmt == def_stmt
1142 || !gimple_assign_cast_p (use_stmt))
1143 continue;
1145 tree lhs = gimple_assign_lhs (use_stmt);
1147 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
1148 || (gimple_assign_rhs1 (use_stmt)
1149 != gimple_assign_rhs1 (def_stmt))
1150 || !types_compatible_p (TREE_TYPE (name), TREE_TYPE (lhs)))
1151 continue;
1153 bool use_dominates;
1154 if (gimple_bb (def_stmt) == gimple_bb (use_stmt))
1156 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
1157 while (!gsi_end_p (gsi) && gsi_stmt (gsi) != def_stmt)
1158 gsi_next (&gsi);
1159 use_dominates = !gsi_end_p (gsi);
1161 else if (dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt),
1162 gimple_bb (def_stmt)))
1163 use_dominates = false;
1164 else if (dominated_by_p (CDI_DOMINATORS, gimple_bb (def_stmt),
1165 gimple_bb (use_stmt)))
1166 use_dominates = true;
1167 else
1168 continue;
1170 if (use_dominates)
1172 std::swap (name, lhs);
1173 std::swap (def_stmt, use_stmt);
1177 /* Now go through all uses of SRC again, replacing the equivalent
1178 dominated conversions. We may replace defs that were not
1179 dominated by the then-prevailing defs when we first visited
1180 them. */
1181 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, src)
1183 if (use_stmt == def_stmt
1184 || !gimple_assign_cast_p (use_stmt))
1185 continue;
1187 tree lhs = gimple_assign_lhs (use_stmt);
1189 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
1190 || (gimple_assign_rhs1 (use_stmt)
1191 != gimple_assign_rhs1 (def_stmt))
1192 || !types_compatible_p (TREE_TYPE (name), TREE_TYPE (lhs)))
1193 continue;
1195 basic_block use_bb = gimple_bb (use_stmt);
1196 if (gimple_bb (def_stmt) == use_bb
1197 || dominated_by_p (CDI_DOMINATORS, use_bb, gimple_bb (def_stmt)))
1199 sincos_stats.conv_removed++;
1201 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
1202 replace_uses_by (lhs, name);
1203 if (gsi_remove (&gsi, true)
1204 && gimple_purge_dead_eh_edges (use_bb))
1205 *cfg_changed = true;
1206 release_defs (use_stmt);
1210 return name;
1213 /* Records an occurrence at statement USE_STMT in the vector of trees
1214 STMTS if it is dominated by *TOP_BB or dominates it or this basic block
1215 is not yet initialized. Returns true if the occurrence was pushed on
1216 the vector. Adjusts *TOP_BB to be the basic block dominating all
1217 statements in the vector. */
1219 static bool
1220 maybe_record_sincos (vec<gimple *> *stmts,
1221 basic_block *top_bb, gimple *use_stmt)
1223 basic_block use_bb = gimple_bb (use_stmt);
1224 if (*top_bb
1225 && (*top_bb == use_bb
1226 || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb)))
1227 stmts->safe_push (use_stmt);
1228 else if (!*top_bb
1229 || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb))
1231 stmts->safe_push (use_stmt);
1232 *top_bb = use_bb;
1234 else
1235 return false;
1237 return true;
1240 /* Look for sin, cos and cexpi calls with the same argument NAME and
1241 create a single call to cexpi CSEing the result in this case.
1242 We first walk over all immediate uses of the argument collecting
1243 statements that we can CSE in a vector and in a second pass replace
1244 the statement rhs with a REALPART or IMAGPART expression on the
1245 result of the cexpi call we insert before the use statement that
1246 dominates all other candidates. */
1248 static bool
1249 execute_cse_sincos_1 (tree name)
1251 gimple_stmt_iterator gsi;
1252 imm_use_iterator use_iter;
1253 tree fndecl, res, type = NULL_TREE;
1254 gimple *def_stmt, *use_stmt, *stmt;
1255 int seen_cos = 0, seen_sin = 0, seen_cexpi = 0;
1256 auto_vec<gimple *> stmts;
1257 basic_block top_bb = NULL;
1258 int i;
1259 bool cfg_changed = false;
1261 name = execute_cse_conv_1 (name, &cfg_changed);
1263 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
1265 if (gimple_code (use_stmt) != GIMPLE_CALL
1266 || !gimple_call_lhs (use_stmt))
1267 continue;
1269 switch (gimple_call_combined_fn (use_stmt))
1271 CASE_CFN_COS:
1272 seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1273 break;
1275 CASE_CFN_SIN:
1276 seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1277 break;
1279 CASE_CFN_CEXPI:
1280 seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1281 break;
1283 default:;
1284 continue;
1287 tree t = mathfn_built_in_type (gimple_call_combined_fn (use_stmt));
1288 if (!type)
1290 type = t;
1291 t = TREE_TYPE (name);
1293 /* This checks that NAME has the right type in the first round,
1294 and, in subsequent rounds, that the built_in type is the same
1295 type, or a compatible type. */
1296 if (type != t && !types_compatible_p (type, t))
1297 return false;
1299 if (seen_cos + seen_sin + seen_cexpi <= 1)
1300 return false;
1302 /* Simply insert cexpi at the beginning of top_bb but not earlier than
1303 the name def statement. */
1304 fndecl = mathfn_built_in (type, BUILT_IN_CEXPI);
1305 if (!fndecl)
1306 return false;
1307 stmt = gimple_build_call (fndecl, 1, name);
1308 res = make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl)), stmt, "sincostmp");
1309 gimple_call_set_lhs (stmt, res);
1311 def_stmt = SSA_NAME_DEF_STMT (name);
1312 if (!SSA_NAME_IS_DEFAULT_DEF (name)
1313 && gimple_code (def_stmt) != GIMPLE_PHI
1314 && gimple_bb (def_stmt) == top_bb)
1316 gsi = gsi_for_stmt (def_stmt);
1317 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1319 else
1321 gsi = gsi_after_labels (top_bb);
1322 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1324 sincos_stats.inserted++;
1326 /* And adjust the recorded old call sites. */
1327 for (i = 0; stmts.iterate (i, &use_stmt); ++i)
1329 tree rhs = NULL;
1331 switch (gimple_call_combined_fn (use_stmt))
1333 CASE_CFN_COS:
1334 rhs = fold_build1 (REALPART_EXPR, type, res);
1335 break;
1337 CASE_CFN_SIN:
1338 rhs = fold_build1 (IMAGPART_EXPR, type, res);
1339 break;
1341 CASE_CFN_CEXPI:
1342 rhs = res;
1343 break;
1345 default:;
1346 gcc_unreachable ();
1349 /* Replace call with a copy. */
1350 stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs);
1352 gsi = gsi_for_stmt (use_stmt);
1353 gsi_replace (&gsi, stmt, true);
1354 if (gimple_purge_dead_eh_edges (gimple_bb (stmt)))
1355 cfg_changed = true;
1358 return cfg_changed;
1361 /* To evaluate powi(x,n), the floating point value x raised to the
1362 constant integer exponent n, we use a hybrid algorithm that
1363 combines the "window method" with look-up tables. For an
1364 introduction to exponentiation algorithms and "addition chains",
1365 see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
1366 "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
1367 3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
1368 Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998. */
1370 /* Provide a default value for POWI_MAX_MULTS, the maximum number of
1371 multiplications to inline before calling the system library's pow
1372 function. powi(x,n) requires at worst 2*bits(n)-2 multiplications,
1373 so this default never requires calling pow, powf or powl. */
1375 #ifndef POWI_MAX_MULTS
1376 #define POWI_MAX_MULTS (2*HOST_BITS_PER_WIDE_INT-2)
1377 #endif
1379 /* The size of the "optimal power tree" lookup table. All
1380 exponents less than this value are simply looked up in the
1381 powi_table below. This threshold is also used to size the
1382 cache of pseudo registers that hold intermediate results. */
1383 #define POWI_TABLE_SIZE 256
1385 /* The size, in bits of the window, used in the "window method"
1386 exponentiation algorithm. This is equivalent to a radix of
1387 (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method". */
1388 #define POWI_WINDOW_SIZE 3
1390 /* The following table is an efficient representation of an
1391 "optimal power tree". For each value, i, the corresponding
1392 value, j, in the table states than an optimal evaluation
1393 sequence for calculating pow(x,i) can be found by evaluating
1394 pow(x,j)*pow(x,i-j). An optimal power tree for the first
1395 100 integers is given in Knuth's "Seminumerical algorithms". */
1397 static const unsigned char powi_table[POWI_TABLE_SIZE] =
1399 0, 1, 1, 2, 2, 3, 3, 4, /* 0 - 7 */
1400 4, 6, 5, 6, 6, 10, 7, 9, /* 8 - 15 */
1401 8, 16, 9, 16, 10, 12, 11, 13, /* 16 - 23 */
1402 12, 17, 13, 18, 14, 24, 15, 26, /* 24 - 31 */
1403 16, 17, 17, 19, 18, 33, 19, 26, /* 32 - 39 */
1404 20, 25, 21, 40, 22, 27, 23, 44, /* 40 - 47 */
1405 24, 32, 25, 34, 26, 29, 27, 44, /* 48 - 55 */
1406 28, 31, 29, 34, 30, 60, 31, 36, /* 56 - 63 */
1407 32, 64, 33, 34, 34, 46, 35, 37, /* 64 - 71 */
1408 36, 65, 37, 50, 38, 48, 39, 69, /* 72 - 79 */
1409 40, 49, 41, 43, 42, 51, 43, 58, /* 80 - 87 */
1410 44, 64, 45, 47, 46, 59, 47, 76, /* 88 - 95 */
1411 48, 65, 49, 66, 50, 67, 51, 66, /* 96 - 103 */
1412 52, 70, 53, 74, 54, 104, 55, 74, /* 104 - 111 */
1413 56, 64, 57, 69, 58, 78, 59, 68, /* 112 - 119 */
1414 60, 61, 61, 80, 62, 75, 63, 68, /* 120 - 127 */
1415 64, 65, 65, 128, 66, 129, 67, 90, /* 128 - 135 */
1416 68, 73, 69, 131, 70, 94, 71, 88, /* 136 - 143 */
1417 72, 128, 73, 98, 74, 132, 75, 121, /* 144 - 151 */
1418 76, 102, 77, 124, 78, 132, 79, 106, /* 152 - 159 */
1419 80, 97, 81, 160, 82, 99, 83, 134, /* 160 - 167 */
1420 84, 86, 85, 95, 86, 160, 87, 100, /* 168 - 175 */
1421 88, 113, 89, 98, 90, 107, 91, 122, /* 176 - 183 */
1422 92, 111, 93, 102, 94, 126, 95, 150, /* 184 - 191 */
1423 96, 128, 97, 130, 98, 133, 99, 195, /* 192 - 199 */
1424 100, 128, 101, 123, 102, 164, 103, 138, /* 200 - 207 */
1425 104, 145, 105, 146, 106, 109, 107, 149, /* 208 - 215 */
1426 108, 200, 109, 146, 110, 170, 111, 157, /* 216 - 223 */
1427 112, 128, 113, 130, 114, 182, 115, 132, /* 224 - 231 */
1428 116, 200, 117, 132, 118, 158, 119, 206, /* 232 - 239 */
1429 120, 240, 121, 162, 122, 147, 123, 152, /* 240 - 247 */
1430 124, 166, 125, 214, 126, 138, 127, 153, /* 248 - 255 */
1434 /* Return the number of multiplications required to calculate
1435 powi(x,n) where n is less than POWI_TABLE_SIZE. This is a
1436 subroutine of powi_cost. CACHE is an array indicating
1437 which exponents have already been calculated. */
1439 static int
1440 powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache)
1442 /* If we've already calculated this exponent, then this evaluation
1443 doesn't require any additional multiplications. */
1444 if (cache[n])
1445 return 0;
1447 cache[n] = true;
1448 return powi_lookup_cost (n - powi_table[n], cache)
1449 + powi_lookup_cost (powi_table[n], cache) + 1;
1452 /* Return the number of multiplications required to calculate
1453 powi(x,n) for an arbitrary x, given the exponent N. This
1454 function needs to be kept in sync with powi_as_mults below. */
1456 static int
1457 powi_cost (HOST_WIDE_INT n)
1459 bool cache[POWI_TABLE_SIZE];
1460 unsigned HOST_WIDE_INT digit;
1461 unsigned HOST_WIDE_INT val;
1462 int result;
1464 if (n == 0)
1465 return 0;
1467 /* Ignore the reciprocal when calculating the cost. */
1468 val = absu_hwi (n);
1470 /* Initialize the exponent cache. */
1471 memset (cache, 0, POWI_TABLE_SIZE * sizeof (bool));
1472 cache[1] = true;
1474 result = 0;
1476 while (val >= POWI_TABLE_SIZE)
1478 if (val & 1)
1480 digit = val & ((1 << POWI_WINDOW_SIZE) - 1);
1481 result += powi_lookup_cost (digit, cache)
1482 + POWI_WINDOW_SIZE + 1;
1483 val >>= POWI_WINDOW_SIZE;
1485 else
1487 val >>= 1;
1488 result++;
1492 return result + powi_lookup_cost (val, cache);
1495 /* Recursive subroutine of powi_as_mults. This function takes the
1496 array, CACHE, of already calculated exponents and an exponent N and
1497 returns a tree that corresponds to CACHE[1]**N, with type TYPE. */
1499 static tree
1500 powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type,
1501 unsigned HOST_WIDE_INT n, tree *cache)
1503 tree op0, op1, ssa_target;
1504 unsigned HOST_WIDE_INT digit;
1505 gassign *mult_stmt;
1507 if (n < POWI_TABLE_SIZE && cache[n])
1508 return cache[n];
1510 ssa_target = make_temp_ssa_name (type, NULL, "powmult");
1512 if (n < POWI_TABLE_SIZE)
1514 cache[n] = ssa_target;
1515 op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache);
1516 op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache);
1518 else if (n & 1)
1520 digit = n & ((1 << POWI_WINDOW_SIZE) - 1);
1521 op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache);
1522 op1 = powi_as_mults_1 (gsi, loc, type, digit, cache);
1524 else
1526 op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache);
1527 op1 = op0;
1530 mult_stmt = gimple_build_assign (ssa_target, MULT_EXPR, op0, op1);
1531 gimple_set_location (mult_stmt, loc);
1532 gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
1534 return ssa_target;
1537 /* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
1538 This function needs to be kept in sync with powi_cost above. */
1540 tree
1541 powi_as_mults (gimple_stmt_iterator *gsi, location_t loc,
1542 tree arg0, HOST_WIDE_INT n)
1544 tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0);
1545 gassign *div_stmt;
1546 tree target;
1548 if (n == 0)
1549 return build_one_cst (type);
1551 memset (cache, 0, sizeof (cache));
1552 cache[1] = arg0;
1554 result = powi_as_mults_1 (gsi, loc, type, absu_hwi (n), cache);
1555 if (n >= 0)
1556 return result;
1558 /* If the original exponent was negative, reciprocate the result. */
1559 target = make_temp_ssa_name (type, NULL, "powmult");
1560 div_stmt = gimple_build_assign (target, RDIV_EXPR,
1561 build_real (type, dconst1), result);
1562 gimple_set_location (div_stmt, loc);
1563 gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
1565 return target;
1568 /* ARG0 and N are the two arguments to a powi builtin in GSI with
1569 location info LOC. If the arguments are appropriate, create an
1570 equivalent sequence of statements prior to GSI using an optimal
1571 number of multiplications, and return an expession holding the
1572 result. */
1574 static tree
1575 gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc,
1576 tree arg0, HOST_WIDE_INT n)
1578 if ((n >= -1 && n <= 2)
1579 || (optimize_function_for_speed_p (cfun)
1580 && powi_cost (n) <= POWI_MAX_MULTS))
1581 return powi_as_mults (gsi, loc, arg0, n);
1583 return NULL_TREE;
1586 /* Build a gimple call statement that calls FN with argument ARG.
1587 Set the lhs of the call statement to a fresh SSA name. Insert the
1588 statement prior to GSI's current position, and return the fresh
1589 SSA name. */
1591 static tree
1592 build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc,
1593 tree fn, tree arg)
1595 gcall *call_stmt;
1596 tree ssa_target;
1598 call_stmt = gimple_build_call (fn, 1, arg);
1599 ssa_target = make_temp_ssa_name (TREE_TYPE (arg), NULL, "powroot");
1600 gimple_set_lhs (call_stmt, ssa_target);
1601 gimple_set_location (call_stmt, loc);
1602 gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
1604 return ssa_target;
1607 /* Build a gimple binary operation with the given CODE and arguments
1608 ARG0, ARG1, assigning the result to a new SSA name for variable
1609 TARGET. Insert the statement prior to GSI's current position, and
1610 return the fresh SSA name.*/
1612 static tree
1613 build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc,
1614 const char *name, enum tree_code code,
1615 tree arg0, tree arg1)
1617 tree result = make_temp_ssa_name (TREE_TYPE (arg0), NULL, name);
1618 gassign *stmt = gimple_build_assign (result, code, arg0, arg1);
1619 gimple_set_location (stmt, loc);
1620 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1621 return result;
1624 /* Build a gimple reference operation with the given CODE and argument
1625 ARG, assigning the result to a new SSA name of TYPE with NAME.
1626 Insert the statement prior to GSI's current position, and return
1627 the fresh SSA name. */
1629 static inline tree
1630 build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type,
1631 const char *name, enum tree_code code, tree arg0)
1633 tree result = make_temp_ssa_name (type, NULL, name);
1634 gimple *stmt = gimple_build_assign (result, build1 (code, type, arg0));
1635 gimple_set_location (stmt, loc);
1636 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1637 return result;
1640 /* Build a gimple assignment to cast VAL to TYPE. Insert the statement
1641 prior to GSI's current position, and return the fresh SSA name. */
1643 static tree
1644 build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,
1645 tree type, tree val)
1647 tree result = make_ssa_name (type);
1648 gassign *stmt = gimple_build_assign (result, NOP_EXPR, val);
1649 gimple_set_location (stmt, loc);
1650 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1651 return result;
1654 struct pow_synth_sqrt_info
1656 bool *factors;
1657 unsigned int deepest;
1658 unsigned int num_mults;
1661 /* Return true iff the real value C can be represented as a
1662 sum of powers of 0.5 up to N. That is:
1663 C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1.
1664 Record in INFO the various parameters of the synthesis algorithm such
1665 as the factors a[i], the maximum 0.5 power and the number of
1666 multiplications that will be required. */
1668 bool
1669 representable_as_half_series_p (REAL_VALUE_TYPE c, unsigned n,
1670 struct pow_synth_sqrt_info *info)
1672 REAL_VALUE_TYPE factor = dconsthalf;
1673 REAL_VALUE_TYPE remainder = c;
1675 info->deepest = 0;
1676 info->num_mults = 0;
1677 memset (info->factors, 0, n * sizeof (bool));
1679 for (unsigned i = 0; i < n; i++)
1681 REAL_VALUE_TYPE res;
1683 /* If something inexact happened bail out now. */
1684 if (real_arithmetic (&res, MINUS_EXPR, &remainder, &factor))
1685 return false;
1687 /* We have hit zero. The number is representable as a sum
1688 of powers of 0.5. */
1689 if (real_equal (&res, &dconst0))
1691 info->factors[i] = true;
1692 info->deepest = i + 1;
1693 return true;
1695 else if (!REAL_VALUE_NEGATIVE (res))
1697 remainder = res;
1698 info->factors[i] = true;
1699 info->num_mults++;
1701 else
1702 info->factors[i] = false;
1704 real_arithmetic (&factor, MULT_EXPR, &factor, &dconsthalf);
1706 return false;
1709 /* Return the tree corresponding to FN being applied
1710 to ARG N times at GSI and LOC.
1711 Look up previous results from CACHE if need be.
1712 cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times. */
1714 static tree
1715 get_fn_chain (tree arg, unsigned int n, gimple_stmt_iterator *gsi,
1716 tree fn, location_t loc, tree *cache)
1718 tree res = cache[n];
1719 if (!res)
1721 tree prev = get_fn_chain (arg, n - 1, gsi, fn, loc, cache);
1722 res = build_and_insert_call (gsi, loc, fn, prev);
1723 cache[n] = res;
1726 return res;
1729 /* Print to STREAM the repeated application of function FNAME to ARG
1730 N times. So, for FNAME = "foo", ARG = "x", N = 2 it would print:
1731 "foo (foo (x))". */
1733 static void
1734 print_nested_fn (FILE* stream, const char *fname, const char* arg,
1735 unsigned int n)
1737 if (n == 0)
1738 fprintf (stream, "%s", arg);
1739 else
1741 fprintf (stream, "%s (", fname);
1742 print_nested_fn (stream, fname, arg, n - 1);
1743 fprintf (stream, ")");
1747 /* Print to STREAM the fractional sequence of sqrt chains
1748 applied to ARG, described by INFO. Used for the dump file. */
1750 static void
1751 dump_fractional_sqrt_sequence (FILE *stream, const char *arg,
1752 struct pow_synth_sqrt_info *info)
1754 for (unsigned int i = 0; i < info->deepest; i++)
1756 bool is_set = info->factors[i];
1757 if (is_set)
1759 print_nested_fn (stream, "sqrt", arg, i + 1);
1760 if (i != info->deepest - 1)
1761 fprintf (stream, " * ");
1766 /* Print to STREAM a representation of raising ARG to an integer
1767 power N. Used for the dump file. */
1769 static void
1770 dump_integer_part (FILE *stream, const char* arg, HOST_WIDE_INT n)
1772 if (n > 1)
1773 fprintf (stream, "powi (%s, " HOST_WIDE_INT_PRINT_DEC ")", arg, n);
1774 else if (n == 1)
1775 fprintf (stream, "%s", arg);
1778 /* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of
1779 square roots. Place at GSI and LOC. Limit the maximum depth
1780 of the sqrt chains to MAX_DEPTH. Return the tree holding the
1781 result of the expanded sequence or NULL_TREE if the expansion failed.
1783 This routine assumes that ARG1 is a real number with a fractional part
1784 (the integer exponent case will have been handled earlier in
1785 gimple_expand_builtin_pow).
1787 For ARG1 > 0.0:
1788 * For ARG1 composed of a whole part WHOLE_PART and a fractional part
1789 FRAC_PART i.e. WHOLE_PART == floor (ARG1) and
1790 FRAC_PART == ARG1 - WHOLE_PART:
1791 Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where
1792 POW (ARG0, FRAC_PART) is expanded as a product of square root chains
1793 if it can be expressed as such, that is if FRAC_PART satisfies:
1794 FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i))
1795 where integer a[i] is either 0 or 1.
1797 Example:
1798 POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625)
1799 --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x)))
1801 For ARG1 < 0.0 there are two approaches:
1802 * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1)
1803 is calculated as above.
1805 Example:
1806 POW (x, -5.625) == 1.0 / POW (x, 5.625)
1807 --> 1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x))))
1809 * (B) : WHOLE_PART := - ceil (abs (ARG1))
1810 FRAC_PART := ARG1 - WHOLE_PART
1811 and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART).
1812 Example:
1813 POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6)
1814 --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6))
1816 For ARG1 < 0.0 we choose between (A) and (B) depending on
1817 how many multiplications we'd have to do.
1818 So, for the example in (B): POW (x, -5.875), if we were to
1819 follow algorithm (A) we would produce:
1820 1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X)))
1821 which contains more multiplications than approach (B).
1823 Hopefully, this approach will eliminate potentially expensive POW library
1824 calls when unsafe floating point math is enabled and allow the compiler to
1825 further optimise the multiplies, square roots and divides produced by this
1826 function. */
1828 static tree
1829 expand_pow_as_sqrts (gimple_stmt_iterator *gsi, location_t loc,
1830 tree arg0, tree arg1, HOST_WIDE_INT max_depth)
1832 tree type = TREE_TYPE (arg0);
1833 machine_mode mode = TYPE_MODE (type);
1834 tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1835 bool one_over = true;
1837 if (!sqrtfn)
1838 return NULL_TREE;
1840 if (TREE_CODE (arg1) != REAL_CST)
1841 return NULL_TREE;
1843 REAL_VALUE_TYPE exp_init = TREE_REAL_CST (arg1);
1845 gcc_assert (max_depth > 0);
1846 tree *cache = XALLOCAVEC (tree, max_depth + 1);
1848 struct pow_synth_sqrt_info synth_info;
1849 synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1850 synth_info.deepest = 0;
1851 synth_info.num_mults = 0;
1853 bool neg_exp = REAL_VALUE_NEGATIVE (exp_init);
1854 REAL_VALUE_TYPE exp = real_value_abs (&exp_init);
1856 /* The whole and fractional parts of exp. */
1857 REAL_VALUE_TYPE whole_part;
1858 REAL_VALUE_TYPE frac_part;
1860 real_floor (&whole_part, mode, &exp);
1861 real_arithmetic (&frac_part, MINUS_EXPR, &exp, &whole_part);
1864 REAL_VALUE_TYPE ceil_whole = dconst0;
1865 REAL_VALUE_TYPE ceil_fract = dconst0;
1867 if (neg_exp)
1869 real_ceil (&ceil_whole, mode, &exp);
1870 real_arithmetic (&ceil_fract, MINUS_EXPR, &ceil_whole, &exp);
1873 if (!representable_as_half_series_p (frac_part, max_depth, &synth_info))
1874 return NULL_TREE;
1876 /* Check whether it's more profitable to not use 1.0 / ... */
1877 if (neg_exp)
1879 struct pow_synth_sqrt_info alt_synth_info;
1880 alt_synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1881 alt_synth_info.deepest = 0;
1882 alt_synth_info.num_mults = 0;
1884 if (representable_as_half_series_p (ceil_fract, max_depth,
1885 &alt_synth_info)
1886 && alt_synth_info.deepest <= synth_info.deepest
1887 && alt_synth_info.num_mults < synth_info.num_mults)
1889 whole_part = ceil_whole;
1890 frac_part = ceil_fract;
1891 synth_info.deepest = alt_synth_info.deepest;
1892 synth_info.num_mults = alt_synth_info.num_mults;
1893 memcpy (synth_info.factors, alt_synth_info.factors,
1894 (max_depth + 1) * sizeof (bool));
1895 one_over = false;
1899 HOST_WIDE_INT n = real_to_integer (&whole_part);
1900 REAL_VALUE_TYPE cint;
1901 real_from_integer (&cint, VOIDmode, n, SIGNED);
1903 if (!real_identical (&whole_part, &cint))
1904 return NULL_TREE;
1906 if (powi_cost (n) + synth_info.num_mults > POWI_MAX_MULTS)
1907 return NULL_TREE;
1909 memset (cache, 0, (max_depth + 1) * sizeof (tree));
1911 tree integer_res = n == 0 ? build_real (type, dconst1) : arg0;
1913 /* Calculate the integer part of the exponent. */
1914 if (n > 1)
1916 integer_res = gimple_expand_builtin_powi (gsi, loc, arg0, n);
1917 if (!integer_res)
1918 return NULL_TREE;
1921 if (dump_file)
1923 char string[64];
1925 real_to_decimal (string, &exp_init, sizeof (string), 0, 1);
1926 fprintf (dump_file, "synthesizing pow (x, %s) as:\n", string);
1928 if (neg_exp)
1930 if (one_over)
1932 fprintf (dump_file, "1.0 / (");
1933 dump_integer_part (dump_file, "x", n);
1934 if (n > 0)
1935 fprintf (dump_file, " * ");
1936 dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1937 fprintf (dump_file, ")");
1939 else
1941 dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1942 fprintf (dump_file, " / (");
1943 dump_integer_part (dump_file, "x", n);
1944 fprintf (dump_file, ")");
1947 else
1949 dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1950 if (n > 0)
1951 fprintf (dump_file, " * ");
1952 dump_integer_part (dump_file, "x", n);
1955 fprintf (dump_file, "\ndeepest sqrt chain: %d\n", synth_info.deepest);
1959 tree fract_res = NULL_TREE;
1960 cache[0] = arg0;
1962 /* Calculate the fractional part of the exponent. */
1963 for (unsigned i = 0; i < synth_info.deepest; i++)
1965 if (synth_info.factors[i])
1967 tree sqrt_chain = get_fn_chain (arg0, i + 1, gsi, sqrtfn, loc, cache);
1969 if (!fract_res)
1970 fract_res = sqrt_chain;
1972 else
1973 fract_res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1974 fract_res, sqrt_chain);
1978 tree res = NULL_TREE;
1980 if (neg_exp)
1982 if (one_over)
1984 if (n > 0)
1985 res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1986 fract_res, integer_res);
1987 else
1988 res = fract_res;
1990 res = build_and_insert_binop (gsi, loc, "powrootrecip", RDIV_EXPR,
1991 build_real (type, dconst1), res);
1993 else
1995 res = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1996 fract_res, integer_res);
1999 else
2000 res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
2001 fract_res, integer_res);
2002 return res;
2005 /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
2006 with location info LOC. If possible, create an equivalent and
2007 less expensive sequence of statements prior to GSI, and return an
2008 expession holding the result. */
2010 static tree
2011 gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,
2012 tree arg0, tree arg1)
2014 REAL_VALUE_TYPE c, cint, dconst1_3, dconst1_4, dconst1_6;
2015 REAL_VALUE_TYPE c2, dconst3;
2016 HOST_WIDE_INT n;
2017 tree type, sqrtfn, cbrtfn, sqrt_arg0, result, cbrt_x, powi_cbrt_x;
2018 machine_mode mode;
2019 bool speed_p = optimize_bb_for_speed_p (gsi_bb (*gsi));
2020 bool hw_sqrt_exists, c_is_int, c2_is_int;
2022 dconst1_4 = dconst1;
2023 SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
2025 /* If the exponent isn't a constant, there's nothing of interest
2026 to be done. */
2027 if (TREE_CODE (arg1) != REAL_CST)
2028 return NULL_TREE;
2030 /* Don't perform the operation if flag_signaling_nans is on
2031 and the operand is a signaling NaN. */
2032 if (HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg1)))
2033 && ((TREE_CODE (arg0) == REAL_CST
2034 && REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg0)))
2035 || REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg1))))
2036 return NULL_TREE;
2038 /* If the exponent is equivalent to an integer, expand to an optimal
2039 multiplication sequence when profitable. */
2040 c = TREE_REAL_CST (arg1);
2041 n = real_to_integer (&c);
2042 real_from_integer (&cint, VOIDmode, n, SIGNED);
2043 c_is_int = real_identical (&c, &cint);
2045 if (c_is_int
2046 && ((n >= -1 && n <= 2)
2047 || (flag_unsafe_math_optimizations
2048 && speed_p
2049 && powi_cost (n) <= POWI_MAX_MULTS)))
2050 return gimple_expand_builtin_powi (gsi, loc, arg0, n);
2052 /* Attempt various optimizations using sqrt and cbrt. */
2053 type = TREE_TYPE (arg0);
2054 mode = TYPE_MODE (type);
2055 sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
2057 /* Optimize pow(x,0.5) = sqrt(x). This replacement is always safe
2058 unless signed zeros must be maintained. pow(-0,0.5) = +0, while
2059 sqrt(-0) = -0. */
2060 if (sqrtfn
2061 && real_equal (&c, &dconsthalf)
2062 && !HONOR_SIGNED_ZEROS (mode))
2063 return build_and_insert_call (gsi, loc, sqrtfn, arg0);
2065 hw_sqrt_exists = optab_handler (sqrt_optab, mode) != CODE_FOR_nothing;
2067 /* Optimize pow(x,1./3.) = cbrt(x). This requires unsafe math
2068 optimizations since 1./3. is not exactly representable. If x
2069 is negative and finite, the correct value of pow(x,1./3.) is
2070 a NaN with the "invalid" exception raised, because the value
2071 of 1./3. actually has an even denominator. The correct value
2072 of cbrt(x) is a negative real value. */
2073 cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT);
2074 dconst1_3 = real_value_truncate (mode, dconst_third ());
2076 if (flag_unsafe_math_optimizations
2077 && cbrtfn
2078 && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2079 && real_equal (&c, &dconst1_3))
2080 return build_and_insert_call (gsi, loc, cbrtfn, arg0);
2082 /* Optimize pow(x,1./6.) = cbrt(sqrt(x)). Don't do this optimization
2083 if we don't have a hardware sqrt insn. */
2084 dconst1_6 = dconst1_3;
2085 SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
2087 if (flag_unsafe_math_optimizations
2088 && sqrtfn
2089 && cbrtfn
2090 && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2091 && speed_p
2092 && hw_sqrt_exists
2093 && real_equal (&c, &dconst1_6))
2095 /* sqrt(x) */
2096 sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
2098 /* cbrt(sqrt(x)) */
2099 return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0);
2103 /* Attempt to expand the POW as a product of square root chains.
2104 Expand the 0.25 case even when otpimising for size. */
2105 if (flag_unsafe_math_optimizations
2106 && sqrtfn
2107 && hw_sqrt_exists
2108 && (speed_p || real_equal (&c, &dconst1_4))
2109 && !HONOR_SIGNED_ZEROS (mode))
2111 unsigned int max_depth = speed_p
2112 ? param_max_pow_sqrt_depth
2113 : 2;
2115 tree expand_with_sqrts
2116 = expand_pow_as_sqrts (gsi, loc, arg0, arg1, max_depth);
2118 if (expand_with_sqrts)
2119 return expand_with_sqrts;
2122 real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
2123 n = real_to_integer (&c2);
2124 real_from_integer (&cint, VOIDmode, n, SIGNED);
2125 c2_is_int = real_identical (&c2, &cint);
2127 /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
2129 powi(x, n/3) * powi(cbrt(x), n%3), n > 0;
2130 1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)), n < 0.
2132 Do not calculate the first factor when n/3 = 0. As cbrt(x) is
2133 different from pow(x, 1./3.) due to rounding and behavior with
2134 negative x, we need to constrain this transformation to unsafe
2135 math and positive x or finite math. */
2136 real_from_integer (&dconst3, VOIDmode, 3, SIGNED);
2137 real_arithmetic (&c2, MULT_EXPR, &c, &dconst3);
2138 real_round (&c2, mode, &c2);
2139 n = real_to_integer (&c2);
2140 real_from_integer (&cint, VOIDmode, n, SIGNED);
2141 real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3);
2142 real_convert (&c2, mode, &c2);
2144 if (flag_unsafe_math_optimizations
2145 && cbrtfn
2146 && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2147 && real_identical (&c2, &c)
2148 && !c2_is_int
2149 && optimize_function_for_speed_p (cfun)
2150 && powi_cost (n / 3) <= POWI_MAX_MULTS)
2152 tree powi_x_ndiv3 = NULL_TREE;
2154 /* Attempt to fold powi(arg0, abs(n/3)) into multiplies. If not
2155 possible or profitable, give up. Skip the degenerate case when
2156 abs(n) < 3, where the result is always 1. */
2157 if (absu_hwi (n) >= 3)
2159 powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0,
2160 abs_hwi (n / 3));
2161 if (!powi_x_ndiv3)
2162 return NULL_TREE;
2165 /* Calculate powi(cbrt(x), n%3). Don't use gimple_expand_builtin_powi
2166 as that creates an unnecessary variable. Instead, just produce
2167 either cbrt(x) or cbrt(x) * cbrt(x). */
2168 cbrt_x = build_and_insert_call (gsi, loc, cbrtfn, arg0);
2170 if (absu_hwi (n) % 3 == 1)
2171 powi_cbrt_x = cbrt_x;
2172 else
2173 powi_cbrt_x = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
2174 cbrt_x, cbrt_x);
2176 /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1. */
2177 if (absu_hwi (n) < 3)
2178 result = powi_cbrt_x;
2179 else
2180 result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
2181 powi_x_ndiv3, powi_cbrt_x);
2183 /* If n is negative, reciprocate the result. */
2184 if (n < 0)
2185 result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
2186 build_real (type, dconst1), result);
2188 return result;
2191 /* No optimizations succeeded. */
2192 return NULL_TREE;
2195 /* ARG is the argument to a cabs builtin call in GSI with location info
2196 LOC. Create a sequence of statements prior to GSI that calculates
2197 sqrt(R*R + I*I), where R and I are the real and imaginary components
2198 of ARG, respectively. Return an expression holding the result. */
2200 static tree
2201 gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg)
2203 tree real_part, imag_part, addend1, addend2, sum, result;
2204 tree type = TREE_TYPE (TREE_TYPE (arg));
2205 tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
2206 machine_mode mode = TYPE_MODE (type);
2208 if (!flag_unsafe_math_optimizations
2209 || !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi)))
2210 || !sqrtfn
2211 || optab_handler (sqrt_optab, mode) == CODE_FOR_nothing)
2212 return NULL_TREE;
2214 real_part = build_and_insert_ref (gsi, loc, type, "cabs",
2215 REALPART_EXPR, arg);
2216 addend1 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
2217 real_part, real_part);
2218 imag_part = build_and_insert_ref (gsi, loc, type, "cabs",
2219 IMAGPART_EXPR, arg);
2220 addend2 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
2221 imag_part, imag_part);
2222 sum = build_and_insert_binop (gsi, loc, "cabs", PLUS_EXPR, addend1, addend2);
2223 result = build_and_insert_call (gsi, loc, sqrtfn, sum);
2225 return result;
2228 /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
2229 on the SSA_NAME argument of each of them. */
2231 namespace {
2233 const pass_data pass_data_cse_sincos =
2235 GIMPLE_PASS, /* type */
2236 "sincos", /* name */
2237 OPTGROUP_NONE, /* optinfo_flags */
2238 TV_TREE_SINCOS, /* tv_id */
2239 PROP_ssa, /* properties_required */
2240 0, /* properties_provided */
2241 0, /* properties_destroyed */
2242 0, /* todo_flags_start */
2243 TODO_update_ssa, /* todo_flags_finish */
2246 class pass_cse_sincos : public gimple_opt_pass
2248 public:
2249 pass_cse_sincos (gcc::context *ctxt)
2250 : gimple_opt_pass (pass_data_cse_sincos, ctxt)
2253 /* opt_pass methods: */
2254 bool gate (function *) final override
2256 return optimize;
2259 unsigned int execute (function *) final override;
2261 }; // class pass_cse_sincos
2263 unsigned int
2264 pass_cse_sincos::execute (function *fun)
2266 basic_block bb;
2267 bool cfg_changed = false;
2269 calculate_dominance_info (CDI_DOMINATORS);
2270 memset (&sincos_stats, 0, sizeof (sincos_stats));
2272 FOR_EACH_BB_FN (bb, fun)
2274 gimple_stmt_iterator gsi;
2276 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2278 gimple *stmt = gsi_stmt (gsi);
2280 if (is_gimple_call (stmt)
2281 && gimple_call_lhs (stmt))
2283 tree arg;
2284 switch (gimple_call_combined_fn (stmt))
2286 CASE_CFN_COS:
2287 CASE_CFN_SIN:
2288 CASE_CFN_CEXPI:
2289 arg = gimple_call_arg (stmt, 0);
2290 /* Make sure we have either sincos or cexp. */
2291 if (!targetm.libc_has_function (function_c99_math_complex,
2292 TREE_TYPE (arg))
2293 && !targetm.libc_has_function (function_sincos,
2294 TREE_TYPE (arg)))
2295 break;
2297 if (TREE_CODE (arg) == SSA_NAME)
2298 cfg_changed |= execute_cse_sincos_1 (arg);
2299 break;
2300 default:
2301 break;
2307 statistics_counter_event (fun, "sincos statements inserted",
2308 sincos_stats.inserted);
2309 statistics_counter_event (fun, "conv statements removed",
2310 sincos_stats.conv_removed);
2312 return cfg_changed ? TODO_cleanup_cfg : 0;
2315 } // anon namespace
2317 gimple_opt_pass *
2318 make_pass_cse_sincos (gcc::context *ctxt)
2320 return new pass_cse_sincos (ctxt);
2323 /* Expand powi(x,n) into an optimal number of multiplies, when n is a constant.
2324 Also expand CABS. */
2325 namespace {
2327 const pass_data pass_data_expand_powcabs =
2329 GIMPLE_PASS, /* type */
2330 "powcabs", /* name */
2331 OPTGROUP_NONE, /* optinfo_flags */
2332 TV_TREE_POWCABS, /* tv_id */
2333 PROP_ssa, /* properties_required */
2334 PROP_gimple_opt_math, /* properties_provided */
2335 0, /* properties_destroyed */
2336 0, /* todo_flags_start */
2337 TODO_update_ssa, /* todo_flags_finish */
2340 class pass_expand_powcabs : public gimple_opt_pass
2342 public:
2343 pass_expand_powcabs (gcc::context *ctxt)
2344 : gimple_opt_pass (pass_data_expand_powcabs, ctxt)
2347 /* opt_pass methods: */
2348 bool gate (function *) final override
2350 return optimize;
2353 unsigned int execute (function *) final override;
2355 }; // class pass_expand_powcabs
2357 unsigned int
2358 pass_expand_powcabs::execute (function *fun)
2360 basic_block bb;
2361 bool cfg_changed = false;
2363 calculate_dominance_info (CDI_DOMINATORS);
2365 FOR_EACH_BB_FN (bb, fun)
2367 gimple_stmt_iterator gsi;
2368 bool cleanup_eh = false;
2370 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2372 gimple *stmt = gsi_stmt (gsi);
2374 /* Only the last stmt in a bb could throw, no need to call
2375 gimple_purge_dead_eh_edges if we change something in the middle
2376 of a basic block. */
2377 cleanup_eh = false;
2379 if (is_gimple_call (stmt)
2380 && gimple_call_lhs (stmt))
2382 tree arg0, arg1, result;
2383 HOST_WIDE_INT n;
2384 location_t loc;
2386 switch (gimple_call_combined_fn (stmt))
2388 CASE_CFN_POW:
2389 arg0 = gimple_call_arg (stmt, 0);
2390 arg1 = gimple_call_arg (stmt, 1);
2392 loc = gimple_location (stmt);
2393 result = gimple_expand_builtin_pow (&gsi, loc, arg0, arg1);
2395 if (result)
2397 tree lhs = gimple_get_lhs (stmt);
2398 gassign *new_stmt = gimple_build_assign (lhs, result);
2399 gimple_set_location (new_stmt, loc);
2400 unlink_stmt_vdef (stmt);
2401 gsi_replace (&gsi, new_stmt, true);
2402 cleanup_eh = true;
2403 if (gimple_vdef (stmt))
2404 release_ssa_name (gimple_vdef (stmt));
2406 break;
2408 CASE_CFN_POWI:
2409 arg0 = gimple_call_arg (stmt, 0);
2410 arg1 = gimple_call_arg (stmt, 1);
2411 loc = gimple_location (stmt);
2413 if (real_minus_onep (arg0))
2415 tree t0, t1, cond, one, minus_one;
2416 gassign *stmt;
2418 t0 = TREE_TYPE (arg0);
2419 t1 = TREE_TYPE (arg1);
2420 one = build_real (t0, dconst1);
2421 minus_one = build_real (t0, dconstm1);
2423 cond = make_temp_ssa_name (t1, NULL, "powi_cond");
2424 stmt = gimple_build_assign (cond, BIT_AND_EXPR,
2425 arg1, build_int_cst (t1, 1));
2426 gimple_set_location (stmt, loc);
2427 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2429 result = make_temp_ssa_name (t0, NULL, "powi");
2430 stmt = gimple_build_assign (result, COND_EXPR, cond,
2431 minus_one, one);
2432 gimple_set_location (stmt, loc);
2433 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2435 else
2437 if (!tree_fits_shwi_p (arg1))
2438 break;
2440 n = tree_to_shwi (arg1);
2441 result = gimple_expand_builtin_powi (&gsi, loc, arg0, n);
2444 if (result)
2446 tree lhs = gimple_get_lhs (stmt);
2447 gassign *new_stmt = gimple_build_assign (lhs, result);
2448 gimple_set_location (new_stmt, loc);
2449 unlink_stmt_vdef (stmt);
2450 gsi_replace (&gsi, new_stmt, true);
2451 cleanup_eh = true;
2452 if (gimple_vdef (stmt))
2453 release_ssa_name (gimple_vdef (stmt));
2455 break;
2457 CASE_CFN_CABS:
2458 arg0 = gimple_call_arg (stmt, 0);
2459 loc = gimple_location (stmt);
2460 result = gimple_expand_builtin_cabs (&gsi, loc, arg0);
2462 if (result)
2464 tree lhs = gimple_get_lhs (stmt);
2465 gassign *new_stmt = gimple_build_assign (lhs, result);
2466 gimple_set_location (new_stmt, loc);
2467 unlink_stmt_vdef (stmt);
2468 gsi_replace (&gsi, new_stmt, true);
2469 cleanup_eh = true;
2470 if (gimple_vdef (stmt))
2471 release_ssa_name (gimple_vdef (stmt));
2473 break;
2475 default:;
2479 if (cleanup_eh)
2480 cfg_changed |= gimple_purge_dead_eh_edges (bb);
2483 return cfg_changed ? TODO_cleanup_cfg : 0;
2486 } // anon namespace
2488 gimple_opt_pass *
2489 make_pass_expand_powcabs (gcc::context *ctxt)
2491 return new pass_expand_powcabs (ctxt);
2494 /* Return true if stmt is a type conversion operation that can be stripped
2495 when used in a widening multiply operation. */
2496 static bool
2497 widening_mult_conversion_strippable_p (tree result_type, gimple *stmt)
2499 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
2501 if (TREE_CODE (result_type) == INTEGER_TYPE)
2503 tree op_type;
2504 tree inner_op_type;
2506 if (!CONVERT_EXPR_CODE_P (rhs_code))
2507 return false;
2509 op_type = TREE_TYPE (gimple_assign_lhs (stmt));
2511 /* If the type of OP has the same precision as the result, then
2512 we can strip this conversion. The multiply operation will be
2513 selected to create the correct extension as a by-product. */
2514 if (TYPE_PRECISION (result_type) == TYPE_PRECISION (op_type))
2515 return true;
2517 /* We can also strip a conversion if it preserves the signed-ness of
2518 the operation and doesn't narrow the range. */
2519 inner_op_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
2521 /* If the inner-most type is unsigned, then we can strip any
2522 intermediate widening operation. If it's signed, then the
2523 intermediate widening operation must also be signed. */
2524 if ((TYPE_UNSIGNED (inner_op_type)
2525 || TYPE_UNSIGNED (op_type) == TYPE_UNSIGNED (inner_op_type))
2526 && TYPE_PRECISION (op_type) > TYPE_PRECISION (inner_op_type))
2527 return true;
2529 return false;
2532 return rhs_code == FIXED_CONVERT_EXPR;
2535 /* Return true if RHS is a suitable operand for a widening multiplication,
2536 assuming a target type of TYPE.
2537 There are two cases:
2539 - RHS makes some value at least twice as wide. Store that value
2540 in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
2542 - RHS is an integer constant. Store that value in *NEW_RHS_OUT if so,
2543 but leave *TYPE_OUT untouched. */
2545 static bool
2546 is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out,
2547 tree *new_rhs_out)
2549 gimple *stmt;
2550 tree type1, rhs1;
2552 if (TREE_CODE (rhs) == SSA_NAME)
2554 stmt = SSA_NAME_DEF_STMT (rhs);
2555 if (is_gimple_assign (stmt))
2557 if (! widening_mult_conversion_strippable_p (type, stmt))
2558 rhs1 = rhs;
2559 else
2561 rhs1 = gimple_assign_rhs1 (stmt);
2563 if (TREE_CODE (rhs1) == INTEGER_CST)
2565 *new_rhs_out = rhs1;
2566 *type_out = NULL;
2567 return true;
2571 else
2572 rhs1 = rhs;
2574 type1 = TREE_TYPE (rhs1);
2576 if (TREE_CODE (type1) != TREE_CODE (type)
2577 || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type))
2578 return false;
2580 *new_rhs_out = rhs1;
2581 *type_out = type1;
2582 return true;
2585 if (TREE_CODE (rhs) == INTEGER_CST)
2587 *new_rhs_out = rhs;
2588 *type_out = NULL;
2589 return true;
2592 return false;
2595 /* Return true if STMT performs a widening multiplication, assuming the
2596 output type is TYPE. If so, store the unwidened types of the operands
2597 in *TYPE1_OUT and *TYPE2_OUT respectively. Also fill *RHS1_OUT and
2598 *RHS2_OUT such that converting those operands to types *TYPE1_OUT
2599 and *TYPE2_OUT would give the operands of the multiplication. */
2601 static bool
2602 is_widening_mult_p (gimple *stmt,
2603 tree *type1_out, tree *rhs1_out,
2604 tree *type2_out, tree *rhs2_out)
2606 tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2608 if (TREE_CODE (type) == INTEGER_TYPE)
2610 if (TYPE_OVERFLOW_TRAPS (type))
2611 return false;
2613 else if (TREE_CODE (type) != FIXED_POINT_TYPE)
2614 return false;
2616 if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out,
2617 rhs1_out))
2618 return false;
2620 if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out,
2621 rhs2_out))
2622 return false;
2624 if (*type1_out == NULL)
2626 if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out))
2627 return false;
2628 *type1_out = *type2_out;
2631 if (*type2_out == NULL)
2633 if (!int_fits_type_p (*rhs2_out, *type1_out))
2634 return false;
2635 *type2_out = *type1_out;
2638 /* Ensure that the larger of the two operands comes first. */
2639 if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out))
2641 std::swap (*type1_out, *type2_out);
2642 std::swap (*rhs1_out, *rhs2_out);
2645 return true;
2648 /* Check to see if the CALL statement is an invocation of copysign
2649 with 1. being the first argument. */
2650 static bool
2651 is_copysign_call_with_1 (gimple *call)
2653 gcall *c = dyn_cast <gcall *> (call);
2654 if (! c)
2655 return false;
2657 enum combined_fn code = gimple_call_combined_fn (c);
2659 if (code == CFN_LAST)
2660 return false;
2662 if (builtin_fn_p (code))
2664 switch (as_builtin_fn (code))
2666 CASE_FLT_FN (BUILT_IN_COPYSIGN):
2667 CASE_FLT_FN_FLOATN_NX (BUILT_IN_COPYSIGN):
2668 return real_onep (gimple_call_arg (c, 0));
2669 default:
2670 return false;
2674 if (internal_fn_p (code))
2676 switch (as_internal_fn (code))
2678 case IFN_COPYSIGN:
2679 return real_onep (gimple_call_arg (c, 0));
2680 default:
2681 return false;
2685 return false;
2688 /* Try to expand the pattern x * copysign (1, y) into xorsign (x, y).
2689 This only happens when the xorsign optab is defined, if the
2690 pattern is not a xorsign pattern or if expansion fails FALSE is
2691 returned, otherwise TRUE is returned. */
2692 static bool
2693 convert_expand_mult_copysign (gimple *stmt, gimple_stmt_iterator *gsi)
2695 tree treeop0, treeop1, lhs, type;
2696 location_t loc = gimple_location (stmt);
2697 lhs = gimple_assign_lhs (stmt);
2698 treeop0 = gimple_assign_rhs1 (stmt);
2699 treeop1 = gimple_assign_rhs2 (stmt);
2700 type = TREE_TYPE (lhs);
2701 machine_mode mode = TYPE_MODE (type);
2703 if (HONOR_SNANS (type))
2704 return false;
2706 if (TREE_CODE (treeop0) == SSA_NAME && TREE_CODE (treeop1) == SSA_NAME)
2708 gimple *call0 = SSA_NAME_DEF_STMT (treeop0);
2709 if (!has_single_use (treeop0) || !is_copysign_call_with_1 (call0))
2711 call0 = SSA_NAME_DEF_STMT (treeop1);
2712 if (!has_single_use (treeop1) || !is_copysign_call_with_1 (call0))
2713 return false;
2715 treeop1 = treeop0;
2717 if (optab_handler (xorsign_optab, mode) == CODE_FOR_nothing)
2718 return false;
2720 gcall *c = as_a<gcall*> (call0);
2721 treeop0 = gimple_call_arg (c, 1);
2723 gcall *call_stmt
2724 = gimple_build_call_internal (IFN_XORSIGN, 2, treeop1, treeop0);
2725 gimple_set_lhs (call_stmt, lhs);
2726 gimple_set_location (call_stmt, loc);
2727 gsi_replace (gsi, call_stmt, true);
2728 return true;
2731 return false;
2734 /* Process a single gimple statement STMT, which has a MULT_EXPR as
2735 its rhs, and try to convert it into a WIDEN_MULT_EXPR. The return
2736 value is true iff we converted the statement. */
2738 static bool
2739 convert_mult_to_widen (gimple *stmt, gimple_stmt_iterator *gsi)
2741 tree lhs, rhs1, rhs2, type, type1, type2;
2742 enum insn_code handler;
2743 scalar_int_mode to_mode, from_mode, actual_mode;
2744 optab op;
2745 int actual_precision;
2746 location_t loc = gimple_location (stmt);
2747 bool from_unsigned1, from_unsigned2;
2749 lhs = gimple_assign_lhs (stmt);
2750 type = TREE_TYPE (lhs);
2751 if (TREE_CODE (type) != INTEGER_TYPE)
2752 return false;
2754 if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2))
2755 return false;
2757 to_mode = SCALAR_INT_TYPE_MODE (type);
2758 from_mode = SCALAR_INT_TYPE_MODE (type1);
2759 if (to_mode == from_mode)
2760 return false;
2762 from_unsigned1 = TYPE_UNSIGNED (type1);
2763 from_unsigned2 = TYPE_UNSIGNED (type2);
2765 if (from_unsigned1 && from_unsigned2)
2766 op = umul_widen_optab;
2767 else if (!from_unsigned1 && !from_unsigned2)
2768 op = smul_widen_optab;
2769 else
2770 op = usmul_widen_optab;
2772 handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode,
2773 &actual_mode);
2775 if (handler == CODE_FOR_nothing)
2777 if (op != smul_widen_optab)
2779 /* We can use a signed multiply with unsigned types as long as
2780 there is a wider mode to use, or it is the smaller of the two
2781 types that is unsigned. Note that type1 >= type2, always. */
2782 if ((TYPE_UNSIGNED (type1)
2783 && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2784 || (TYPE_UNSIGNED (type2)
2785 && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2787 if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
2788 || GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode))
2789 return false;
2792 op = smul_widen_optab;
2793 handler = find_widening_optab_handler_and_mode (op, to_mode,
2794 from_mode,
2795 &actual_mode);
2797 if (handler == CODE_FOR_nothing)
2798 return false;
2800 from_unsigned1 = from_unsigned2 = false;
2802 else
2804 /* Expand can synthesize smul_widen_optab if the target
2805 supports umul_widen_optab. */
2806 op = umul_widen_optab;
2807 handler = find_widening_optab_handler_and_mode (op, to_mode,
2808 from_mode,
2809 &actual_mode);
2810 if (handler == CODE_FOR_nothing)
2811 return false;
2815 /* Ensure that the inputs to the handler are in the correct precison
2816 for the opcode. This will be the full mode size. */
2817 actual_precision = GET_MODE_PRECISION (actual_mode);
2818 if (2 * actual_precision > TYPE_PRECISION (type))
2819 return false;
2820 if (actual_precision != TYPE_PRECISION (type1)
2821 || from_unsigned1 != TYPE_UNSIGNED (type1))
2822 rhs1 = build_and_insert_cast (gsi, loc,
2823 build_nonstandard_integer_type
2824 (actual_precision, from_unsigned1), rhs1);
2825 if (actual_precision != TYPE_PRECISION (type2)
2826 || from_unsigned2 != TYPE_UNSIGNED (type2))
2827 rhs2 = build_and_insert_cast (gsi, loc,
2828 build_nonstandard_integer_type
2829 (actual_precision, from_unsigned2), rhs2);
2831 /* Handle constants. */
2832 if (TREE_CODE (rhs1) == INTEGER_CST)
2833 rhs1 = fold_convert (type1, rhs1);
2834 if (TREE_CODE (rhs2) == INTEGER_CST)
2835 rhs2 = fold_convert (type2, rhs2);
2837 gimple_assign_set_rhs1 (stmt, rhs1);
2838 gimple_assign_set_rhs2 (stmt, rhs2);
2839 gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
2840 update_stmt (stmt);
2841 widen_mul_stats.widen_mults_inserted++;
2842 return true;
2845 /* Process a single gimple statement STMT, which is found at the
2846 iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
2847 rhs (given by CODE), and try to convert it into a
2848 WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR. The return value
2849 is true iff we converted the statement. */
2851 static bool
2852 convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple *stmt,
2853 enum tree_code code)
2855 gimple *rhs1_stmt = NULL, *rhs2_stmt = NULL;
2856 gimple *conv1_stmt = NULL, *conv2_stmt = NULL, *conv_stmt;
2857 tree type, type1, type2, optype;
2858 tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
2859 enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
2860 optab this_optab;
2861 enum tree_code wmult_code;
2862 enum insn_code handler;
2863 scalar_mode to_mode, from_mode, actual_mode;
2864 location_t loc = gimple_location (stmt);
2865 int actual_precision;
2866 bool from_unsigned1, from_unsigned2;
2868 lhs = gimple_assign_lhs (stmt);
2869 type = TREE_TYPE (lhs);
2870 if (TREE_CODE (type) != INTEGER_TYPE
2871 && TREE_CODE (type) != FIXED_POINT_TYPE)
2872 return false;
2874 if (code == MINUS_EXPR)
2875 wmult_code = WIDEN_MULT_MINUS_EXPR;
2876 else
2877 wmult_code = WIDEN_MULT_PLUS_EXPR;
2879 rhs1 = gimple_assign_rhs1 (stmt);
2880 rhs2 = gimple_assign_rhs2 (stmt);
2882 if (TREE_CODE (rhs1) == SSA_NAME)
2884 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2885 if (is_gimple_assign (rhs1_stmt))
2886 rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2889 if (TREE_CODE (rhs2) == SSA_NAME)
2891 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2892 if (is_gimple_assign (rhs2_stmt))
2893 rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2896 /* Allow for one conversion statement between the multiply
2897 and addition/subtraction statement. If there are more than
2898 one conversions then we assume they would invalidate this
2899 transformation. If that's not the case then they should have
2900 been folded before now. */
2901 if (CONVERT_EXPR_CODE_P (rhs1_code))
2903 conv1_stmt = rhs1_stmt;
2904 rhs1 = gimple_assign_rhs1 (rhs1_stmt);
2905 if (TREE_CODE (rhs1) == SSA_NAME)
2907 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2908 if (is_gimple_assign (rhs1_stmt))
2909 rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2911 else
2912 return false;
2914 if (CONVERT_EXPR_CODE_P (rhs2_code))
2916 conv2_stmt = rhs2_stmt;
2917 rhs2 = gimple_assign_rhs1 (rhs2_stmt);
2918 if (TREE_CODE (rhs2) == SSA_NAME)
2920 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2921 if (is_gimple_assign (rhs2_stmt))
2922 rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2924 else
2925 return false;
2928 /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
2929 is_widening_mult_p, but we still need the rhs returns.
2931 It might also appear that it would be sufficient to use the existing
2932 operands of the widening multiply, but that would limit the choice of
2933 multiply-and-accumulate instructions.
2935 If the widened-multiplication result has more than one uses, it is
2936 probably wiser not to do the conversion. Also restrict this operation
2937 to single basic block to avoid moving the multiply to a different block
2938 with a higher execution frequency. */
2939 if (code == PLUS_EXPR
2940 && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
2942 if (!has_single_use (rhs1)
2943 || gimple_bb (rhs1_stmt) != gimple_bb (stmt)
2944 || !is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
2945 &type2, &mult_rhs2))
2946 return false;
2947 add_rhs = rhs2;
2948 conv_stmt = conv1_stmt;
2950 else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
2952 if (!has_single_use (rhs2)
2953 || gimple_bb (rhs2_stmt) != gimple_bb (stmt)
2954 || !is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
2955 &type2, &mult_rhs2))
2956 return false;
2957 add_rhs = rhs1;
2958 conv_stmt = conv2_stmt;
2960 else
2961 return false;
2963 to_mode = SCALAR_TYPE_MODE (type);
2964 from_mode = SCALAR_TYPE_MODE (type1);
2965 if (to_mode == from_mode)
2966 return false;
2968 from_unsigned1 = TYPE_UNSIGNED (type1);
2969 from_unsigned2 = TYPE_UNSIGNED (type2);
2970 optype = type1;
2972 /* There's no such thing as a mixed sign madd yet, so use a wider mode. */
2973 if (from_unsigned1 != from_unsigned2)
2975 if (!INTEGRAL_TYPE_P (type))
2976 return false;
2977 /* We can use a signed multiply with unsigned types as long as
2978 there is a wider mode to use, or it is the smaller of the two
2979 types that is unsigned. Note that type1 >= type2, always. */
2980 if ((from_unsigned1
2981 && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2982 || (from_unsigned2
2983 && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2985 if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
2986 || GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode))
2987 return false;
2990 from_unsigned1 = from_unsigned2 = false;
2991 optype = build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode),
2992 false);
2995 /* If there was a conversion between the multiply and addition
2996 then we need to make sure it fits a multiply-and-accumulate.
2997 The should be a single mode change which does not change the
2998 value. */
2999 if (conv_stmt)
3001 /* We use the original, unmodified data types for this. */
3002 tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt));
3003 tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt));
3004 int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2);
3005 bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2);
3007 if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type))
3009 /* Conversion is a truncate. */
3010 if (TYPE_PRECISION (to_type) < data_size)
3011 return false;
3013 else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type))
3015 /* Conversion is an extend. Check it's the right sort. */
3016 if (TYPE_UNSIGNED (from_type) != is_unsigned
3017 && !(is_unsigned && TYPE_PRECISION (from_type) > data_size))
3018 return false;
3020 /* else convert is a no-op for our purposes. */
3023 /* Verify that the machine can perform a widening multiply
3024 accumulate in this mode/signedness combination, otherwise
3025 this transformation is likely to pessimize code. */
3026 this_optab = optab_for_tree_code (wmult_code, optype, optab_default);
3027 handler = find_widening_optab_handler_and_mode (this_optab, to_mode,
3028 from_mode, &actual_mode);
3030 if (handler == CODE_FOR_nothing)
3031 return false;
3033 /* Ensure that the inputs to the handler are in the correct precison
3034 for the opcode. This will be the full mode size. */
3035 actual_precision = GET_MODE_PRECISION (actual_mode);
3036 if (actual_precision != TYPE_PRECISION (type1)
3037 || from_unsigned1 != TYPE_UNSIGNED (type1))
3038 mult_rhs1 = build_and_insert_cast (gsi, loc,
3039 build_nonstandard_integer_type
3040 (actual_precision, from_unsigned1),
3041 mult_rhs1);
3042 if (actual_precision != TYPE_PRECISION (type2)
3043 || from_unsigned2 != TYPE_UNSIGNED (type2))
3044 mult_rhs2 = build_and_insert_cast (gsi, loc,
3045 build_nonstandard_integer_type
3046 (actual_precision, from_unsigned2),
3047 mult_rhs2);
3049 if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs)))
3050 add_rhs = build_and_insert_cast (gsi, loc, type, add_rhs);
3052 /* Handle constants. */
3053 if (TREE_CODE (mult_rhs1) == INTEGER_CST)
3054 mult_rhs1 = fold_convert (type1, mult_rhs1);
3055 if (TREE_CODE (mult_rhs2) == INTEGER_CST)
3056 mult_rhs2 = fold_convert (type2, mult_rhs2);
3058 gimple_assign_set_rhs_with_ops (gsi, wmult_code, mult_rhs1, mult_rhs2,
3059 add_rhs);
3060 update_stmt (gsi_stmt (*gsi));
3061 widen_mul_stats.maccs_inserted++;
3062 return true;
3065 /* Given a result MUL_RESULT which is a result of a multiplication of OP1 and
3066 OP2 and which we know is used in statements that can be, together with the
3067 multiplication, converted to FMAs, perform the transformation. */
3069 static void
3070 convert_mult_to_fma_1 (tree mul_result, tree op1, tree op2)
3072 tree type = TREE_TYPE (mul_result);
3073 gimple *use_stmt;
3074 imm_use_iterator imm_iter;
3075 gcall *fma_stmt;
3077 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
3079 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
3080 tree addop, mulop1 = op1, result = mul_result;
3081 bool negate_p = false;
3082 gimple_seq seq = NULL;
3084 if (is_gimple_debug (use_stmt))
3085 continue;
3087 if (is_gimple_assign (use_stmt)
3088 && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
3090 result = gimple_assign_lhs (use_stmt);
3091 use_operand_p use_p;
3092 gimple *neguse_stmt;
3093 single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt);
3094 gsi_remove (&gsi, true);
3095 release_defs (use_stmt);
3097 use_stmt = neguse_stmt;
3098 gsi = gsi_for_stmt (use_stmt);
3099 negate_p = true;
3102 tree cond, else_value, ops[3], len, bias;
3103 tree_code code;
3104 if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code,
3105 ops, &else_value,
3106 &len, &bias))
3107 gcc_unreachable ();
3108 addop = ops[0] == result ? ops[1] : ops[0];
3110 if (code == MINUS_EXPR)
3112 if (ops[0] == result)
3113 /* a * b - c -> a * b + (-c) */
3114 addop = gimple_build (&seq, NEGATE_EXPR, type, addop);
3115 else
3116 /* a - b * c -> (-b) * c + a */
3117 negate_p = !negate_p;
3120 if (negate_p)
3121 mulop1 = gimple_build (&seq, NEGATE_EXPR, type, mulop1);
3123 if (seq)
3124 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
3126 if (len)
3127 fma_stmt
3128 = gimple_build_call_internal (IFN_COND_LEN_FMA, 7, cond, mulop1, op2,
3129 addop, else_value, len, bias);
3130 else if (cond)
3131 fma_stmt = gimple_build_call_internal (IFN_COND_FMA, 5, cond, mulop1,
3132 op2, addop, else_value);
3133 else
3134 fma_stmt = gimple_build_call_internal (IFN_FMA, 3, mulop1, op2, addop);
3135 gimple_set_lhs (fma_stmt, gimple_get_lhs (use_stmt));
3136 gimple_call_set_nothrow (fma_stmt, !stmt_can_throw_internal (cfun,
3137 use_stmt));
3138 gsi_replace (&gsi, fma_stmt, true);
3139 /* Follow all SSA edges so that we generate FMS, FNMA and FNMS
3140 regardless of where the negation occurs. */
3141 gimple *orig_stmt = gsi_stmt (gsi);
3142 if (fold_stmt (&gsi, follow_all_ssa_edges))
3144 if (maybe_clean_or_replace_eh_stmt (orig_stmt, gsi_stmt (gsi)))
3145 gcc_unreachable ();
3146 update_stmt (gsi_stmt (gsi));
3149 if (dump_file && (dump_flags & TDF_DETAILS))
3151 fprintf (dump_file, "Generated FMA ");
3152 print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_NONE);
3153 fprintf (dump_file, "\n");
3156 /* If the FMA result is negated in a single use, fold the negation
3157 too. */
3158 orig_stmt = gsi_stmt (gsi);
3159 use_operand_p use_p;
3160 gimple *neg_stmt;
3161 if (is_gimple_call (orig_stmt)
3162 && gimple_call_internal_p (orig_stmt)
3163 && gimple_call_lhs (orig_stmt)
3164 && TREE_CODE (gimple_call_lhs (orig_stmt)) == SSA_NAME
3165 && single_imm_use (gimple_call_lhs (orig_stmt), &use_p, &neg_stmt)
3166 && is_gimple_assign (neg_stmt)
3167 && gimple_assign_rhs_code (neg_stmt) == NEGATE_EXPR
3168 && !stmt_could_throw_p (cfun, neg_stmt))
3170 gsi = gsi_for_stmt (neg_stmt);
3171 if (fold_stmt (&gsi, follow_all_ssa_edges))
3173 if (maybe_clean_or_replace_eh_stmt (neg_stmt, gsi_stmt (gsi)))
3174 gcc_unreachable ();
3175 update_stmt (gsi_stmt (gsi));
3176 if (dump_file && (dump_flags & TDF_DETAILS))
3178 fprintf (dump_file, "Folded FMA negation ");
3179 print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_NONE);
3180 fprintf (dump_file, "\n");
3185 widen_mul_stats.fmas_inserted++;
3189 /* Data necessary to perform the actual transformation from a multiplication
3190 and an addition to an FMA after decision is taken it should be done and to
3191 then delete the multiplication statement from the function IL. */
3193 struct fma_transformation_info
3195 gimple *mul_stmt;
3196 tree mul_result;
3197 tree op1;
3198 tree op2;
3201 /* Structure containing the current state of FMA deferring, i.e. whether we are
3202 deferring, whether to continue deferring, and all data necessary to come
3203 back and perform all deferred transformations. */
3205 class fma_deferring_state
3207 public:
3208 /* Class constructor. Pass true as PERFORM_DEFERRING in order to actually
3209 do any deferring. */
3211 fma_deferring_state (bool perform_deferring)
3212 : m_candidates (), m_mul_result_set (), m_initial_phi (NULL),
3213 m_last_result (NULL_TREE), m_deferring_p (perform_deferring) {}
3215 /* List of FMA candidates for which we the transformation has been determined
3216 possible but we at this point in BB analysis we do not consider them
3217 beneficial. */
3218 auto_vec<fma_transformation_info, 8> m_candidates;
3220 /* Set of results of multiplication that are part of an already deferred FMA
3221 candidates. */
3222 hash_set<tree> m_mul_result_set;
3224 /* The PHI that supposedly feeds back result of a FMA to another over loop
3225 boundary. */
3226 gphi *m_initial_phi;
3228 /* Result of the last produced FMA candidate or NULL if there has not been
3229 one. */
3230 tree m_last_result;
3232 /* If true, deferring might still be profitable. If false, transform all
3233 candidates and no longer defer. */
3234 bool m_deferring_p;
3237 /* Transform all deferred FMA candidates and mark STATE as no longer
3238 deferring. */
3240 static void
3241 cancel_fma_deferring (fma_deferring_state *state)
3243 if (!state->m_deferring_p)
3244 return;
3246 for (unsigned i = 0; i < state->m_candidates.length (); i++)
3248 if (dump_file && (dump_flags & TDF_DETAILS))
3249 fprintf (dump_file, "Generating deferred FMA\n");
3251 const fma_transformation_info &fti = state->m_candidates[i];
3252 convert_mult_to_fma_1 (fti.mul_result, fti.op1, fti.op2);
3254 gimple_stmt_iterator gsi = gsi_for_stmt (fti.mul_stmt);
3255 gsi_remove (&gsi, true);
3256 release_defs (fti.mul_stmt);
3258 state->m_deferring_p = false;
3261 /* If OP is an SSA name defined by a PHI node, return the PHI statement.
3262 Otherwise return NULL. */
3264 static gphi *
3265 result_of_phi (tree op)
3267 if (TREE_CODE (op) != SSA_NAME)
3268 return NULL;
3270 return dyn_cast <gphi *> (SSA_NAME_DEF_STMT (op));
3273 /* After processing statements of a BB and recording STATE, return true if the
3274 initial phi is fed by the last FMA candidate result ore one such result from
3275 previously processed BBs marked in LAST_RESULT_SET. */
3277 static bool
3278 last_fma_candidate_feeds_initial_phi (fma_deferring_state *state,
3279 hash_set<tree> *last_result_set)
3281 ssa_op_iter iter;
3282 use_operand_p use;
3283 FOR_EACH_PHI_ARG (use, state->m_initial_phi, iter, SSA_OP_USE)
3285 tree t = USE_FROM_PTR (use);
3286 if (t == state->m_last_result
3287 || last_result_set->contains (t))
3288 return true;
3291 return false;
3294 /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
3295 with uses in additions and subtractions to form fused multiply-add
3296 operations. Returns true if successful and MUL_STMT should be removed.
3297 If MUL_COND is nonnull, the multiplication in MUL_STMT is conditional
3298 on MUL_COND, otherwise it is unconditional.
3300 If STATE indicates that we are deferring FMA transformation, that means
3301 that we do not produce FMAs for basic blocks which look like:
3303 <bb 6>
3304 # accumulator_111 = PHI <0.0(5), accumulator_66(6)>
3305 _65 = _14 * _16;
3306 accumulator_66 = _65 + accumulator_111;
3308 or its unrolled version, i.e. with several FMA candidates that feed result
3309 of one into the addend of another. Instead, we add them to a list in STATE
3310 and if we later discover an FMA candidate that is not part of such a chain,
3311 we go back and perform all deferred past candidates. */
3313 static bool
3314 convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2,
3315 fma_deferring_state *state, tree mul_cond = NULL_TREE,
3316 tree mul_len = NULL_TREE, tree mul_bias = NULL_TREE)
3318 tree mul_result = gimple_get_lhs (mul_stmt);
3319 /* If there isn't a LHS then this can't be an FMA. There can be no LHS
3320 if the statement was left just for the side-effects. */
3321 if (!mul_result)
3322 return false;
3323 tree type = TREE_TYPE (mul_result);
3324 gimple *use_stmt, *neguse_stmt;
3325 use_operand_p use_p;
3326 imm_use_iterator imm_iter;
3328 if (FLOAT_TYPE_P (type)
3329 && flag_fp_contract_mode != FP_CONTRACT_FAST)
3330 return false;
3332 /* We don't want to do bitfield reduction ops. */
3333 if (INTEGRAL_TYPE_P (type)
3334 && (!type_has_mode_precision_p (type) || TYPE_OVERFLOW_TRAPS (type)))
3335 return false;
3337 /* If the target doesn't support it, don't generate it. We assume that
3338 if fma isn't available then fms, fnma or fnms are not either. */
3339 optimization_type opt_type = bb_optimization_type (gimple_bb (mul_stmt));
3340 if (!direct_internal_fn_supported_p (IFN_FMA, type, opt_type))
3341 return false;
3343 /* If the multiplication has zero uses, it is kept around probably because
3344 of -fnon-call-exceptions. Don't optimize it away in that case,
3345 it is DCE job. */
3346 if (has_zero_uses (mul_result))
3347 return false;
3349 bool check_defer
3350 = (state->m_deferring_p
3351 && maybe_le (tree_to_poly_int64 (TYPE_SIZE (type)),
3352 param_avoid_fma_max_bits));
3353 bool defer = check_defer;
3354 bool seen_negate_p = false;
3356 /* There is no numerical difference between fused and unfused integer FMAs,
3357 and the assumption below that FMA is as cheap as addition is unlikely
3358 to be true, especially if the multiplication occurs multiple times on
3359 the same chain. E.g., for something like:
3361 (((a * b) + c) >> 1) + (a * b)
3363 we do not want to duplicate the a * b into two additions, not least
3364 because the result is not a natural FMA chain. */
3365 if (ANY_INTEGRAL_TYPE_P (type)
3366 && !has_single_use (mul_result))
3367 return false;
3369 /* Make sure that the multiplication statement becomes dead after
3370 the transformation, thus that all uses are transformed to FMAs.
3371 This means we assume that an FMA operation has the same cost
3372 as an addition. */
3373 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
3375 tree result = mul_result;
3376 bool negate_p = false;
3378 use_stmt = USE_STMT (use_p);
3380 if (is_gimple_debug (use_stmt))
3381 continue;
3383 /* For now restrict this operations to single basic blocks. In theory
3384 we would want to support sinking the multiplication in
3385 m = a*b;
3386 if ()
3387 ma = m + c;
3388 else
3389 d = m;
3390 to form a fma in the then block and sink the multiplication to the
3391 else block. */
3392 if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3393 return false;
3395 /* A negate on the multiplication leads to FNMA. */
3396 if (is_gimple_assign (use_stmt)
3397 && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
3399 ssa_op_iter iter;
3400 use_operand_p usep;
3402 /* If (due to earlier missed optimizations) we have two
3403 negates of the same value, treat them as equivalent
3404 to a single negate with multiple uses. */
3405 if (seen_negate_p)
3406 return false;
3408 result = gimple_assign_lhs (use_stmt);
3410 /* Make sure the negate statement becomes dead with this
3411 single transformation. */
3412 if (!single_imm_use (gimple_assign_lhs (use_stmt),
3413 &use_p, &neguse_stmt))
3414 return false;
3416 /* Make sure the multiplication isn't also used on that stmt. */
3417 FOR_EACH_PHI_OR_STMT_USE (usep, neguse_stmt, iter, SSA_OP_USE)
3418 if (USE_FROM_PTR (usep) == mul_result)
3419 return false;
3421 /* Re-validate. */
3422 use_stmt = neguse_stmt;
3423 if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3424 return false;
3426 negate_p = seen_negate_p = true;
3429 tree cond, else_value, ops[3], len, bias;
3430 tree_code code;
3431 if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code, ops,
3432 &else_value, &len, &bias))
3433 return false;
3435 switch (code)
3437 case MINUS_EXPR:
3438 if (ops[1] == result)
3439 negate_p = !negate_p;
3440 break;
3441 case PLUS_EXPR:
3442 break;
3443 default:
3444 /* FMA can only be formed from PLUS and MINUS. */
3445 return false;
3448 if (len)
3450 /* For COND_LEN_* operations, we may have dummpy mask which is
3451 the all true mask. Such TREE type may be mul_cond != cond
3452 but we still consider they are equal. */
3453 if (mul_cond && cond != mul_cond
3454 && !(integer_truep (mul_cond) && integer_truep (cond)))
3455 return false;
3457 if (else_value == result)
3458 return false;
3460 if (!direct_internal_fn_supported_p (IFN_COND_LEN_FMA, type,
3461 opt_type))
3462 return false;
3464 if (mul_len)
3466 poly_int64 mul_value, value;
3467 if (poly_int_tree_p (mul_len, &mul_value)
3468 && poly_int_tree_p (len, &value)
3469 && maybe_ne (mul_value, value))
3470 return false;
3471 else if (mul_len != len)
3472 return false;
3474 if (wi::to_widest (mul_bias) != wi::to_widest (bias))
3475 return false;
3478 else
3480 if (mul_cond && cond != mul_cond)
3481 return false;
3483 if (cond)
3485 if (cond == result || else_value == result)
3486 return false;
3487 if (!direct_internal_fn_supported_p (IFN_COND_FMA, type,
3488 opt_type))
3489 return false;
3493 /* If the subtrahend (OPS[1]) is computed by a MULT_EXPR that
3494 we'll visit later, we might be able to get a more profitable
3495 match with fnma.
3496 OTOH, if we don't, a negate / fma pair has likely lower latency
3497 that a mult / subtract pair. */
3498 if (code == MINUS_EXPR
3499 && !negate_p
3500 && ops[0] == result
3501 && !direct_internal_fn_supported_p (IFN_FMS, type, opt_type)
3502 && direct_internal_fn_supported_p (IFN_FNMA, type, opt_type)
3503 && TREE_CODE (ops[1]) == SSA_NAME
3504 && has_single_use (ops[1]))
3506 gimple *stmt2 = SSA_NAME_DEF_STMT (ops[1]);
3507 if (is_gimple_assign (stmt2)
3508 && gimple_assign_rhs_code (stmt2) == MULT_EXPR)
3509 return false;
3512 /* We can't handle a * b + a * b. */
3513 if (ops[0] == ops[1])
3514 return false;
3515 /* If deferring, make sure we are not looking at an instruction that
3516 wouldn't have existed if we were not. */
3517 if (state->m_deferring_p
3518 && (state->m_mul_result_set.contains (ops[0])
3519 || state->m_mul_result_set.contains (ops[1])))
3520 return false;
3522 if (check_defer)
3524 tree use_lhs = gimple_get_lhs (use_stmt);
3525 if (state->m_last_result)
3527 if (ops[1] == state->m_last_result
3528 || ops[0] == state->m_last_result)
3529 defer = true;
3530 else
3531 defer = false;
3533 else
3535 gcc_checking_assert (!state->m_initial_phi);
3536 gphi *phi;
3537 if (ops[0] == result)
3538 phi = result_of_phi (ops[1]);
3539 else
3541 gcc_assert (ops[1] == result);
3542 phi = result_of_phi (ops[0]);
3545 if (phi)
3547 state->m_initial_phi = phi;
3548 defer = true;
3550 else
3551 defer = false;
3554 state->m_last_result = use_lhs;
3555 check_defer = false;
3557 else
3558 defer = false;
3560 /* While it is possible to validate whether or not the exact form that
3561 we've recognized is available in the backend, the assumption is that
3562 if the deferring logic above did not trigger, the transformation is
3563 never a loss. For instance, suppose the target only has the plain FMA
3564 pattern available. Consider a*b-c -> fma(a,b,-c): we've exchanged
3565 MUL+SUB for FMA+NEG, which is still two operations. Consider
3566 -(a*b)-c -> fma(-a,b,-c): we still have 3 operations, but in the FMA
3567 form the two NEGs are independent and could be run in parallel. */
3570 if (defer)
3572 fma_transformation_info fti;
3573 fti.mul_stmt = mul_stmt;
3574 fti.mul_result = mul_result;
3575 fti.op1 = op1;
3576 fti.op2 = op2;
3577 state->m_candidates.safe_push (fti);
3578 state->m_mul_result_set.add (mul_result);
3580 if (dump_file && (dump_flags & TDF_DETAILS))
3582 fprintf (dump_file, "Deferred generating FMA for multiplication ");
3583 print_gimple_stmt (dump_file, mul_stmt, 0, TDF_NONE);
3584 fprintf (dump_file, "\n");
3587 return false;
3589 else
3591 if (state->m_deferring_p)
3592 cancel_fma_deferring (state);
3593 convert_mult_to_fma_1 (mul_result, op1, op2);
3594 return true;
3599 /* Helper function of match_arith_overflow. For MUL_OVERFLOW, if we have
3600 a check for non-zero like:
3601 _1 = x_4(D) * y_5(D);
3602 *res_7(D) = _1;
3603 if (x_4(D) != 0)
3604 goto <bb 3>; [50.00%]
3605 else
3606 goto <bb 4>; [50.00%]
3608 <bb 3> [local count: 536870913]:
3609 _2 = _1 / x_4(D);
3610 _9 = _2 != y_5(D);
3611 _10 = (int) _9;
3613 <bb 4> [local count: 1073741824]:
3614 # iftmp.0_3 = PHI <_10(3), 0(2)>
3615 then in addition to using .MUL_OVERFLOW (x_4(D), y_5(D)) we can also
3616 optimize the x_4(D) != 0 condition to 1. */
3618 static void
3619 maybe_optimize_guarding_check (vec<gimple *> &mul_stmts, gimple *cond_stmt,
3620 gimple *div_stmt, bool *cfg_changed)
3622 basic_block bb = gimple_bb (cond_stmt);
3623 if (gimple_bb (div_stmt) != bb || !single_pred_p (bb))
3624 return;
3625 edge pred_edge = single_pred_edge (bb);
3626 basic_block pred_bb = pred_edge->src;
3627 if (EDGE_COUNT (pred_bb->succs) != 2)
3628 return;
3629 edge other_edge = EDGE_SUCC (pred_bb, EDGE_SUCC (pred_bb, 0) == pred_edge);
3630 edge other_succ_edge = NULL;
3631 if (gimple_code (cond_stmt) == GIMPLE_COND)
3633 if (EDGE_COUNT (bb->succs) != 2)
3634 return;
3635 other_succ_edge = EDGE_SUCC (bb, 0);
3636 if (gimple_cond_code (cond_stmt) == NE_EXPR)
3638 if (other_succ_edge->flags & EDGE_TRUE_VALUE)
3639 other_succ_edge = EDGE_SUCC (bb, 1);
3641 else if (other_succ_edge->flags & EDGE_FALSE_VALUE)
3642 other_succ_edge = EDGE_SUCC (bb, 0);
3643 if (other_edge->dest != other_succ_edge->dest)
3644 return;
3646 else if (!single_succ_p (bb) || other_edge->dest != single_succ (bb))
3647 return;
3648 gcond *zero_cond = safe_dyn_cast <gcond *> (*gsi_last_bb (pred_bb));
3649 if (zero_cond == NULL
3650 || (gimple_cond_code (zero_cond)
3651 != ((pred_edge->flags & EDGE_TRUE_VALUE) ? NE_EXPR : EQ_EXPR))
3652 || !integer_zerop (gimple_cond_rhs (zero_cond)))
3653 return;
3654 tree zero_cond_lhs = gimple_cond_lhs (zero_cond);
3655 if (TREE_CODE (zero_cond_lhs) != SSA_NAME)
3656 return;
3657 if (gimple_assign_rhs2 (div_stmt) != zero_cond_lhs)
3659 /* Allow the divisor to be result of a same precision cast
3660 from zero_cond_lhs. */
3661 tree rhs2 = gimple_assign_rhs2 (div_stmt);
3662 if (TREE_CODE (rhs2) != SSA_NAME)
3663 return;
3664 gimple *g = SSA_NAME_DEF_STMT (rhs2);
3665 if (!gimple_assign_cast_p (g)
3666 || gimple_assign_rhs1 (g) != gimple_cond_lhs (zero_cond)
3667 || !INTEGRAL_TYPE_P (TREE_TYPE (zero_cond_lhs))
3668 || (TYPE_PRECISION (TREE_TYPE (zero_cond_lhs))
3669 != TYPE_PRECISION (TREE_TYPE (rhs2))))
3670 return;
3672 gimple_stmt_iterator gsi = gsi_after_labels (bb);
3673 mul_stmts.quick_push (div_stmt);
3674 if (is_gimple_debug (gsi_stmt (gsi)))
3675 gsi_next_nondebug (&gsi);
3676 unsigned cast_count = 0;
3677 while (gsi_stmt (gsi) != cond_stmt)
3679 /* If original mul_stmt has a single use, allow it in the same bb,
3680 we are looking then just at __builtin_mul_overflow_p.
3681 Though, in that case the original mul_stmt will be replaced
3682 by .MUL_OVERFLOW, REALPART_EXPR and IMAGPART_EXPR stmts. */
3683 gimple *mul_stmt;
3684 unsigned int i;
3685 bool ok = false;
3686 FOR_EACH_VEC_ELT (mul_stmts, i, mul_stmt)
3688 if (gsi_stmt (gsi) == mul_stmt)
3690 ok = true;
3691 break;
3694 if (!ok && gimple_assign_cast_p (gsi_stmt (gsi)) && ++cast_count < 4)
3695 ok = true;
3696 if (!ok)
3697 return;
3698 gsi_next_nondebug (&gsi);
3700 if (gimple_code (cond_stmt) == GIMPLE_COND)
3702 basic_block succ_bb = other_edge->dest;
3703 for (gphi_iterator gpi = gsi_start_phis (succ_bb); !gsi_end_p (gpi);
3704 gsi_next (&gpi))
3706 gphi *phi = gpi.phi ();
3707 tree v1 = gimple_phi_arg_def (phi, other_edge->dest_idx);
3708 tree v2 = gimple_phi_arg_def (phi, other_succ_edge->dest_idx);
3709 if (!operand_equal_p (v1, v2, 0))
3710 return;
3713 else
3715 tree lhs = gimple_assign_lhs (cond_stmt);
3716 if (!lhs || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
3717 return;
3718 gsi_next_nondebug (&gsi);
3719 if (!gsi_end_p (gsi))
3721 if (gimple_assign_rhs_code (cond_stmt) == COND_EXPR)
3722 return;
3723 gimple *cast_stmt = gsi_stmt (gsi);
3724 if (!gimple_assign_cast_p (cast_stmt))
3725 return;
3726 tree new_lhs = gimple_assign_lhs (cast_stmt);
3727 gsi_next_nondebug (&gsi);
3728 if (!gsi_end_p (gsi)
3729 || !new_lhs
3730 || !INTEGRAL_TYPE_P (TREE_TYPE (new_lhs))
3731 || TYPE_PRECISION (TREE_TYPE (new_lhs)) <= 1)
3732 return;
3733 lhs = new_lhs;
3735 edge succ_edge = single_succ_edge (bb);
3736 basic_block succ_bb = succ_edge->dest;
3737 gsi = gsi_start_phis (succ_bb);
3738 if (gsi_end_p (gsi))
3739 return;
3740 gphi *phi = as_a <gphi *> (gsi_stmt (gsi));
3741 gsi_next (&gsi);
3742 if (!gsi_end_p (gsi))
3743 return;
3744 if (gimple_phi_arg_def (phi, succ_edge->dest_idx) != lhs)
3745 return;
3746 tree other_val = gimple_phi_arg_def (phi, other_edge->dest_idx);
3747 if (gimple_assign_rhs_code (cond_stmt) == COND_EXPR)
3749 tree cond = gimple_assign_rhs1 (cond_stmt);
3750 if (TREE_CODE (cond) == NE_EXPR)
3752 if (!operand_equal_p (other_val,
3753 gimple_assign_rhs3 (cond_stmt), 0))
3754 return;
3756 else if (!operand_equal_p (other_val,
3757 gimple_assign_rhs2 (cond_stmt), 0))
3758 return;
3760 else if (gimple_assign_rhs_code (cond_stmt) == NE_EXPR)
3762 if (!integer_zerop (other_val))
3763 return;
3765 else if (!integer_onep (other_val))
3766 return;
3768 if (pred_edge->flags & EDGE_TRUE_VALUE)
3769 gimple_cond_make_true (zero_cond);
3770 else
3771 gimple_cond_make_false (zero_cond);
3772 update_stmt (zero_cond);
3773 *cfg_changed = true;
3776 /* Helper function for arith_overflow_check_p. Return true
3777 if VAL1 is equal to VAL2 cast to corresponding integral type
3778 with other signedness or vice versa. */
3780 static bool
3781 arith_cast_equal_p (tree val1, tree val2)
3783 if (TREE_CODE (val1) == INTEGER_CST && TREE_CODE (val2) == INTEGER_CST)
3784 return wi::eq_p (wi::to_wide (val1), wi::to_wide (val2));
3785 else if (TREE_CODE (val1) != SSA_NAME || TREE_CODE (val2) != SSA_NAME)
3786 return false;
3787 if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val1))
3788 && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val1)) == val2)
3789 return true;
3790 if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val2))
3791 && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val2)) == val1)
3792 return true;
3793 return false;
3796 /* Helper function of match_arith_overflow. Return 1
3797 if USE_STMT is unsigned overflow check ovf != 0 for
3798 STMT, -1 if USE_STMT is unsigned overflow check ovf == 0
3799 and 0 otherwise. */
3801 static int
3802 arith_overflow_check_p (gimple *stmt, gimple *cast_stmt, gimple *&use_stmt,
3803 tree maxval, tree *other)
3805 enum tree_code ccode = ERROR_MARK;
3806 tree crhs1 = NULL_TREE, crhs2 = NULL_TREE;
3807 enum tree_code code = gimple_assign_rhs_code (stmt);
3808 tree lhs = gimple_assign_lhs (cast_stmt ? cast_stmt : stmt);
3809 tree rhs1 = gimple_assign_rhs1 (stmt);
3810 tree rhs2 = gimple_assign_rhs2 (stmt);
3811 tree multop = NULL_TREE, divlhs = NULL_TREE;
3812 gimple *cur_use_stmt = use_stmt;
3814 if (code == MULT_EXPR)
3816 if (!is_gimple_assign (use_stmt))
3817 return 0;
3818 if (gimple_assign_rhs_code (use_stmt) != TRUNC_DIV_EXPR)
3819 return 0;
3820 if (gimple_assign_rhs1 (use_stmt) != lhs)
3821 return 0;
3822 if (cast_stmt)
3824 if (arith_cast_equal_p (gimple_assign_rhs2 (use_stmt), rhs1))
3825 multop = rhs2;
3826 else if (arith_cast_equal_p (gimple_assign_rhs2 (use_stmt), rhs2))
3827 multop = rhs1;
3828 else
3829 return 0;
3831 else if (gimple_assign_rhs2 (use_stmt) == rhs1)
3832 multop = rhs2;
3833 else if (operand_equal_p (gimple_assign_rhs2 (use_stmt), rhs2, 0))
3834 multop = rhs1;
3835 else
3836 return 0;
3837 if (stmt_ends_bb_p (use_stmt))
3838 return 0;
3839 divlhs = gimple_assign_lhs (use_stmt);
3840 if (!divlhs)
3841 return 0;
3842 use_operand_p use;
3843 if (!single_imm_use (divlhs, &use, &cur_use_stmt))
3844 return 0;
3845 if (cast_stmt && gimple_assign_cast_p (cur_use_stmt))
3847 tree cast_lhs = gimple_assign_lhs (cur_use_stmt);
3848 if (INTEGRAL_TYPE_P (TREE_TYPE (cast_lhs))
3849 && TYPE_UNSIGNED (TREE_TYPE (cast_lhs))
3850 && (TYPE_PRECISION (TREE_TYPE (cast_lhs))
3851 == TYPE_PRECISION (TREE_TYPE (divlhs)))
3852 && single_imm_use (cast_lhs, &use, &cur_use_stmt))
3854 cast_stmt = NULL;
3855 divlhs = cast_lhs;
3857 else
3858 return 0;
3861 if (gimple_code (cur_use_stmt) == GIMPLE_COND)
3863 ccode = gimple_cond_code (cur_use_stmt);
3864 crhs1 = gimple_cond_lhs (cur_use_stmt);
3865 crhs2 = gimple_cond_rhs (cur_use_stmt);
3867 else if (is_gimple_assign (cur_use_stmt))
3869 if (gimple_assign_rhs_class (cur_use_stmt) == GIMPLE_BINARY_RHS)
3871 ccode = gimple_assign_rhs_code (cur_use_stmt);
3872 crhs1 = gimple_assign_rhs1 (cur_use_stmt);
3873 crhs2 = gimple_assign_rhs2 (cur_use_stmt);
3875 else if (gimple_assign_rhs_code (cur_use_stmt) == COND_EXPR)
3877 tree cond = gimple_assign_rhs1 (cur_use_stmt);
3878 if (COMPARISON_CLASS_P (cond))
3880 ccode = TREE_CODE (cond);
3881 crhs1 = TREE_OPERAND (cond, 0);
3882 crhs2 = TREE_OPERAND (cond, 1);
3884 else
3885 return 0;
3887 else
3888 return 0;
3890 else
3891 return 0;
3893 if (TREE_CODE_CLASS (ccode) != tcc_comparison)
3894 return 0;
3896 switch (ccode)
3898 case GT_EXPR:
3899 case LE_EXPR:
3900 if (maxval)
3902 /* r = a + b; r > maxval or r <= maxval */
3903 if (crhs1 == lhs
3904 && TREE_CODE (crhs2) == INTEGER_CST
3905 && tree_int_cst_equal (crhs2, maxval))
3906 return ccode == GT_EXPR ? 1 : -1;
3907 break;
3909 /* r = a - b; r > a or r <= a
3910 r = a + b; a > r or a <= r or b > r or b <= r. */
3911 if ((code == MINUS_EXPR && crhs1 == lhs && crhs2 == rhs1)
3912 || (code == PLUS_EXPR && (crhs1 == rhs1 || crhs1 == rhs2)
3913 && crhs2 == lhs))
3914 return ccode == GT_EXPR ? 1 : -1;
3915 /* r = ~a; b > r or b <= r. */
3916 if (code == BIT_NOT_EXPR && crhs2 == lhs)
3918 if (other)
3919 *other = crhs1;
3920 return ccode == GT_EXPR ? 1 : -1;
3922 break;
3923 case LT_EXPR:
3924 case GE_EXPR:
3925 if (maxval)
3926 break;
3927 /* r = a - b; a < r or a >= r
3928 r = a + b; r < a or r >= a or r < b or r >= b. */
3929 if ((code == MINUS_EXPR && crhs1 == rhs1 && crhs2 == lhs)
3930 || (code == PLUS_EXPR && crhs1 == lhs
3931 && (crhs2 == rhs1 || crhs2 == rhs2)))
3932 return ccode == LT_EXPR ? 1 : -1;
3933 /* r = ~a; r < b or r >= b. */
3934 if (code == BIT_NOT_EXPR && crhs1 == lhs)
3936 if (other)
3937 *other = crhs2;
3938 return ccode == LT_EXPR ? 1 : -1;
3940 break;
3941 case EQ_EXPR:
3942 case NE_EXPR:
3943 /* r = a * b; _1 = r / a; _1 == b
3944 r = a * b; _1 = r / b; _1 == a
3945 r = a * b; _1 = r / a; _1 != b
3946 r = a * b; _1 = r / b; _1 != a. */
3947 if (code == MULT_EXPR)
3949 if (cast_stmt)
3951 if ((crhs1 == divlhs && arith_cast_equal_p (crhs2, multop))
3952 || (crhs2 == divlhs && arith_cast_equal_p (crhs1, multop)))
3954 use_stmt = cur_use_stmt;
3955 return ccode == NE_EXPR ? 1 : -1;
3958 else if ((crhs1 == divlhs && operand_equal_p (crhs2, multop, 0))
3959 || (crhs2 == divlhs && crhs1 == multop))
3961 use_stmt = cur_use_stmt;
3962 return ccode == NE_EXPR ? 1 : -1;
3965 break;
3966 default:
3967 break;
3969 return 0;
3972 /* Recognize for unsigned x
3973 x = y - z;
3974 if (x > y)
3975 where there are other uses of x and replace it with
3976 _7 = .SUB_OVERFLOW (y, z);
3977 x = REALPART_EXPR <_7>;
3978 _8 = IMAGPART_EXPR <_7>;
3979 if (_8)
3980 and similarly for addition.
3982 Also recognize:
3983 yc = (type) y;
3984 zc = (type) z;
3985 x = yc + zc;
3986 if (x > max)
3987 where y and z have unsigned types with maximum max
3988 and there are other uses of x and all of those cast x
3989 back to that unsigned type and again replace it with
3990 _7 = .ADD_OVERFLOW (y, z);
3991 _9 = REALPART_EXPR <_7>;
3992 _8 = IMAGPART_EXPR <_7>;
3993 if (_8)
3994 and replace (utype) x with _9.
3996 Also recognize:
3997 x = ~z;
3998 if (y > x)
3999 and replace it with
4000 _7 = .ADD_OVERFLOW (y, z);
4001 _8 = IMAGPART_EXPR <_7>;
4002 if (_8)
4004 And also recognize:
4005 z = x * y;
4006 if (x != 0)
4007 goto <bb 3>; [50.00%]
4008 else
4009 goto <bb 4>; [50.00%]
4011 <bb 3> [local count: 536870913]:
4012 _2 = z / x;
4013 _9 = _2 != y;
4014 _10 = (int) _9;
4016 <bb 4> [local count: 1073741824]:
4017 # iftmp.0_3 = PHI <_10(3), 0(2)>
4018 and replace it with
4019 _7 = .MUL_OVERFLOW (x, y);
4020 z = IMAGPART_EXPR <_7>;
4021 _8 = IMAGPART_EXPR <_7>;
4022 _9 = _8 != 0;
4023 iftmp.0_3 = (int) _9; */
4025 static bool
4026 match_arith_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
4027 enum tree_code code, bool *cfg_changed)
4029 tree lhs = gimple_assign_lhs (stmt);
4030 tree type = TREE_TYPE (lhs);
4031 use_operand_p use_p;
4032 imm_use_iterator iter;
4033 bool use_seen = false;
4034 bool ovf_use_seen = false;
4035 gimple *use_stmt;
4036 gimple *add_stmt = NULL;
4037 bool add_first = false;
4038 gimple *cond_stmt = NULL;
4039 gimple *cast_stmt = NULL;
4040 tree cast_lhs = NULL_TREE;
4042 gcc_checking_assert (code == PLUS_EXPR
4043 || code == MINUS_EXPR
4044 || code == MULT_EXPR
4045 || code == BIT_NOT_EXPR);
4046 if (!INTEGRAL_TYPE_P (type)
4047 || !TYPE_UNSIGNED (type)
4048 || has_zero_uses (lhs)
4049 || (code != PLUS_EXPR
4050 && code != MULT_EXPR
4051 && optab_handler (code == MINUS_EXPR ? usubv4_optab : uaddv4_optab,
4052 TYPE_MODE (type)) == CODE_FOR_nothing))
4053 return false;
4055 tree rhs1 = gimple_assign_rhs1 (stmt);
4056 tree rhs2 = gimple_assign_rhs2 (stmt);
4057 FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
4059 use_stmt = USE_STMT (use_p);
4060 if (is_gimple_debug (use_stmt))
4061 continue;
4063 tree other = NULL_TREE;
4064 if (arith_overflow_check_p (stmt, NULL, use_stmt, NULL_TREE, &other))
4066 if (code == BIT_NOT_EXPR)
4068 gcc_assert (other);
4069 if (TREE_CODE (other) != SSA_NAME)
4070 return false;
4071 if (rhs2 == NULL)
4072 rhs2 = other;
4073 else
4074 return false;
4075 cond_stmt = use_stmt;
4077 ovf_use_seen = true;
4079 else
4081 use_seen = true;
4082 if (code == MULT_EXPR
4083 && cast_stmt == NULL
4084 && gimple_assign_cast_p (use_stmt))
4086 cast_lhs = gimple_assign_lhs (use_stmt);
4087 if (INTEGRAL_TYPE_P (TREE_TYPE (cast_lhs))
4088 && !TYPE_UNSIGNED (TREE_TYPE (cast_lhs))
4089 && (TYPE_PRECISION (TREE_TYPE (cast_lhs))
4090 == TYPE_PRECISION (TREE_TYPE (lhs))))
4091 cast_stmt = use_stmt;
4092 else
4093 cast_lhs = NULL_TREE;
4096 if (ovf_use_seen && use_seen)
4097 break;
4100 if (!ovf_use_seen
4101 && code == MULT_EXPR
4102 && cast_stmt)
4104 if (TREE_CODE (rhs1) != SSA_NAME
4105 || (TREE_CODE (rhs2) != SSA_NAME && TREE_CODE (rhs2) != INTEGER_CST))
4106 return false;
4107 FOR_EACH_IMM_USE_FAST (use_p, iter, cast_lhs)
4109 use_stmt = USE_STMT (use_p);
4110 if (is_gimple_debug (use_stmt))
4111 continue;
4113 if (arith_overflow_check_p (stmt, cast_stmt, use_stmt,
4114 NULL_TREE, NULL))
4115 ovf_use_seen = true;
4118 else
4120 cast_stmt = NULL;
4121 cast_lhs = NULL_TREE;
4124 tree maxval = NULL_TREE;
4125 if (!ovf_use_seen
4126 || (code != MULT_EXPR && (code == BIT_NOT_EXPR ? use_seen : !use_seen))
4127 || (code == PLUS_EXPR
4128 && optab_handler (uaddv4_optab,
4129 TYPE_MODE (type)) == CODE_FOR_nothing)
4130 || (code == MULT_EXPR
4131 && optab_handler (cast_stmt ? mulv4_optab : umulv4_optab,
4132 TYPE_MODE (type)) == CODE_FOR_nothing
4133 && (use_seen
4134 || cast_stmt
4135 || !can_mult_highpart_p (TYPE_MODE (type), true))))
4137 if (code != PLUS_EXPR)
4138 return false;
4139 if (TREE_CODE (rhs1) != SSA_NAME
4140 || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs1)))
4141 return false;
4142 rhs1 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs1));
4143 tree type1 = TREE_TYPE (rhs1);
4144 if (!INTEGRAL_TYPE_P (type1)
4145 || !TYPE_UNSIGNED (type1)
4146 || TYPE_PRECISION (type1) >= TYPE_PRECISION (type)
4147 || (TYPE_PRECISION (type1)
4148 != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type1))))
4149 return false;
4150 if (TREE_CODE (rhs2) == INTEGER_CST)
4152 if (wi::ne_p (wi::rshift (wi::to_wide (rhs2),
4153 TYPE_PRECISION (type1),
4154 UNSIGNED), 0))
4155 return false;
4156 rhs2 = fold_convert (type1, rhs2);
4158 else
4160 if (TREE_CODE (rhs2) != SSA_NAME
4161 || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs2)))
4162 return false;
4163 rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs2));
4164 tree type2 = TREE_TYPE (rhs2);
4165 if (!INTEGRAL_TYPE_P (type2)
4166 || !TYPE_UNSIGNED (type2)
4167 || TYPE_PRECISION (type2) >= TYPE_PRECISION (type)
4168 || (TYPE_PRECISION (type2)
4169 != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type2))))
4170 return false;
4172 if (TYPE_PRECISION (type1) >= TYPE_PRECISION (TREE_TYPE (rhs2)))
4173 type = type1;
4174 else
4175 type = TREE_TYPE (rhs2);
4177 if (TREE_CODE (type) != INTEGER_TYPE
4178 || optab_handler (uaddv4_optab,
4179 TYPE_MODE (type)) == CODE_FOR_nothing)
4180 return false;
4182 maxval = wide_int_to_tree (type, wi::max_value (TYPE_PRECISION (type),
4183 UNSIGNED));
4184 ovf_use_seen = false;
4185 use_seen = false;
4186 basic_block use_bb = NULL;
4187 FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
4189 use_stmt = USE_STMT (use_p);
4190 if (is_gimple_debug (use_stmt))
4191 continue;
4193 if (arith_overflow_check_p (stmt, NULL, use_stmt, maxval, NULL))
4195 ovf_use_seen = true;
4196 use_bb = gimple_bb (use_stmt);
4198 else
4200 if (!gimple_assign_cast_p (use_stmt)
4201 || gimple_assign_rhs_code (use_stmt) == VIEW_CONVERT_EXPR)
4202 return false;
4203 tree use_lhs = gimple_assign_lhs (use_stmt);
4204 if (!INTEGRAL_TYPE_P (TREE_TYPE (use_lhs))
4205 || (TYPE_PRECISION (TREE_TYPE (use_lhs))
4206 > TYPE_PRECISION (type)))
4207 return false;
4208 use_seen = true;
4211 if (!ovf_use_seen)
4212 return false;
4213 if (!useless_type_conversion_p (type, TREE_TYPE (rhs1)))
4215 if (!use_seen)
4216 return false;
4217 tree new_rhs1 = make_ssa_name (type);
4218 gimple *g = gimple_build_assign (new_rhs1, NOP_EXPR, rhs1);
4219 gsi_insert_before (gsi, g, GSI_SAME_STMT);
4220 rhs1 = new_rhs1;
4222 else if (!useless_type_conversion_p (type, TREE_TYPE (rhs2)))
4224 if (!use_seen)
4225 return false;
4226 tree new_rhs2 = make_ssa_name (type);
4227 gimple *g = gimple_build_assign (new_rhs2, NOP_EXPR, rhs2);
4228 gsi_insert_before (gsi, g, GSI_SAME_STMT);
4229 rhs2 = new_rhs2;
4231 else if (!use_seen)
4233 /* If there are no uses of the wider addition, check if
4234 forwprop has not created a narrower addition.
4235 Require it to be in the same bb as the overflow check. */
4236 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs1)
4238 use_stmt = USE_STMT (use_p);
4239 if (is_gimple_debug (use_stmt))
4240 continue;
4242 if (use_stmt == stmt)
4243 continue;
4245 if (!is_gimple_assign (use_stmt)
4246 || gimple_bb (use_stmt) != use_bb
4247 || gimple_assign_rhs_code (use_stmt) != PLUS_EXPR)
4248 continue;
4250 if (gimple_assign_rhs1 (use_stmt) == rhs1)
4252 if (!operand_equal_p (gimple_assign_rhs2 (use_stmt),
4253 rhs2, 0))
4254 continue;
4256 else if (gimple_assign_rhs2 (use_stmt) == rhs1)
4258 if (gimple_assign_rhs1 (use_stmt) != rhs2)
4259 continue;
4261 else
4262 continue;
4264 add_stmt = use_stmt;
4265 break;
4267 if (add_stmt == NULL)
4268 return false;
4270 /* If stmt and add_stmt are in the same bb, we need to find out
4271 which one is earlier. If they are in different bbs, we've
4272 checked add_stmt is in the same bb as one of the uses of the
4273 stmt lhs, so stmt needs to dominate add_stmt too. */
4274 if (gimple_bb (stmt) == gimple_bb (add_stmt))
4276 gimple_stmt_iterator gsif = *gsi;
4277 gimple_stmt_iterator gsib = *gsi;
4278 int i;
4279 /* Search both forward and backward from stmt and have a small
4280 upper bound. */
4281 for (i = 0; i < 128; i++)
4283 if (!gsi_end_p (gsib))
4285 gsi_prev_nondebug (&gsib);
4286 if (gsi_stmt (gsib) == add_stmt)
4288 add_first = true;
4289 break;
4292 else if (gsi_end_p (gsif))
4293 break;
4294 if (!gsi_end_p (gsif))
4296 gsi_next_nondebug (&gsif);
4297 if (gsi_stmt (gsif) == add_stmt)
4298 break;
4301 if (i == 128)
4302 return false;
4303 if (add_first)
4304 *gsi = gsi_for_stmt (add_stmt);
4309 if (code == BIT_NOT_EXPR)
4310 *gsi = gsi_for_stmt (cond_stmt);
4312 auto_vec<gimple *, 8> mul_stmts;
4313 if (code == MULT_EXPR && cast_stmt)
4315 type = TREE_TYPE (cast_lhs);
4316 gimple *g = SSA_NAME_DEF_STMT (rhs1);
4317 if (gimple_assign_cast_p (g)
4318 && useless_type_conversion_p (type,
4319 TREE_TYPE (gimple_assign_rhs1 (g)))
4320 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
4321 rhs1 = gimple_assign_rhs1 (g);
4322 else
4324 g = gimple_build_assign (make_ssa_name (type), NOP_EXPR, rhs1);
4325 gsi_insert_before (gsi, g, GSI_SAME_STMT);
4326 rhs1 = gimple_assign_lhs (g);
4327 mul_stmts.quick_push (g);
4329 if (TREE_CODE (rhs2) == INTEGER_CST)
4330 rhs2 = fold_convert (type, rhs2);
4331 else
4333 g = SSA_NAME_DEF_STMT (rhs2);
4334 if (gimple_assign_cast_p (g)
4335 && useless_type_conversion_p (type,
4336 TREE_TYPE (gimple_assign_rhs1 (g)))
4337 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
4338 rhs2 = gimple_assign_rhs1 (g);
4339 else
4341 g = gimple_build_assign (make_ssa_name (type), NOP_EXPR, rhs2);
4342 gsi_insert_before (gsi, g, GSI_SAME_STMT);
4343 rhs2 = gimple_assign_lhs (g);
4344 mul_stmts.quick_push (g);
4348 tree ctype = build_complex_type (type);
4349 gcall *g = gimple_build_call_internal (code == MULT_EXPR
4350 ? IFN_MUL_OVERFLOW
4351 : code != MINUS_EXPR
4352 ? IFN_ADD_OVERFLOW : IFN_SUB_OVERFLOW,
4353 2, rhs1, rhs2);
4354 tree ctmp = make_ssa_name (ctype);
4355 gimple_call_set_lhs (g, ctmp);
4356 gsi_insert_before (gsi, g, GSI_SAME_STMT);
4357 tree new_lhs = (maxval || cast_stmt) ? make_ssa_name (type) : lhs;
4358 gassign *g2;
4359 if (code != BIT_NOT_EXPR)
4361 g2 = gimple_build_assign (new_lhs, REALPART_EXPR,
4362 build1 (REALPART_EXPR, type, ctmp));
4363 if (maxval || cast_stmt)
4365 gsi_insert_before (gsi, g2, GSI_SAME_STMT);
4366 if (add_first)
4367 *gsi = gsi_for_stmt (stmt);
4369 else
4370 gsi_replace (gsi, g2, true);
4371 if (code == MULT_EXPR)
4373 mul_stmts.quick_push (g);
4374 mul_stmts.quick_push (g2);
4375 if (cast_stmt)
4377 g2 = gimple_build_assign (lhs, NOP_EXPR, new_lhs);
4378 gsi_replace (gsi, g2, true);
4379 mul_stmts.quick_push (g2);
4383 tree ovf = make_ssa_name (type);
4384 g2 = gimple_build_assign (ovf, IMAGPART_EXPR,
4385 build1 (IMAGPART_EXPR, type, ctmp));
4386 if (code != BIT_NOT_EXPR)
4387 gsi_insert_after (gsi, g2, GSI_NEW_STMT);
4388 else
4389 gsi_insert_before (gsi, g2, GSI_SAME_STMT);
4390 if (code == MULT_EXPR)
4391 mul_stmts.quick_push (g2);
4393 FOR_EACH_IMM_USE_STMT (use_stmt, iter, cast_lhs ? cast_lhs : lhs)
4395 if (is_gimple_debug (use_stmt))
4396 continue;
4398 gimple *orig_use_stmt = use_stmt;
4399 int ovf_use = arith_overflow_check_p (stmt, cast_stmt, use_stmt,
4400 maxval, NULL);
4401 if (ovf_use == 0)
4403 gcc_assert (code != BIT_NOT_EXPR);
4404 if (maxval)
4406 tree use_lhs = gimple_assign_lhs (use_stmt);
4407 gimple_assign_set_rhs1 (use_stmt, new_lhs);
4408 if (useless_type_conversion_p (TREE_TYPE (use_lhs),
4409 TREE_TYPE (new_lhs)))
4410 gimple_assign_set_rhs_code (use_stmt, SSA_NAME);
4411 update_stmt (use_stmt);
4413 continue;
4415 if (gimple_code (use_stmt) == GIMPLE_COND)
4417 gcond *cond_stmt = as_a <gcond *> (use_stmt);
4418 gimple_cond_set_lhs (cond_stmt, ovf);
4419 gimple_cond_set_rhs (cond_stmt, build_int_cst (type, 0));
4420 gimple_cond_set_code (cond_stmt, ovf_use == 1 ? NE_EXPR : EQ_EXPR);
4422 else
4424 gcc_checking_assert (is_gimple_assign (use_stmt));
4425 if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
4427 gimple_assign_set_rhs1 (use_stmt, ovf);
4428 gimple_assign_set_rhs2 (use_stmt, build_int_cst (type, 0));
4429 gimple_assign_set_rhs_code (use_stmt,
4430 ovf_use == 1 ? NE_EXPR : EQ_EXPR);
4432 else
4434 gcc_checking_assert (gimple_assign_rhs_code (use_stmt)
4435 == COND_EXPR);
4436 tree cond = build2 (ovf_use == 1 ? NE_EXPR : EQ_EXPR,
4437 boolean_type_node, ovf,
4438 build_int_cst (type, 0));
4439 gimple_assign_set_rhs1 (use_stmt, cond);
4442 update_stmt (use_stmt);
4443 if (code == MULT_EXPR && use_stmt != orig_use_stmt)
4445 gimple_stmt_iterator gsi2 = gsi_for_stmt (orig_use_stmt);
4446 maybe_optimize_guarding_check (mul_stmts, use_stmt, orig_use_stmt,
4447 cfg_changed);
4448 use_operand_p use;
4449 gimple *cast_stmt;
4450 if (single_imm_use (gimple_assign_lhs (orig_use_stmt), &use,
4451 &cast_stmt)
4452 && gimple_assign_cast_p (cast_stmt))
4454 gimple_stmt_iterator gsi3 = gsi_for_stmt (cast_stmt);
4455 gsi_remove (&gsi3, true);
4456 release_ssa_name (gimple_assign_lhs (cast_stmt));
4458 gsi_remove (&gsi2, true);
4459 release_ssa_name (gimple_assign_lhs (orig_use_stmt));
4462 if (maxval)
4464 gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt);
4465 gsi_remove (&gsi2, true);
4466 if (add_stmt)
4468 gimple *g = gimple_build_assign (gimple_assign_lhs (add_stmt),
4469 new_lhs);
4470 gsi2 = gsi_for_stmt (add_stmt);
4471 gsi_replace (&gsi2, g, true);
4474 else if (code == BIT_NOT_EXPR)
4476 *gsi = gsi_for_stmt (stmt);
4477 gsi_remove (gsi, true);
4478 release_ssa_name (lhs);
4479 return true;
4481 return false;
4484 /* Helper of match_uaddc_usubc. Look through an integral cast
4485 which should preserve [0, 1] range value (unless source has
4486 1-bit signed type) and the cast has single use. */
4488 static gimple *
4489 uaddc_cast (gimple *g)
4491 if (!gimple_assign_cast_p (g))
4492 return g;
4493 tree op = gimple_assign_rhs1 (g);
4494 if (TREE_CODE (op) == SSA_NAME
4495 && INTEGRAL_TYPE_P (TREE_TYPE (op))
4496 && (TYPE_PRECISION (TREE_TYPE (op)) > 1
4497 || TYPE_UNSIGNED (TREE_TYPE (op)))
4498 && has_single_use (gimple_assign_lhs (g)))
4499 return SSA_NAME_DEF_STMT (op);
4500 return g;
4503 /* Helper of match_uaddc_usubc. Look through a NE_EXPR
4504 comparison with 0 which also preserves [0, 1] value range. */
4506 static gimple *
4507 uaddc_ne0 (gimple *g)
4509 if (is_gimple_assign (g)
4510 && gimple_assign_rhs_code (g) == NE_EXPR
4511 && integer_zerop (gimple_assign_rhs2 (g))
4512 && TREE_CODE (gimple_assign_rhs1 (g)) == SSA_NAME
4513 && has_single_use (gimple_assign_lhs (g)))
4514 return SSA_NAME_DEF_STMT (gimple_assign_rhs1 (g));
4515 return g;
4518 /* Return true if G is {REAL,IMAG}PART_EXPR PART with SSA_NAME
4519 operand. */
4521 static bool
4522 uaddc_is_cplxpart (gimple *g, tree_code part)
4524 return (is_gimple_assign (g)
4525 && gimple_assign_rhs_code (g) == part
4526 && TREE_CODE (TREE_OPERAND (gimple_assign_rhs1 (g), 0)) == SSA_NAME);
4529 /* Try to match e.g.
4530 _29 = .ADD_OVERFLOW (_3, _4);
4531 _30 = REALPART_EXPR <_29>;
4532 _31 = IMAGPART_EXPR <_29>;
4533 _32 = .ADD_OVERFLOW (_30, _38);
4534 _33 = REALPART_EXPR <_32>;
4535 _34 = IMAGPART_EXPR <_32>;
4536 _35 = _31 + _34;
4538 _36 = .UADDC (_3, _4, _38);
4539 _33 = REALPART_EXPR <_36>;
4540 _35 = IMAGPART_EXPR <_36>;
4542 _22 = .SUB_OVERFLOW (_6, _5);
4543 _23 = REALPART_EXPR <_22>;
4544 _24 = IMAGPART_EXPR <_22>;
4545 _25 = .SUB_OVERFLOW (_23, _37);
4546 _26 = REALPART_EXPR <_25>;
4547 _27 = IMAGPART_EXPR <_25>;
4548 _28 = _24 | _27;
4550 _29 = .USUBC (_6, _5, _37);
4551 _26 = REALPART_EXPR <_29>;
4552 _288 = IMAGPART_EXPR <_29>;
4553 provided _38 or _37 above have [0, 1] range
4554 and _3, _4 and _30 or _6, _5 and _23 are unsigned
4555 integral types with the same precision. Whether + or | or ^ is
4556 used on the IMAGPART_EXPR results doesn't matter, with one of
4557 added or subtracted operands in [0, 1] range at most one
4558 .ADD_OVERFLOW or .SUB_OVERFLOW will indicate overflow. */
4560 static bool
4561 match_uaddc_usubc (gimple_stmt_iterator *gsi, gimple *stmt, tree_code code)
4563 tree rhs[4];
4564 rhs[0] = gimple_assign_rhs1 (stmt);
4565 rhs[1] = gimple_assign_rhs2 (stmt);
4566 rhs[2] = NULL_TREE;
4567 rhs[3] = NULL_TREE;
4568 tree type = TREE_TYPE (rhs[0]);
4569 if (!INTEGRAL_TYPE_P (type) || !TYPE_UNSIGNED (type))
4570 return false;
4572 if (code != BIT_IOR_EXPR && code != BIT_XOR_EXPR)
4574 /* If overflow flag is ignored on the MSB limb, we can end up with
4575 the most significant limb handled as r = op1 + op2 + ovf1 + ovf2;
4576 or r = op1 - op2 - ovf1 - ovf2; or various equivalent expressions
4577 thereof. Handle those like the ovf = ovf1 + ovf2; case to recognize
4578 the limb below the MSB, but also create another .UADDC/.USUBC call
4579 for the last limb.
4581 First look through assignments with the same rhs code as CODE,
4582 with the exception that subtraction of a constant is canonicalized
4583 into addition of its negation. rhs[0] will be minuend for
4584 subtractions and one of addends for addition, all other assigned
4585 rhs[i] operands will be subtrahends or other addends. */
4586 while (TREE_CODE (rhs[0]) == SSA_NAME && !rhs[3])
4588 gimple *g = SSA_NAME_DEF_STMT (rhs[0]);
4589 if (has_single_use (rhs[0])
4590 && is_gimple_assign (g)
4591 && (gimple_assign_rhs_code (g) == code
4592 || (code == MINUS_EXPR
4593 && gimple_assign_rhs_code (g) == PLUS_EXPR
4594 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST)))
4596 tree r2 = gimple_assign_rhs2 (g);
4597 if (gimple_assign_rhs_code (g) != code)
4599 r2 = const_unop (NEGATE_EXPR, TREE_TYPE (r2), r2);
4600 if (!r2)
4601 break;
4603 rhs[0] = gimple_assign_rhs1 (g);
4604 tree &r = rhs[2] ? rhs[3] : rhs[2];
4605 r = r2;
4607 else
4608 break;
4610 while (TREE_CODE (rhs[1]) == SSA_NAME && !rhs[3])
4612 gimple *g = SSA_NAME_DEF_STMT (rhs[1]);
4613 if (has_single_use (rhs[1])
4614 && is_gimple_assign (g)
4615 && gimple_assign_rhs_code (g) == PLUS_EXPR)
4617 rhs[1] = gimple_assign_rhs1 (g);
4618 if (rhs[2])
4619 rhs[3] = gimple_assign_rhs2 (g);
4620 else
4621 rhs[2] = gimple_assign_rhs2 (g);
4623 else
4624 break;
4626 /* If there are just 3 addends or one minuend and two subtrahends,
4627 check for UADDC or USUBC being pattern recognized earlier.
4628 Say r = op1 + op2 + ovf1 + ovf2; where the (ovf1 + ovf2) part
4629 got pattern matched earlier as __imag__ .UADDC (arg1, arg2, arg3)
4630 etc. */
4631 if (rhs[2] && !rhs[3])
4633 for (int i = (code == MINUS_EXPR ? 1 : 0); i < 3; ++i)
4634 if (TREE_CODE (rhs[i]) == SSA_NAME)
4636 gimple *im = uaddc_cast (SSA_NAME_DEF_STMT (rhs[i]));
4637 im = uaddc_ne0 (im);
4638 if (uaddc_is_cplxpart (im, IMAGPART_EXPR))
4640 /* We found one of the 3 addends or 2 subtrahends to be
4641 __imag__ of something, verify it is .UADDC/.USUBC. */
4642 tree rhs1 = gimple_assign_rhs1 (im);
4643 gimple *ovf = SSA_NAME_DEF_STMT (TREE_OPERAND (rhs1, 0));
4644 if (gimple_call_internal_p (ovf, code == PLUS_EXPR
4645 ? IFN_UADDC : IFN_USUBC)
4646 && (optab_handler (code == PLUS_EXPR
4647 ? uaddc5_optab : usubc5_optab,
4648 TYPE_MODE (type))
4649 != CODE_FOR_nothing))
4651 /* And in that case build another .UADDC/.USUBC
4652 call for the most significand limb addition.
4653 Overflow bit is ignored here. */
4654 if (i != 2)
4655 std::swap (rhs[i], rhs[2]);
4656 gimple *g
4657 = gimple_build_call_internal (code == PLUS_EXPR
4658 ? IFN_UADDC
4659 : IFN_USUBC,
4660 3, rhs[0], rhs[1],
4661 rhs[2]);
4662 tree nlhs = make_ssa_name (build_complex_type (type));
4663 gimple_call_set_lhs (g, nlhs);
4664 gsi_insert_before (gsi, g, GSI_SAME_STMT);
4665 tree ilhs = gimple_assign_lhs (stmt);
4666 g = gimple_build_assign (ilhs, REALPART_EXPR,
4667 build1 (REALPART_EXPR,
4668 TREE_TYPE (ilhs),
4669 nlhs));
4670 gsi_replace (gsi, g, true);
4671 return true;
4675 return false;
4677 if (code == MINUS_EXPR && !rhs[2])
4678 return false;
4679 if (code == MINUS_EXPR)
4680 /* Code below expects rhs[0] and rhs[1] to have the IMAGPART_EXPRs.
4681 So, for MINUS_EXPR swap the single added rhs operand (others are
4682 subtracted) to rhs[3]. */
4683 std::swap (rhs[0], rhs[3]);
4685 /* Walk from both operands of STMT (for +/- even sometimes from
4686 all the 4 addends or 3 subtrahends), see through casts and != 0
4687 statements which would preserve [0, 1] range of values and
4688 check which is initialized from __imag__. */
4689 gimple *im1 = NULL, *im2 = NULL;
4690 for (int i = 0; i < (code == MINUS_EXPR ? 3 : 4); i++)
4691 if (rhs[i] && TREE_CODE (rhs[i]) == SSA_NAME)
4693 gimple *im = uaddc_cast (SSA_NAME_DEF_STMT (rhs[i]));
4694 im = uaddc_ne0 (im);
4695 if (uaddc_is_cplxpart (im, IMAGPART_EXPR))
4697 if (im1 == NULL)
4699 im1 = im;
4700 if (i != 0)
4701 std::swap (rhs[0], rhs[i]);
4703 else
4705 im2 = im;
4706 if (i != 1)
4707 std::swap (rhs[1], rhs[i]);
4708 break;
4712 /* If we don't find at least two, punt. */
4713 if (!im2)
4714 return false;
4715 /* Check they are __imag__ of .ADD_OVERFLOW or .SUB_OVERFLOW call results,
4716 either both .ADD_OVERFLOW or both .SUB_OVERFLOW and that we have
4717 uaddc5/usubc5 named pattern for the corresponding mode. */
4718 gimple *ovf1
4719 = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (im1), 0));
4720 gimple *ovf2
4721 = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (im2), 0));
4722 internal_fn ifn;
4723 if (!is_gimple_call (ovf1)
4724 || !gimple_call_internal_p (ovf1)
4725 || ((ifn = gimple_call_internal_fn (ovf1)) != IFN_ADD_OVERFLOW
4726 && ifn != IFN_SUB_OVERFLOW)
4727 || !gimple_call_internal_p (ovf2, ifn)
4728 || optab_handler (ifn == IFN_ADD_OVERFLOW ? uaddc5_optab : usubc5_optab,
4729 TYPE_MODE (type)) == CODE_FOR_nothing
4730 || (rhs[2]
4731 && optab_handler (code == PLUS_EXPR ? uaddc5_optab : usubc5_optab,
4732 TYPE_MODE (type)) == CODE_FOR_nothing))
4733 return false;
4734 tree arg1, arg2, arg3 = NULL_TREE;
4735 gimple *re1 = NULL, *re2 = NULL;
4736 /* On one of the two calls, one of the .ADD_OVERFLOW/.SUB_OVERFLOW arguments
4737 should be initialized from __real__ of the other of the two calls.
4738 Though, for .SUB_OVERFLOW, it has to be the first argument, not the
4739 second one. */
4740 for (int i = (ifn == IFN_ADD_OVERFLOW ? 1 : 0); i >= 0; --i)
4741 for (gimple *ovf = ovf1; ovf; ovf = (ovf == ovf1 ? ovf2 : NULL))
4743 tree arg = gimple_call_arg (ovf, i);
4744 if (TREE_CODE (arg) != SSA_NAME)
4745 continue;
4746 re1 = SSA_NAME_DEF_STMT (arg);
4747 if (uaddc_is_cplxpart (re1, REALPART_EXPR)
4748 && (SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (re1), 0))
4749 == (ovf == ovf1 ? ovf2 : ovf1)))
4751 if (ovf == ovf1)
4753 /* Make sure ovf2 is the .*_OVERFLOW call with argument
4754 initialized from __real__ of ovf1. */
4755 std::swap (rhs[0], rhs[1]);
4756 std::swap (im1, im2);
4757 std::swap (ovf1, ovf2);
4759 arg3 = gimple_call_arg (ovf, 1 - i);
4760 i = -1;
4761 break;
4764 if (!arg3)
4765 return false;
4766 arg1 = gimple_call_arg (ovf1, 0);
4767 arg2 = gimple_call_arg (ovf1, 1);
4768 if (!types_compatible_p (type, TREE_TYPE (arg1)))
4769 return false;
4770 int kind[2] = { 0, 0 };
4771 tree arg_im[2] = { NULL_TREE, NULL_TREE };
4772 /* At least one of arg2 and arg3 should have type compatible
4773 with arg1/rhs[0], and the other one should have value in [0, 1]
4774 range. If both are in [0, 1] range and type compatible with
4775 arg1/rhs[0], try harder to find after looking through casts,
4776 != 0 comparisons which one is initialized to __imag__ of
4777 .{ADD,SUB}_OVERFLOW or .U{ADD,SUB}C call results. */
4778 for (int i = 0; i < 2; ++i)
4780 tree arg = i == 0 ? arg2 : arg3;
4781 if (types_compatible_p (type, TREE_TYPE (arg)))
4782 kind[i] = 1;
4783 if (!INTEGRAL_TYPE_P (TREE_TYPE (arg))
4784 || (TYPE_PRECISION (TREE_TYPE (arg)) == 1
4785 && !TYPE_UNSIGNED (TREE_TYPE (arg))))
4786 continue;
4787 if (tree_zero_one_valued_p (arg))
4788 kind[i] |= 2;
4789 if (TREE_CODE (arg) == SSA_NAME)
4791 gimple *g = SSA_NAME_DEF_STMT (arg);
4792 if (gimple_assign_cast_p (g))
4794 tree op = gimple_assign_rhs1 (g);
4795 if (TREE_CODE (op) == SSA_NAME
4796 && INTEGRAL_TYPE_P (TREE_TYPE (op)))
4797 g = SSA_NAME_DEF_STMT (op);
4799 g = uaddc_ne0 (g);
4800 if (!uaddc_is_cplxpart (g, IMAGPART_EXPR))
4801 continue;
4802 arg_im[i] = gimple_assign_lhs (g);
4803 g = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (g), 0));
4804 if (!is_gimple_call (g) || !gimple_call_internal_p (g))
4805 continue;
4806 switch (gimple_call_internal_fn (g))
4808 case IFN_ADD_OVERFLOW:
4809 case IFN_SUB_OVERFLOW:
4810 case IFN_UADDC:
4811 case IFN_USUBC:
4812 break;
4813 default:
4814 continue;
4816 kind[i] |= 4;
4819 /* Make arg2 the one with compatible type and arg3 the one
4820 with [0, 1] range. If both is true for both operands,
4821 prefer as arg3 result of __imag__ of some ifn. */
4822 if ((kind[0] & 1) == 0 || ((kind[1] & 1) != 0 && kind[0] > kind[1]))
4824 std::swap (arg2, arg3);
4825 std::swap (kind[0], kind[1]);
4826 std::swap (arg_im[0], arg_im[1]);
4828 if ((kind[0] & 1) == 0 || (kind[1] & 6) == 0)
4829 return false;
4830 if (!has_single_use (gimple_assign_lhs (im1))
4831 || !has_single_use (gimple_assign_lhs (im2))
4832 || !has_single_use (gimple_assign_lhs (re1))
4833 || num_imm_uses (gimple_call_lhs (ovf1)) != 2)
4834 return false;
4835 /* Check that ovf2's result is used in __real__ and set re2
4836 to that statement. */
4837 use_operand_p use_p;
4838 imm_use_iterator iter;
4839 tree lhs = gimple_call_lhs (ovf2);
4840 FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
4842 gimple *use_stmt = USE_STMT (use_p);
4843 if (is_gimple_debug (use_stmt))
4844 continue;
4845 if (use_stmt == im2)
4846 continue;
4847 if (re2)
4848 return false;
4849 if (!uaddc_is_cplxpart (use_stmt, REALPART_EXPR))
4850 return false;
4851 re2 = use_stmt;
4853 /* Build .UADDC/.USUBC call which will be placed before the stmt. */
4854 gimple_stmt_iterator gsi2 = gsi_for_stmt (ovf2);
4855 gimple *g;
4856 if ((kind[1] & 4) != 0 && types_compatible_p (type, TREE_TYPE (arg_im[1])))
4857 arg3 = arg_im[1];
4858 if ((kind[1] & 1) == 0)
4860 if (TREE_CODE (arg3) == INTEGER_CST)
4861 arg3 = fold_convert (type, arg3);
4862 else
4864 g = gimple_build_assign (make_ssa_name (type), NOP_EXPR, arg3);
4865 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
4866 arg3 = gimple_assign_lhs (g);
4869 g = gimple_build_call_internal (ifn == IFN_ADD_OVERFLOW
4870 ? IFN_UADDC : IFN_USUBC,
4871 3, arg1, arg2, arg3);
4872 tree nlhs = make_ssa_name (TREE_TYPE (lhs));
4873 gimple_call_set_lhs (g, nlhs);
4874 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
4875 /* In the case where stmt is | or ^ of two overflow flags
4876 or addition of those, replace stmt with __imag__ of the above
4877 added call. In case of arg1 + arg2 + (ovf1 + ovf2) or
4878 arg1 - arg2 - (ovf1 + ovf2) just emit it before stmt. */
4879 tree ilhs = rhs[2] ? make_ssa_name (type) : gimple_assign_lhs (stmt);
4880 g = gimple_build_assign (ilhs, IMAGPART_EXPR,
4881 build1 (IMAGPART_EXPR, TREE_TYPE (ilhs), nlhs));
4882 if (rhs[2])
4883 gsi_insert_before (gsi, g, GSI_SAME_STMT);
4884 else
4885 gsi_replace (gsi, g, true);
4886 /* Remove some statements which can't be kept in the IL because they
4887 use SSA_NAME whose setter is going to be removed too. */
4888 tree rhs1 = rhs[1];
4889 for (int i = 0; i < 2; i++)
4890 if (rhs1 == gimple_assign_lhs (im2))
4891 break;
4892 else
4894 g = SSA_NAME_DEF_STMT (rhs1);
4895 rhs1 = gimple_assign_rhs1 (g);
4896 gsi2 = gsi_for_stmt (g);
4897 gsi_remove (&gsi2, true);
4899 gcc_checking_assert (rhs1 == gimple_assign_lhs (im2));
4900 gsi2 = gsi_for_stmt (im2);
4901 gsi_remove (&gsi2, true);
4902 /* Replace the re2 statement with __real__ of the newly added
4903 .UADDC/.USUBC call. */
4904 if (re2)
4906 gsi2 = gsi_for_stmt (re2);
4907 tree rlhs = gimple_assign_lhs (re2);
4908 g = gimple_build_assign (rlhs, REALPART_EXPR,
4909 build1 (REALPART_EXPR, TREE_TYPE (rlhs), nlhs));
4910 gsi_replace (&gsi2, g, true);
4912 if (rhs[2])
4914 /* If this is the arg1 + arg2 + (ovf1 + ovf2) or
4915 arg1 - arg2 - (ovf1 + ovf2) case for the most significant limb,
4916 replace stmt with __real__ of another .UADDC/.USUBC call which
4917 handles the most significant limb. Overflow flag from this is
4918 ignored. */
4919 g = gimple_build_call_internal (code == PLUS_EXPR
4920 ? IFN_UADDC : IFN_USUBC,
4921 3, rhs[3], rhs[2], ilhs);
4922 nlhs = make_ssa_name (TREE_TYPE (lhs));
4923 gimple_call_set_lhs (g, nlhs);
4924 gsi_insert_before (gsi, g, GSI_SAME_STMT);
4925 ilhs = gimple_assign_lhs (stmt);
4926 g = gimple_build_assign (ilhs, REALPART_EXPR,
4927 build1 (REALPART_EXPR, TREE_TYPE (ilhs), nlhs));
4928 gsi_replace (gsi, g, true);
4930 if (TREE_CODE (arg3) == SSA_NAME)
4932 /* When pattern recognizing the second least significant limb
4933 above (i.e. first pair of .{ADD,SUB}_OVERFLOW calls for one limb),
4934 check if the [0, 1] range argument (i.e. carry in) isn't the
4935 result of another .{ADD,SUB}_OVERFLOW call (one handling the
4936 least significant limb). Again look through casts and != 0. */
4937 gimple *im3 = SSA_NAME_DEF_STMT (arg3);
4938 for (int i = 0; i < 2; ++i)
4940 gimple *im4 = uaddc_cast (im3);
4941 if (im4 == im3)
4942 break;
4943 else
4944 im3 = im4;
4946 im3 = uaddc_ne0 (im3);
4947 if (uaddc_is_cplxpart (im3, IMAGPART_EXPR))
4949 gimple *ovf3
4950 = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (im3), 0));
4951 if (gimple_call_internal_p (ovf3, ifn))
4953 lhs = gimple_call_lhs (ovf3);
4954 arg1 = gimple_call_arg (ovf3, 0);
4955 arg2 = gimple_call_arg (ovf3, 1);
4956 if (types_compatible_p (type, TREE_TYPE (TREE_TYPE (lhs)))
4957 && types_compatible_p (type, TREE_TYPE (arg1))
4958 && types_compatible_p (type, TREE_TYPE (arg2)))
4960 /* And if it is initialized from result of __imag__
4961 of .{ADD,SUB}_OVERFLOW call, replace that
4962 call with .U{ADD,SUB}C call with the same arguments,
4963 just 0 added as third argument. This isn't strictly
4964 necessary, .ADD_OVERFLOW (x, y) and .UADDC (x, y, 0)
4965 produce the same result, but may result in better
4966 generated code on some targets where the backend can
4967 better prepare in how the result will be used. */
4968 g = gimple_build_call_internal (ifn == IFN_ADD_OVERFLOW
4969 ? IFN_UADDC : IFN_USUBC,
4970 3, arg1, arg2,
4971 build_zero_cst (type));
4972 gimple_call_set_lhs (g, lhs);
4973 gsi2 = gsi_for_stmt (ovf3);
4974 gsi_replace (&gsi2, g, true);
4979 return true;
4982 /* Return true if target has support for divmod. */
4984 static bool
4985 target_supports_divmod_p (optab divmod_optab, optab div_optab, machine_mode mode)
4987 /* If target supports hardware divmod insn, use it for divmod. */
4988 if (optab_handler (divmod_optab, mode) != CODE_FOR_nothing)
4989 return true;
4991 /* Check if libfunc for divmod is available. */
4992 rtx libfunc = optab_libfunc (divmod_optab, mode);
4993 if (libfunc != NULL_RTX)
4995 /* If optab_handler exists for div_optab, perhaps in a wider mode,
4996 we don't want to use the libfunc even if it exists for given mode. */
4997 machine_mode div_mode;
4998 FOR_EACH_MODE_FROM (div_mode, mode)
4999 if (optab_handler (div_optab, div_mode) != CODE_FOR_nothing)
5000 return false;
5002 return targetm.expand_divmod_libfunc != NULL;
5005 return false;
5008 /* Check if stmt is candidate for divmod transform. */
5010 static bool
5011 divmod_candidate_p (gassign *stmt)
5013 tree type = TREE_TYPE (gimple_assign_lhs (stmt));
5014 machine_mode mode = TYPE_MODE (type);
5015 optab divmod_optab, div_optab;
5017 if (TYPE_UNSIGNED (type))
5019 divmod_optab = udivmod_optab;
5020 div_optab = udiv_optab;
5022 else
5024 divmod_optab = sdivmod_optab;
5025 div_optab = sdiv_optab;
5028 tree op1 = gimple_assign_rhs1 (stmt);
5029 tree op2 = gimple_assign_rhs2 (stmt);
5031 /* Disable the transform if either is a constant, since division-by-constant
5032 may have specialized expansion. */
5033 if (CONSTANT_CLASS_P (op1))
5034 return false;
5036 if (CONSTANT_CLASS_P (op2))
5038 if (integer_pow2p (op2))
5039 return false;
5041 if (element_precision (type) <= HOST_BITS_PER_WIDE_INT
5042 && element_precision (type) <= BITS_PER_WORD)
5043 return false;
5045 /* If the divisor is not power of 2 and the precision wider than
5046 HWI, expand_divmod punts on that, so in that case it is better
5047 to use divmod optab or libfunc. Similarly if choose_multiplier
5048 might need pre/post shifts of BITS_PER_WORD or more. */
5051 /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should
5052 expand using the [su]divv optabs. */
5053 if (TYPE_OVERFLOW_TRAPS (type))
5054 return false;
5056 if (!target_supports_divmod_p (divmod_optab, div_optab, mode))
5057 return false;
5059 return true;
5062 /* This function looks for:
5063 t1 = a TRUNC_DIV_EXPR b;
5064 t2 = a TRUNC_MOD_EXPR b;
5065 and transforms it to the following sequence:
5066 complex_tmp = DIVMOD (a, b);
5067 t1 = REALPART_EXPR(a);
5068 t2 = IMAGPART_EXPR(b);
5069 For conditions enabling the transform see divmod_candidate_p().
5071 The pass has three parts:
5072 1) Find top_stmt which is trunc_div or trunc_mod stmt and dominates all
5073 other trunc_div_expr and trunc_mod_expr stmts.
5074 2) Add top_stmt and all trunc_div and trunc_mod stmts dominated by top_stmt
5075 to stmts vector.
5076 3) Insert DIVMOD call just before top_stmt and update entries in
5077 stmts vector to use return value of DIMOVD (REALEXPR_PART for div,
5078 IMAGPART_EXPR for mod). */
5080 static bool
5081 convert_to_divmod (gassign *stmt)
5083 if (stmt_can_throw_internal (cfun, stmt)
5084 || !divmod_candidate_p (stmt))
5085 return false;
5087 tree op1 = gimple_assign_rhs1 (stmt);
5088 tree op2 = gimple_assign_rhs2 (stmt);
5090 imm_use_iterator use_iter;
5091 gimple *use_stmt;
5092 auto_vec<gimple *> stmts;
5094 gimple *top_stmt = stmt;
5095 basic_block top_bb = gimple_bb (stmt);
5097 /* Part 1: Try to set top_stmt to "topmost" stmt that dominates
5098 at-least stmt and possibly other trunc_div/trunc_mod stmts
5099 having same operands as stmt. */
5101 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, op1)
5103 if (is_gimple_assign (use_stmt)
5104 && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
5105 || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
5106 && operand_equal_p (op1, gimple_assign_rhs1 (use_stmt), 0)
5107 && operand_equal_p (op2, gimple_assign_rhs2 (use_stmt), 0))
5109 if (stmt_can_throw_internal (cfun, use_stmt))
5110 continue;
5112 basic_block bb = gimple_bb (use_stmt);
5114 if (bb == top_bb)
5116 if (gimple_uid (use_stmt) < gimple_uid (top_stmt))
5117 top_stmt = use_stmt;
5119 else if (dominated_by_p (CDI_DOMINATORS, top_bb, bb))
5121 top_bb = bb;
5122 top_stmt = use_stmt;
5127 tree top_op1 = gimple_assign_rhs1 (top_stmt);
5128 tree top_op2 = gimple_assign_rhs2 (top_stmt);
5130 stmts.safe_push (top_stmt);
5131 bool div_seen = (gimple_assign_rhs_code (top_stmt) == TRUNC_DIV_EXPR);
5133 /* Part 2: Add all trunc_div/trunc_mod statements domianted by top_bb
5134 to stmts vector. The 2nd loop will always add stmt to stmts vector, since
5135 gimple_bb (top_stmt) dominates gimple_bb (stmt), so the
5136 2nd loop ends up adding at-least single trunc_mod_expr stmt. */
5138 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, top_op1)
5140 if (is_gimple_assign (use_stmt)
5141 && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
5142 || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
5143 && operand_equal_p (top_op1, gimple_assign_rhs1 (use_stmt), 0)
5144 && operand_equal_p (top_op2, gimple_assign_rhs2 (use_stmt), 0))
5146 if (use_stmt == top_stmt
5147 || stmt_can_throw_internal (cfun, use_stmt)
5148 || !dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt), top_bb))
5149 continue;
5151 stmts.safe_push (use_stmt);
5152 if (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR)
5153 div_seen = true;
5157 if (!div_seen)
5158 return false;
5160 /* Part 3: Create libcall to internal fn DIVMOD:
5161 divmod_tmp = DIVMOD (op1, op2). */
5163 gcall *call_stmt = gimple_build_call_internal (IFN_DIVMOD, 2, op1, op2);
5164 tree res = make_temp_ssa_name (build_complex_type (TREE_TYPE (op1)),
5165 call_stmt, "divmod_tmp");
5166 gimple_call_set_lhs (call_stmt, res);
5167 /* We rejected throwing statements above. */
5168 gimple_call_set_nothrow (call_stmt, true);
5170 /* Insert the call before top_stmt. */
5171 gimple_stmt_iterator top_stmt_gsi = gsi_for_stmt (top_stmt);
5172 gsi_insert_before (&top_stmt_gsi, call_stmt, GSI_SAME_STMT);
5174 widen_mul_stats.divmod_calls_inserted++;
5176 /* Update all statements in stmts vector:
5177 lhs = op1 TRUNC_DIV_EXPR op2 -> lhs = REALPART_EXPR<divmod_tmp>
5178 lhs = op1 TRUNC_MOD_EXPR op2 -> lhs = IMAGPART_EXPR<divmod_tmp>. */
5180 for (unsigned i = 0; stmts.iterate (i, &use_stmt); ++i)
5182 tree new_rhs;
5184 switch (gimple_assign_rhs_code (use_stmt))
5186 case TRUNC_DIV_EXPR:
5187 new_rhs = fold_build1 (REALPART_EXPR, TREE_TYPE (op1), res);
5188 break;
5190 case TRUNC_MOD_EXPR:
5191 new_rhs = fold_build1 (IMAGPART_EXPR, TREE_TYPE (op1), res);
5192 break;
5194 default:
5195 gcc_unreachable ();
5198 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
5199 gimple_assign_set_rhs_from_tree (&gsi, new_rhs);
5200 update_stmt (use_stmt);
5203 return true;
5206 /* Process a single gimple assignment STMT, which has a RSHIFT_EXPR as
5207 its rhs, and try to convert it into a MULT_HIGHPART_EXPR. The return
5208 value is true iff we converted the statement. */
5210 static bool
5211 convert_mult_to_highpart (gassign *stmt, gimple_stmt_iterator *gsi)
5213 tree lhs = gimple_assign_lhs (stmt);
5214 tree stype = TREE_TYPE (lhs);
5215 tree sarg0 = gimple_assign_rhs1 (stmt);
5216 tree sarg1 = gimple_assign_rhs2 (stmt);
5218 if (TREE_CODE (stype) != INTEGER_TYPE
5219 || TREE_CODE (sarg1) != INTEGER_CST
5220 || TREE_CODE (sarg0) != SSA_NAME
5221 || !tree_fits_uhwi_p (sarg1)
5222 || !has_single_use (sarg0))
5223 return false;
5225 gassign *def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (sarg0));
5226 if (!def)
5227 return false;
5229 enum tree_code mcode = gimple_assign_rhs_code (def);
5230 if (mcode == NOP_EXPR)
5232 tree tmp = gimple_assign_rhs1 (def);
5233 if (TREE_CODE (tmp) != SSA_NAME || !has_single_use (tmp))
5234 return false;
5235 def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (tmp));
5236 if (!def)
5237 return false;
5238 mcode = gimple_assign_rhs_code (def);
5241 if (mcode != WIDEN_MULT_EXPR
5242 || gimple_bb (def) != gimple_bb (stmt))
5243 return false;
5244 tree mtype = TREE_TYPE (gimple_assign_lhs (def));
5245 if (TREE_CODE (mtype) != INTEGER_TYPE
5246 || TYPE_PRECISION (mtype) != TYPE_PRECISION (stype))
5247 return false;
5249 tree mop1 = gimple_assign_rhs1 (def);
5250 tree mop2 = gimple_assign_rhs2 (def);
5251 tree optype = TREE_TYPE (mop1);
5252 bool unsignedp = TYPE_UNSIGNED (optype);
5253 unsigned int prec = TYPE_PRECISION (optype);
5255 if (unsignedp != TYPE_UNSIGNED (mtype)
5256 || TYPE_PRECISION (mtype) != 2 * prec)
5257 return false;
5259 unsigned HOST_WIDE_INT bits = tree_to_uhwi (sarg1);
5260 if (bits < prec || bits >= 2 * prec)
5261 return false;
5263 /* For the time being, require operands to have the same sign. */
5264 if (unsignedp != TYPE_UNSIGNED (TREE_TYPE (mop2)))
5265 return false;
5267 machine_mode mode = TYPE_MODE (optype);
5268 optab tab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
5269 if (optab_handler (tab, mode) == CODE_FOR_nothing)
5270 return false;
5272 location_t loc = gimple_location (stmt);
5273 tree highpart1 = build_and_insert_binop (gsi, loc, "highparttmp",
5274 MULT_HIGHPART_EXPR, mop1, mop2);
5275 tree highpart2 = highpart1;
5276 tree ntype = optype;
5278 if (TYPE_UNSIGNED (stype) != TYPE_UNSIGNED (optype))
5280 ntype = TYPE_UNSIGNED (stype) ? unsigned_type_for (optype)
5281 : signed_type_for (optype);
5282 highpart2 = build_and_insert_cast (gsi, loc, ntype, highpart1);
5284 if (bits > prec)
5285 highpart2 = build_and_insert_binop (gsi, loc, "highparttmp",
5286 RSHIFT_EXPR, highpart2,
5287 build_int_cst (ntype, bits - prec));
5289 gassign *new_stmt = gimple_build_assign (lhs, NOP_EXPR, highpart2);
5290 gsi_replace (gsi, new_stmt, true);
5292 widen_mul_stats.highpart_mults_inserted++;
5293 return true;
5296 /* If target has spaceship<MODE>3 expander, pattern recognize
5297 <bb 2> [local count: 1073741824]:
5298 if (a_2(D) == b_3(D))
5299 goto <bb 6>; [34.00%]
5300 else
5301 goto <bb 3>; [66.00%]
5303 <bb 3> [local count: 708669601]:
5304 if (a_2(D) < b_3(D))
5305 goto <bb 6>; [1.04%]
5306 else
5307 goto <bb 4>; [98.96%]
5309 <bb 4> [local count: 701299439]:
5310 if (a_2(D) > b_3(D))
5311 goto <bb 5>; [48.89%]
5312 else
5313 goto <bb 6>; [51.11%]
5315 <bb 5> [local count: 342865295]:
5317 <bb 6> [local count: 1073741824]:
5318 and turn it into:
5319 <bb 2> [local count: 1073741824]:
5320 _1 = .SPACESHIP (a_2(D), b_3(D));
5321 if (_1 == 0)
5322 goto <bb 6>; [34.00%]
5323 else
5324 goto <bb 3>; [66.00%]
5326 <bb 3> [local count: 708669601]:
5327 if (_1 == -1)
5328 goto <bb 6>; [1.04%]
5329 else
5330 goto <bb 4>; [98.96%]
5332 <bb 4> [local count: 701299439]:
5333 if (_1 == 1)
5334 goto <bb 5>; [48.89%]
5335 else
5336 goto <bb 6>; [51.11%]
5338 <bb 5> [local count: 342865295]:
5340 <bb 6> [local count: 1073741824]:
5341 so that the backend can emit optimal comparison and
5342 conditional jump sequence. */
5344 static void
5345 optimize_spaceship (gcond *stmt)
5347 enum tree_code code = gimple_cond_code (stmt);
5348 if (code != EQ_EXPR && code != NE_EXPR)
5349 return;
5350 tree arg1 = gimple_cond_lhs (stmt);
5351 tree arg2 = gimple_cond_rhs (stmt);
5352 if (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg1))
5353 || optab_handler (spaceship_optab,
5354 TYPE_MODE (TREE_TYPE (arg1))) == CODE_FOR_nothing
5355 || operand_equal_p (arg1, arg2, 0))
5356 return;
5358 basic_block bb0 = gimple_bb (stmt), bb1, bb2 = NULL;
5359 edge em1 = NULL, e1 = NULL, e2 = NULL;
5360 bb1 = EDGE_SUCC (bb0, 1)->dest;
5361 if (((EDGE_SUCC (bb0, 0)->flags & EDGE_TRUE_VALUE) != 0) ^ (code == EQ_EXPR))
5362 bb1 = EDGE_SUCC (bb0, 0)->dest;
5364 gcond *g = safe_dyn_cast <gcond *> (*gsi_last_bb (bb1));
5365 if (g == NULL
5366 || !single_pred_p (bb1)
5367 || (operand_equal_p (gimple_cond_lhs (g), arg1, 0)
5368 ? !operand_equal_p (gimple_cond_rhs (g), arg2, 0)
5369 : (!operand_equal_p (gimple_cond_lhs (g), arg2, 0)
5370 || !operand_equal_p (gimple_cond_rhs (g), arg1, 0)))
5371 || !cond_only_block_p (bb1))
5372 return;
5374 enum tree_code ccode = (operand_equal_p (gimple_cond_lhs (g), arg1, 0)
5375 ? LT_EXPR : GT_EXPR);
5376 switch (gimple_cond_code (g))
5378 case LT_EXPR:
5379 case LE_EXPR:
5380 break;
5381 case GT_EXPR:
5382 case GE_EXPR:
5383 ccode = ccode == LT_EXPR ? GT_EXPR : LT_EXPR;
5384 break;
5385 default:
5386 return;
5389 for (int i = 0; i < 2; ++i)
5391 /* With NaNs, </<=/>/>= are false, so we need to look for the
5392 third comparison on the false edge from whatever non-equality
5393 comparison the second comparison is. */
5394 if (HONOR_NANS (TREE_TYPE (arg1))
5395 && (EDGE_SUCC (bb1, i)->flags & EDGE_TRUE_VALUE) != 0)
5396 continue;
5398 bb2 = EDGE_SUCC (bb1, i)->dest;
5399 g = safe_dyn_cast <gcond *> (*gsi_last_bb (bb2));
5400 if (g == NULL
5401 || !single_pred_p (bb2)
5402 || (operand_equal_p (gimple_cond_lhs (g), arg1, 0)
5403 ? !operand_equal_p (gimple_cond_rhs (g), arg2, 0)
5404 : (!operand_equal_p (gimple_cond_lhs (g), arg2, 0)
5405 || !operand_equal_p (gimple_cond_rhs (g), arg1, 0)))
5406 || !cond_only_block_p (bb2)
5407 || EDGE_SUCC (bb2, 0)->dest == EDGE_SUCC (bb2, 1)->dest)
5408 continue;
5410 enum tree_code ccode2
5411 = (operand_equal_p (gimple_cond_lhs (g), arg1, 0) ? LT_EXPR : GT_EXPR);
5412 switch (gimple_cond_code (g))
5414 case LT_EXPR:
5415 case LE_EXPR:
5416 break;
5417 case GT_EXPR:
5418 case GE_EXPR:
5419 ccode2 = ccode2 == LT_EXPR ? GT_EXPR : LT_EXPR;
5420 break;
5421 default:
5422 continue;
5424 if (HONOR_NANS (TREE_TYPE (arg1)) && ccode == ccode2)
5425 continue;
5427 if ((ccode == LT_EXPR)
5428 ^ ((EDGE_SUCC (bb1, i)->flags & EDGE_TRUE_VALUE) != 0))
5430 em1 = EDGE_SUCC (bb1, 1 - i);
5431 e1 = EDGE_SUCC (bb2, 0);
5432 e2 = EDGE_SUCC (bb2, 1);
5433 if ((ccode2 == LT_EXPR) ^ ((e1->flags & EDGE_TRUE_VALUE) == 0))
5434 std::swap (e1, e2);
5436 else
5438 e1 = EDGE_SUCC (bb1, 1 - i);
5439 em1 = EDGE_SUCC (bb2, 0);
5440 e2 = EDGE_SUCC (bb2, 1);
5441 if ((ccode2 != LT_EXPR) ^ ((em1->flags & EDGE_TRUE_VALUE) == 0))
5442 std::swap (em1, e2);
5444 break;
5447 if (em1 == NULL)
5449 if ((ccode == LT_EXPR)
5450 ^ ((EDGE_SUCC (bb1, 0)->flags & EDGE_TRUE_VALUE) != 0))
5452 em1 = EDGE_SUCC (bb1, 1);
5453 e1 = EDGE_SUCC (bb1, 0);
5454 e2 = (e1->flags & EDGE_TRUE_VALUE) ? em1 : e1;
5456 else
5458 em1 = EDGE_SUCC (bb1, 0);
5459 e1 = EDGE_SUCC (bb1, 1);
5460 e2 = (e1->flags & EDGE_TRUE_VALUE) ? em1 : e1;
5464 gcall *gc = gimple_build_call_internal (IFN_SPACESHIP, 2, arg1, arg2);
5465 tree lhs = make_ssa_name (integer_type_node);
5466 gimple_call_set_lhs (gc, lhs);
5467 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5468 gsi_insert_before (&gsi, gc, GSI_SAME_STMT);
5470 gimple_cond_set_lhs (stmt, lhs);
5471 gimple_cond_set_rhs (stmt, integer_zero_node);
5472 update_stmt (stmt);
5474 gcond *cond = as_a <gcond *> (*gsi_last_bb (bb1));
5475 gimple_cond_set_lhs (cond, lhs);
5476 if (em1->src == bb1 && e2 != em1)
5478 gimple_cond_set_rhs (cond, integer_minus_one_node);
5479 gimple_cond_set_code (cond, (em1->flags & EDGE_TRUE_VALUE)
5480 ? EQ_EXPR : NE_EXPR);
5482 else
5484 gcc_assert (e1->src == bb1 && e2 != e1);
5485 gimple_cond_set_rhs (cond, integer_one_node);
5486 gimple_cond_set_code (cond, (e1->flags & EDGE_TRUE_VALUE)
5487 ? EQ_EXPR : NE_EXPR);
5489 update_stmt (cond);
5491 if (e2 != e1 && e2 != em1)
5493 cond = as_a <gcond *> (*gsi_last_bb (bb2));
5494 gimple_cond_set_lhs (cond, lhs);
5495 if (em1->src == bb2)
5496 gimple_cond_set_rhs (cond, integer_minus_one_node);
5497 else
5499 gcc_assert (e1->src == bb2);
5500 gimple_cond_set_rhs (cond, integer_one_node);
5502 gimple_cond_set_code (cond,
5503 (e2->flags & EDGE_TRUE_VALUE) ? NE_EXPR : EQ_EXPR);
5504 update_stmt (cond);
5507 wide_int wm1 = wi::minus_one (TYPE_PRECISION (integer_type_node));
5508 wide_int w2 = wi::two (TYPE_PRECISION (integer_type_node));
5509 value_range vr (TREE_TYPE (lhs), wm1, w2);
5510 set_range_info (lhs, vr);
5514 /* Find integer multiplications where the operands are extended from
5515 smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
5516 or MULT_HIGHPART_EXPR where appropriate. */
5518 namespace {
5520 const pass_data pass_data_optimize_widening_mul =
5522 GIMPLE_PASS, /* type */
5523 "widening_mul", /* name */
5524 OPTGROUP_NONE, /* optinfo_flags */
5525 TV_TREE_WIDEN_MUL, /* tv_id */
5526 PROP_ssa, /* properties_required */
5527 0, /* properties_provided */
5528 0, /* properties_destroyed */
5529 0, /* todo_flags_start */
5530 TODO_update_ssa, /* todo_flags_finish */
5533 class pass_optimize_widening_mul : public gimple_opt_pass
5535 public:
5536 pass_optimize_widening_mul (gcc::context *ctxt)
5537 : gimple_opt_pass (pass_data_optimize_widening_mul, ctxt)
5540 /* opt_pass methods: */
5541 bool gate (function *) final override
5543 return flag_expensive_optimizations && optimize;
5546 unsigned int execute (function *) final override;
5548 }; // class pass_optimize_widening_mul
5550 /* Walker class to perform the transformation in reverse dominance order. */
5552 class math_opts_dom_walker : public dom_walker
5554 public:
5555 /* Constructor, CFG_CHANGED is a pointer to a boolean flag that will be set
5556 if walking modidifes the CFG. */
5558 math_opts_dom_walker (bool *cfg_changed_p)
5559 : dom_walker (CDI_DOMINATORS), m_last_result_set (),
5560 m_cfg_changed_p (cfg_changed_p) {}
5562 /* The actual actions performed in the walk. */
5564 void after_dom_children (basic_block) final override;
5566 /* Set of results of chains of multiply and add statement combinations that
5567 were not transformed into FMAs because of active deferring. */
5568 hash_set<tree> m_last_result_set;
5570 /* Pointer to a flag of the user that needs to be set if CFG has been
5571 modified. */
5572 bool *m_cfg_changed_p;
5575 void
5576 math_opts_dom_walker::after_dom_children (basic_block bb)
5578 gimple_stmt_iterator gsi;
5580 fma_deferring_state fma_state (param_avoid_fma_max_bits > 0);
5582 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
5584 gimple *stmt = gsi_stmt (gsi);
5585 enum tree_code code;
5587 if (is_gimple_assign (stmt))
5589 code = gimple_assign_rhs_code (stmt);
5590 switch (code)
5592 case MULT_EXPR:
5593 if (!convert_mult_to_widen (stmt, &gsi)
5594 && !convert_expand_mult_copysign (stmt, &gsi)
5595 && convert_mult_to_fma (stmt,
5596 gimple_assign_rhs1 (stmt),
5597 gimple_assign_rhs2 (stmt),
5598 &fma_state))
5600 gsi_remove (&gsi, true);
5601 release_defs (stmt);
5602 continue;
5604 match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
5605 break;
5607 case PLUS_EXPR:
5608 case MINUS_EXPR:
5609 if (!convert_plusminus_to_widen (&gsi, stmt, code))
5611 match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
5612 if (gsi_stmt (gsi) == stmt)
5613 match_uaddc_usubc (&gsi, stmt, code);
5615 break;
5617 case BIT_NOT_EXPR:
5618 if (match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p))
5619 continue;
5620 break;
5622 case TRUNC_MOD_EXPR:
5623 convert_to_divmod (as_a<gassign *> (stmt));
5624 break;
5626 case RSHIFT_EXPR:
5627 convert_mult_to_highpart (as_a<gassign *> (stmt), &gsi);
5628 break;
5630 case BIT_IOR_EXPR:
5631 case BIT_XOR_EXPR:
5632 match_uaddc_usubc (&gsi, stmt, code);
5633 break;
5635 default:;
5638 else if (is_gimple_call (stmt))
5640 switch (gimple_call_combined_fn (stmt))
5642 CASE_CFN_POW:
5643 if (gimple_call_lhs (stmt)
5644 && TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST
5645 && real_equal (&TREE_REAL_CST (gimple_call_arg (stmt, 1)),
5646 &dconst2)
5647 && convert_mult_to_fma (stmt,
5648 gimple_call_arg (stmt, 0),
5649 gimple_call_arg (stmt, 0),
5650 &fma_state))
5652 unlink_stmt_vdef (stmt);
5653 if (gsi_remove (&gsi, true)
5654 && gimple_purge_dead_eh_edges (bb))
5655 *m_cfg_changed_p = true;
5656 release_defs (stmt);
5657 continue;
5659 break;
5661 case CFN_COND_MUL:
5662 if (convert_mult_to_fma (stmt,
5663 gimple_call_arg (stmt, 1),
5664 gimple_call_arg (stmt, 2),
5665 &fma_state,
5666 gimple_call_arg (stmt, 0)))
5669 gsi_remove (&gsi, true);
5670 release_defs (stmt);
5671 continue;
5673 break;
5675 case CFN_COND_LEN_MUL:
5676 if (convert_mult_to_fma (stmt,
5677 gimple_call_arg (stmt, 1),
5678 gimple_call_arg (stmt, 2),
5679 &fma_state,
5680 gimple_call_arg (stmt, 0),
5681 gimple_call_arg (stmt, 4),
5682 gimple_call_arg (stmt, 5)))
5685 gsi_remove (&gsi, true);
5686 release_defs (stmt);
5687 continue;
5689 break;
5691 case CFN_LAST:
5692 cancel_fma_deferring (&fma_state);
5693 break;
5695 default:
5696 break;
5699 else if (gimple_code (stmt) == GIMPLE_COND)
5700 optimize_spaceship (as_a <gcond *> (stmt));
5701 gsi_next (&gsi);
5703 if (fma_state.m_deferring_p
5704 && fma_state.m_initial_phi)
5706 gcc_checking_assert (fma_state.m_last_result);
5707 if (!last_fma_candidate_feeds_initial_phi (&fma_state,
5708 &m_last_result_set))
5709 cancel_fma_deferring (&fma_state);
5710 else
5711 m_last_result_set.add (fma_state.m_last_result);
5716 unsigned int
5717 pass_optimize_widening_mul::execute (function *fun)
5719 bool cfg_changed = false;
5721 memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
5722 calculate_dominance_info (CDI_DOMINATORS);
5723 renumber_gimple_stmt_uids (cfun);
5725 math_opts_dom_walker (&cfg_changed).walk (ENTRY_BLOCK_PTR_FOR_FN (cfun));
5727 statistics_counter_event (fun, "widening multiplications inserted",
5728 widen_mul_stats.widen_mults_inserted);
5729 statistics_counter_event (fun, "widening maccs inserted",
5730 widen_mul_stats.maccs_inserted);
5731 statistics_counter_event (fun, "fused multiply-adds inserted",
5732 widen_mul_stats.fmas_inserted);
5733 statistics_counter_event (fun, "divmod calls inserted",
5734 widen_mul_stats.divmod_calls_inserted);
5735 statistics_counter_event (fun, "highpart multiplications inserted",
5736 widen_mul_stats.highpart_mults_inserted);
5738 return cfg_changed ? TODO_cleanup_cfg : 0;
5741 } // anon namespace
5743 gimple_opt_pass *
5744 make_pass_optimize_widening_mul (gcc::context *ctxt)
5746 return new pass_optimize_widening_mul (ctxt);