gcc/tree-ssa-math-opts.c

   1 /* Global, SSA-based optimizations using mathematical identities.
   2    Copyright (C) 2005-2022 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* Currently, the only mini-pass in this file tries to CSE reciprocal
  21    operations.  These are common in sequences such as this one:
  22
  23         modulus = sqrt(x*x + y*y + z*z);
  24         x = x / modulus;
  25         y = y / modulus;
  26         z = z / modulus;
  27
  28    that can be optimized to
  29
  30         modulus = sqrt(x*x + y*y + z*z);
  31         rmodulus = 1.0 / modulus;
  32         x = x * rmodulus;
  33         y = y * rmodulus;
  34         z = z * rmodulus;
  35
  36    We do this for loop invariant divisors, and with this pass whenever
  37    we notice that a division has the same divisor multiple times.
  38
  39    Of course, like in PRE, we don't insert a division if a dominator
  40    already has one.  However, this cannot be done as an extension of
  41    PRE for several reasons.
  42
  43    First of all, with some experiments it was found out that the
  44    transformation is not always useful if there are only two divisions
  45    by the same divisor.  This is probably because modern processors
  46    can pipeline the divisions; on older, in-order processors it should
  47    still be effective to optimize two divisions by the same number.
  48    We make this a param, and it shall be called N in the remainder of
  49    this comment.
  50
  51    Second, if trapping math is active, we have less freedom on where
  52    to insert divisions: we can only do so in basic blocks that already
  53    contain one.  (If divisions don't trap, instead, we can insert
  54    divisions elsewhere, which will be in blocks that are common dominators
  55    of those that have the division).
  56
  57    We really don't want to compute the reciprocal unless a division will
  58    be found.  To do this, we won't insert the division in a basic block
  59    that has less than N divisions *post-dominating* it.
  60
  61    The algorithm constructs a subset of the dominator tree, holding the
  62    blocks containing the divisions and the common dominators to them,
  63    and walk it twice.  The first walk is in post-order, and it annotates
  64    each block with the number of divisions that post-dominate it: this
  65    gives information on where divisions can be inserted profitably.
  66    The second walk is in pre-order, and it inserts divisions as explained
  67    above, and replaces divisions by multiplications.
  68
  69    In the best case, the cost of the pass is O(n_statements).  In the
  70    worst-case, the cost is due to creating the dominator tree subset,
  71    with a cost of O(n_basic_blocks ^ 2); however this can only happen
  72    for n_statements / n_basic_blocks statements.  So, the amortized cost
  73    of creating the dominator tree subset is O(n_basic_blocks) and the
  74    worst-case cost of the pass is O(n_statements * n_basic_blocks).
  75
  76    More practically, the cost will be small because there are few
  77    divisions, and they tend to be in the same basic block, so insert_bb
  78    is called very few times.
  79
  80    If we did this using domwalk.c, an efficient implementation would have
  81    to work on all the variables in a single pass, because we could not
  82    work on just a subset of the dominator tree, as we do now, and the
  83    cost would also be something like O(n_statements * n_basic_blocks).
  84    The data structures would be more complex in order to work on all the
  85    variables in a single pass.  */
  86
  87 #include "config.h"
  88 #include "system.h"
  89 #include "coretypes.h"
  90 #include "backend.h"
  91 #include "target.h"
  92 #include "rtl.h"
  93 #include "tree.h"
  94 #include "gimple.h"
  95 #include "predict.h"
  96 #include "alloc-pool.h"
  97 #include "tree-pass.h"
  98 #include "ssa.h"
  99 #include "optabs-tree.h"
 100 #include "gimple-pretty-print.h"
 101 #include "alias.h"
 102 #include "fold-const.h"
 103 #include "gimple-fold.h"
 104 #include "gimple-iterator.h"
 105 #include "gimplify.h"
 106 #include "gimplify-me.h"
 107 #include "stor-layout.h"
 108 #include "tree-cfg.h"
 109 #include "tree-dfa.h"
 110 #include "tree-ssa.h"
 111 #include "builtins.h"
 112 #include "internal-fn.h"
 113 #include "case-cfn-macros.h"
 114 #include "optabs-libfuncs.h"
 115 #include "tree-eh.h"
 116 #include "targhooks.h"
 117 #include "domwalk.h"
 118 #include "tree-ssa-math-opts.h"
 119
 120 /* This structure represents one basic block that either computes a
 121    division, or is a common dominator for basic block that compute a
 122    division.  */
 123 struct occurrence {
 124   /* The basic block represented by this structure.  */
 125   basic_block bb = basic_block();
 126
 127   /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
 128      inserted in BB.  */
 129   tree recip_def = tree();
 130
 131   /* If non-NULL, the SSA_NAME holding the definition for a squared
 132      reciprocal inserted in BB.  */
 133   tree square_recip_def = tree();
 134
 135   /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
 136      was inserted in BB.  */
 137   gimple *recip_def_stmt = nullptr;
 138
 139   /* Pointer to a list of "struct occurrence"s for blocks dominated
 140      by BB.  */
 141   struct occurrence *children = nullptr;
 142
 143   /* Pointer to the next "struct occurrence"s in the list of blocks
 144      sharing a common dominator.  */
 145   struct occurrence *next = nullptr;
 146
 147   /* The number of divisions that are in BB before compute_merit.  The
 148      number of divisions that are in BB or post-dominate it after
 149      compute_merit.  */
 150   int num_divisions = 0;
 151
 152   /* True if the basic block has a division, false if it is a common
 153      dominator for basic blocks that do.  If it is false and trapping
 154      math is active, BB is not a candidate for inserting a reciprocal.  */
 155   bool bb_has_division = false;
 156
 157   /* Construct a struct occurrence for basic block BB, and whose
 158      children list is headed by CHILDREN.  */
 159   occurrence (basic_block bb, struct occurrence *children)
 160   : bb (bb), children (children)
 161   {
 162     bb->aux = this;
 163   }
 164
 165   /* Destroy a struct occurrence and remove it from its basic block.  */
 166   ~occurrence ()
 167   {
 168     bb->aux = nullptr;
 169   }
 170
 171   /* Allocate memory for a struct occurrence from OCC_POOL.  */
 172   static void* operator new (size_t);
 173
 174   /* Return memory for a struct occurrence to OCC_POOL.  */
 175   static void operator delete (void*, size_t);
 176 };
 177
 178 static struct
 179 {
 180   /* Number of 1.0/X ops inserted.  */
 181   int rdivs_inserted;
 182
 183   /* Number of 1.0/FUNC ops inserted.  */
 184   int rfuncs_inserted;
 185 } reciprocal_stats;
 186
 187 static struct
 188 {
 189   /* Number of cexpi calls inserted.  */
 190   int inserted;
 191
 192   /* Number of conversions removed.  */
 193   int conv_removed;
 194
 195 } sincos_stats;
 196
 197 static struct
 198 {
 199   /* Number of widening multiplication ops inserted.  */
 200   int widen_mults_inserted;
 201
 202   /* Number of integer multiply-and-accumulate ops inserted.  */
 203   int maccs_inserted;
 204
 205   /* Number of fp fused multiply-add ops inserted.  */
 206   int fmas_inserted;
 207
 208   /* Number of divmod calls inserted.  */
 209   int divmod_calls_inserted;
 210
 211   /* Number of highpart multiplication ops inserted.  */
 212   int highpart_mults_inserted;
 213 } widen_mul_stats;
 214
 215 /* The instance of "struct occurrence" representing the highest
 216    interesting block in the dominator tree.  */
 217 static struct occurrence *occ_head;
 218
 219 /* Allocation pool for getting instances of "struct occurrence".  */
 220 static object_allocator<occurrence> *occ_pool;
 221
 222 void* occurrence::operator new (size_t n)
 223 {
 224   gcc_assert (n == sizeof(occurrence));
 225   return occ_pool->allocate_raw ();
 226 }
 227
 228 void occurrence::operator delete (void *occ, size_t n)
 229 {
 230   gcc_assert (n == sizeof(occurrence));
 231   occ_pool->remove_raw (occ);
 232 }
 233
 234 /* Insert NEW_OCC into our subset of the dominator tree.  P_HEAD points to a
 235    list of "struct occurrence"s, one per basic block, having IDOM as
 236    their common dominator.
 237
 238    We try to insert NEW_OCC as deep as possible in the tree, and we also
 239    insert any other block that is a common dominator for BB and one
 240    block already in the tree.  */
 241
 242 static void
 243 insert_bb (struct occurrence *new_occ, basic_block idom,
 244            struct occurrence **p_head)
 245 {
 246   struct occurrence *occ, **p_occ;
 247
 248   for (p_occ = p_head; (occ = *p_occ) != NULL; )
 249     {
 250       basic_block bb = new_occ->bb, occ_bb = occ->bb;
 251       basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb);
 252       if (dom == bb)
 253         {
 254           /* BB dominates OCC_BB.  OCC becomes NEW_OCC's child: remove OCC
 255              from its list.  */
 256           *p_occ = occ->next;
 257           occ->next = new_occ->children;
 258           new_occ->children = occ;
 259
 260           /* Try the next block (it may as well be dominated by BB).  */
 261         }
 262
 263       else if (dom == occ_bb)
 264         {
 265           /* OCC_BB dominates BB.  Tail recurse to look deeper.  */
 266           insert_bb (new_occ, dom, &occ->children);
 267           return;
 268         }
 269
 270       else if (dom != idom)
 271         {
 272           gcc_assert (!dom->aux);
 273
 274           /* There is a dominator between IDOM and BB, add it and make
 275              two children out of NEW_OCC and OCC.  First, remove OCC from
 276              its list.  */
 277           *p_occ = occ->next;
 278           new_occ->next = occ;
 279           occ->next = NULL;
 280
 281           /* None of the previous blocks has DOM as a dominator: if we tail
 282              recursed, we would reexamine them uselessly. Just switch BB with
 283              DOM, and go on looking for blocks dominated by DOM.  */
 284           new_occ = new occurrence (dom, new_occ);
 285         }
 286
 287       else
 288         {
 289           /* Nothing special, go on with the next element.  */
 290           p_occ = &occ->next;
 291         }
 292     }
 293
 294   /* No place was found as a child of IDOM.  Make BB a sibling of IDOM.  */
 295   new_occ->next = *p_head;
 296   *p_head = new_occ;
 297 }
 298
 299 /* Register that we found a division in BB.
 300    IMPORTANCE is a measure of how much weighting to give
 301    that division.  Use IMPORTANCE = 2 to register a single
 302    division.  If the division is going to be found multiple
 303    times use 1 (as it is with squares).  */
 304
 305 static inline void
 306 register_division_in (basic_block bb, int importance)
 307 {
 308   struct occurrence *occ;
 309
 310   occ = (struct occurrence *) bb->aux;
 311   if (!occ)
 312     {
 313       occ = new occurrence (bb, NULL);
 314       insert_bb (occ, ENTRY_BLOCK_PTR_FOR_FN (cfun), &occ_head);
 315     }
 316
 317   occ->bb_has_division = true;
 318   occ->num_divisions += importance;
 319 }
 320
 321
 322 /* Compute the number of divisions that postdominate each block in OCC and
 323    its children.  */
 324
 325 static void
 326 compute_merit (struct occurrence *occ)
 327 {
 328   struct occurrence *occ_child;
 329   basic_block dom = occ->bb;
 330
 331   for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
 332     {
 333       basic_block bb;
 334       if (occ_child->children)
 335         compute_merit (occ_child);
 336
 337       if (flag_exceptions)
 338         bb = single_noncomplex_succ (dom);
 339       else
 340         bb = dom;
 341
 342       if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
 343         occ->num_divisions += occ_child->num_divisions;
 344     }
 345 }
 346
 347
 348 /* Return whether USE_STMT is a floating-point division by DEF.  */
 349 static inline bool
 350 is_division_by (gimple *use_stmt, tree def)
 351 {
 352   return is_gimple_assign (use_stmt)
 353          && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
 354          && gimple_assign_rhs2 (use_stmt) == def
 355          /* Do not recognize x / x as valid division, as we are getting
 356             confused later by replacing all immediate uses x in such
 357             a stmt.  */
 358          && gimple_assign_rhs1 (use_stmt) != def
 359          && !stmt_can_throw_internal (cfun, use_stmt);
 360 }
 361
 362 /* Return TRUE if USE_STMT is a multiplication of DEF by A.  */
 363 static inline bool
 364 is_mult_by (gimple *use_stmt, tree def, tree a)
 365 {
 366   if (gimple_code (use_stmt) == GIMPLE_ASSIGN
 367       && gimple_assign_rhs_code (use_stmt) == MULT_EXPR)
 368     {
 369       tree op0 = gimple_assign_rhs1 (use_stmt);
 370       tree op1 = gimple_assign_rhs2 (use_stmt);
 371
 372       return (op0 == def && op1 == a)
 373               || (op0 == a && op1 == def);
 374     }
 375   return 0;
 376 }
 377
 378 /* Return whether USE_STMT is DEF * DEF.  */
 379 static inline bool
 380 is_square_of (gimple *use_stmt, tree def)
 381 {
 382   return is_mult_by (use_stmt, def, def);
 383 }
 384
 385 /* Return whether USE_STMT is a floating-point division by
 386    DEF * DEF.  */
 387 static inline bool
 388 is_division_by_square (gimple *use_stmt, tree def)
 389 {
 390   if (gimple_code (use_stmt) == GIMPLE_ASSIGN
 391       && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
 392       && gimple_assign_rhs1 (use_stmt) != gimple_assign_rhs2 (use_stmt)
 393       && !stmt_can_throw_internal (cfun, use_stmt))
 394     {
 395       tree denominator = gimple_assign_rhs2 (use_stmt);
 396       if (TREE_CODE (denominator) == SSA_NAME)
 397         return is_square_of (SSA_NAME_DEF_STMT (denominator), def);
 398     }
 399   return 0;
 400 }
 401
 402 /* Walk the subset of the dominator tree rooted at OCC, setting the
 403    RECIP_DEF field to a definition of 1.0 / DEF that can be used in
 404    the given basic block.  The field may be left NULL, of course,
 405    if it is not possible or profitable to do the optimization.
 406
 407    DEF_BSI is an iterator pointing at the statement defining DEF.
 408    If RECIP_DEF is set, a dominator already has a computation that can
 409    be used.
 410
 411    If should_insert_square_recip is set, then this also inserts
 412    the square of the reciprocal immediately after the definition
 413    of the reciprocal.  */
 414
 415 static void
 416 insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
 417                     tree def, tree recip_def, tree square_recip_def,
 418                     int should_insert_square_recip, int threshold)
 419 {
 420   tree type;
 421   gassign *new_stmt, *new_square_stmt;
 422   gimple_stmt_iterator gsi;
 423   struct occurrence *occ_child;
 424
 425   if (!recip_def
 426       && (occ->bb_has_division || !flag_trapping_math)
 427       /* Divide by two as all divisions are counted twice in
 428          the costing loop.  */
 429       && occ->num_divisions / 2 >= threshold)
 430     {
 431       /* Make a variable with the replacement and substitute it.  */
 432       type = TREE_TYPE (def);
 433       recip_def = create_tmp_reg (type, "reciptmp");
 434       new_stmt = gimple_build_assign (recip_def, RDIV_EXPR,
 435                                       build_one_cst (type), def);
 436
 437       if (should_insert_square_recip)
 438         {
 439           square_recip_def = create_tmp_reg (type, "powmult_reciptmp");
 440           new_square_stmt = gimple_build_assign (square_recip_def, MULT_EXPR,
 441                                                  recip_def, recip_def);
 442         }
 443
 444       if (occ->bb_has_division)
 445         {
 446           /* Case 1: insert before an existing division.  */
 447           gsi = gsi_after_labels (occ->bb);
 448           while (!gsi_end_p (gsi)
 449                  && (!is_division_by (gsi_stmt (gsi), def))
 450                  && (!is_division_by_square (gsi_stmt (gsi), def)))
 451             gsi_next (&gsi);
 452
 453           gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
 454           if (should_insert_square_recip)
 455             gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
 456         }
 457       else if (def_gsi && occ->bb == gsi_bb (*def_gsi))
 458         {
 459           /* Case 2: insert right after the definition.  Note that this will
 460              never happen if the definition statement can throw, because in
 461              that case the sole successor of the statement's basic block will
 462              dominate all the uses as well.  */
 463           gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
 464           if (should_insert_square_recip)
 465             gsi_insert_after (def_gsi, new_square_stmt, GSI_NEW_STMT);
 466         }
 467       else
 468         {
 469           /* Case 3: insert in a basic block not containing defs/uses.  */
 470           gsi = gsi_after_labels (occ->bb);
 471           gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
 472           if (should_insert_square_recip)
 473             gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
 474         }
 475
 476       reciprocal_stats.rdivs_inserted++;
 477
 478       occ->recip_def_stmt = new_stmt;
 479     }
 480
 481   occ->recip_def = recip_def;
 482   occ->square_recip_def = square_recip_def;
 483   for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
 484     insert_reciprocals (def_gsi, occ_child, def, recip_def,
 485                         square_recip_def, should_insert_square_recip,
 486                         threshold);
 487 }
 488
 489 /* Replace occurrences of expr / (x * x) with expr * ((1 / x) * (1 / x)).
 490    Take as argument the use for (x * x).  */
 491 static inline void
 492 replace_reciprocal_squares (use_operand_p use_p)
 493 {
 494   gimple *use_stmt = USE_STMT (use_p);
 495   basic_block bb = gimple_bb (use_stmt);
 496   struct occurrence *occ = (struct occurrence *) bb->aux;
 497
 498   if (optimize_bb_for_speed_p (bb) && occ->square_recip_def
 499       && occ->recip_def)
 500     {
 501       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
 502       gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
 503       gimple_assign_set_rhs2 (use_stmt, occ->square_recip_def);
 504       SET_USE (use_p, occ->square_recip_def);
 505       fold_stmt_inplace (&gsi);
 506       update_stmt (use_stmt);
 507     }
 508 }
 509
 510
 511 /* Replace the division at USE_P with a multiplication by the reciprocal, if
 512    possible.  */
 513
 514 static inline void
 515 replace_reciprocal (use_operand_p use_p)
 516 {
 517   gimple *use_stmt = USE_STMT (use_p);
 518   basic_block bb = gimple_bb (use_stmt);
 519   struct occurrence *occ = (struct occurrence *) bb->aux;
 520
 521   if (optimize_bb_for_speed_p (bb)
 522       && occ->recip_def && use_stmt != occ->recip_def_stmt)
 523     {
 524       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
 525       gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
 526       SET_USE (use_p, occ->recip_def);
 527       fold_stmt_inplace (&gsi);
 528       update_stmt (use_stmt);
 529     }
 530 }
 531
 532
 533 /* Free OCC and return one more "struct occurrence" to be freed.  */
 534
 535 static struct occurrence *
 536 free_bb (struct occurrence *occ)
 537 {
 538   struct occurrence *child, *next;
 539
 540   /* First get the two pointers hanging off OCC.  */
 541   next = occ->next;
 542   child = occ->children;
 543   delete occ;
 544
 545   /* Now ensure that we don't recurse unless it is necessary.  */
 546   if (!child)
 547     return next;
 548   else
 549     {
 550       while (next)
 551         next = free_bb (next);
 552
 553       return child;
 554     }
 555 }
 556
 557 /* Transform sequences like
 558    t = sqrt (a)
 559    x = 1.0 / t;
 560    r1 = x * x;
 561    r2 = a * x;
 562    into:
 563    t = sqrt (a)
 564    r1 = 1.0 / a;
 565    r2 = t;
 566    x = r1 * r2;
 567    depending on the uses of x, r1, r2.  This removes one multiplication and
 568    allows the sqrt and division operations to execute in parallel.
 569    DEF_GSI is the gsi of the initial division by sqrt that defines
 570    DEF (x in the example above).  */
 571
 572 static void
 573 optimize_recip_sqrt (gimple_stmt_iterator *def_gsi, tree def)
 574 {
 575   gimple *use_stmt;
 576   imm_use_iterator use_iter;
 577   gimple *stmt = gsi_stmt (*def_gsi);
 578   tree x = def;
 579   tree orig_sqrt_ssa_name = gimple_assign_rhs2 (stmt);
 580   tree div_rhs1 = gimple_assign_rhs1 (stmt);
 581
 582   if (TREE_CODE (orig_sqrt_ssa_name) != SSA_NAME
 583       || TREE_CODE (div_rhs1) != REAL_CST
 584       || !real_equal (&TREE_REAL_CST (div_rhs1), &dconst1))
 585     return;
 586
 587   gcall *sqrt_stmt
 588     = dyn_cast <gcall *> (SSA_NAME_DEF_STMT (orig_sqrt_ssa_name));
 589
 590   if (!sqrt_stmt || !gimple_call_lhs (sqrt_stmt))
 591     return;
 592
 593   switch (gimple_call_combined_fn (sqrt_stmt))
 594     {
 595     CASE_CFN_SQRT:
 596     CASE_CFN_SQRT_FN:
 597       break;
 598
 599     default:
 600       return;
 601     }
 602   tree a = gimple_call_arg (sqrt_stmt, 0);
 603
 604   /* We have 'a' and 'x'.  Now analyze the uses of 'x'.  */
 605
 606   /* Statements that use x in x * x.  */
 607   auto_vec<gimple *> sqr_stmts;
 608   /* Statements that use x in a * x.  */
 609   auto_vec<gimple *> mult_stmts;
 610   bool has_other_use = false;
 611   bool mult_on_main_path = false;
 612
 613   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, x)
 614     {
 615       if (is_gimple_debug (use_stmt))
 616         continue;
 617       if (is_square_of (use_stmt, x))
 618         {
 619           sqr_stmts.safe_push (use_stmt);
 620           if (gimple_bb (use_stmt) == gimple_bb (stmt))
 621             mult_on_main_path = true;
 622         }
 623       else if (is_mult_by (use_stmt, x, a))
 624         {
 625           mult_stmts.safe_push (use_stmt);
 626           if (gimple_bb (use_stmt) == gimple_bb (stmt))
 627             mult_on_main_path = true;
 628         }
 629       else
 630         has_other_use = true;
 631     }
 632
 633   /* In the x * x and a * x cases we just rewire stmt operands or
 634      remove multiplications.  In the has_other_use case we introduce
 635      a multiplication so make sure we don't introduce a multiplication
 636      on a path where there was none.  */
 637   if (has_other_use && !mult_on_main_path)
 638     return;
 639
 640   if (sqr_stmts.is_empty () && mult_stmts.is_empty ())
 641     return;
 642
 643   /* If x = 1.0 / sqrt (a) has uses other than those optimized here we want
 644      to be able to compose it from the sqr and mult cases.  */
 645   if (has_other_use && (sqr_stmts.is_empty () || mult_stmts.is_empty ()))
 646     return;
 647
 648   if (dump_file)
 649     {
 650       fprintf (dump_file, "Optimizing reciprocal sqrt multiplications of\n");
 651       print_gimple_stmt (dump_file, sqrt_stmt, 0, TDF_NONE);
 652       print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
 653       fprintf (dump_file, "\n");
 654     }
 655
 656   bool delete_div = !has_other_use;
 657   tree sqr_ssa_name = NULL_TREE;
 658   if (!sqr_stmts.is_empty ())
 659     {
 660       /* r1 = x * x.  Transform the original
 661          x = 1.0 / t
 662          into
 663          tmp1 = 1.0 / a
 664          r1 = tmp1.  */
 665
 666       sqr_ssa_name
 667         = make_temp_ssa_name (TREE_TYPE (a), NULL, "recip_sqrt_sqr");
 668
 669       if (dump_file)
 670         {
 671           fprintf (dump_file, "Replacing original division\n");
 672           print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
 673           fprintf (dump_file, "with new division\n");
 674         }
 675       stmt
 676         = gimple_build_assign (sqr_ssa_name, gimple_assign_rhs_code (stmt),
 677                                gimple_assign_rhs1 (stmt), a);
 678       gsi_insert_before (def_gsi, stmt, GSI_SAME_STMT);
 679       gsi_remove (def_gsi, true);
 680       *def_gsi = gsi_for_stmt (stmt);
 681       fold_stmt_inplace (def_gsi);
 682       update_stmt (stmt);
 683
 684       if (dump_file)
 685         print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
 686
 687       delete_div = false;
 688       gimple *sqr_stmt;
 689       unsigned int i;
 690       FOR_EACH_VEC_ELT (sqr_stmts, i, sqr_stmt)
 691         {
 692           gimple_stmt_iterator gsi2 = gsi_for_stmt (sqr_stmt);
 693           gimple_assign_set_rhs_from_tree (&gsi2, sqr_ssa_name);
 694           update_stmt (sqr_stmt);
 695         }
 696     }
 697   if (!mult_stmts.is_empty ())
 698     {
 699       /* r2 = a * x.  Transform this into:
 700          r2 = t (The original sqrt (a)).  */
 701       unsigned int i;
 702       gimple *mult_stmt = NULL;
 703       FOR_EACH_VEC_ELT (mult_stmts, i, mult_stmt)
 704         {
 705           gimple_stmt_iterator gsi2 = gsi_for_stmt (mult_stmt);
 706
 707           if (dump_file)
 708             {
 709               fprintf (dump_file, "Replacing squaring multiplication\n");
 710               print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE);
 711               fprintf (dump_file, "with assignment\n");
 712             }
 713           gimple_assign_set_rhs_from_tree (&gsi2, orig_sqrt_ssa_name);
 714           fold_stmt_inplace (&gsi2);
 715           update_stmt (mult_stmt);
 716           if (dump_file)
 717             print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE);
 718       }
 719     }
 720
 721   if (has_other_use)
 722     {
 723       /* Using the two temporaries tmp1, tmp2 from above
 724          the original x is now:
 725          x = tmp1 * tmp2.  */
 726       gcc_assert (orig_sqrt_ssa_name);
 727       gcc_assert (sqr_ssa_name);
 728
 729       gimple *new_stmt
 730         = gimple_build_assign (x, MULT_EXPR,
 731                                orig_sqrt_ssa_name, sqr_ssa_name);
 732       gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
 733       update_stmt (stmt);
 734     }
 735   else if (delete_div)
 736     {
 737       /* Remove the original division.  */
 738       gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt);
 739       gsi_remove (&gsi2, true);
 740       release_defs (stmt);
 741     }
 742   else
 743     release_ssa_name (x);
 744 }
 745
 746 /* Look for floating-point divisions among DEF's uses, and try to
 747    replace them by multiplications with the reciprocal.  Add
 748    as many statements computing the reciprocal as needed.
 749
 750    DEF must be a GIMPLE register of a floating-point type.  */
 751
 752 static void
 753 execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
 754 {
 755   use_operand_p use_p, square_use_p;
 756   imm_use_iterator use_iter, square_use_iter;
 757   tree square_def;
 758   struct occurrence *occ;
 759   int count = 0;
 760   int threshold;
 761   int square_recip_count = 0;
 762   int sqrt_recip_count = 0;
 763
 764   gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && TREE_CODE (def) == SSA_NAME);
 765   threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
 766
 767   /* If DEF is a square (x * x), count the number of divisions by x.
 768      If there are more divisions by x than by (DEF * DEF), prefer to optimize
 769      the reciprocal of x instead of DEF.  This improves cases like:
 770        def = x * x
 771        t0 = a / def
 772        t1 = b / def
 773        t2 = c / x
 774      Reciprocal optimization of x results in 1 division rather than 2 or 3.  */
 775   gimple *def_stmt = SSA_NAME_DEF_STMT (def);
 776
 777   if (is_gimple_assign (def_stmt)
 778       && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
 779       && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME
 780       && gimple_assign_rhs1 (def_stmt) == gimple_assign_rhs2 (def_stmt))
 781     {
 782       tree op0 = gimple_assign_rhs1 (def_stmt);
 783
 784       FOR_EACH_IMM_USE_FAST (use_p, use_iter, op0)
 785         {
 786           gimple *use_stmt = USE_STMT (use_p);
 787           if (is_division_by (use_stmt, op0))
 788             sqrt_recip_count++;
 789         }
 790     }
 791
 792   FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
 793     {
 794       gimple *use_stmt = USE_STMT (use_p);
 795       if (is_division_by (use_stmt, def))
 796         {
 797           register_division_in (gimple_bb (use_stmt), 2);
 798           count++;
 799         }
 800
 801       if (is_square_of (use_stmt, def))
 802         {
 803           square_def = gimple_assign_lhs (use_stmt);
 804           FOR_EACH_IMM_USE_FAST (square_use_p, square_use_iter, square_def)
 805             {
 806               gimple *square_use_stmt = USE_STMT (square_use_p);
 807               if (is_division_by (square_use_stmt, square_def))
 808                 {
 809                   /* This is executed twice for each division by a square.  */
 810                   register_division_in (gimple_bb (square_use_stmt), 1);
 811                   square_recip_count++;
 812                 }
 813             }
 814         }
 815     }
 816
 817   /* Square reciprocals were counted twice above.  */
 818   square_recip_count /= 2;
 819
 820   /* If it is more profitable to optimize 1 / x, don't optimize 1 / (x * x).  */
 821   if (sqrt_recip_count > square_recip_count)
 822     goto out;
 823
 824   /* Do the expensive part only if we can hope to optimize something.  */
 825   if (count + square_recip_count >= threshold && count >= 1)
 826     {
 827       gimple *use_stmt;
 828       for (occ = occ_head; occ; occ = occ->next)
 829         {
 830           compute_merit (occ);
 831           insert_reciprocals (def_gsi, occ, def, NULL, NULL,
 832                               square_recip_count, threshold);
 833         }
 834
 835       FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
 836         {
 837           if (is_division_by (use_stmt, def))
 838             {
 839               FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
 840                 replace_reciprocal (use_p);
 841             }
 842           else if (square_recip_count > 0 && is_square_of (use_stmt, def))
 843             {
 844               FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
 845                 {
 846                   /* Find all uses of the square that are divisions and
 847                    * replace them by multiplications with the inverse.  */
 848                   imm_use_iterator square_iterator;
 849                   gimple *powmult_use_stmt = USE_STMT (use_p);
 850                   tree powmult_def_name = gimple_assign_lhs (powmult_use_stmt);
 851
 852                   FOR_EACH_IMM_USE_STMT (powmult_use_stmt,
 853                                          square_iterator, powmult_def_name)
 854                     FOR_EACH_IMM_USE_ON_STMT (square_use_p, square_iterator)
 855                       {
 856                         gimple *powmult_use_stmt = USE_STMT (square_use_p);
 857                         if (is_division_by (powmult_use_stmt, powmult_def_name))
 858                           replace_reciprocal_squares (square_use_p);
 859                       }
 860                 }
 861             }
 862         }
 863     }
 864
 865 out:
 866   for (occ = occ_head; occ; )
 867     occ = free_bb (occ);
 868
 869   occ_head = NULL;
 870 }
 871
 872 /* Return an internal function that implements the reciprocal of CALL,
 873    or IFN_LAST if there is no such function that the target supports.  */
 874
 875 internal_fn
 876 internal_fn_reciprocal (gcall *call)
 877 {
 878   internal_fn ifn;
 879
 880   switch (gimple_call_combined_fn (call))
 881     {
 882     CASE_CFN_SQRT:
 883     CASE_CFN_SQRT_FN:
 884       ifn = IFN_RSQRT;
 885       break;
 886
 887     default:
 888       return IFN_LAST;
 889     }
 890
 891   tree_pair types = direct_internal_fn_types (ifn, call);
 892   if (!direct_internal_fn_supported_p (ifn, types, OPTIMIZE_FOR_SPEED))
 893     return IFN_LAST;
 894
 895   return ifn;
 896 }
 897
 898 /* Go through all the floating-point SSA_NAMEs, and call
 899    execute_cse_reciprocals_1 on each of them.  */
 900 namespace {
 901
 902 const pass_data pass_data_cse_reciprocals =
 903 {
 904   GIMPLE_PASS, /* type */
 905   "recip", /* name */
 906   OPTGROUP_NONE, /* optinfo_flags */
 907   TV_TREE_RECIP, /* tv_id */
 908   PROP_ssa, /* properties_required */
 909   0, /* properties_provided */
 910   0, /* properties_destroyed */
 911   0, /* todo_flags_start */
 912   TODO_update_ssa, /* todo_flags_finish */
 913 };
 914
 915 class pass_cse_reciprocals : public gimple_opt_pass
 916 {
 917 public:
 918   pass_cse_reciprocals (gcc::context *ctxt)
 919     : gimple_opt_pass (pass_data_cse_reciprocals, ctxt)
 920   {}
 921
 922   /* opt_pass methods: */
 923   virtual bool gate (function *) { return optimize && flag_reciprocal_math; }
 924   virtual unsigned int execute (function *);
 925
 926 }; // class pass_cse_reciprocals
 927
 928 unsigned int
 929 pass_cse_reciprocals::execute (function *fun)
 930 {
 931   basic_block bb;
 932   tree arg;
 933
 934   occ_pool = new object_allocator<occurrence> ("dominators for recip");
 935
 936   memset (&reciprocal_stats, 0, sizeof (reciprocal_stats));
 937   calculate_dominance_info (CDI_DOMINATORS);
 938   calculate_dominance_info (CDI_POST_DOMINATORS);
 939
 940   if (flag_checking)
 941     FOR_EACH_BB_FN (bb, fun)
 942       gcc_assert (!bb->aux);
 943
 944   for (arg = DECL_ARGUMENTS (fun->decl); arg; arg = DECL_CHAIN (arg))
 945     if (FLOAT_TYPE_P (TREE_TYPE (arg))
 946         && is_gimple_reg (arg))
 947       {
 948         tree name = ssa_default_def (fun, arg);
 949         if (name)
 950           execute_cse_reciprocals_1 (NULL, name);
 951       }
 952
 953   FOR_EACH_BB_FN (bb, fun)
 954     {
 955       tree def;
 956
 957       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
 958            gsi_next (&gsi))
 959         {
 960           gphi *phi = gsi.phi ();
 961           def = PHI_RESULT (phi);
 962           if (! virtual_operand_p (def)
 963               && FLOAT_TYPE_P (TREE_TYPE (def)))
 964             execute_cse_reciprocals_1 (NULL, def);
 965         }
 966
 967       for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
 968            gsi_next (&gsi))
 969         {
 970           gimple *stmt = gsi_stmt (gsi);
 971
 972           if (gimple_has_lhs (stmt)
 973               && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
 974               && FLOAT_TYPE_P (TREE_TYPE (def))
 975               && TREE_CODE (def) == SSA_NAME)
 976             {
 977               execute_cse_reciprocals_1 (&gsi, def);
 978               stmt = gsi_stmt (gsi);
 979               if (flag_unsafe_math_optimizations
 980                   && is_gimple_assign (stmt)
 981                   && gimple_assign_lhs (stmt) == def
 982                   && !stmt_can_throw_internal (cfun, stmt)
 983                   && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
 984                 optimize_recip_sqrt (&gsi, def);
 985             }
 986         }
 987
 988       if (optimize_bb_for_size_p (bb))
 989         continue;
 990
 991       /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b).  */
 992       for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
 993            gsi_next (&gsi))
 994         {
 995           gimple *stmt = gsi_stmt (gsi);
 996
 997           if (is_gimple_assign (stmt)
 998               && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
 999             {
1000               tree arg1 = gimple_assign_rhs2 (stmt);
1001               gimple *stmt1;
1002
1003               if (TREE_CODE (arg1) != SSA_NAME)
1004                 continue;
1005
1006               stmt1 = SSA_NAME_DEF_STMT (arg1);
1007
1008               if (is_gimple_call (stmt1)
1009                   && gimple_call_lhs (stmt1))
1010                 {
1011                   bool fail;
1012                   imm_use_iterator ui;
1013                   use_operand_p use_p;
1014                   tree fndecl = NULL_TREE;
1015
1016                   gcall *call = as_a <gcall *> (stmt1);
1017                   internal_fn ifn = internal_fn_reciprocal (call);
1018                   if (ifn == IFN_LAST)
1019                     {
1020                       fndecl = gimple_call_fndecl (call);
1021                       if (!fndecl
1022                           || !fndecl_built_in_p (fndecl, BUILT_IN_MD))
1023                         continue;
1024                       fndecl = targetm.builtin_reciprocal (fndecl);
1025                       if (!fndecl)
1026                         continue;
1027                     }
1028
1029                   /* Check that all uses of the SSA name are divisions,
1030                      otherwise replacing the defining statement will do
1031                      the wrong thing.  */
1032                   fail = false;
1033                   FOR_EACH_IMM_USE_FAST (use_p, ui, arg1)
1034                     {
1035                       gimple *stmt2 = USE_STMT (use_p);
1036                       if (is_gimple_debug (stmt2))
1037                         continue;
1038                       if (!is_gimple_assign (stmt2)
1039                           || gimple_assign_rhs_code (stmt2) != RDIV_EXPR
1040                           || gimple_assign_rhs1 (stmt2) == arg1
1041                           || gimple_assign_rhs2 (stmt2) != arg1)
1042                         {
1043                           fail = true;
1044                           break;
1045                         }
1046                     }
1047                   if (fail)
1048                     continue;
1049
1050                   gimple_replace_ssa_lhs (call, arg1);
1051                   if (gimple_call_internal_p (call) != (ifn != IFN_LAST))
1052                     {
1053                       auto_vec<tree, 4> args;
1054                       for (unsigned int i = 0;
1055                            i < gimple_call_num_args (call); i++)
1056                         args.safe_push (gimple_call_arg (call, i));
1057                       gcall *stmt2;
1058                       if (ifn == IFN_LAST)
1059                         stmt2 = gimple_build_call_vec (fndecl, args);
1060                       else
1061                         stmt2 = gimple_build_call_internal_vec (ifn, args);
1062                       gimple_call_set_lhs (stmt2, arg1);
1063                       gimple_move_vops (stmt2, call);
1064                       gimple_call_set_nothrow (stmt2,
1065                                                gimple_call_nothrow_p (call));
1066                       gimple_stmt_iterator gsi2 = gsi_for_stmt (call);
1067                       gsi_replace (&gsi2, stmt2, true);
1068                     }
1069                   else
1070                     {
1071                       if (ifn == IFN_LAST)
1072                         gimple_call_set_fndecl (call, fndecl);
1073                       else
1074                         gimple_call_set_internal_fn (call, ifn);
1075                       update_stmt (call);
1076                     }
1077                   reciprocal_stats.rfuncs_inserted++;
1078
1079                   FOR_EACH_IMM_USE_STMT (stmt, ui, arg1)
1080                     {
1081                       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1082                       gimple_assign_set_rhs_code (stmt, MULT_EXPR);
1083                       fold_stmt_inplace (&gsi);
1084                       update_stmt (stmt);
1085                     }
1086                 }
1087             }
1088         }
1089     }
1090
1091   statistics_counter_event (fun, "reciprocal divs inserted",
1092                             reciprocal_stats.rdivs_inserted);
1093   statistics_counter_event (fun, "reciprocal functions inserted",
1094                             reciprocal_stats.rfuncs_inserted);
1095
1096   free_dominance_info (CDI_DOMINATORS);
1097   free_dominance_info (CDI_POST_DOMINATORS);
1098   delete occ_pool;
1099   return 0;
1100 }
1101
1102 } // anon namespace
1103
1104 gimple_opt_pass *
1105 make_pass_cse_reciprocals (gcc::context *ctxt)
1106 {
1107   return new pass_cse_reciprocals (ctxt);
1108 }
1109
1110 /* If NAME is the result of a type conversion, look for other
1111    equivalent dominating or dominated conversions, and replace all
1112    uses with the earliest dominating name, removing the redundant
1113    conversions.  Return the prevailing name.  */
1114
1115 static tree
1116 execute_cse_conv_1 (tree name)
1117 {
1118   if (SSA_NAME_IS_DEFAULT_DEF (name)
1119       || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name))
1120     return name;
1121
1122   gimple *def_stmt = SSA_NAME_DEF_STMT (name);
1123
1124   if (!gimple_assign_cast_p (def_stmt))
1125     return name;
1126
1127   tree src = gimple_assign_rhs1 (def_stmt);
1128
1129   if (TREE_CODE (src) != SSA_NAME)
1130     return name;
1131
1132   imm_use_iterator use_iter;
1133   gimple *use_stmt;
1134
1135   /* Find the earliest dominating def.    */
1136   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, src)
1137     {
1138       if (use_stmt == def_stmt
1139           || !gimple_assign_cast_p (use_stmt))
1140         continue;
1141
1142       tree lhs = gimple_assign_lhs (use_stmt);
1143
1144       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
1145           || (gimple_assign_rhs1 (use_stmt)
1146               != gimple_assign_rhs1 (def_stmt))
1147           || !types_compatible_p (TREE_TYPE (name), TREE_TYPE (lhs)))
1148         continue;
1149
1150       bool use_dominates;
1151       if (gimple_bb (def_stmt) == gimple_bb (use_stmt))
1152         {
1153           gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
1154           while (!gsi_end_p (gsi) && gsi_stmt (gsi) != def_stmt)
1155             gsi_next (&gsi);
1156           use_dominates = !gsi_end_p (gsi);
1157         }
1158       else if (dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt),
1159                                gimple_bb (def_stmt)))
1160         use_dominates = false;
1161       else if (dominated_by_p (CDI_DOMINATORS, gimple_bb (def_stmt),
1162                                gimple_bb (use_stmt)))
1163         use_dominates = true;
1164       else
1165         continue;
1166
1167       if (use_dominates)
1168         {
1169           std::swap (name, lhs);
1170           std::swap (def_stmt, use_stmt);
1171         }
1172     }
1173
1174   /* Now go through all uses of SRC again, replacing the equivalent
1175      dominated conversions.  We may replace defs that were not
1176      dominated by the then-prevailing defs when we first visited
1177      them.  */
1178   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, src)
1179     {
1180       if (use_stmt == def_stmt
1181           || !gimple_assign_cast_p (use_stmt))
1182         continue;
1183
1184       tree lhs = gimple_assign_lhs (use_stmt);
1185
1186       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
1187           || (gimple_assign_rhs1 (use_stmt)
1188               != gimple_assign_rhs1 (def_stmt))
1189           || !types_compatible_p (TREE_TYPE (name), TREE_TYPE (lhs)))
1190         continue;
1191
1192       if (gimple_bb (def_stmt) == gimple_bb (use_stmt)
1193           || dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt),
1194                              gimple_bb (def_stmt)))
1195         {
1196           sincos_stats.conv_removed++;
1197
1198           gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
1199           replace_uses_by (lhs, name);
1200           gsi_remove (&gsi, true);
1201         }
1202     }
1203
1204   return name;
1205 }
1206
1207 /* Records an occurrence at statement USE_STMT in the vector of trees
1208    STMTS if it is dominated by *TOP_BB or dominates it or this basic block
1209    is not yet initialized.  Returns true if the occurrence was pushed on
1210    the vector.  Adjusts *TOP_BB to be the basic block dominating all
1211    statements in the vector.  */
1212
1213 static bool
1214 maybe_record_sincos (vec<gimple *> *stmts,
1215                      basic_block *top_bb, gimple *use_stmt)
1216 {
1217   basic_block use_bb = gimple_bb (use_stmt);
1218   if (*top_bb
1219       && (*top_bb == use_bb
1220           || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb)))
1221     stmts->safe_push (use_stmt);
1222   else if (!*top_bb
1223            || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb))
1224     {
1225       stmts->safe_push (use_stmt);
1226       *top_bb = use_bb;
1227     }
1228   else
1229     return false;
1230
1231   return true;
1232 }
1233
1234 /* Look for sin, cos and cexpi calls with the same argument NAME and
1235    create a single call to cexpi CSEing the result in this case.
1236    We first walk over all immediate uses of the argument collecting
1237    statements that we can CSE in a vector and in a second pass replace
1238    the statement rhs with a REALPART or IMAGPART expression on the
1239    result of the cexpi call we insert before the use statement that
1240    dominates all other candidates.  */
1241
1242 static bool
1243 execute_cse_sincos_1 (tree name)
1244 {
1245   gimple_stmt_iterator gsi;
1246   imm_use_iterator use_iter;
1247   tree fndecl, res, type = NULL_TREE;
1248   gimple *def_stmt, *use_stmt, *stmt;
1249   int seen_cos = 0, seen_sin = 0, seen_cexpi = 0;
1250   auto_vec<gimple *> stmts;
1251   basic_block top_bb = NULL;
1252   int i;
1253   bool cfg_changed = false;
1254
1255   name = execute_cse_conv_1 (name);
1256
1257   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
1258     {
1259       if (gimple_code (use_stmt) != GIMPLE_CALL
1260           || !gimple_call_lhs (use_stmt))
1261         continue;
1262
1263       switch (gimple_call_combined_fn (use_stmt))
1264         {
1265         CASE_CFN_COS:
1266           seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1267           break;
1268
1269         CASE_CFN_SIN:
1270           seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1271           break;
1272
1273         CASE_CFN_CEXPI:
1274           seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1275           break;
1276
1277         default:;
1278           continue;
1279         }
1280
1281       tree t = mathfn_built_in_type (gimple_call_combined_fn (use_stmt));
1282       if (!type)
1283         {
1284           type = t;
1285           t = TREE_TYPE (name);
1286         }
1287       /* This checks that NAME has the right type in the first round,
1288          and, in subsequent rounds, that the built_in type is the same
1289          type, or a compatible type.  */
1290       if (type != t && !types_compatible_p (type, t))
1291         return false;
1292     }
1293   if (seen_cos + seen_sin + seen_cexpi <= 1)
1294     return false;
1295
1296   /* Simply insert cexpi at the beginning of top_bb but not earlier than
1297      the name def statement.  */
1298   fndecl = mathfn_built_in (type, BUILT_IN_CEXPI);
1299   if (!fndecl)
1300     return false;
1301   stmt = gimple_build_call (fndecl, 1, name);
1302   res = make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl)), stmt, "sincostmp");
1303   gimple_call_set_lhs (stmt, res);
1304
1305   def_stmt = SSA_NAME_DEF_STMT (name);
1306   if (!SSA_NAME_IS_DEFAULT_DEF (name)
1307       && gimple_code (def_stmt) != GIMPLE_PHI
1308       && gimple_bb (def_stmt) == top_bb)
1309     {
1310       gsi = gsi_for_stmt (def_stmt);
1311       gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1312     }
1313   else
1314     {
1315       gsi = gsi_after_labels (top_bb);
1316       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1317     }
1318   sincos_stats.inserted++;
1319
1320   /* And adjust the recorded old call sites.  */
1321   for (i = 0; stmts.iterate (i, &use_stmt); ++i)
1322     {
1323       tree rhs = NULL;
1324
1325       switch (gimple_call_combined_fn (use_stmt))
1326         {
1327         CASE_CFN_COS:
1328           rhs = fold_build1 (REALPART_EXPR, type, res);
1329           break;
1330
1331         CASE_CFN_SIN:
1332           rhs = fold_build1 (IMAGPART_EXPR, type, res);
1333           break;
1334
1335         CASE_CFN_CEXPI:
1336           rhs = res;
1337           break;
1338
1339         default:;
1340           gcc_unreachable ();
1341         }
1342
1343         /* Replace call with a copy.  */
1344         stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs);
1345
1346         gsi = gsi_for_stmt (use_stmt);
1347         gsi_replace (&gsi, stmt, true);
1348         if (gimple_purge_dead_eh_edges (gimple_bb (stmt)))
1349           cfg_changed = true;
1350     }
1351
1352   return cfg_changed;
1353 }
1354
1355 /* To evaluate powi(x,n), the floating point value x raised to the
1356    constant integer exponent n, we use a hybrid algorithm that
1357    combines the "window method" with look-up tables.  For an
1358    introduction to exponentiation algorithms and "addition chains",
1359    see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
1360    "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
1361    3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
1362    Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998.  */
1363
1364 /* Provide a default value for POWI_MAX_MULTS, the maximum number of
1365    multiplications to inline before calling the system library's pow
1366    function.  powi(x,n) requires at worst 2*bits(n)-2 multiplications,
1367    so this default never requires calling pow, powf or powl.  */
1368
1369 #ifndef POWI_MAX_MULTS
1370 #define POWI_MAX_MULTS  (2*HOST_BITS_PER_WIDE_INT-2)
1371 #endif
1372
1373 /* The size of the "optimal power tree" lookup table.  All
1374    exponents less than this value are simply looked up in the
1375    powi_table below.  This threshold is also used to size the
1376    cache of pseudo registers that hold intermediate results.  */
1377 #define POWI_TABLE_SIZE 256
1378
1379 /* The size, in bits of the window, used in the "window method"
1380    exponentiation algorithm.  This is equivalent to a radix of
1381    (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method".  */
1382 #define POWI_WINDOW_SIZE 3
1383
1384 /* The following table is an efficient representation of an
1385    "optimal power tree".  For each value, i, the corresponding
1386    value, j, in the table states than an optimal evaluation
1387    sequence for calculating pow(x,i) can be found by evaluating
1388    pow(x,j)*pow(x,i-j).  An optimal power tree for the first
1389    100 integers is given in Knuth's "Seminumerical algorithms".  */
1390
1391 static const unsigned char powi_table[POWI_TABLE_SIZE] =
1392   {
1393       0,   1,   1,   2,   2,   3,   3,   4,  /*   0 -   7 */
1394       4,   6,   5,   6,   6,  10,   7,   9,  /*   8 -  15 */
1395       8,  16,   9,  16,  10,  12,  11,  13,  /*  16 -  23 */
1396      12,  17,  13,  18,  14,  24,  15,  26,  /*  24 -  31 */
1397      16,  17,  17,  19,  18,  33,  19,  26,  /*  32 -  39 */
1398      20,  25,  21,  40,  22,  27,  23,  44,  /*  40 -  47 */
1399      24,  32,  25,  34,  26,  29,  27,  44,  /*  48 -  55 */
1400      28,  31,  29,  34,  30,  60,  31,  36,  /*  56 -  63 */
1401      32,  64,  33,  34,  34,  46,  35,  37,  /*  64 -  71 */
1402      36,  65,  37,  50,  38,  48,  39,  69,  /*  72 -  79 */
1403      40,  49,  41,  43,  42,  51,  43,  58,  /*  80 -  87 */
1404      44,  64,  45,  47,  46,  59,  47,  76,  /*  88 -  95 */
1405      48,  65,  49,  66,  50,  67,  51,  66,  /*  96 - 103 */
1406      52,  70,  53,  74,  54, 104,  55,  74,  /* 104 - 111 */
1407      56,  64,  57,  69,  58,  78,  59,  68,  /* 112 - 119 */
1408      60,  61,  61,  80,  62,  75,  63,  68,  /* 120 - 127 */
1409      64,  65,  65, 128,  66, 129,  67,  90,  /* 128 - 135 */
1410      68,  73,  69, 131,  70,  94,  71,  88,  /* 136 - 143 */
1411      72, 128,  73,  98,  74, 132,  75, 121,  /* 144 - 151 */
1412      76, 102,  77, 124,  78, 132,  79, 106,  /* 152 - 159 */
1413      80,  97,  81, 160,  82,  99,  83, 134,  /* 160 - 167 */
1414      84,  86,  85,  95,  86, 160,  87, 100,  /* 168 - 175 */
1415      88, 113,  89,  98,  90, 107,  91, 122,  /* 176 - 183 */
1416      92, 111,  93, 102,  94, 126,  95, 150,  /* 184 - 191 */
1417      96, 128,  97, 130,  98, 133,  99, 195,  /* 192 - 199 */
1418     100, 128, 101, 123, 102, 164, 103, 138,  /* 200 - 207 */
1419     104, 145, 105, 146, 106, 109, 107, 149,  /* 208 - 215 */
1420     108, 200, 109, 146, 110, 170, 111, 157,  /* 216 - 223 */
1421     112, 128, 113, 130, 114, 182, 115, 132,  /* 224 - 231 */
1422     116, 200, 117, 132, 118, 158, 119, 206,  /* 232 - 239 */
1423     120, 240, 121, 162, 122, 147, 123, 152,  /* 240 - 247 */
1424     124, 166, 125, 214, 126, 138, 127, 153,  /* 248 - 255 */
1425   };
1426
1427
1428 /* Return the number of multiplications required to calculate
1429    powi(x,n) where n is less than POWI_TABLE_SIZE.  This is a
1430    subroutine of powi_cost.  CACHE is an array indicating
1431    which exponents have already been calculated.  */
1432
1433 static int
1434 powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache)
1435 {
1436   /* If we've already calculated this exponent, then this evaluation
1437      doesn't require any additional multiplications.  */
1438   if (cache[n])
1439     return 0;
1440
1441   cache[n] = true;
1442   return powi_lookup_cost (n - powi_table[n], cache)
1443          + powi_lookup_cost (powi_table[n], cache) + 1;
1444 }
1445
1446 /* Return the number of multiplications required to calculate
1447    powi(x,n) for an arbitrary x, given the exponent N.  This
1448    function needs to be kept in sync with powi_as_mults below.  */
1449
1450 static int
1451 powi_cost (HOST_WIDE_INT n)
1452 {
1453   bool cache[POWI_TABLE_SIZE];
1454   unsigned HOST_WIDE_INT digit;
1455   unsigned HOST_WIDE_INT val;
1456   int result;
1457
1458   if (n == 0)
1459     return 0;
1460
1461   /* Ignore the reciprocal when calculating the cost.  */
1462   val = (n < 0) ? -n : n;
1463
1464   /* Initialize the exponent cache.  */
1465   memset (cache, 0, POWI_TABLE_SIZE * sizeof (bool));
1466   cache[1] = true;
1467
1468   result = 0;
1469
1470   while (val >= POWI_TABLE_SIZE)
1471     {
1472       if (val & 1)
1473         {
1474           digit = val & ((1 << POWI_WINDOW_SIZE) - 1);
1475           result += powi_lookup_cost (digit, cache)
1476                     + POWI_WINDOW_SIZE + 1;
1477           val >>= POWI_WINDOW_SIZE;
1478         }
1479       else
1480         {
1481           val >>= 1;
1482           result++;
1483         }
1484     }
1485
1486   return result + powi_lookup_cost (val, cache);
1487 }
1488
1489 /* Recursive subroutine of powi_as_mults.  This function takes the
1490    array, CACHE, of already calculated exponents and an exponent N and
1491    returns a tree that corresponds to CACHE[1]**N, with type TYPE.  */
1492
1493 static tree
1494 powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type,
1495                  HOST_WIDE_INT n, tree *cache)
1496 {
1497   tree op0, op1, ssa_target;
1498   unsigned HOST_WIDE_INT digit;
1499   gassign *mult_stmt;
1500
1501   if (n < POWI_TABLE_SIZE && cache[n])
1502     return cache[n];
1503
1504   ssa_target = make_temp_ssa_name (type, NULL, "powmult");
1505
1506   if (n < POWI_TABLE_SIZE)
1507     {
1508       cache[n] = ssa_target;
1509       op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache);
1510       op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache);
1511     }
1512   else if (n & 1)
1513     {
1514       digit = n & ((1 << POWI_WINDOW_SIZE) - 1);
1515       op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache);
1516       op1 = powi_as_mults_1 (gsi, loc, type, digit, cache);
1517     }
1518   else
1519     {
1520       op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache);
1521       op1 = op0;
1522     }
1523
1524   mult_stmt = gimple_build_assign (ssa_target, MULT_EXPR, op0, op1);
1525   gimple_set_location (mult_stmt, loc);
1526   gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
1527
1528   return ssa_target;
1529 }
1530
1531 /* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
1532    This function needs to be kept in sync with powi_cost above.  */
1533
1534 tree
1535 powi_as_mults (gimple_stmt_iterator *gsi, location_t loc,
1536                tree arg0, HOST_WIDE_INT n)
1537 {
1538   tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0);
1539   gassign *div_stmt;
1540   tree target;
1541
1542   if (n == 0)
1543     return build_one_cst (type);
1544
1545   memset (cache, 0, sizeof (cache));
1546   cache[1] = arg0;
1547
1548   result = powi_as_mults_1 (gsi, loc, type, (n < 0) ? -n : n, cache);
1549   if (n >= 0)
1550     return result;
1551
1552   /* If the original exponent was negative, reciprocate the result.  */
1553   target = make_temp_ssa_name (type, NULL, "powmult");
1554   div_stmt = gimple_build_assign (target, RDIV_EXPR,
1555                                   build_real (type, dconst1), result);
1556   gimple_set_location (div_stmt, loc);
1557   gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
1558
1559   return target;
1560 }
1561
1562 /* ARG0 and N are the two arguments to a powi builtin in GSI with
1563    location info LOC.  If the arguments are appropriate, create an
1564    equivalent sequence of statements prior to GSI using an optimal
1565    number of multiplications, and return an expession holding the
1566    result.  */
1567
1568 static tree
1569 gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc,
1570                             tree arg0, HOST_WIDE_INT n)
1571 {
1572   /* Avoid largest negative number.  */
1573   if (n != -n
1574       && ((n >= -1 && n <= 2)
1575           || (optimize_function_for_speed_p (cfun)
1576               && powi_cost (n) <= POWI_MAX_MULTS)))
1577     return powi_as_mults (gsi, loc, arg0, n);
1578
1579   return NULL_TREE;
1580 }
1581
1582 /* Build a gimple call statement that calls FN with argument ARG.
1583    Set the lhs of the call statement to a fresh SSA name.  Insert the
1584    statement prior to GSI's current position, and return the fresh
1585    SSA name.  */
1586
1587 static tree
1588 build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc,
1589                        tree fn, tree arg)
1590 {
1591   gcall *call_stmt;
1592   tree ssa_target;
1593
1594   call_stmt = gimple_build_call (fn, 1, arg);
1595   ssa_target = make_temp_ssa_name (TREE_TYPE (arg), NULL, "powroot");
1596   gimple_set_lhs (call_stmt, ssa_target);
1597   gimple_set_location (call_stmt, loc);
1598   gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
1599
1600   return ssa_target;
1601 }
1602
1603 /* Build a gimple binary operation with the given CODE and arguments
1604    ARG0, ARG1, assigning the result to a new SSA name for variable
1605    TARGET.  Insert the statement prior to GSI's current position, and
1606    return the fresh SSA name.*/
1607
1608 static tree
1609 build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc,
1610                         const char *name, enum tree_code code,
1611                         tree arg0, tree arg1)
1612 {
1613   tree result = make_temp_ssa_name (TREE_TYPE (arg0), NULL, name);
1614   gassign *stmt = gimple_build_assign (result, code, arg0, arg1);
1615   gimple_set_location (stmt, loc);
1616   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1617   return result;
1618 }
1619
1620 /* Build a gimple reference operation with the given CODE and argument
1621    ARG, assigning the result to a new SSA name of TYPE with NAME.
1622    Insert the statement prior to GSI's current position, and return
1623    the fresh SSA name.  */
1624
1625 static inline tree
1626 build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type,
1627                       const char *name, enum tree_code code, tree arg0)
1628 {
1629   tree result = make_temp_ssa_name (type, NULL, name);
1630   gimple *stmt = gimple_build_assign (result, build1 (code, type, arg0));
1631   gimple_set_location (stmt, loc);
1632   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1633   return result;
1634 }
1635
1636 /* Build a gimple assignment to cast VAL to TYPE.  Insert the statement
1637    prior to GSI's current position, and return the fresh SSA name.  */
1638
1639 static tree
1640 build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,
1641                        tree type, tree val)
1642 {
1643   tree result = make_ssa_name (type);
1644   gassign *stmt = gimple_build_assign (result, NOP_EXPR, val);
1645   gimple_set_location (stmt, loc);
1646   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1647   return result;
1648 }
1649
1650 struct pow_synth_sqrt_info
1651 {
1652   bool *factors;
1653   unsigned int deepest;
1654   unsigned int num_mults;
1655 };
1656
1657 /* Return true iff the real value C can be represented as a
1658    sum of powers of 0.5 up to N.  That is:
1659    C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1.
1660    Record in INFO the various parameters of the synthesis algorithm such
1661    as the factors a[i], the maximum 0.5 power and the number of
1662    multiplications that will be required.  */
1663
1664 bool
1665 representable_as_half_series_p (REAL_VALUE_TYPE c, unsigned n,
1666                                  struct pow_synth_sqrt_info *info)
1667 {
1668   REAL_VALUE_TYPE factor = dconsthalf;
1669   REAL_VALUE_TYPE remainder = c;
1670
1671   info->deepest = 0;
1672   info->num_mults = 0;
1673   memset (info->factors, 0, n * sizeof (bool));
1674
1675   for (unsigned i = 0; i < n; i++)
1676     {
1677       REAL_VALUE_TYPE res;
1678
1679       /* If something inexact happened bail out now.  */
1680       if (real_arithmetic (&res, MINUS_EXPR, &remainder, &factor))
1681         return false;
1682
1683       /* We have hit zero.  The number is representable as a sum
1684          of powers of 0.5.  */
1685       if (real_equal (&res, &dconst0))
1686         {
1687           info->factors[i] = true;
1688           info->deepest = i + 1;
1689           return true;
1690         }
1691       else if (!REAL_VALUE_NEGATIVE (res))
1692         {
1693           remainder = res;
1694           info->factors[i] = true;
1695           info->num_mults++;
1696         }
1697       else
1698         info->factors[i] = false;
1699
1700       real_arithmetic (&factor, MULT_EXPR, &factor, &dconsthalf);
1701     }
1702   return false;
1703 }
1704
1705 /* Return the tree corresponding to FN being applied
1706    to ARG N times at GSI and LOC.
1707    Look up previous results from CACHE if need be.
1708    cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times.  */
1709
1710 static tree
1711 get_fn_chain (tree arg, unsigned int n, gimple_stmt_iterator *gsi,
1712               tree fn, location_t loc, tree *cache)
1713 {
1714   tree res = cache[n];
1715   if (!res)
1716     {
1717       tree prev = get_fn_chain (arg, n - 1, gsi, fn, loc, cache);
1718       res = build_and_insert_call (gsi, loc, fn, prev);
1719       cache[n] = res;
1720     }
1721
1722   return res;
1723 }
1724
1725 /* Print to STREAM the repeated application of function FNAME to ARG
1726    N times.  So, for FNAME = "foo", ARG = "x", N = 2 it would print:
1727    "foo (foo (x))".  */
1728
1729 static void
1730 print_nested_fn (FILE* stream, const char *fname, const char* arg,
1731                  unsigned int n)
1732 {
1733   if (n == 0)
1734     fprintf (stream, "%s", arg);
1735   else
1736     {
1737       fprintf (stream, "%s (", fname);
1738       print_nested_fn (stream, fname, arg, n - 1);
1739       fprintf (stream, ")");
1740     }
1741 }
1742
1743 /* Print to STREAM the fractional sequence of sqrt chains
1744    applied to ARG, described by INFO.  Used for the dump file.  */
1745
1746 static void
1747 dump_fractional_sqrt_sequence (FILE *stream, const char *arg,
1748                                 struct pow_synth_sqrt_info *info)
1749 {
1750   for (unsigned int i = 0; i < info->deepest; i++)
1751     {
1752       bool is_set = info->factors[i];
1753       if (is_set)
1754         {
1755           print_nested_fn (stream, "sqrt", arg, i + 1);
1756           if (i != info->deepest - 1)
1757             fprintf (stream, " * ");
1758         }
1759     }
1760 }
1761
1762 /* Print to STREAM a representation of raising ARG to an integer
1763    power N.  Used for the dump file.  */
1764
1765 static void
1766 dump_integer_part (FILE *stream, const char* arg, HOST_WIDE_INT n)
1767 {
1768   if (n > 1)
1769     fprintf (stream, "powi (%s, " HOST_WIDE_INT_PRINT_DEC ")", arg, n);
1770   else if (n == 1)
1771     fprintf (stream, "%s", arg);
1772 }
1773
1774 /* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of
1775    square roots.  Place at GSI and LOC.  Limit the maximum depth
1776    of the sqrt chains to MAX_DEPTH.  Return the tree holding the
1777    result of the expanded sequence or NULL_TREE if the expansion failed.
1778
1779    This routine assumes that ARG1 is a real number with a fractional part
1780    (the integer exponent case will have been handled earlier in
1781    gimple_expand_builtin_pow).
1782
1783    For ARG1 > 0.0:
1784    * For ARG1 composed of a whole part WHOLE_PART and a fractional part
1785      FRAC_PART i.e. WHOLE_PART == floor (ARG1) and
1786                     FRAC_PART == ARG1 - WHOLE_PART:
1787      Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where
1788      POW (ARG0, FRAC_PART) is expanded as a product of square root chains
1789      if it can be expressed as such, that is if FRAC_PART satisfies:
1790      FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i))
1791      where integer a[i] is either 0 or 1.
1792
1793      Example:
1794      POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625)
1795        --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x)))
1796
1797    For ARG1 < 0.0 there are two approaches:
1798    * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1)
1799          is calculated as above.
1800
1801      Example:
1802      POW (x, -5.625) == 1.0 / POW (x, 5.625)
1803        -->  1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x))))
1804
1805    * (B) : WHOLE_PART := - ceil (abs (ARG1))
1806            FRAC_PART  := ARG1 - WHOLE_PART
1807      and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART).
1808      Example:
1809      POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6)
1810        --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6))
1811
1812    For ARG1 < 0.0 we choose between (A) and (B) depending on
1813    how many multiplications we'd have to do.
1814    So, for the example in (B): POW (x, -5.875), if we were to
1815    follow algorithm (A) we would produce:
1816    1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X)))
1817    which contains more multiplications than approach (B).
1818
1819    Hopefully, this approach will eliminate potentially expensive POW library
1820    calls when unsafe floating point math is enabled and allow the compiler to
1821    further optimise the multiplies, square roots and divides produced by this
1822    function.  */
1823
1824 static tree
1825 expand_pow_as_sqrts (gimple_stmt_iterator *gsi, location_t loc,
1826                      tree arg0, tree arg1, HOST_WIDE_INT max_depth)
1827 {
1828   tree type = TREE_TYPE (arg0);
1829   machine_mode mode = TYPE_MODE (type);
1830   tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1831   bool one_over = true;
1832
1833   if (!sqrtfn)
1834     return NULL_TREE;
1835
1836   if (TREE_CODE (arg1) != REAL_CST)
1837     return NULL_TREE;
1838
1839   REAL_VALUE_TYPE exp_init = TREE_REAL_CST (arg1);
1840
1841   gcc_assert (max_depth > 0);
1842   tree *cache = XALLOCAVEC (tree, max_depth + 1);
1843
1844   struct pow_synth_sqrt_info synth_info;
1845   synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1846   synth_info.deepest = 0;
1847   synth_info.num_mults = 0;
1848
1849   bool neg_exp = REAL_VALUE_NEGATIVE (exp_init);
1850   REAL_VALUE_TYPE exp = real_value_abs (&exp_init);
1851
1852   /* The whole and fractional parts of exp.  */
1853   REAL_VALUE_TYPE whole_part;
1854   REAL_VALUE_TYPE frac_part;
1855
1856   real_floor (&whole_part, mode, &exp);
1857   real_arithmetic (&frac_part, MINUS_EXPR, &exp, &whole_part);
1858
1859
1860   REAL_VALUE_TYPE ceil_whole = dconst0;
1861   REAL_VALUE_TYPE ceil_fract = dconst0;
1862
1863   if (neg_exp)
1864     {
1865       real_ceil (&ceil_whole, mode, &exp);
1866       real_arithmetic (&ceil_fract, MINUS_EXPR, &ceil_whole, &exp);
1867     }
1868
1869   if (!representable_as_half_series_p (frac_part, max_depth, &synth_info))
1870     return NULL_TREE;
1871
1872   /* Check whether it's more profitable to not use 1.0 / ...  */
1873   if (neg_exp)
1874     {
1875       struct pow_synth_sqrt_info alt_synth_info;
1876       alt_synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1877       alt_synth_info.deepest = 0;
1878       alt_synth_info.num_mults = 0;
1879
1880       if (representable_as_half_series_p (ceil_fract, max_depth,
1881                                            &alt_synth_info)
1882           && alt_synth_info.deepest <= synth_info.deepest
1883           && alt_synth_info.num_mults < synth_info.num_mults)
1884         {
1885           whole_part = ceil_whole;
1886           frac_part = ceil_fract;
1887           synth_info.deepest = alt_synth_info.deepest;
1888           synth_info.num_mults = alt_synth_info.num_mults;
1889           memcpy (synth_info.factors, alt_synth_info.factors,
1890                   (max_depth + 1) * sizeof (bool));
1891           one_over = false;
1892         }
1893     }
1894
1895   HOST_WIDE_INT n = real_to_integer (&whole_part);
1896   REAL_VALUE_TYPE cint;
1897   real_from_integer (&cint, VOIDmode, n, SIGNED);
1898
1899   if (!real_identical (&whole_part, &cint))
1900     return NULL_TREE;
1901
1902   if (powi_cost (n) + synth_info.num_mults > POWI_MAX_MULTS)
1903     return NULL_TREE;
1904
1905   memset (cache, 0, (max_depth + 1) * sizeof (tree));
1906
1907   tree integer_res = n == 0 ? build_real (type, dconst1) : arg0;
1908
1909   /* Calculate the integer part of the exponent.  */
1910   if (n > 1)
1911     {
1912       integer_res = gimple_expand_builtin_powi (gsi, loc, arg0, n);
1913       if (!integer_res)
1914         return NULL_TREE;
1915     }
1916
1917   if (dump_file)
1918     {
1919       char string[64];
1920
1921       real_to_decimal (string, &exp_init, sizeof (string), 0, 1);
1922       fprintf (dump_file, "synthesizing pow (x, %s) as:\n", string);
1923
1924       if (neg_exp)
1925         {
1926           if (one_over)
1927             {
1928               fprintf (dump_file, "1.0 / (");
1929               dump_integer_part (dump_file, "x", n);
1930               if (n > 0)
1931                 fprintf (dump_file, " * ");
1932               dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1933               fprintf (dump_file, ")");
1934             }
1935           else
1936             {
1937               dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1938               fprintf (dump_file, " / (");
1939               dump_integer_part (dump_file, "x", n);
1940               fprintf (dump_file, ")");
1941             }
1942         }
1943       else
1944         {
1945           dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1946           if (n > 0)
1947             fprintf (dump_file, " * ");
1948           dump_integer_part (dump_file, "x", n);
1949         }
1950
1951       fprintf (dump_file, "\ndeepest sqrt chain: %d\n", synth_info.deepest);
1952     }
1953
1954
1955   tree fract_res = NULL_TREE;
1956   cache[0] = arg0;
1957
1958   /* Calculate the fractional part of the exponent.  */
1959   for (unsigned i = 0; i < synth_info.deepest; i++)
1960     {
1961       if (synth_info.factors[i])
1962         {
1963           tree sqrt_chain = get_fn_chain (arg0, i + 1, gsi, sqrtfn, loc, cache);
1964
1965           if (!fract_res)
1966               fract_res = sqrt_chain;
1967
1968           else
1969             fract_res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1970                                            fract_res, sqrt_chain);
1971         }
1972     }
1973
1974   tree res = NULL_TREE;
1975
1976   if (neg_exp)
1977     {
1978       if (one_over)
1979         {
1980           if (n > 0)
1981             res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1982                                            fract_res, integer_res);
1983           else
1984             res = fract_res;
1985
1986           res = build_and_insert_binop (gsi, loc, "powrootrecip", RDIV_EXPR,
1987                                           build_real (type, dconst1), res);
1988         }
1989       else
1990         {
1991           res = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1992                                          fract_res, integer_res);
1993         }
1994     }
1995   else
1996     res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1997                                    fract_res, integer_res);
1998   return res;
1999 }
2000
2001 /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
2002    with location info LOC.  If possible, create an equivalent and
2003    less expensive sequence of statements prior to GSI, and return an
2004    expession holding the result.  */
2005
2006 static tree
2007 gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,
2008                            tree arg0, tree arg1)
2009 {
2010   REAL_VALUE_TYPE c, cint, dconst1_3, dconst1_4, dconst1_6;
2011   REAL_VALUE_TYPE c2, dconst3;
2012   HOST_WIDE_INT n;
2013   tree type, sqrtfn, cbrtfn, sqrt_arg0, result, cbrt_x, powi_cbrt_x;
2014   machine_mode mode;
2015   bool speed_p = optimize_bb_for_speed_p (gsi_bb (*gsi));
2016   bool hw_sqrt_exists, c_is_int, c2_is_int;
2017
2018   dconst1_4 = dconst1;
2019   SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
2020
2021   /* If the exponent isn't a constant, there's nothing of interest
2022      to be done.  */
2023   if (TREE_CODE (arg1) != REAL_CST)
2024     return NULL_TREE;
2025
2026   /* Don't perform the operation if flag_signaling_nans is on
2027      and the operand is a signaling NaN.  */
2028   if (HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg1)))
2029       && ((TREE_CODE (arg0) == REAL_CST
2030            && REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg0)))
2031           || REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg1))))
2032     return NULL_TREE;
2033
2034   /* If the exponent is equivalent to an integer, expand to an optimal
2035      multiplication sequence when profitable.  */
2036   c = TREE_REAL_CST (arg1);
2037   n = real_to_integer (&c);
2038   real_from_integer (&cint, VOIDmode, n, SIGNED);
2039   c_is_int = real_identical (&c, &cint);
2040
2041   if (c_is_int
2042       && ((n >= -1 && n <= 2)
2043           || (flag_unsafe_math_optimizations
2044               && speed_p
2045               && powi_cost (n) <= POWI_MAX_MULTS)))
2046     return gimple_expand_builtin_powi (gsi, loc, arg0, n);
2047
2048   /* Attempt various optimizations using sqrt and cbrt.  */
2049   type = TREE_TYPE (arg0);
2050   mode = TYPE_MODE (type);
2051   sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
2052
2053   /* Optimize pow(x,0.5) = sqrt(x).  This replacement is always safe
2054      unless signed zeros must be maintained.  pow(-0,0.5) = +0, while
2055      sqrt(-0) = -0.  */
2056   if (sqrtfn
2057       && real_equal (&c, &dconsthalf)
2058       && !HONOR_SIGNED_ZEROS (mode))
2059     return build_and_insert_call (gsi, loc, sqrtfn, arg0);
2060
2061   hw_sqrt_exists = optab_handler (sqrt_optab, mode) != CODE_FOR_nothing;
2062
2063   /* Optimize pow(x,1./3.) = cbrt(x).  This requires unsafe math
2064      optimizations since 1./3. is not exactly representable.  If x
2065      is negative and finite, the correct value of pow(x,1./3.) is
2066      a NaN with the "invalid" exception raised, because the value
2067      of 1./3. actually has an even denominator.  The correct value
2068      of cbrt(x) is a negative real value.  */
2069   cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT);
2070   dconst1_3 = real_value_truncate (mode, dconst_third ());
2071
2072   if (flag_unsafe_math_optimizations
2073       && cbrtfn
2074       && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2075       && real_equal (&c, &dconst1_3))
2076     return build_and_insert_call (gsi, loc, cbrtfn, arg0);
2077
2078   /* Optimize pow(x,1./6.) = cbrt(sqrt(x)).  Don't do this optimization
2079      if we don't have a hardware sqrt insn.  */
2080   dconst1_6 = dconst1_3;
2081   SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
2082
2083   if (flag_unsafe_math_optimizations
2084       && sqrtfn
2085       && cbrtfn
2086       && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2087       && speed_p
2088       && hw_sqrt_exists
2089       && real_equal (&c, &dconst1_6))
2090     {
2091       /* sqrt(x)  */
2092       sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
2093
2094       /* cbrt(sqrt(x))  */
2095       return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0);
2096     }
2097
2098
2099   /* Attempt to expand the POW as a product of square root chains.
2100      Expand the 0.25 case even when otpimising for size.  */
2101   if (flag_unsafe_math_optimizations
2102       && sqrtfn
2103       && hw_sqrt_exists
2104       && (speed_p || real_equal (&c, &dconst1_4))
2105       && !HONOR_SIGNED_ZEROS (mode))
2106     {
2107       unsigned int max_depth = speed_p
2108                                 ? param_max_pow_sqrt_depth
2109                                 : 2;
2110
2111       tree expand_with_sqrts
2112         = expand_pow_as_sqrts (gsi, loc, arg0, arg1, max_depth);
2113
2114       if (expand_with_sqrts)
2115         return expand_with_sqrts;
2116     }
2117
2118   real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
2119   n = real_to_integer (&c2);
2120   real_from_integer (&cint, VOIDmode, n, SIGNED);
2121   c2_is_int = real_identical (&c2, &cint);
2122
2123   /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
2124
2125      powi(x, n/3) * powi(cbrt(x), n%3),                    n > 0;
2126      1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)),  n < 0.
2127
2128      Do not calculate the first factor when n/3 = 0.  As cbrt(x) is
2129      different from pow(x, 1./3.) due to rounding and behavior with
2130      negative x, we need to constrain this transformation to unsafe
2131      math and positive x or finite math.  */
2132   real_from_integer (&dconst3, VOIDmode, 3, SIGNED);
2133   real_arithmetic (&c2, MULT_EXPR, &c, &dconst3);
2134   real_round (&c2, mode, &c2);
2135   n = real_to_integer (&c2);
2136   real_from_integer (&cint, VOIDmode, n, SIGNED);
2137   real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3);
2138   real_convert (&c2, mode, &c2);
2139
2140   if (flag_unsafe_math_optimizations
2141       && cbrtfn
2142       && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2143       && real_identical (&c2, &c)
2144       && !c2_is_int
2145       && optimize_function_for_speed_p (cfun)
2146       && powi_cost (n / 3) <= POWI_MAX_MULTS)
2147     {
2148       tree powi_x_ndiv3 = NULL_TREE;
2149
2150       /* Attempt to fold powi(arg0, abs(n/3)) into multiplies.  If not
2151          possible or profitable, give up.  Skip the degenerate case when
2152          abs(n) < 3, where the result is always 1.  */
2153       if (absu_hwi (n) >= 3)
2154         {
2155           powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0,
2156                                                      abs_hwi (n / 3));
2157           if (!powi_x_ndiv3)
2158             return NULL_TREE;
2159         }
2160
2161       /* Calculate powi(cbrt(x), n%3).  Don't use gimple_expand_builtin_powi
2162          as that creates an unnecessary variable.  Instead, just produce
2163          either cbrt(x) or cbrt(x) * cbrt(x).  */
2164       cbrt_x = build_and_insert_call (gsi, loc, cbrtfn, arg0);
2165
2166       if (absu_hwi (n) % 3 == 1)
2167         powi_cbrt_x = cbrt_x;
2168       else
2169         powi_cbrt_x = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
2170                                               cbrt_x, cbrt_x);
2171
2172       /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1.  */
2173       if (absu_hwi (n) < 3)
2174         result = powi_cbrt_x;
2175       else
2176         result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
2177                                          powi_x_ndiv3, powi_cbrt_x);
2178
2179       /* If n is negative, reciprocate the result.  */
2180       if (n < 0)
2181         result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
2182                                          build_real (type, dconst1), result);
2183
2184       return result;
2185     }
2186
2187   /* No optimizations succeeded.  */
2188   return NULL_TREE;
2189 }
2190
2191 /* ARG is the argument to a cabs builtin call in GSI with location info
2192    LOC.  Create a sequence of statements prior to GSI that calculates
2193    sqrt(R*R + I*I), where R and I are the real and imaginary components
2194    of ARG, respectively.  Return an expression holding the result.  */
2195
2196 static tree
2197 gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg)
2198 {
2199   tree real_part, imag_part, addend1, addend2, sum, result;
2200   tree type = TREE_TYPE (TREE_TYPE (arg));
2201   tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
2202   machine_mode mode = TYPE_MODE (type);
2203
2204   if (!flag_unsafe_math_optimizations
2205       || !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi)))
2206       || !sqrtfn
2207       || optab_handler (sqrt_optab, mode) == CODE_FOR_nothing)
2208     return NULL_TREE;
2209
2210   real_part = build_and_insert_ref (gsi, loc, type, "cabs",
2211                                     REALPART_EXPR, arg);
2212   addend1 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
2213                                     real_part, real_part);
2214   imag_part = build_and_insert_ref (gsi, loc, type, "cabs",
2215                                     IMAGPART_EXPR, arg);
2216   addend2 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
2217                                     imag_part, imag_part);
2218   sum = build_and_insert_binop (gsi, loc, "cabs", PLUS_EXPR, addend1, addend2);
2219   result = build_and_insert_call (gsi, loc, sqrtfn, sum);
2220
2221   return result;
2222 }
2223
2224 /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
2225    on the SSA_NAME argument of each of them.  Also expand powi(x,n) into
2226    an optimal number of multiplies, when n is a constant.  */
2227
2228 namespace {
2229
2230 const pass_data pass_data_cse_sincos =
2231 {
2232   GIMPLE_PASS, /* type */
2233   "sincos", /* name */
2234   OPTGROUP_NONE, /* optinfo_flags */
2235   TV_TREE_SINCOS, /* tv_id */
2236   PROP_ssa, /* properties_required */
2237   PROP_gimple_opt_math, /* properties_provided */
2238   0, /* properties_destroyed */
2239   0, /* todo_flags_start */
2240   TODO_update_ssa, /* todo_flags_finish */
2241 };
2242
2243 class pass_cse_sincos : public gimple_opt_pass
2244 {
2245 public:
2246   pass_cse_sincos (gcc::context *ctxt)
2247     : gimple_opt_pass (pass_data_cse_sincos, ctxt)
2248   {}
2249
2250   /* opt_pass methods: */
2251   virtual bool gate (function *)
2252     {
2253       /* We no longer require either sincos or cexp, since powi expansion
2254          piggybacks on this pass.  */
2255       return optimize;
2256     }
2257
2258   virtual unsigned int execute (function *);
2259
2260 }; // class pass_cse_sincos
2261
2262 unsigned int
2263 pass_cse_sincos::execute (function *fun)
2264 {
2265   basic_block bb;
2266   bool cfg_changed = false;
2267
2268   calculate_dominance_info (CDI_DOMINATORS);
2269   memset (&sincos_stats, 0, sizeof (sincos_stats));
2270
2271   FOR_EACH_BB_FN (bb, fun)
2272     {
2273       gimple_stmt_iterator gsi;
2274       bool cleanup_eh = false;
2275
2276       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2277         {
2278           gimple *stmt = gsi_stmt (gsi);
2279
2280           /* Only the last stmt in a bb could throw, no need to call
2281              gimple_purge_dead_eh_edges if we change something in the middle
2282              of a basic block.  */
2283           cleanup_eh = false;
2284
2285           if (is_gimple_call (stmt)
2286               && gimple_call_lhs (stmt))
2287             {
2288               tree arg, arg0, arg1, result;
2289               HOST_WIDE_INT n;
2290               location_t loc;
2291
2292               switch (gimple_call_combined_fn (stmt))
2293                 {
2294                 CASE_CFN_COS:
2295                 CASE_CFN_SIN:
2296                 CASE_CFN_CEXPI:
2297                   arg = gimple_call_arg (stmt, 0);
2298                   /* Make sure we have either sincos or cexp.  */
2299                   if (!targetm.libc_has_function (function_c99_math_complex,
2300                                                   TREE_TYPE (arg))
2301                       && !targetm.libc_has_function (function_sincos,
2302                                                      TREE_TYPE (arg)))
2303                     break;
2304
2305                   if (TREE_CODE (arg) == SSA_NAME)
2306                     cfg_changed |= execute_cse_sincos_1 (arg);
2307                   break;
2308
2309                 CASE_CFN_POW:
2310                   arg0 = gimple_call_arg (stmt, 0);
2311                   arg1 = gimple_call_arg (stmt, 1);
2312
2313                   loc = gimple_location (stmt);
2314                   result = gimple_expand_builtin_pow (&gsi, loc, arg0, arg1);
2315
2316                   if (result)
2317                     {
2318                       tree lhs = gimple_get_lhs (stmt);
2319                       gassign *new_stmt = gimple_build_assign (lhs, result);
2320                       gimple_set_location (new_stmt, loc);
2321                       unlink_stmt_vdef (stmt);
2322                       gsi_replace (&gsi, new_stmt, true);
2323                       cleanup_eh = true;
2324                       if (gimple_vdef (stmt))
2325                         release_ssa_name (gimple_vdef (stmt));
2326                     }
2327                   break;
2328
2329                 CASE_CFN_POWI:
2330                   arg0 = gimple_call_arg (stmt, 0);
2331                   arg1 = gimple_call_arg (stmt, 1);
2332                   loc = gimple_location (stmt);
2333
2334                   if (real_minus_onep (arg0))
2335                     {
2336                       tree t0, t1, cond, one, minus_one;
2337                       gassign *stmt;
2338
2339                       t0 = TREE_TYPE (arg0);
2340                       t1 = TREE_TYPE (arg1);
2341                       one = build_real (t0, dconst1);
2342                       minus_one = build_real (t0, dconstm1);
2343
2344                       cond = make_temp_ssa_name (t1, NULL, "powi_cond");
2345                       stmt = gimple_build_assign (cond, BIT_AND_EXPR,
2346                                                   arg1, build_int_cst (t1, 1));
2347                       gimple_set_location (stmt, loc);
2348                       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2349
2350                       result = make_temp_ssa_name (t0, NULL, "powi");
2351                       stmt = gimple_build_assign (result, COND_EXPR, cond,
2352                                                   minus_one, one);
2353                       gimple_set_location (stmt, loc);
2354                       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2355                     }
2356                   else
2357                     {
2358                       if (!tree_fits_shwi_p (arg1))
2359                         break;
2360
2361                       n = tree_to_shwi (arg1);
2362                       result = gimple_expand_builtin_powi (&gsi, loc, arg0, n);
2363                     }
2364
2365                   if (result)
2366                     {
2367                       tree lhs = gimple_get_lhs (stmt);
2368                       gassign *new_stmt = gimple_build_assign (lhs, result);
2369                       gimple_set_location (new_stmt, loc);
2370                       unlink_stmt_vdef (stmt);
2371                       gsi_replace (&gsi, new_stmt, true);
2372                       cleanup_eh = true;
2373                       if (gimple_vdef (stmt))
2374                         release_ssa_name (gimple_vdef (stmt));
2375                     }
2376                   break;
2377
2378                 CASE_CFN_CABS:
2379                   arg0 = gimple_call_arg (stmt, 0);
2380                   loc = gimple_location (stmt);
2381                   result = gimple_expand_builtin_cabs (&gsi, loc, arg0);
2382
2383                   if (result)
2384                     {
2385                       tree lhs = gimple_get_lhs (stmt);
2386                       gassign *new_stmt = gimple_build_assign (lhs, result);
2387                       gimple_set_location (new_stmt, loc);
2388                       unlink_stmt_vdef (stmt);
2389                       gsi_replace (&gsi, new_stmt, true);
2390                       cleanup_eh = true;
2391                       if (gimple_vdef (stmt))
2392                         release_ssa_name (gimple_vdef (stmt));
2393                     }
2394                   break;
2395
2396                 default:;
2397                 }
2398             }
2399         }
2400       if (cleanup_eh)
2401         cfg_changed |= gimple_purge_dead_eh_edges (bb);
2402     }
2403
2404   statistics_counter_event (fun, "sincos statements inserted",
2405                             sincos_stats.inserted);
2406   statistics_counter_event (fun, "conv statements removed",
2407                             sincos_stats.conv_removed);
2408
2409   return cfg_changed ? TODO_cleanup_cfg : 0;
2410 }
2411
2412 } // anon namespace
2413
2414 gimple_opt_pass *
2415 make_pass_cse_sincos (gcc::context *ctxt)
2416 {
2417   return new pass_cse_sincos (ctxt);
2418 }
2419
2420 /* Return true if stmt is a type conversion operation that can be stripped
2421    when used in a widening multiply operation.  */
2422 static bool
2423 widening_mult_conversion_strippable_p (tree result_type, gimple *stmt)
2424 {
2425   enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
2426
2427   if (TREE_CODE (result_type) == INTEGER_TYPE)
2428     {
2429       tree op_type;
2430       tree inner_op_type;
2431
2432       if (!CONVERT_EXPR_CODE_P (rhs_code))
2433         return false;
2434
2435       op_type = TREE_TYPE (gimple_assign_lhs (stmt));
2436
2437       /* If the type of OP has the same precision as the result, then
2438          we can strip this conversion.  The multiply operation will be
2439          selected to create the correct extension as a by-product.  */
2440       if (TYPE_PRECISION (result_type) == TYPE_PRECISION (op_type))
2441         return true;
2442
2443       /* We can also strip a conversion if it preserves the signed-ness of
2444          the operation and doesn't narrow the range.  */
2445       inner_op_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
2446
2447       /* If the inner-most type is unsigned, then we can strip any
2448          intermediate widening operation.  If it's signed, then the
2449          intermediate widening operation must also be signed.  */
2450       if ((TYPE_UNSIGNED (inner_op_type)
2451            || TYPE_UNSIGNED (op_type) == TYPE_UNSIGNED (inner_op_type))
2452           && TYPE_PRECISION (op_type) > TYPE_PRECISION (inner_op_type))
2453         return true;
2454
2455       return false;
2456     }
2457
2458   return rhs_code == FIXED_CONVERT_EXPR;
2459 }
2460
2461 /* Return true if RHS is a suitable operand for a widening multiplication,
2462    assuming a target type of TYPE.
2463    There are two cases:
2464
2465      - RHS makes some value at least twice as wide.  Store that value
2466        in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
2467
2468      - RHS is an integer constant.  Store that value in *NEW_RHS_OUT if so,
2469        but leave *TYPE_OUT untouched.  */
2470
2471 static bool
2472 is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out,
2473                         tree *new_rhs_out)
2474 {
2475   gimple *stmt;
2476   tree type1, rhs1;
2477
2478   if (TREE_CODE (rhs) == SSA_NAME)
2479     {
2480       stmt = SSA_NAME_DEF_STMT (rhs);
2481       if (is_gimple_assign (stmt))
2482         {
2483           if (! widening_mult_conversion_strippable_p (type, stmt))
2484             rhs1 = rhs;
2485           else
2486             {
2487               rhs1 = gimple_assign_rhs1 (stmt);
2488
2489               if (TREE_CODE (rhs1) == INTEGER_CST)
2490                 {
2491                   *new_rhs_out = rhs1;
2492                   *type_out = NULL;
2493                   return true;
2494                 }
2495             }
2496         }
2497       else
2498         rhs1 = rhs;
2499
2500       type1 = TREE_TYPE (rhs1);
2501
2502       if (TREE_CODE (type1) != TREE_CODE (type)
2503           || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type))
2504         return false;
2505
2506       *new_rhs_out = rhs1;
2507       *type_out = type1;
2508       return true;
2509     }
2510
2511   if (TREE_CODE (rhs) == INTEGER_CST)
2512     {
2513       *new_rhs_out = rhs;
2514       *type_out = NULL;
2515       return true;
2516     }
2517
2518   return false;
2519 }
2520
2521 /* Return true if STMT performs a widening multiplication, assuming the
2522    output type is TYPE.  If so, store the unwidened types of the operands
2523    in *TYPE1_OUT and *TYPE2_OUT respectively.  Also fill *RHS1_OUT and
2524    *RHS2_OUT such that converting those operands to types *TYPE1_OUT
2525    and *TYPE2_OUT would give the operands of the multiplication.  */
2526
2527 static bool
2528 is_widening_mult_p (gimple *stmt,
2529                     tree *type1_out, tree *rhs1_out,
2530                     tree *type2_out, tree *rhs2_out)
2531 {
2532   tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2533
2534   if (TREE_CODE (type) == INTEGER_TYPE)
2535     {
2536       if (TYPE_OVERFLOW_TRAPS (type))
2537         return false;
2538     }
2539   else if (TREE_CODE (type) != FIXED_POINT_TYPE)
2540     return false;
2541
2542   if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out,
2543                                rhs1_out))
2544     return false;
2545
2546   if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out,
2547                                rhs2_out))
2548     return false;
2549
2550   if (*type1_out == NULL)
2551     {
2552       if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out))
2553         return false;
2554       *type1_out = *type2_out;
2555     }
2556
2557   if (*type2_out == NULL)
2558     {
2559       if (!int_fits_type_p (*rhs2_out, *type1_out))
2560         return false;
2561       *type2_out = *type1_out;
2562     }
2563
2564   /* Ensure that the larger of the two operands comes first. */
2565   if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out))
2566     {
2567       std::swap (*type1_out, *type2_out);
2568       std::swap (*rhs1_out, *rhs2_out);
2569     }
2570
2571   return true;
2572 }
2573
2574 /* Check to see if the CALL statement is an invocation of copysign
2575    with 1. being the first argument.  */
2576 static bool
2577 is_copysign_call_with_1 (gimple *call)
2578 {
2579   gcall *c = dyn_cast <gcall *> (call);
2580   if (! c)
2581     return false;
2582
2583   enum combined_fn code = gimple_call_combined_fn (c);
2584
2585   if (code == CFN_LAST)
2586     return false;
2587
2588   if (builtin_fn_p (code))
2589     {
2590       switch (as_builtin_fn (code))
2591         {
2592         CASE_FLT_FN (BUILT_IN_COPYSIGN):
2593         CASE_FLT_FN_FLOATN_NX (BUILT_IN_COPYSIGN):
2594           return real_onep (gimple_call_arg (c, 0));
2595         default:
2596           return false;
2597         }
2598     }
2599
2600   if (internal_fn_p (code))
2601     {
2602       switch (as_internal_fn (code))
2603         {
2604         case IFN_COPYSIGN:
2605           return real_onep (gimple_call_arg (c, 0));
2606         default:
2607           return false;
2608         }
2609     }
2610
2611    return false;
2612 }
2613
2614 /* Try to expand the pattern x * copysign (1, y) into xorsign (x, y).
2615    This only happens when the xorsign optab is defined, if the
2616    pattern is not a xorsign pattern or if expansion fails FALSE is
2617    returned, otherwise TRUE is returned.  */
2618 static bool
2619 convert_expand_mult_copysign (gimple *stmt, gimple_stmt_iterator *gsi)
2620 {
2621   tree treeop0, treeop1, lhs, type;
2622   location_t loc = gimple_location (stmt);
2623   lhs = gimple_assign_lhs (stmt);
2624   treeop0 = gimple_assign_rhs1 (stmt);
2625   treeop1 = gimple_assign_rhs2 (stmt);
2626   type = TREE_TYPE (lhs);
2627   machine_mode mode = TYPE_MODE (type);
2628
2629   if (HONOR_SNANS (type))
2630     return false;
2631
2632   if (TREE_CODE (treeop0) == SSA_NAME && TREE_CODE (treeop1) == SSA_NAME)
2633     {
2634       gimple *call0 = SSA_NAME_DEF_STMT (treeop0);
2635       if (!has_single_use (treeop0) || !is_copysign_call_with_1 (call0))
2636         {
2637           call0 = SSA_NAME_DEF_STMT (treeop1);
2638           if (!has_single_use (treeop1) || !is_copysign_call_with_1 (call0))
2639             return false;
2640
2641           treeop1 = treeop0;
2642         }
2643         if (optab_handler (xorsign_optab, mode) == CODE_FOR_nothing)
2644           return false;
2645
2646         gcall *c = as_a<gcall*> (call0);
2647         treeop0 = gimple_call_arg (c, 1);
2648
2649         gcall *call_stmt
2650           = gimple_build_call_internal (IFN_XORSIGN, 2, treeop1, treeop0);
2651         gimple_set_lhs (call_stmt, lhs);
2652         gimple_set_location (call_stmt, loc);
2653         gsi_replace (gsi, call_stmt, true);
2654         return true;
2655     }
2656
2657   return false;
2658 }
2659
2660 /* Process a single gimple statement STMT, which has a MULT_EXPR as
2661    its rhs, and try to convert it into a WIDEN_MULT_EXPR.  The return
2662    value is true iff we converted the statement.  */
2663
2664 static bool
2665 convert_mult_to_widen (gimple *stmt, gimple_stmt_iterator *gsi)
2666 {
2667   tree lhs, rhs1, rhs2, type, type1, type2;
2668   enum insn_code handler;
2669   scalar_int_mode to_mode, from_mode, actual_mode;
2670   optab op;
2671   int actual_precision;
2672   location_t loc = gimple_location (stmt);
2673   bool from_unsigned1, from_unsigned2;
2674
2675   lhs = gimple_assign_lhs (stmt);
2676   type = TREE_TYPE (lhs);
2677   if (TREE_CODE (type) != INTEGER_TYPE)
2678     return false;
2679
2680   if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2))
2681     return false;
2682
2683   to_mode = SCALAR_INT_TYPE_MODE (type);
2684   from_mode = SCALAR_INT_TYPE_MODE (type1);
2685   if (to_mode == from_mode)
2686     return false;
2687
2688   from_unsigned1 = TYPE_UNSIGNED (type1);
2689   from_unsigned2 = TYPE_UNSIGNED (type2);
2690
2691   if (from_unsigned1 && from_unsigned2)
2692     op = umul_widen_optab;
2693   else if (!from_unsigned1 && !from_unsigned2)
2694     op = smul_widen_optab;
2695   else
2696     op = usmul_widen_optab;
2697
2698   handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode,
2699                                                   &actual_mode);
2700
2701   if (handler == CODE_FOR_nothing)
2702     {
2703       if (op != smul_widen_optab)
2704         {
2705           /* We can use a signed multiply with unsigned types as long as
2706              there is a wider mode to use, or it is the smaller of the two
2707              types that is unsigned.  Note that type1 >= type2, always.  */
2708           if ((TYPE_UNSIGNED (type1)
2709                && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2710               || (TYPE_UNSIGNED (type2)
2711                   && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2712             {
2713               if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
2714                   || GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode))
2715                 return false;
2716             }
2717
2718           op = smul_widen_optab;
2719           handler = find_widening_optab_handler_and_mode (op, to_mode,
2720                                                           from_mode,
2721                                                           &actual_mode);
2722
2723           if (handler == CODE_FOR_nothing)
2724             return false;
2725
2726           from_unsigned1 = from_unsigned2 = false;
2727         }
2728       else
2729         {
2730           /* Expand can synthesize smul_widen_optab if the target
2731              supports umul_widen_optab.  */
2732           op = umul_widen_optab;
2733           handler = find_widening_optab_handler_and_mode (op, to_mode,
2734                                                           from_mode,
2735                                                           &actual_mode);
2736           if (handler == CODE_FOR_nothing)
2737             return false;
2738         }
2739     }
2740
2741   /* Ensure that the inputs to the handler are in the correct precison
2742      for the opcode.  This will be the full mode size.  */
2743   actual_precision = GET_MODE_PRECISION (actual_mode);
2744   if (2 * actual_precision > TYPE_PRECISION (type))
2745     return false;
2746   if (actual_precision != TYPE_PRECISION (type1)
2747       || from_unsigned1 != TYPE_UNSIGNED (type1))
2748     rhs1 = build_and_insert_cast (gsi, loc,
2749                                   build_nonstandard_integer_type
2750                                     (actual_precision, from_unsigned1), rhs1);
2751   if (actual_precision != TYPE_PRECISION (type2)
2752       || from_unsigned2 != TYPE_UNSIGNED (type2))
2753     rhs2 = build_and_insert_cast (gsi, loc,
2754                                   build_nonstandard_integer_type
2755                                     (actual_precision, from_unsigned2), rhs2);
2756
2757   /* Handle constants.  */
2758   if (TREE_CODE (rhs1) == INTEGER_CST)
2759     rhs1 = fold_convert (type1, rhs1);
2760   if (TREE_CODE (rhs2) == INTEGER_CST)
2761     rhs2 = fold_convert (type2, rhs2);
2762
2763   gimple_assign_set_rhs1 (stmt, rhs1);
2764   gimple_assign_set_rhs2 (stmt, rhs2);
2765   gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
2766   update_stmt (stmt);
2767   widen_mul_stats.widen_mults_inserted++;
2768   return true;
2769 }
2770
2771 /* Process a single gimple statement STMT, which is found at the
2772    iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
2773    rhs (given by CODE), and try to convert it into a
2774    WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR.  The return value
2775    is true iff we converted the statement.  */
2776
2777 static bool
2778 convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple *stmt,
2779                             enum tree_code code)
2780 {
2781   gimple *rhs1_stmt = NULL, *rhs2_stmt = NULL;
2782   gimple *conv1_stmt = NULL, *conv2_stmt = NULL, *conv_stmt;
2783   tree type, type1, type2, optype;
2784   tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
2785   enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
2786   optab this_optab;
2787   enum tree_code wmult_code;
2788   enum insn_code handler;
2789   scalar_mode to_mode, from_mode, actual_mode;
2790   location_t loc = gimple_location (stmt);
2791   int actual_precision;
2792   bool from_unsigned1, from_unsigned2;
2793
2794   lhs = gimple_assign_lhs (stmt);
2795   type = TREE_TYPE (lhs);
2796   if (TREE_CODE (type) != INTEGER_TYPE
2797       && TREE_CODE (type) != FIXED_POINT_TYPE)
2798     return false;
2799
2800   if (code == MINUS_EXPR)
2801     wmult_code = WIDEN_MULT_MINUS_EXPR;
2802   else
2803     wmult_code = WIDEN_MULT_PLUS_EXPR;
2804
2805   rhs1 = gimple_assign_rhs1 (stmt);
2806   rhs2 = gimple_assign_rhs2 (stmt);
2807
2808   if (TREE_CODE (rhs1) == SSA_NAME)
2809     {
2810       rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2811       if (is_gimple_assign (rhs1_stmt))
2812         rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2813     }
2814
2815   if (TREE_CODE (rhs2) == SSA_NAME)
2816     {
2817       rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2818       if (is_gimple_assign (rhs2_stmt))
2819         rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2820     }
2821
2822   /* Allow for one conversion statement between the multiply
2823      and addition/subtraction statement.  If there are more than
2824      one conversions then we assume they would invalidate this
2825      transformation.  If that's not the case then they should have
2826      been folded before now.  */
2827   if (CONVERT_EXPR_CODE_P (rhs1_code))
2828     {
2829       conv1_stmt = rhs1_stmt;
2830       rhs1 = gimple_assign_rhs1 (rhs1_stmt);
2831       if (TREE_CODE (rhs1) == SSA_NAME)
2832         {
2833           rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2834           if (is_gimple_assign (rhs1_stmt))
2835             rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2836         }
2837       else
2838         return false;
2839     }
2840   if (CONVERT_EXPR_CODE_P (rhs2_code))
2841     {
2842       conv2_stmt = rhs2_stmt;
2843       rhs2 = gimple_assign_rhs1 (rhs2_stmt);
2844       if (TREE_CODE (rhs2) == SSA_NAME)
2845         {
2846           rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2847           if (is_gimple_assign (rhs2_stmt))
2848             rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2849         }
2850       else
2851         return false;
2852     }
2853
2854   /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
2855      is_widening_mult_p, but we still need the rhs returns.
2856
2857      It might also appear that it would be sufficient to use the existing
2858      operands of the widening multiply, but that would limit the choice of
2859      multiply-and-accumulate instructions.
2860
2861      If the widened-multiplication result has more than one uses, it is
2862      probably wiser not to do the conversion.  Also restrict this operation
2863      to single basic block to avoid moving the multiply to a different block
2864      with a higher execution frequency.  */
2865   if (code == PLUS_EXPR
2866       && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
2867     {
2868       if (!has_single_use (rhs1)
2869           || gimple_bb (rhs1_stmt) != gimple_bb (stmt)
2870           || !is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
2871                                   &type2, &mult_rhs2))
2872         return false;
2873       add_rhs = rhs2;
2874       conv_stmt = conv1_stmt;
2875     }
2876   else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
2877     {
2878       if (!has_single_use (rhs2)
2879           || gimple_bb (rhs2_stmt) != gimple_bb (stmt)
2880           || !is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
2881                                   &type2, &mult_rhs2))
2882         return false;
2883       add_rhs = rhs1;
2884       conv_stmt = conv2_stmt;
2885     }
2886   else
2887     return false;
2888
2889   to_mode = SCALAR_TYPE_MODE (type);
2890   from_mode = SCALAR_TYPE_MODE (type1);
2891   if (to_mode == from_mode)
2892     return false;
2893
2894   from_unsigned1 = TYPE_UNSIGNED (type1);
2895   from_unsigned2 = TYPE_UNSIGNED (type2);
2896   optype = type1;
2897
2898   /* There's no such thing as a mixed sign madd yet, so use a wider mode.  */
2899   if (from_unsigned1 != from_unsigned2)
2900     {
2901       if (!INTEGRAL_TYPE_P (type))
2902         return false;
2903       /* We can use a signed multiply with unsigned types as long as
2904          there is a wider mode to use, or it is the smaller of the two
2905          types that is unsigned.  Note that type1 >= type2, always.  */
2906       if ((from_unsigned1
2907            && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2908           || (from_unsigned2
2909               && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2910         {
2911           if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
2912               || GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode))
2913             return false;
2914         }
2915
2916       from_unsigned1 = from_unsigned2 = false;
2917       optype = build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode),
2918                                                false);
2919     }
2920
2921   /* If there was a conversion between the multiply and addition
2922      then we need to make sure it fits a multiply-and-accumulate.
2923      The should be a single mode change which does not change the
2924      value.  */
2925   if (conv_stmt)
2926     {
2927       /* We use the original, unmodified data types for this.  */
2928       tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt));
2929       tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt));
2930       int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2);
2931       bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2);
2932
2933       if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type))
2934         {
2935           /* Conversion is a truncate.  */
2936           if (TYPE_PRECISION (to_type) < data_size)
2937             return false;
2938         }
2939       else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type))
2940         {
2941           /* Conversion is an extend.  Check it's the right sort.  */
2942           if (TYPE_UNSIGNED (from_type) != is_unsigned
2943               && !(is_unsigned && TYPE_PRECISION (from_type) > data_size))
2944             return false;
2945         }
2946       /* else convert is a no-op for our purposes.  */
2947     }
2948
2949   /* Verify that the machine can perform a widening multiply
2950      accumulate in this mode/signedness combination, otherwise
2951      this transformation is likely to pessimize code.  */
2952   this_optab = optab_for_tree_code (wmult_code, optype, optab_default);
2953   handler = find_widening_optab_handler_and_mode (this_optab, to_mode,
2954                                                   from_mode, &actual_mode);
2955
2956   if (handler == CODE_FOR_nothing)
2957     return false;
2958
2959   /* Ensure that the inputs to the handler are in the correct precison
2960      for the opcode.  This will be the full mode size.  */
2961   actual_precision = GET_MODE_PRECISION (actual_mode);
2962   if (actual_precision != TYPE_PRECISION (type1)
2963       || from_unsigned1 != TYPE_UNSIGNED (type1))
2964     mult_rhs1 = build_and_insert_cast (gsi, loc,
2965                                        build_nonstandard_integer_type
2966                                          (actual_precision, from_unsigned1),
2967                                        mult_rhs1);
2968   if (actual_precision != TYPE_PRECISION (type2)
2969       || from_unsigned2 != TYPE_UNSIGNED (type2))
2970     mult_rhs2 = build_and_insert_cast (gsi, loc,
2971                                        build_nonstandard_integer_type
2972                                          (actual_precision, from_unsigned2),
2973                                        mult_rhs2);
2974
2975   if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs)))
2976     add_rhs = build_and_insert_cast (gsi, loc, type, add_rhs);
2977
2978   /* Handle constants.  */
2979   if (TREE_CODE (mult_rhs1) == INTEGER_CST)
2980     mult_rhs1 = fold_convert (type1, mult_rhs1);
2981   if (TREE_CODE (mult_rhs2) == INTEGER_CST)
2982     mult_rhs2 = fold_convert (type2, mult_rhs2);
2983
2984   gimple_assign_set_rhs_with_ops (gsi, wmult_code, mult_rhs1, mult_rhs2,
2985                                   add_rhs);
2986   update_stmt (gsi_stmt (*gsi));
2987   widen_mul_stats.maccs_inserted++;
2988   return true;
2989 }
2990
2991 /* Given a result MUL_RESULT which is a result of a multiplication of OP1 and
2992    OP2 and which we know is used in statements that can be, together with the
2993    multiplication, converted to FMAs, perform the transformation.  */
2994
2995 static void
2996 convert_mult_to_fma_1 (tree mul_result, tree op1, tree op2)
2997 {
2998   tree type = TREE_TYPE (mul_result);
2999   gimple *use_stmt;
3000   imm_use_iterator imm_iter;
3001   gcall *fma_stmt;
3002
3003   FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
3004     {
3005       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
3006       tree addop, mulop1 = op1, result = mul_result;
3007       bool negate_p = false;
3008       gimple_seq seq = NULL;
3009
3010       if (is_gimple_debug (use_stmt))
3011         continue;
3012
3013       if (is_gimple_assign (use_stmt)
3014           && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
3015         {
3016           result = gimple_assign_lhs (use_stmt);
3017           use_operand_p use_p;
3018           gimple *neguse_stmt;
3019           single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt);
3020           gsi_remove (&gsi, true);
3021           release_defs (use_stmt);
3022
3023           use_stmt = neguse_stmt;
3024           gsi = gsi_for_stmt (use_stmt);
3025           negate_p = true;
3026         }
3027
3028       tree cond, else_value, ops[3];
3029       tree_code code;
3030       if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code,
3031                                               ops, &else_value))
3032         gcc_unreachable ();
3033       addop = ops[0] == result ? ops[1] : ops[0];
3034
3035       if (code == MINUS_EXPR)
3036         {
3037           if (ops[0] == result)
3038             /* a * b - c -> a * b + (-c)  */
3039             addop = gimple_build (&seq, NEGATE_EXPR, type, addop);
3040           else
3041             /* a - b * c -> (-b) * c + a */
3042             negate_p = !negate_p;
3043         }
3044
3045       if (negate_p)
3046         mulop1 = gimple_build (&seq, NEGATE_EXPR, type, mulop1);
3047
3048       if (seq)
3049         gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
3050
3051       if (cond)
3052         fma_stmt = gimple_build_call_internal (IFN_COND_FMA, 5, cond, mulop1,
3053                                                op2, addop, else_value);
3054       else
3055         fma_stmt = gimple_build_call_internal (IFN_FMA, 3, mulop1, op2, addop);
3056       gimple_set_lhs (fma_stmt, gimple_get_lhs (use_stmt));
3057       gimple_call_set_nothrow (fma_stmt, !stmt_can_throw_internal (cfun,
3058                                                                    use_stmt));
3059       gsi_replace (&gsi, fma_stmt, true);
3060       /* Follow all SSA edges so that we generate FMS, FNMA and FNMS
3061          regardless of where the negation occurs.  */
3062       gimple *orig_stmt = gsi_stmt (gsi);
3063       if (fold_stmt (&gsi, follow_all_ssa_edges))
3064         {
3065           if (maybe_clean_or_replace_eh_stmt (orig_stmt, gsi_stmt (gsi)))
3066             gcc_unreachable ();
3067           update_stmt (gsi_stmt (gsi));
3068         }
3069
3070       if (dump_file && (dump_flags & TDF_DETAILS))
3071         {
3072           fprintf (dump_file, "Generated FMA ");
3073           print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_NONE);
3074           fprintf (dump_file, "\n");
3075         }
3076
3077       /* If the FMA result is negated in a single use, fold the negation
3078          too.  */
3079       orig_stmt = gsi_stmt (gsi);
3080       use_operand_p use_p;
3081       gimple *neg_stmt;
3082       if (is_gimple_call (orig_stmt)
3083           && gimple_call_internal_p (orig_stmt)
3084           && gimple_call_lhs (orig_stmt)
3085           && TREE_CODE (gimple_call_lhs (orig_stmt)) == SSA_NAME
3086           && single_imm_use (gimple_call_lhs (orig_stmt), &use_p, &neg_stmt)
3087           && is_gimple_assign (neg_stmt)
3088           && gimple_assign_rhs_code (neg_stmt) == NEGATE_EXPR
3089           && !stmt_could_throw_p (cfun, neg_stmt))
3090         {
3091           gsi = gsi_for_stmt (neg_stmt);
3092           if (fold_stmt (&gsi, follow_all_ssa_edges))
3093             {
3094               if (maybe_clean_or_replace_eh_stmt (neg_stmt, gsi_stmt (gsi)))
3095                 gcc_unreachable ();
3096               update_stmt (gsi_stmt (gsi));
3097               if (dump_file && (dump_flags & TDF_DETAILS))
3098                 {
3099                   fprintf (dump_file, "Folded FMA negation ");
3100                   print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_NONE);
3101                   fprintf (dump_file, "\n");
3102                 }
3103             }
3104         }
3105
3106       widen_mul_stats.fmas_inserted++;
3107     }
3108 }
3109
3110 /* Data necessary to perform the actual transformation from a multiplication
3111    and an addition to an FMA after decision is taken it should be done and to
3112    then delete the multiplication statement from the function IL.  */
3113
3114 struct fma_transformation_info
3115 {
3116   gimple *mul_stmt;
3117   tree mul_result;
3118   tree op1;
3119   tree op2;
3120 };
3121
3122 /* Structure containing the current state of FMA deferring, i.e. whether we are
3123    deferring, whether to continue deferring, and all data necessary to come
3124    back and perform all deferred transformations.  */
3125
3126 class fma_deferring_state
3127 {
3128 public:
3129   /* Class constructor.  Pass true as PERFORM_DEFERRING in order to actually
3130      do any deferring.  */
3131
3132   fma_deferring_state (bool perform_deferring)
3133     : m_candidates (), m_mul_result_set (), m_initial_phi (NULL),
3134       m_last_result (NULL_TREE), m_deferring_p (perform_deferring) {}
3135
3136   /* List of FMA candidates for which we the transformation has been determined
3137      possible but we at this point in BB analysis we do not consider them
3138      beneficial.  */
3139   auto_vec<fma_transformation_info, 8> m_candidates;
3140
3141   /* Set of results of multiplication that are part of an already deferred FMA
3142      candidates.  */
3143   hash_set<tree> m_mul_result_set;
3144
3145   /* The PHI that supposedly feeds back result of a FMA to another over loop
3146      boundary.  */
3147   gphi *m_initial_phi;
3148
3149   /* Result of the last produced FMA candidate or NULL if there has not been
3150      one.  */
3151   tree m_last_result;
3152
3153   /* If true, deferring might still be profitable.  If false, transform all
3154      candidates and no longer defer.  */
3155   bool m_deferring_p;
3156 };
3157
3158 /* Transform all deferred FMA candidates and mark STATE as no longer
3159    deferring.  */
3160
3161 static void
3162 cancel_fma_deferring (fma_deferring_state *state)
3163 {
3164   if (!state->m_deferring_p)
3165     return;
3166
3167   for (unsigned i = 0; i < state->m_candidates.length (); i++)
3168     {
3169       if (dump_file && (dump_flags & TDF_DETAILS))
3170         fprintf (dump_file, "Generating deferred FMA\n");
3171
3172       const fma_transformation_info &fti = state->m_candidates[i];
3173       convert_mult_to_fma_1 (fti.mul_result, fti.op1, fti.op2);
3174
3175       gimple_stmt_iterator gsi = gsi_for_stmt (fti.mul_stmt);
3176       gsi_remove (&gsi, true);
3177       release_defs (fti.mul_stmt);
3178     }
3179   state->m_deferring_p = false;
3180 }
3181
3182 /* If OP is an SSA name defined by a PHI node, return the PHI statement.
3183    Otherwise return NULL.  */
3184
3185 static gphi *
3186 result_of_phi (tree op)
3187 {
3188   if (TREE_CODE (op) != SSA_NAME)
3189     return NULL;
3190
3191   return dyn_cast <gphi *> (SSA_NAME_DEF_STMT (op));
3192 }
3193
3194 /* After processing statements of a BB and recording STATE, return true if the
3195    initial phi is fed by the last FMA candidate result ore one such result from
3196    previously processed BBs marked in LAST_RESULT_SET.  */
3197
3198 static bool
3199 last_fma_candidate_feeds_initial_phi (fma_deferring_state *state,
3200                                       hash_set<tree> *last_result_set)
3201 {
3202   ssa_op_iter iter;
3203   use_operand_p use;
3204   FOR_EACH_PHI_ARG (use, state->m_initial_phi, iter, SSA_OP_USE)
3205     {
3206       tree t = USE_FROM_PTR (use);
3207       if (t == state->m_last_result
3208           || last_result_set->contains (t))
3209         return true;
3210     }
3211
3212   return false;
3213 }
3214
3215 /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
3216    with uses in additions and subtractions to form fused multiply-add
3217    operations.  Returns true if successful and MUL_STMT should be removed.
3218    If MUL_COND is nonnull, the multiplication in MUL_STMT is conditional
3219    on MUL_COND, otherwise it is unconditional.
3220
3221    If STATE indicates that we are deferring FMA transformation, that means
3222    that we do not produce FMAs for basic blocks which look like:
3223
3224     <bb 6>
3225     # accumulator_111 = PHI <0.0(5), accumulator_66(6)>
3226     _65 = _14 * _16;
3227     accumulator_66 = _65 + accumulator_111;
3228
3229   or its unrolled version, i.e. with several FMA candidates that feed result
3230   of one into the addend of another.  Instead, we add them to a list in STATE
3231   and if we later discover an FMA candidate that is not part of such a chain,
3232   we go back and perform all deferred past candidates.  */
3233
3234 static bool
3235 convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2,
3236                      fma_deferring_state *state, tree mul_cond = NULL_TREE)
3237 {
3238   tree mul_result = gimple_get_lhs (mul_stmt);
3239   /* If there isn't a LHS then this can't be an FMA.  There can be no LHS
3240      if the statement was left just for the side-effects.  */
3241   if (!mul_result)
3242     return false;
3243   tree type = TREE_TYPE (mul_result);
3244   gimple *use_stmt, *neguse_stmt;
3245   use_operand_p use_p;
3246   imm_use_iterator imm_iter;
3247
3248   if (FLOAT_TYPE_P (type)
3249       && flag_fp_contract_mode == FP_CONTRACT_OFF)
3250     return false;
3251
3252   /* We don't want to do bitfield reduction ops.  */
3253   if (INTEGRAL_TYPE_P (type)
3254       && (!type_has_mode_precision_p (type) || TYPE_OVERFLOW_TRAPS (type)))
3255     return false;
3256
3257   /* If the target doesn't support it, don't generate it.  We assume that
3258      if fma isn't available then fms, fnma or fnms are not either.  */
3259   optimization_type opt_type = bb_optimization_type (gimple_bb (mul_stmt));
3260   if (!direct_internal_fn_supported_p (IFN_FMA, type, opt_type))
3261     return false;
3262
3263   /* If the multiplication has zero uses, it is kept around probably because
3264      of -fnon-call-exceptions.  Don't optimize it away in that case,
3265      it is DCE job.  */
3266   if (has_zero_uses (mul_result))
3267     return false;
3268
3269   bool check_defer
3270     = (state->m_deferring_p
3271        && maybe_le (tree_to_poly_int64 (TYPE_SIZE (type)),
3272                     param_avoid_fma_max_bits));
3273   bool defer = check_defer;
3274   bool seen_negate_p = false;
3275   /* Make sure that the multiplication statement becomes dead after
3276      the transformation, thus that all uses are transformed to FMAs.
3277      This means we assume that an FMA operation has the same cost
3278      as an addition.  */
3279   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
3280     {
3281       tree result = mul_result;
3282       bool negate_p = false;
3283
3284       use_stmt = USE_STMT (use_p);
3285
3286       if (is_gimple_debug (use_stmt))
3287         continue;
3288
3289       /* For now restrict this operations to single basic blocks.  In theory
3290          we would want to support sinking the multiplication in
3291          m = a*b;
3292          if ()
3293            ma = m + c;
3294          else
3295            d = m;
3296          to form a fma in the then block and sink the multiplication to the
3297          else block.  */
3298       if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3299         return false;
3300
3301       /* A negate on the multiplication leads to FNMA.  */
3302       if (is_gimple_assign (use_stmt)
3303           && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
3304         {
3305           ssa_op_iter iter;
3306           use_operand_p usep;
3307
3308           /* If (due to earlier missed optimizations) we have two
3309              negates of the same value, treat them as equivalent
3310              to a single negate with multiple uses.  */
3311           if (seen_negate_p)
3312             return false;
3313
3314           result = gimple_assign_lhs (use_stmt);
3315
3316           /* Make sure the negate statement becomes dead with this
3317              single transformation.  */
3318           if (!single_imm_use (gimple_assign_lhs (use_stmt),
3319                                &use_p, &neguse_stmt))
3320             return false;
3321
3322           /* Make sure the multiplication isn't also used on that stmt.  */
3323           FOR_EACH_PHI_OR_STMT_USE (usep, neguse_stmt, iter, SSA_OP_USE)
3324             if (USE_FROM_PTR (usep) == mul_result)
3325               return false;
3326
3327           /* Re-validate.  */
3328           use_stmt = neguse_stmt;
3329           if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3330             return false;
3331
3332           negate_p = seen_negate_p = true;
3333         }
3334
3335       tree cond, else_value, ops[3];
3336       tree_code code;
3337       if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code, ops,
3338                                               &else_value))
3339         return false;
3340
3341       switch (code)
3342         {
3343         case MINUS_EXPR:
3344           if (ops[1] == result)
3345             negate_p = !negate_p;
3346           break;
3347         case PLUS_EXPR:
3348           break;
3349         default:
3350           /* FMA can only be formed from PLUS and MINUS.  */
3351           return false;
3352         }
3353
3354       if (mul_cond && cond != mul_cond)
3355         return false;
3356
3357       if (cond)
3358         {
3359           if (cond == result || else_value == result)
3360             return false;
3361           if (!direct_internal_fn_supported_p (IFN_COND_FMA, type, opt_type))
3362             return false;
3363         }
3364
3365       /* If the subtrahend (OPS[1]) is computed by a MULT_EXPR that
3366          we'll visit later, we might be able to get a more profitable
3367          match with fnma.
3368          OTOH, if we don't, a negate / fma pair has likely lower latency
3369          that a mult / subtract pair.  */
3370       if (code == MINUS_EXPR
3371           && !negate_p
3372           && ops[0] == result
3373           && !direct_internal_fn_supported_p (IFN_FMS, type, opt_type)
3374           && direct_internal_fn_supported_p (IFN_FNMA, type, opt_type)
3375           && TREE_CODE (ops[1]) == SSA_NAME
3376           && has_single_use (ops[1]))
3377         {
3378           gimple *stmt2 = SSA_NAME_DEF_STMT (ops[1]);
3379           if (is_gimple_assign (stmt2)
3380               && gimple_assign_rhs_code (stmt2) == MULT_EXPR)
3381             return false;
3382         }
3383
3384       /* We can't handle a * b + a * b.  */
3385       if (ops[0] == ops[1])
3386         return false;
3387       /* If deferring, make sure we are not looking at an instruction that
3388          wouldn't have existed if we were not.  */
3389       if (state->m_deferring_p
3390           && (state->m_mul_result_set.contains (ops[0])
3391               || state->m_mul_result_set.contains (ops[1])))
3392         return false;
3393
3394       if (check_defer)
3395         {
3396           tree use_lhs = gimple_get_lhs (use_stmt);
3397           if (state->m_last_result)
3398             {
3399               if (ops[1] == state->m_last_result
3400                   || ops[0] == state->m_last_result)
3401                 defer = true;
3402               else
3403                 defer = false;
3404             }
3405           else
3406             {
3407               gcc_checking_assert (!state->m_initial_phi);
3408               gphi *phi;
3409               if (ops[0] == result)
3410                 phi = result_of_phi (ops[1]);
3411               else
3412                 {
3413                   gcc_assert (ops[1] == result);
3414                   phi = result_of_phi (ops[0]);
3415                 }
3416
3417               if (phi)
3418                 {
3419                   state->m_initial_phi = phi;
3420                   defer = true;
3421                 }
3422               else
3423                 defer = false;
3424             }
3425
3426           state->m_last_result = use_lhs;
3427           check_defer = false;
3428         }
3429       else
3430         defer = false;
3431
3432       /* While it is possible to validate whether or not the exact form that
3433          we've recognized is available in the backend, the assumption is that
3434          if the deferring logic above did not trigger, the transformation is
3435          never a loss.  For instance, suppose the target only has the plain FMA
3436          pattern available.  Consider a*b-c -> fma(a,b,-c): we've exchanged
3437          MUL+SUB for FMA+NEG, which is still two operations.  Consider
3438          -(a*b)-c -> fma(-a,b,-c): we still have 3 operations, but in the FMA
3439          form the two NEGs are independent and could be run in parallel.  */
3440     }
3441
3442   if (defer)
3443     {
3444       fma_transformation_info fti;
3445       fti.mul_stmt = mul_stmt;
3446       fti.mul_result = mul_result;
3447       fti.op1 = op1;
3448       fti.op2 = op2;
3449       state->m_candidates.safe_push (fti);
3450       state->m_mul_result_set.add (mul_result);
3451
3452       if (dump_file && (dump_flags & TDF_DETAILS))
3453         {
3454           fprintf (dump_file, "Deferred generating FMA for multiplication ");
3455           print_gimple_stmt (dump_file, mul_stmt, 0, TDF_NONE);
3456           fprintf (dump_file, "\n");
3457         }
3458
3459       return false;
3460     }
3461   else
3462     {
3463       if (state->m_deferring_p)
3464         cancel_fma_deferring (state);
3465       convert_mult_to_fma_1 (mul_result, op1, op2);
3466       return true;
3467     }
3468 }
3469
3470
3471 /* Helper function of match_arith_overflow.  For MUL_OVERFLOW, if we have
3472    a check for non-zero like:
3473    _1 = x_4(D) * y_5(D);
3474    *res_7(D) = _1;
3475    if (x_4(D) != 0)
3476      goto <bb 3>; [50.00%]
3477    else
3478      goto <bb 4>; [50.00%]
3479
3480    <bb 3> [local count: 536870913]:
3481    _2 = _1 / x_4(D);
3482    _9 = _2 != y_5(D);
3483    _10 = (int) _9;
3484
3485    <bb 4> [local count: 1073741824]:
3486    # iftmp.0_3 = PHI <_10(3), 0(2)>
3487    then in addition to using .MUL_OVERFLOW (x_4(D), y_5(D)) we can also
3488    optimize the x_4(D) != 0 condition to 1.  */
3489
3490 static void
3491 maybe_optimize_guarding_check (vec<gimple *> &mul_stmts, gimple *cond_stmt,
3492                                gimple *div_stmt, bool *cfg_changed)
3493 {
3494   basic_block bb = gimple_bb (cond_stmt);
3495   if (gimple_bb (div_stmt) != bb || !single_pred_p (bb))
3496     return;
3497   edge pred_edge = single_pred_edge (bb);
3498   basic_block pred_bb = pred_edge->src;
3499   if (EDGE_COUNT (pred_bb->succs) != 2)
3500     return;
3501   edge other_edge = EDGE_SUCC (pred_bb, EDGE_SUCC (pred_bb, 0) == pred_edge);
3502   edge other_succ_edge = NULL;
3503   if (gimple_code (cond_stmt) == GIMPLE_COND)
3504     {
3505       if (EDGE_COUNT (bb->succs) != 2)
3506         return;
3507       other_succ_edge = EDGE_SUCC (bb, 0);
3508       if (gimple_cond_code (cond_stmt) == NE_EXPR)
3509         {
3510           if (other_succ_edge->flags & EDGE_TRUE_VALUE)
3511             other_succ_edge = EDGE_SUCC (bb, 1);
3512         }
3513       else if (other_succ_edge->flags & EDGE_FALSE_VALUE)
3514         other_succ_edge = EDGE_SUCC (bb, 0);
3515       if (other_edge->dest != other_succ_edge->dest)
3516         return;
3517     }
3518   else if (!single_succ_p (bb) || other_edge->dest != single_succ (bb))
3519     return;
3520   gimple *zero_cond = last_stmt (pred_bb);
3521   if (zero_cond == NULL
3522       || gimple_code (zero_cond) != GIMPLE_COND
3523       || (gimple_cond_code (zero_cond)
3524           != ((pred_edge->flags & EDGE_TRUE_VALUE) ? NE_EXPR : EQ_EXPR))
3525       || !integer_zerop (gimple_cond_rhs (zero_cond)))
3526     return;
3527   tree zero_cond_lhs = gimple_cond_lhs (zero_cond);
3528   if (TREE_CODE (zero_cond_lhs) != SSA_NAME)
3529     return;
3530   if (gimple_assign_rhs2 (div_stmt) != zero_cond_lhs)
3531     {
3532       /* Allow the divisor to be result of a same precision cast
3533          from zero_cond_lhs.  */
3534       tree rhs2 = gimple_assign_rhs2 (div_stmt);
3535       if (TREE_CODE (rhs2) != SSA_NAME)
3536         return;
3537       gimple *g = SSA_NAME_DEF_STMT (rhs2);
3538       if (!gimple_assign_cast_p (g)
3539           || gimple_assign_rhs1 (g) != gimple_cond_lhs (zero_cond)
3540           || !INTEGRAL_TYPE_P (TREE_TYPE (zero_cond_lhs))
3541           || (TYPE_PRECISION (TREE_TYPE (zero_cond_lhs))
3542               != TYPE_PRECISION (TREE_TYPE (rhs2))))
3543         return;
3544     }
3545   gimple_stmt_iterator gsi = gsi_after_labels (bb);
3546   mul_stmts.quick_push (div_stmt);
3547   if (is_gimple_debug (gsi_stmt (gsi)))
3548     gsi_next_nondebug (&gsi);
3549   unsigned cast_count = 0;
3550   while (gsi_stmt (gsi) != cond_stmt)
3551     {
3552       /* If original mul_stmt has a single use, allow it in the same bb,
3553          we are looking then just at __builtin_mul_overflow_p.
3554          Though, in that case the original mul_stmt will be replaced
3555          by .MUL_OVERFLOW, REALPART_EXPR and IMAGPART_EXPR stmts.  */
3556       gimple *mul_stmt;
3557       unsigned int i;
3558       bool ok = false;
3559       FOR_EACH_VEC_ELT (mul_stmts, i, mul_stmt)
3560         {
3561           if (gsi_stmt (gsi) == mul_stmt)
3562             {
3563               ok = true;
3564               break;
3565             }
3566         }
3567       if (!ok && gimple_assign_cast_p (gsi_stmt (gsi)) && ++cast_count < 4)
3568         ok = true;
3569       if (!ok)
3570         return;
3571       gsi_next_nondebug (&gsi);
3572     }
3573   if (gimple_code (cond_stmt) == GIMPLE_COND)
3574     {
3575       basic_block succ_bb = other_edge->dest;
3576       for (gphi_iterator gpi = gsi_start_phis (succ_bb); !gsi_end_p (gpi);
3577            gsi_next (&gpi))
3578         {
3579           gphi *phi = gpi.phi ();
3580           tree v1 = gimple_phi_arg_def (phi, other_edge->dest_idx);
3581           tree v2 = gimple_phi_arg_def (phi, other_succ_edge->dest_idx);
3582           if (!operand_equal_p (v1, v2, 0))
3583             return;
3584         }
3585     }
3586   else
3587     {
3588       tree lhs = gimple_assign_lhs (cond_stmt);
3589       if (!lhs || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
3590         return;
3591       gsi_next_nondebug (&gsi);
3592       if (!gsi_end_p (gsi))
3593         {
3594           if (gimple_assign_rhs_code (cond_stmt) == COND_EXPR)
3595             return;
3596           gimple *cast_stmt = gsi_stmt (gsi);
3597           if (!gimple_assign_cast_p (cast_stmt))
3598             return;
3599           tree new_lhs = gimple_assign_lhs (cast_stmt);
3600           gsi_next_nondebug (&gsi);
3601           if (!gsi_end_p (gsi)
3602               || !new_lhs
3603               || !INTEGRAL_TYPE_P (TREE_TYPE (new_lhs))
3604               || TYPE_PRECISION (TREE_TYPE (new_lhs)) <= 1)
3605             return;
3606           lhs = new_lhs;
3607         }
3608       edge succ_edge = single_succ_edge (bb);
3609       basic_block succ_bb = succ_edge->dest;
3610       gsi = gsi_start_phis (succ_bb);
3611       if (gsi_end_p (gsi))
3612         return;
3613       gphi *phi = as_a <gphi *> (gsi_stmt (gsi));
3614       gsi_next (&gsi);
3615       if (!gsi_end_p (gsi))
3616         return;
3617       if (gimple_phi_arg_def (phi, succ_edge->dest_idx) != lhs)
3618         return;
3619       tree other_val = gimple_phi_arg_def (phi, other_edge->dest_idx);
3620       if (gimple_assign_rhs_code (cond_stmt) == COND_EXPR)
3621         {
3622           tree cond = gimple_assign_rhs1 (cond_stmt);
3623           if (TREE_CODE (cond) == NE_EXPR)
3624             {
3625               if (!operand_equal_p (other_val,
3626                                     gimple_assign_rhs3 (cond_stmt), 0))
3627                 return;
3628             }
3629           else if (!operand_equal_p (other_val,
3630                                      gimple_assign_rhs2 (cond_stmt), 0))
3631             return;
3632         }
3633       else if (gimple_assign_rhs_code (cond_stmt) == NE_EXPR)
3634         {
3635           if (!integer_zerop (other_val))
3636             return;
3637         }
3638       else if (!integer_onep (other_val))
3639         return;
3640     }
3641   gcond *zero_gcond = as_a <gcond *> (zero_cond);
3642   if (pred_edge->flags & EDGE_TRUE_VALUE)
3643     gimple_cond_make_true (zero_gcond);
3644   else
3645     gimple_cond_make_false (zero_gcond);
3646   update_stmt (zero_cond);
3647   *cfg_changed = true;
3648 }
3649
3650 /* Helper function for arith_overflow_check_p.  Return true
3651    if VAL1 is equal to VAL2 cast to corresponding integral type
3652    with other signedness or vice versa.  */
3653
3654 static bool
3655 arith_cast_equal_p (tree val1, tree val2)
3656 {
3657   if (TREE_CODE (val1) == INTEGER_CST && TREE_CODE (val2) == INTEGER_CST)
3658     return wi::eq_p (wi::to_wide (val1), wi::to_wide (val2));
3659   else if (TREE_CODE (val1) != SSA_NAME || TREE_CODE (val2) != SSA_NAME)
3660     return false;
3661   if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val1))
3662       && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val1)) == val2)
3663     return true;
3664   if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val2))
3665       && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val2)) == val1)
3666     return true;
3667   return false;
3668 }
3669
3670 /* Helper function of match_arith_overflow.  Return 1
3671    if USE_STMT is unsigned overflow check ovf != 0 for
3672    STMT, -1 if USE_STMT is unsigned overflow check ovf == 0
3673    and 0 otherwise.  */
3674
3675 static int
3676 arith_overflow_check_p (gimple *stmt, gimple *cast_stmt, gimple *&use_stmt,
3677                         tree maxval, tree *other)
3678 {
3679   enum tree_code ccode = ERROR_MARK;
3680   tree crhs1 = NULL_TREE, crhs2 = NULL_TREE;
3681   enum tree_code code = gimple_assign_rhs_code (stmt);
3682   tree lhs = gimple_assign_lhs (cast_stmt ? cast_stmt : stmt);
3683   tree rhs1 = gimple_assign_rhs1 (stmt);
3684   tree rhs2 = gimple_assign_rhs2 (stmt);
3685   tree multop = NULL_TREE, divlhs = NULL_TREE;
3686   gimple *cur_use_stmt = use_stmt;
3687
3688   if (code == MULT_EXPR)
3689     {
3690       if (!is_gimple_assign (use_stmt))
3691         return 0;
3692       if (gimple_assign_rhs_code (use_stmt) != TRUNC_DIV_EXPR)
3693         return 0;
3694       if (gimple_assign_rhs1 (use_stmt) != lhs)
3695         return 0;
3696       if (cast_stmt)
3697         {
3698           if (arith_cast_equal_p (gimple_assign_rhs2 (use_stmt), rhs1))
3699             multop = rhs2;
3700           else if (arith_cast_equal_p (gimple_assign_rhs2 (use_stmt), rhs2))
3701             multop = rhs1;
3702           else
3703             return 0;
3704         }
3705       else if (gimple_assign_rhs2 (use_stmt) == rhs1)
3706         multop = rhs2;
3707       else if (operand_equal_p (gimple_assign_rhs2 (use_stmt), rhs2, 0))
3708         multop = rhs1;
3709       else
3710         return 0;
3711       if (stmt_ends_bb_p (use_stmt))
3712         return 0;
3713       divlhs = gimple_assign_lhs (use_stmt);
3714       if (!divlhs)
3715         return 0;
3716       use_operand_p use;
3717       if (!single_imm_use (divlhs, &use, &cur_use_stmt))
3718         return 0;
3719     }
3720   if (gimple_code (cur_use_stmt) == GIMPLE_COND)
3721     {
3722       ccode = gimple_cond_code (cur_use_stmt);
3723       crhs1 = gimple_cond_lhs (cur_use_stmt);
3724       crhs2 = gimple_cond_rhs (cur_use_stmt);
3725     }
3726   else if (is_gimple_assign (cur_use_stmt))
3727     {
3728       if (gimple_assign_rhs_class (cur_use_stmt) == GIMPLE_BINARY_RHS)
3729         {
3730           ccode = gimple_assign_rhs_code (cur_use_stmt);
3731           crhs1 = gimple_assign_rhs1 (cur_use_stmt);
3732           crhs2 = gimple_assign_rhs2 (cur_use_stmt);
3733         }
3734       else if (gimple_assign_rhs_code (cur_use_stmt) == COND_EXPR)
3735         {
3736           tree cond = gimple_assign_rhs1 (cur_use_stmt);
3737           if (COMPARISON_CLASS_P (cond))
3738             {
3739               ccode = TREE_CODE (cond);
3740               crhs1 = TREE_OPERAND (cond, 0);
3741               crhs2 = TREE_OPERAND (cond, 1);
3742             }
3743           else
3744             return 0;
3745         }
3746       else
3747         return 0;
3748     }
3749   else
3750     return 0;
3751
3752   if (TREE_CODE_CLASS (ccode) != tcc_comparison)
3753     return 0;
3754
3755   switch (ccode)
3756     {
3757     case GT_EXPR:
3758     case LE_EXPR:
3759       if (maxval)
3760         {
3761           /* r = a + b; r > maxval or r <= maxval  */
3762           if (crhs1 == lhs
3763               && TREE_CODE (crhs2) == INTEGER_CST
3764               && tree_int_cst_equal (crhs2, maxval))
3765             return ccode == GT_EXPR ? 1 : -1;
3766           break;
3767         }
3768       /* r = a - b; r > a or r <= a
3769          r = a + b; a > r or a <= r or b > r or b <= r.  */
3770       if ((code == MINUS_EXPR && crhs1 == lhs && crhs2 == rhs1)
3771           || (code == PLUS_EXPR && (crhs1 == rhs1 || crhs1 == rhs2)
3772               && crhs2 == lhs))
3773         return ccode == GT_EXPR ? 1 : -1;
3774       /* r = ~a; b > r or b <= r.  */
3775       if (code == BIT_NOT_EXPR && crhs2 == lhs)
3776         {
3777           if (other)
3778             *other = crhs1;
3779           return ccode == GT_EXPR ? 1 : -1;
3780         }
3781       break;
3782     case LT_EXPR:
3783     case GE_EXPR:
3784       if (maxval)
3785         break;
3786       /* r = a - b; a < r or a >= r
3787          r = a + b; r < a or r >= a or r < b or r >= b.  */
3788       if ((code == MINUS_EXPR && crhs1 == rhs1 && crhs2 == lhs)
3789           || (code == PLUS_EXPR && crhs1 == lhs
3790               && (crhs2 == rhs1 || crhs2 == rhs2)))
3791         return ccode == LT_EXPR ? 1 : -1;
3792       /* r = ~a; r < b or r >= b.  */
3793       if (code == BIT_NOT_EXPR && crhs1 == lhs)
3794         {
3795           if (other)
3796             *other = crhs2;
3797           return ccode == LT_EXPR ? 1 : -1;
3798         }
3799       break;
3800     case EQ_EXPR:
3801     case NE_EXPR:
3802       /* r = a * b; _1 = r / a; _1 == b
3803          r = a * b; _1 = r / b; _1 == a
3804          r = a * b; _1 = r / a; _1 != b
3805          r = a * b; _1 = r / b; _1 != a.  */
3806       if (code == MULT_EXPR)
3807         {
3808           if (cast_stmt)
3809             {
3810               if ((crhs1 == divlhs && arith_cast_equal_p (crhs2, multop))
3811                   || (crhs2 == divlhs && arith_cast_equal_p (crhs1, multop)))
3812                 {
3813                   use_stmt = cur_use_stmt;
3814                   return ccode == NE_EXPR ? 1 : -1;
3815                 }
3816             }
3817           else if ((crhs1 == divlhs && operand_equal_p (crhs2, multop, 0))
3818                    || (crhs2 == divlhs && crhs1 == multop))
3819             {
3820               use_stmt = cur_use_stmt;
3821               return ccode == NE_EXPR ? 1 : -1;
3822             }
3823         }
3824       break;
3825     default:
3826       break;
3827     }
3828   return 0;
3829 }
3830
3831 /* Recognize for unsigned x
3832    x = y - z;
3833    if (x > y)
3834    where there are other uses of x and replace it with
3835    _7 = .SUB_OVERFLOW (y, z);
3836    x = REALPART_EXPR <_7>;
3837    _8 = IMAGPART_EXPR <_7>;
3838    if (_8)
3839    and similarly for addition.
3840
3841    Also recognize:
3842    yc = (type) y;
3843    zc = (type) z;
3844    x = yc + zc;
3845    if (x > max)
3846    where y and z have unsigned types with maximum max
3847    and there are other uses of x and all of those cast x
3848    back to that unsigned type and again replace it with
3849    _7 = .ADD_OVERFLOW (y, z);
3850    _9 = REALPART_EXPR <_7>;
3851    _8 = IMAGPART_EXPR <_7>;
3852    if (_8)
3853    and replace (utype) x with _9.
3854
3855    Also recognize:
3856    x = ~z;
3857    if (y > x)
3858    and replace it with
3859    _7 = .ADD_OVERFLOW (y, z);
3860    _8 = IMAGPART_EXPR <_7>;
3861    if (_8)
3862
3863    And also recognize:
3864    z = x * y;
3865    if (x != 0)
3866      goto <bb 3>; [50.00%]
3867    else
3868      goto <bb 4>; [50.00%]
3869
3870    <bb 3> [local count: 536870913]:
3871    _2 = z / x;
3872    _9 = _2 != y;
3873    _10 = (int) _9;
3874
3875    <bb 4> [local count: 1073741824]:
3876    # iftmp.0_3 = PHI <_10(3), 0(2)>
3877    and replace it with
3878    _7 = .MUL_OVERFLOW (x, y);
3879    z = IMAGPART_EXPR <_7>;
3880    _8 = IMAGPART_EXPR <_7>;
3881    _9 = _8 != 0;
3882    iftmp.0_3 = (int) _9;  */
3883
3884 static bool
3885 match_arith_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
3886                       enum tree_code code, bool *cfg_changed)
3887 {
3888   tree lhs = gimple_assign_lhs (stmt);
3889   tree type = TREE_TYPE (lhs);
3890   use_operand_p use_p;
3891   imm_use_iterator iter;
3892   bool use_seen = false;
3893   bool ovf_use_seen = false;
3894   gimple *use_stmt;
3895   gimple *add_stmt = NULL;
3896   bool add_first = false;
3897   gimple *cond_stmt = NULL;
3898   gimple *cast_stmt = NULL;
3899   tree cast_lhs = NULL_TREE;
3900
3901   gcc_checking_assert (code == PLUS_EXPR
3902                        || code == MINUS_EXPR
3903                        || code == MULT_EXPR
3904                        || code == BIT_NOT_EXPR);
3905   if (!INTEGRAL_TYPE_P (type)
3906       || !TYPE_UNSIGNED (type)
3907       || has_zero_uses (lhs)
3908       || (code != PLUS_EXPR
3909           && code != MULT_EXPR
3910           && optab_handler (code == MINUS_EXPR ? usubv4_optab : uaddv4_optab,
3911                             TYPE_MODE (type)) == CODE_FOR_nothing))
3912     return false;
3913
3914   tree rhs1 = gimple_assign_rhs1 (stmt);
3915   tree rhs2 = gimple_assign_rhs2 (stmt);
3916   FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
3917     {
3918       use_stmt = USE_STMT (use_p);
3919       if (is_gimple_debug (use_stmt))
3920         continue;
3921
3922       tree other = NULL_TREE;
3923       if (arith_overflow_check_p (stmt, NULL, use_stmt, NULL_TREE, &other))
3924         {
3925           if (code == BIT_NOT_EXPR)
3926             {
3927               gcc_assert (other);
3928               if (TREE_CODE (other) != SSA_NAME)
3929                 return false;
3930               if (rhs2 == NULL)
3931                 rhs2 = other;
3932               else
3933                 return false;
3934               cond_stmt = use_stmt;
3935             }
3936           ovf_use_seen = true;
3937         }
3938       else
3939         {
3940           use_seen = true;
3941           if (code == MULT_EXPR
3942               && cast_stmt == NULL
3943               && gimple_assign_cast_p (use_stmt))
3944             {
3945               cast_lhs = gimple_assign_lhs (use_stmt);
3946               if (INTEGRAL_TYPE_P (TREE_TYPE (cast_lhs))
3947                   && !TYPE_UNSIGNED (TREE_TYPE (cast_lhs))
3948                   && (TYPE_PRECISION (TREE_TYPE (cast_lhs))
3949                       == TYPE_PRECISION (TREE_TYPE (lhs))))
3950                 cast_stmt = use_stmt;
3951               else
3952                 cast_lhs = NULL_TREE;
3953             }
3954         }
3955       if (ovf_use_seen && use_seen)
3956         break;
3957     }
3958
3959   if (!ovf_use_seen
3960       && code == MULT_EXPR
3961       && cast_stmt)
3962     {
3963       if (TREE_CODE (rhs1) != SSA_NAME
3964           || (TREE_CODE (rhs2) != SSA_NAME && TREE_CODE (rhs2) != INTEGER_CST))
3965         return false;
3966       FOR_EACH_IMM_USE_FAST (use_p, iter, cast_lhs)
3967         {
3968           use_stmt = USE_STMT (use_p);
3969           if (is_gimple_debug (use_stmt))
3970             continue;
3971
3972           if (arith_overflow_check_p (stmt, cast_stmt, use_stmt,
3973                                       NULL_TREE, NULL))
3974             ovf_use_seen = true;
3975         }
3976     }
3977   else
3978     {
3979       cast_stmt = NULL;
3980       cast_lhs = NULL_TREE;
3981     }
3982
3983   tree maxval = NULL_TREE;
3984   if (!ovf_use_seen
3985       || (code != MULT_EXPR && (code == BIT_NOT_EXPR ? use_seen : !use_seen))
3986       || (code == PLUS_EXPR
3987           && optab_handler (uaddv4_optab,
3988                             TYPE_MODE (type)) == CODE_FOR_nothing)
3989       || (code == MULT_EXPR
3990           && optab_handler (cast_stmt ? mulv4_optab : umulv4_optab,
3991                             TYPE_MODE (type)) == CODE_FOR_nothing))
3992     {
3993       if (code != PLUS_EXPR)
3994         return false;
3995       if (TREE_CODE (rhs1) != SSA_NAME
3996           || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs1)))
3997         return false;
3998       rhs1 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs1));
3999       tree type1 = TREE_TYPE (rhs1);
4000       if (!INTEGRAL_TYPE_P (type1)
4001           || !TYPE_UNSIGNED (type1)
4002           || TYPE_PRECISION (type1) >= TYPE_PRECISION (type)
4003           || (TYPE_PRECISION (type1)
4004               != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type1))))
4005         return false;
4006       if (TREE_CODE (rhs2) == INTEGER_CST)
4007         {
4008           if (wi::ne_p (wi::rshift (wi::to_wide (rhs2),
4009                                     TYPE_PRECISION (type1),
4010                                     UNSIGNED), 0))
4011             return false;
4012           rhs2 = fold_convert (type1, rhs2);
4013         }
4014       else
4015         {
4016           if (TREE_CODE (rhs2) != SSA_NAME
4017               || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs2)))
4018             return false;
4019           rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs2));
4020           tree type2 = TREE_TYPE (rhs2);
4021           if (!INTEGRAL_TYPE_P (type2)
4022               || !TYPE_UNSIGNED (type2)
4023               || TYPE_PRECISION (type2) >= TYPE_PRECISION (type)
4024               || (TYPE_PRECISION (type2)
4025                   != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type2))))
4026             return false;
4027         }
4028       if (TYPE_PRECISION (type1) >= TYPE_PRECISION (TREE_TYPE (rhs2)))
4029         type = type1;
4030       else
4031         type = TREE_TYPE (rhs2);
4032
4033       if (TREE_CODE (type) != INTEGER_TYPE
4034           || optab_handler (uaddv4_optab,
4035                             TYPE_MODE (type)) == CODE_FOR_nothing)
4036         return false;
4037
4038       maxval = wide_int_to_tree (type, wi::max_value (TYPE_PRECISION (type),
4039                                                       UNSIGNED));
4040       ovf_use_seen = false;
4041       use_seen = false;
4042       basic_block use_bb = NULL;
4043       FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
4044         {
4045           use_stmt = USE_STMT (use_p);
4046           if (is_gimple_debug (use_stmt))
4047             continue;
4048
4049           if (arith_overflow_check_p (stmt, NULL, use_stmt, maxval, NULL))
4050             {
4051               ovf_use_seen = true;
4052               use_bb = gimple_bb (use_stmt);
4053             }
4054           else
4055             {
4056               if (!gimple_assign_cast_p (use_stmt)
4057                   || gimple_assign_rhs_code (use_stmt) == VIEW_CONVERT_EXPR)
4058                 return false;
4059               tree use_lhs = gimple_assign_lhs (use_stmt);
4060               if (!INTEGRAL_TYPE_P (TREE_TYPE (use_lhs))
4061                   || (TYPE_PRECISION (TREE_TYPE (use_lhs))
4062                       > TYPE_PRECISION (type)))
4063                 return false;
4064               use_seen = true;
4065             }
4066         }
4067       if (!ovf_use_seen)
4068         return false;
4069       if (!useless_type_conversion_p (type, TREE_TYPE (rhs1)))
4070         {
4071           if (!use_seen)
4072             return false;
4073           tree new_rhs1 = make_ssa_name (type);
4074           gimple *g = gimple_build_assign (new_rhs1, NOP_EXPR, rhs1);
4075           gsi_insert_before (gsi, g, GSI_SAME_STMT);
4076           rhs1 = new_rhs1;
4077         }
4078       else if (!useless_type_conversion_p (type, TREE_TYPE (rhs2)))
4079         {
4080           if (!use_seen)
4081             return false;
4082           tree new_rhs2 = make_ssa_name (type);
4083           gimple *g = gimple_build_assign (new_rhs2, NOP_EXPR, rhs2);
4084           gsi_insert_before (gsi, g, GSI_SAME_STMT);
4085           rhs2 = new_rhs2;
4086         }
4087       else if (!use_seen)
4088         {
4089           /* If there are no uses of the wider addition, check if
4090              forwprop has not created a narrower addition.
4091              Require it to be in the same bb as the overflow check.  */
4092           FOR_EACH_IMM_USE_FAST (use_p, iter, rhs1)
4093             {
4094               use_stmt = USE_STMT (use_p);
4095               if (is_gimple_debug (use_stmt))
4096                 continue;
4097
4098               if (use_stmt == stmt)
4099                 continue;
4100
4101               if (!is_gimple_assign (use_stmt)
4102                   || gimple_bb (use_stmt) != use_bb
4103                   || gimple_assign_rhs_code (use_stmt) != PLUS_EXPR)
4104                 continue;
4105
4106               if (gimple_assign_rhs1 (use_stmt) == rhs1)
4107                 {
4108                   if (!operand_equal_p (gimple_assign_rhs2 (use_stmt),
4109                                         rhs2, 0))
4110                     continue;
4111                 }
4112               else if (gimple_assign_rhs2 (use_stmt) == rhs1)
4113                 {
4114                   if (gimple_assign_rhs1 (use_stmt) != rhs2)
4115                     continue;
4116                 }
4117               else
4118                 continue;
4119
4120               add_stmt = use_stmt;
4121               break;
4122             }
4123           if (add_stmt == NULL)
4124             return false;
4125
4126           /* If stmt and add_stmt are in the same bb, we need to find out
4127              which one is earlier.  If they are in different bbs, we've
4128              checked add_stmt is in the same bb as one of the uses of the
4129              stmt lhs, so stmt needs to dominate add_stmt too.  */
4130           if (gimple_bb (stmt) == gimple_bb (add_stmt))
4131             {
4132               gimple_stmt_iterator gsif = *gsi;
4133               gimple_stmt_iterator gsib = *gsi;
4134               int i;
4135               /* Search both forward and backward from stmt and have a small
4136                  upper bound.  */
4137               for (i = 0; i < 128; i++)
4138                 {
4139                   if (!gsi_end_p (gsib))
4140                     {
4141                       gsi_prev_nondebug (&gsib);
4142                       if (gsi_stmt (gsib) == add_stmt)
4143                         {
4144                           add_first = true;
4145                           break;
4146                         }
4147                     }
4148                   else if (gsi_end_p (gsif))
4149                     break;
4150                   if (!gsi_end_p (gsif))
4151                     {
4152                       gsi_next_nondebug (&gsif);
4153                       if (gsi_stmt (gsif) == add_stmt)
4154                         break;
4155                     }
4156                 }
4157               if (i == 128)
4158                 return false;
4159               if (add_first)
4160                 *gsi = gsi_for_stmt (add_stmt);
4161             }
4162         }
4163     }
4164
4165   if (code == BIT_NOT_EXPR)
4166     *gsi = gsi_for_stmt (cond_stmt);
4167
4168   auto_vec<gimple *, 8> mul_stmts;
4169   if (code == MULT_EXPR && cast_stmt)
4170     {
4171       type = TREE_TYPE (cast_lhs);
4172       gimple *g = SSA_NAME_DEF_STMT (rhs1);
4173       if (gimple_assign_cast_p (g)
4174           && useless_type_conversion_p (type,
4175                                         TREE_TYPE (gimple_assign_rhs1 (g)))
4176           && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
4177         rhs1 = gimple_assign_rhs1 (g);
4178       else
4179         {
4180           g = gimple_build_assign (make_ssa_name (type), NOP_EXPR, rhs1);
4181           gsi_insert_before (gsi, g, GSI_SAME_STMT);
4182           rhs1 = gimple_assign_lhs (g);
4183           mul_stmts.quick_push (g);
4184         }
4185       if (TREE_CODE (rhs2) == INTEGER_CST)
4186         rhs2 = fold_convert (type, rhs2);
4187       else
4188         {
4189           g = SSA_NAME_DEF_STMT (rhs2);
4190           if (gimple_assign_cast_p (g)
4191               && useless_type_conversion_p (type,
4192                                             TREE_TYPE (gimple_assign_rhs1 (g)))
4193               && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
4194             rhs2 = gimple_assign_rhs1 (g);
4195           else
4196             {
4197               g = gimple_build_assign (make_ssa_name (type), NOP_EXPR, rhs2);
4198               gsi_insert_before (gsi, g, GSI_SAME_STMT);
4199               rhs2 = gimple_assign_lhs (g);
4200               mul_stmts.quick_push (g);
4201             }
4202         }
4203     }
4204   tree ctype = build_complex_type (type);
4205   gcall *g = gimple_build_call_internal (code == MULT_EXPR
4206                                          ? IFN_MUL_OVERFLOW
4207                                          : code != MINUS_EXPR
4208                                          ? IFN_ADD_OVERFLOW : IFN_SUB_OVERFLOW,
4209                                          2, rhs1, rhs2);
4210   tree ctmp = make_ssa_name (ctype);
4211   gimple_call_set_lhs (g, ctmp);
4212   gsi_insert_before (gsi, g, GSI_SAME_STMT);
4213   tree new_lhs = (maxval || cast_stmt) ? make_ssa_name (type) : lhs;
4214   gassign *g2;
4215   if (code != BIT_NOT_EXPR)
4216     {
4217       g2 = gimple_build_assign (new_lhs, REALPART_EXPR,
4218                                 build1 (REALPART_EXPR, type, ctmp));
4219       if (maxval || cast_stmt)
4220         {
4221           gsi_insert_before (gsi, g2, GSI_SAME_STMT);
4222           if (add_first)
4223             *gsi = gsi_for_stmt (stmt);
4224         }
4225       else
4226         gsi_replace (gsi, g2, true);
4227       if (code == MULT_EXPR)
4228         {
4229           mul_stmts.quick_push (g);
4230           mul_stmts.quick_push (g2);
4231           if (cast_stmt)
4232             {
4233               g2 = gimple_build_assign (lhs, NOP_EXPR, new_lhs);
4234               gsi_replace (gsi, g2, true);
4235               mul_stmts.quick_push (g2);
4236             }
4237         }
4238     }
4239   tree ovf = make_ssa_name (type);
4240   g2 = gimple_build_assign (ovf, IMAGPART_EXPR,
4241                             build1 (IMAGPART_EXPR, type, ctmp));
4242   if (code != BIT_NOT_EXPR)
4243     gsi_insert_after (gsi, g2, GSI_NEW_STMT);
4244   else
4245     gsi_insert_before (gsi, g2, GSI_SAME_STMT);
4246   if (code == MULT_EXPR)
4247     mul_stmts.quick_push (g2);
4248
4249   FOR_EACH_IMM_USE_STMT (use_stmt, iter, cast_lhs ? cast_lhs : lhs)
4250     {
4251       if (is_gimple_debug (use_stmt))
4252         continue;
4253
4254       gimple *orig_use_stmt = use_stmt;
4255       int ovf_use = arith_overflow_check_p (stmt, cast_stmt, use_stmt,
4256                                             maxval, NULL);
4257       if (ovf_use == 0)
4258         {
4259           gcc_assert (code != BIT_NOT_EXPR);
4260           if (maxval)
4261             {
4262               tree use_lhs = gimple_assign_lhs (use_stmt);
4263               gimple_assign_set_rhs1 (use_stmt, new_lhs);
4264               if (useless_type_conversion_p (TREE_TYPE (use_lhs),
4265                                              TREE_TYPE (new_lhs)))
4266                 gimple_assign_set_rhs_code (use_stmt, SSA_NAME);
4267               update_stmt (use_stmt);
4268             }
4269           continue;
4270         }
4271       if (gimple_code (use_stmt) == GIMPLE_COND)
4272         {
4273           gcond *cond_stmt = as_a <gcond *> (use_stmt);
4274           gimple_cond_set_lhs (cond_stmt, ovf);
4275           gimple_cond_set_rhs (cond_stmt, build_int_cst (type, 0));
4276           gimple_cond_set_code (cond_stmt, ovf_use == 1 ? NE_EXPR : EQ_EXPR);
4277         }
4278       else
4279         {
4280           gcc_checking_assert (is_gimple_assign (use_stmt));
4281           if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
4282             {
4283               gimple_assign_set_rhs1 (use_stmt, ovf);
4284               gimple_assign_set_rhs2 (use_stmt, build_int_cst (type, 0));
4285               gimple_assign_set_rhs_code (use_stmt,
4286                                           ovf_use == 1 ? NE_EXPR : EQ_EXPR);
4287             }
4288           else
4289             {
4290               gcc_checking_assert (gimple_assign_rhs_code (use_stmt)
4291                                    == COND_EXPR);
4292               tree cond = build2 (ovf_use == 1 ? NE_EXPR : EQ_EXPR,
4293                                   boolean_type_node, ovf,
4294                                   build_int_cst (type, 0));
4295               gimple_assign_set_rhs1 (use_stmt, cond);
4296             }
4297         }
4298       update_stmt (use_stmt);
4299       if (code == MULT_EXPR && use_stmt != orig_use_stmt)
4300         {
4301           gimple_stmt_iterator gsi2 = gsi_for_stmt (orig_use_stmt);
4302           maybe_optimize_guarding_check (mul_stmts, use_stmt, orig_use_stmt,
4303                                          cfg_changed);
4304           gsi_remove (&gsi2, true);
4305           release_ssa_name (gimple_assign_lhs (orig_use_stmt));
4306         }
4307     }
4308   if (maxval)
4309     {
4310       gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt);
4311       gsi_remove (&gsi2, true);
4312       if (add_stmt)
4313         {
4314           gimple *g = gimple_build_assign (gimple_assign_lhs (add_stmt),
4315                                            new_lhs);
4316           gsi2 = gsi_for_stmt (add_stmt);
4317           gsi_replace (&gsi2, g, true);
4318         }
4319     }
4320   else if (code == BIT_NOT_EXPR)
4321     {
4322       *gsi = gsi_for_stmt (stmt);
4323       gsi_remove (gsi, true);
4324       release_ssa_name (lhs);
4325       return true;
4326     }
4327   return false;
4328 }
4329
4330 /* Return true if target has support for divmod.  */
4331
4332 static bool
4333 target_supports_divmod_p (optab divmod_optab, optab div_optab, machine_mode mode)
4334 {
4335   /* If target supports hardware divmod insn, use it for divmod.  */
4336   if (optab_handler (divmod_optab, mode) != CODE_FOR_nothing)
4337     return true;
4338
4339   /* Check if libfunc for divmod is available.  */
4340   rtx libfunc = optab_libfunc (divmod_optab, mode);
4341   if (libfunc != NULL_RTX)
4342     {
4343       /* If optab_handler exists for div_optab, perhaps in a wider mode,
4344          we don't want to use the libfunc even if it exists for given mode.  */
4345       machine_mode div_mode;
4346       FOR_EACH_MODE_FROM (div_mode, mode)
4347         if (optab_handler (div_optab, div_mode) != CODE_FOR_nothing)
4348           return false;
4349
4350       return targetm.expand_divmod_libfunc != NULL;
4351     }
4352
4353   return false;
4354 }
4355
4356 /* Check if stmt is candidate for divmod transform.  */
4357
4358 static bool
4359 divmod_candidate_p (gassign *stmt)
4360 {
4361   tree type = TREE_TYPE (gimple_assign_lhs (stmt));
4362   machine_mode mode = TYPE_MODE (type);
4363   optab divmod_optab, div_optab;
4364
4365   if (TYPE_UNSIGNED (type))
4366     {
4367       divmod_optab = udivmod_optab;
4368       div_optab = udiv_optab;
4369     }
4370   else
4371     {
4372       divmod_optab = sdivmod_optab;
4373       div_optab = sdiv_optab;
4374     }
4375
4376   tree op1 = gimple_assign_rhs1 (stmt);
4377   tree op2 = gimple_assign_rhs2 (stmt);
4378
4379   /* Disable the transform if either is a constant, since division-by-constant
4380      may have specialized expansion.  */
4381   if (CONSTANT_CLASS_P (op1))
4382     return false;
4383
4384   if (CONSTANT_CLASS_P (op2))
4385     {
4386       if (integer_pow2p (op2))
4387         return false;
4388
4389       if (TYPE_PRECISION (type) <= HOST_BITS_PER_WIDE_INT
4390           && TYPE_PRECISION (type) <= BITS_PER_WORD)
4391         return false;
4392
4393       /* If the divisor is not power of 2 and the precision wider than
4394          HWI, expand_divmod punts on that, so in that case it is better
4395          to use divmod optab or libfunc.  Similarly if choose_multiplier
4396          might need pre/post shifts of BITS_PER_WORD or more.  */
4397     }
4398
4399   /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should
4400      expand using the [su]divv optabs.  */
4401   if (TYPE_OVERFLOW_TRAPS (type))
4402     return false;
4403
4404   if (!target_supports_divmod_p (divmod_optab, div_optab, mode))
4405     return false;
4406
4407   return true;
4408 }
4409
4410 /* This function looks for:
4411    t1 = a TRUNC_DIV_EXPR b;
4412    t2 = a TRUNC_MOD_EXPR b;
4413    and transforms it to the following sequence:
4414    complex_tmp = DIVMOD (a, b);
4415    t1 = REALPART_EXPR(a);
4416    t2 = IMAGPART_EXPR(b);
4417    For conditions enabling the transform see divmod_candidate_p().
4418
4419    The pass has three parts:
4420    1) Find top_stmt which is trunc_div or trunc_mod stmt and dominates all
4421       other trunc_div_expr and trunc_mod_expr stmts.
4422    2) Add top_stmt and all trunc_div and trunc_mod stmts dominated by top_stmt
4423       to stmts vector.
4424    3) Insert DIVMOD call just before top_stmt and update entries in
4425       stmts vector to use return value of DIMOVD (REALEXPR_PART for div,
4426       IMAGPART_EXPR for mod).  */
4427
4428 static bool
4429 convert_to_divmod (gassign *stmt)
4430 {
4431   if (stmt_can_throw_internal (cfun, stmt)
4432       || !divmod_candidate_p (stmt))
4433     return false;
4434
4435   tree op1 = gimple_assign_rhs1 (stmt);
4436   tree op2 = gimple_assign_rhs2 (stmt);
4437
4438   imm_use_iterator use_iter;
4439   gimple *use_stmt;
4440   auto_vec<gimple *> stmts;
4441
4442   gimple *top_stmt = stmt;
4443   basic_block top_bb = gimple_bb (stmt);
4444
4445   /* Part 1: Try to set top_stmt to "topmost" stmt that dominates
4446      at-least stmt and possibly other trunc_div/trunc_mod stmts
4447      having same operands as stmt.  */
4448
4449   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, op1)
4450     {
4451       if (is_gimple_assign (use_stmt)
4452           && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
4453               || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
4454           && operand_equal_p (op1, gimple_assign_rhs1 (use_stmt), 0)
4455           && operand_equal_p (op2, gimple_assign_rhs2 (use_stmt), 0))
4456         {
4457           if (stmt_can_throw_internal (cfun, use_stmt))
4458             continue;
4459
4460           basic_block bb = gimple_bb (use_stmt);
4461
4462           if (bb == top_bb)
4463             {
4464               if (gimple_uid (use_stmt) < gimple_uid (top_stmt))
4465                 top_stmt = use_stmt;
4466             }
4467           else if (dominated_by_p (CDI_DOMINATORS, top_bb, bb))
4468             {
4469               top_bb = bb;
4470               top_stmt = use_stmt;
4471             }
4472         }
4473     }
4474
4475   tree top_op1 = gimple_assign_rhs1 (top_stmt);
4476   tree top_op2 = gimple_assign_rhs2 (top_stmt);
4477
4478   stmts.safe_push (top_stmt);
4479   bool div_seen = (gimple_assign_rhs_code (top_stmt) == TRUNC_DIV_EXPR);
4480
4481   /* Part 2: Add all trunc_div/trunc_mod statements domianted by top_bb
4482      to stmts vector. The 2nd loop will always add stmt to stmts vector, since
4483      gimple_bb (top_stmt) dominates gimple_bb (stmt), so the
4484      2nd loop ends up adding at-least single trunc_mod_expr stmt.  */
4485
4486   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, top_op1)
4487     {
4488       if (is_gimple_assign (use_stmt)
4489           && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
4490               || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
4491           && operand_equal_p (top_op1, gimple_assign_rhs1 (use_stmt), 0)
4492           && operand_equal_p (top_op2, gimple_assign_rhs2 (use_stmt), 0))
4493         {
4494           if (use_stmt == top_stmt
4495               || stmt_can_throw_internal (cfun, use_stmt)
4496               || !dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt), top_bb))
4497             continue;
4498
4499           stmts.safe_push (use_stmt);
4500           if (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR)
4501             div_seen = true;
4502         }
4503     }
4504
4505   if (!div_seen)
4506     return false;
4507
4508   /* Part 3: Create libcall to internal fn DIVMOD:
4509      divmod_tmp = DIVMOD (op1, op2).  */
4510
4511   gcall *call_stmt = gimple_build_call_internal (IFN_DIVMOD, 2, op1, op2);
4512   tree res = make_temp_ssa_name (build_complex_type (TREE_TYPE (op1)),
4513                                  call_stmt, "divmod_tmp");
4514   gimple_call_set_lhs (call_stmt, res);
4515   /* We rejected throwing statements above.  */
4516   gimple_call_set_nothrow (call_stmt, true);
4517
4518   /* Insert the call before top_stmt.  */
4519   gimple_stmt_iterator top_stmt_gsi = gsi_for_stmt (top_stmt);
4520   gsi_insert_before (&top_stmt_gsi, call_stmt, GSI_SAME_STMT);
4521
4522   widen_mul_stats.divmod_calls_inserted++;
4523
4524   /* Update all statements in stmts vector:
4525      lhs = op1 TRUNC_DIV_EXPR op2 -> lhs = REALPART_EXPR<divmod_tmp>
4526      lhs = op1 TRUNC_MOD_EXPR op2 -> lhs = IMAGPART_EXPR<divmod_tmp>.  */
4527
4528   for (unsigned i = 0; stmts.iterate (i, &use_stmt); ++i)
4529     {
4530       tree new_rhs;
4531
4532       switch (gimple_assign_rhs_code (use_stmt))
4533         {
4534           case TRUNC_DIV_EXPR:
4535             new_rhs = fold_build1 (REALPART_EXPR, TREE_TYPE (op1), res);
4536             break;
4537
4538           case TRUNC_MOD_EXPR:
4539             new_rhs = fold_build1 (IMAGPART_EXPR, TREE_TYPE (op1), res);
4540             break;
4541
4542           default:
4543             gcc_unreachable ();
4544         }
4545
4546       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
4547       gimple_assign_set_rhs_from_tree (&gsi, new_rhs);
4548       update_stmt (use_stmt);
4549     }
4550
4551   return true;
4552 }
4553
4554 /* Process a single gimple assignment STMT, which has a RSHIFT_EXPR as
4555    its rhs, and try to convert it into a MULT_HIGHPART_EXPR.  The return
4556    value is true iff we converted the statement.  */
4557
4558 static bool
4559 convert_mult_to_highpart (gassign *stmt, gimple_stmt_iterator *gsi)
4560 {
4561   tree lhs = gimple_assign_lhs (stmt);
4562   tree stype = TREE_TYPE (lhs);
4563   tree sarg0 = gimple_assign_rhs1 (stmt);
4564   tree sarg1 = gimple_assign_rhs2 (stmt);
4565
4566   if (TREE_CODE (stype) != INTEGER_TYPE
4567       || TREE_CODE (sarg1) != INTEGER_CST
4568       || TREE_CODE (sarg0) != SSA_NAME
4569       || !tree_fits_uhwi_p (sarg1)
4570       || !has_single_use (sarg0))
4571     return false;
4572
4573   gassign *def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (sarg0));
4574   if (!def)
4575     return false;
4576
4577   enum tree_code mcode = gimple_assign_rhs_code (def);
4578   if (mcode == NOP_EXPR)
4579     {
4580       tree tmp = gimple_assign_rhs1 (def);
4581       if (TREE_CODE (tmp) != SSA_NAME || !has_single_use (tmp))
4582         return false;
4583       def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (tmp));
4584       if (!def)
4585         return false;
4586       mcode = gimple_assign_rhs_code (def);
4587     }
4588
4589   if (mcode != WIDEN_MULT_EXPR
4590       || gimple_bb (def) != gimple_bb (stmt))
4591     return false;
4592   tree mtype = TREE_TYPE (gimple_assign_lhs (def));
4593   if (TREE_CODE (mtype) != INTEGER_TYPE
4594       || TYPE_PRECISION (mtype) != TYPE_PRECISION (stype))
4595     return false;
4596
4597   tree mop1 = gimple_assign_rhs1 (def);
4598   tree mop2 = gimple_assign_rhs2 (def);
4599   tree optype = TREE_TYPE (mop1);
4600   bool unsignedp = TYPE_UNSIGNED (optype);
4601   unsigned int prec = TYPE_PRECISION (optype);
4602
4603   if (unsignedp != TYPE_UNSIGNED (mtype)
4604       || TYPE_PRECISION (mtype) != 2 * prec)
4605     return false;
4606
4607   unsigned HOST_WIDE_INT bits = tree_to_uhwi (sarg1);
4608   if (bits < prec || bits >= 2 * prec)
4609     return false;
4610
4611   machine_mode mode = TYPE_MODE (optype);
4612   optab tab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
4613   if (optab_handler (tab, mode) == CODE_FOR_nothing)
4614     return false;
4615
4616   location_t loc = gimple_location (stmt);
4617   tree highpart1 = build_and_insert_binop (gsi, loc, "highparttmp",
4618                                            MULT_HIGHPART_EXPR, mop1, mop2);
4619   tree highpart2 = highpart1;
4620   tree ntype = optype;
4621
4622   if (TYPE_UNSIGNED (stype) != TYPE_UNSIGNED (optype))
4623     {
4624       ntype = TYPE_UNSIGNED (stype) ? unsigned_type_for (optype)
4625                                     : signed_type_for (optype);
4626       highpart2 = build_and_insert_cast (gsi, loc, ntype, highpart1);
4627     }
4628   if (bits > prec)
4629     highpart2 = build_and_insert_binop (gsi, loc, "highparttmp",
4630                                         RSHIFT_EXPR, highpart2,
4631                                         build_int_cst (ntype, bits - prec));
4632
4633   gassign *new_stmt = gimple_build_assign (lhs, NOP_EXPR, highpart2);
4634   gsi_replace (gsi, new_stmt, true);
4635
4636   widen_mul_stats.highpart_mults_inserted++;
4637   return true;
4638 }
4639
4640
4641 /* Find integer multiplications where the operands are extended from
4642    smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
4643    or MULT_HIGHPART_EXPR where appropriate.  */
4644
4645 namespace {
4646
4647 const pass_data pass_data_optimize_widening_mul =
4648 {
4649   GIMPLE_PASS, /* type */
4650   "widening_mul", /* name */
4651   OPTGROUP_NONE, /* optinfo_flags */
4652   TV_TREE_WIDEN_MUL, /* tv_id */
4653   PROP_ssa, /* properties_required */
4654   0, /* properties_provided */
4655   0, /* properties_destroyed */
4656   0, /* todo_flags_start */
4657   TODO_update_ssa, /* todo_flags_finish */
4658 };
4659
4660 class pass_optimize_widening_mul : public gimple_opt_pass
4661 {
4662 public:
4663   pass_optimize_widening_mul (gcc::context *ctxt)
4664     : gimple_opt_pass (pass_data_optimize_widening_mul, ctxt)
4665   {}
4666
4667   /* opt_pass methods: */
4668   virtual bool gate (function *)
4669     {
4670       return flag_expensive_optimizations && optimize;
4671     }
4672
4673   virtual unsigned int execute (function *);
4674
4675 }; // class pass_optimize_widening_mul
4676
4677 /* Walker class to perform the transformation in reverse dominance order. */
4678
4679 class math_opts_dom_walker : public dom_walker
4680 {
4681 public:
4682   /* Constructor, CFG_CHANGED is a pointer to a boolean flag that will be set
4683      if walking modidifes the CFG.  */
4684
4685   math_opts_dom_walker (bool *cfg_changed_p)
4686     : dom_walker (CDI_DOMINATORS), m_last_result_set (),
4687       m_cfg_changed_p (cfg_changed_p) {}
4688
4689   /* The actual actions performed in the walk.  */
4690
4691   virtual void after_dom_children (basic_block);
4692
4693   /* Set of results of chains of multiply and add statement combinations that
4694      were not transformed into FMAs because of active deferring.  */
4695   hash_set<tree> m_last_result_set;
4696
4697   /* Pointer to a flag of the user that needs to be set if CFG has been
4698      modified.  */
4699   bool *m_cfg_changed_p;
4700 };
4701
4702 void
4703 math_opts_dom_walker::after_dom_children (basic_block bb)
4704 {
4705   gimple_stmt_iterator gsi;
4706
4707   fma_deferring_state fma_state (param_avoid_fma_max_bits > 0);
4708
4709   for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
4710     {
4711       gimple *stmt = gsi_stmt (gsi);
4712       enum tree_code code;
4713
4714       if (is_gimple_assign (stmt))
4715         {
4716           code = gimple_assign_rhs_code (stmt);
4717           switch (code)
4718             {
4719             case MULT_EXPR:
4720               if (!convert_mult_to_widen (stmt, &gsi)
4721                   && !convert_expand_mult_copysign (stmt, &gsi)
4722                   && convert_mult_to_fma (stmt,
4723                                           gimple_assign_rhs1 (stmt),
4724                                           gimple_assign_rhs2 (stmt),
4725                                           &fma_state))
4726                 {
4727                   gsi_remove (&gsi, true);
4728                   release_defs (stmt);
4729                   continue;
4730                 }
4731               match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
4732               break;
4733
4734             case PLUS_EXPR:
4735             case MINUS_EXPR:
4736               if (!convert_plusminus_to_widen (&gsi, stmt, code))
4737                 match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
4738               break;
4739
4740             case BIT_NOT_EXPR:
4741               if (match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p))
4742                 continue;
4743               break;
4744
4745             case TRUNC_MOD_EXPR:
4746               convert_to_divmod (as_a<gassign *> (stmt));
4747               break;
4748
4749             case RSHIFT_EXPR:
4750               convert_mult_to_highpart (as_a<gassign *> (stmt), &gsi);
4751               break;
4752
4753             default:;
4754             }
4755         }
4756       else if (is_gimple_call (stmt))
4757         {
4758           switch (gimple_call_combined_fn (stmt))
4759             {
4760             CASE_CFN_POW:
4761               if (gimple_call_lhs (stmt)
4762                   && TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST
4763                   && real_equal (&TREE_REAL_CST (gimple_call_arg (stmt, 1)),
4764                                  &dconst2)
4765                   && convert_mult_to_fma (stmt,
4766                                           gimple_call_arg (stmt, 0),
4767                                           gimple_call_arg (stmt, 0),
4768                                           &fma_state))
4769                 {
4770                   unlink_stmt_vdef (stmt);
4771                   if (gsi_remove (&gsi, true)
4772                       && gimple_purge_dead_eh_edges (bb))
4773                     *m_cfg_changed_p = true;
4774                   release_defs (stmt);
4775                   continue;
4776                 }
4777               break;
4778
4779             case CFN_COND_MUL:
4780               if (convert_mult_to_fma (stmt,
4781                                        gimple_call_arg (stmt, 1),
4782                                        gimple_call_arg (stmt, 2),
4783                                        &fma_state,
4784                                        gimple_call_arg (stmt, 0)))
4785
4786                 {
4787                   gsi_remove (&gsi, true);
4788                   release_defs (stmt);
4789                   continue;
4790                 }
4791               break;
4792
4793             case CFN_LAST:
4794               cancel_fma_deferring (&fma_state);
4795               break;
4796
4797             default:
4798               break;
4799             }
4800         }
4801       gsi_next (&gsi);
4802     }
4803   if (fma_state.m_deferring_p
4804       && fma_state.m_initial_phi)
4805     {
4806       gcc_checking_assert (fma_state.m_last_result);
4807       if (!last_fma_candidate_feeds_initial_phi (&fma_state,
4808                                                  &m_last_result_set))
4809         cancel_fma_deferring (&fma_state);
4810       else
4811         m_last_result_set.add (fma_state.m_last_result);
4812     }
4813 }
4814
4815
4816 unsigned int
4817 pass_optimize_widening_mul::execute (function *fun)
4818 {
4819   bool cfg_changed = false;
4820
4821   memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
4822   calculate_dominance_info (CDI_DOMINATORS);
4823   renumber_gimple_stmt_uids (cfun);
4824
4825   math_opts_dom_walker (&cfg_changed).walk (ENTRY_BLOCK_PTR_FOR_FN (cfun));
4826
4827   statistics_counter_event (fun, "widening multiplications inserted",
4828                             widen_mul_stats.widen_mults_inserted);
4829   statistics_counter_event (fun, "widening maccs inserted",
4830                             widen_mul_stats.maccs_inserted);
4831   statistics_counter_event (fun, "fused multiply-adds inserted",
4832                             widen_mul_stats.fmas_inserted);
4833   statistics_counter_event (fun, "divmod calls inserted",
4834                             widen_mul_stats.divmod_calls_inserted);
4835   statistics_counter_event (fun, "highpart multiplications inserted",
4836                             widen_mul_stats.highpart_mults_inserted);
4837
4838   return cfg_changed ? TODO_cleanup_cfg : 0;
4839 }
4840
4841 } // anon namespace
4842
4843 gimple_opt_pass *
4844 make_pass_optimize_widening_mul (gcc::context *ctxt)
4845 {
4846   return new pass_optimize_widening_mul (ctxt);
4847 }