gcc/tree-ssa-math-opts.cc

   1 /* Global, SSA-based optimizations using mathematical identities.
   2    Copyright (C) 2005-2023 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* Currently, the only mini-pass in this file tries to CSE reciprocal
  21    operations.  These are common in sequences such as this one:
  22
  23         modulus = sqrt(x*x + y*y + z*z);
  24         x = x / modulus;
  25         y = y / modulus;
  26         z = z / modulus;
  27
  28    that can be optimized to
  29
  30         modulus = sqrt(x*x + y*y + z*z);
  31         rmodulus = 1.0 / modulus;
  32         x = x * rmodulus;
  33         y = y * rmodulus;
  34         z = z * rmodulus;
  35
  36    We do this for loop invariant divisors, and with this pass whenever
  37    we notice that a division has the same divisor multiple times.
  38
  39    Of course, like in PRE, we don't insert a division if a dominator
  40    already has one.  However, this cannot be done as an extension of
  41    PRE for several reasons.
  42
  43    First of all, with some experiments it was found out that the
  44    transformation is not always useful if there are only two divisions
  45    by the same divisor.  This is probably because modern processors
  46    can pipeline the divisions; on older, in-order processors it should
  47    still be effective to optimize two divisions by the same number.
  48    We make this a param, and it shall be called N in the remainder of
  49    this comment.
  50
  51    Second, if trapping math is active, we have less freedom on where
  52    to insert divisions: we can only do so in basic blocks that already
  53    contain one.  (If divisions don't trap, instead, we can insert
  54    divisions elsewhere, which will be in blocks that are common dominators
  55    of those that have the division).
  56
  57    We really don't want to compute the reciprocal unless a division will
  58    be found.  To do this, we won't insert the division in a basic block
  59    that has less than N divisions *post-dominating* it.
  60
  61    The algorithm constructs a subset of the dominator tree, holding the
  62    blocks containing the divisions and the common dominators to them,
  63    and walk it twice.  The first walk is in post-order, and it annotates
  64    each block with the number of divisions that post-dominate it: this
  65    gives information on where divisions can be inserted profitably.
  66    The second walk is in pre-order, and it inserts divisions as explained
  67    above, and replaces divisions by multiplications.
  68
  69    In the best case, the cost of the pass is O(n_statements).  In the
  70    worst-case, the cost is due to creating the dominator tree subset,
  71    with a cost of O(n_basic_blocks ^ 2); however this can only happen
  72    for n_statements / n_basic_blocks statements.  So, the amortized cost
  73    of creating the dominator tree subset is O(n_basic_blocks) and the
  74    worst-case cost of the pass is O(n_statements * n_basic_blocks).
  75
  76    More practically, the cost will be small because there are few
  77    divisions, and they tend to be in the same basic block, so insert_bb
  78    is called very few times.
  79
  80    If we did this using domwalk.cc, an efficient implementation would have
  81    to work on all the variables in a single pass, because we could not
  82    work on just a subset of the dominator tree, as we do now, and the
  83    cost would also be something like O(n_statements * n_basic_blocks).
  84    The data structures would be more complex in order to work on all the
  85    variables in a single pass.  */
  86
  87 #include "config.h"
  88 #include "system.h"
  89 #include "coretypes.h"
  90 #include "backend.h"
  91 #include "target.h"
  92 #include "rtl.h"
  93 #include "tree.h"
  94 #include "gimple.h"
  95 #include "predict.h"
  96 #include "alloc-pool.h"
  97 #include "tree-pass.h"
  98 #include "ssa.h"
  99 #include "optabs-tree.h"
 100 #include "gimple-pretty-print.h"
 101 #include "alias.h"
 102 #include "fold-const.h"
 103 #include "gimple-iterator.h"
 104 #include "gimple-fold.h"
 105 #include "gimplify.h"
 106 #include "gimplify-me.h"
 107 #include "stor-layout.h"
 108 #include "tree-cfg.h"
 109 #include "tree-dfa.h"
 110 #include "tree-ssa.h"
 111 #include "builtins.h"
 112 #include "internal-fn.h"
 113 #include "case-cfn-macros.h"
 114 #include "optabs-libfuncs.h"
 115 #include "tree-eh.h"
 116 #include "targhooks.h"
 117 #include "domwalk.h"
 118 #include "tree-ssa-math-opts.h"
 119
 120 /* This structure represents one basic block that either computes a
 121    division, or is a common dominator for basic block that compute a
 122    division.  */
 123 struct occurrence {
 124   /* The basic block represented by this structure.  */
 125   basic_block bb = basic_block();
 126
 127   /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
 128      inserted in BB.  */
 129   tree recip_def = tree();
 130
 131   /* If non-NULL, the SSA_NAME holding the definition for a squared
 132      reciprocal inserted in BB.  */
 133   tree square_recip_def = tree();
 134
 135   /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
 136      was inserted in BB.  */
 137   gimple *recip_def_stmt = nullptr;
 138
 139   /* Pointer to a list of "struct occurrence"s for blocks dominated
 140      by BB.  */
 141   struct occurrence *children = nullptr;
 142
 143   /* Pointer to the next "struct occurrence"s in the list of blocks
 144      sharing a common dominator.  */
 145   struct occurrence *next = nullptr;
 146
 147   /* The number of divisions that are in BB before compute_merit.  The
 148      number of divisions that are in BB or post-dominate it after
 149      compute_merit.  */
 150   int num_divisions = 0;
 151
 152   /* True if the basic block has a division, false if it is a common
 153      dominator for basic blocks that do.  If it is false and trapping
 154      math is active, BB is not a candidate for inserting a reciprocal.  */
 155   bool bb_has_division = false;
 156
 157   /* Construct a struct occurrence for basic block BB, and whose
 158      children list is headed by CHILDREN.  */
 159   occurrence (basic_block bb, struct occurrence *children)
 160   : bb (bb), children (children)
 161   {
 162     bb->aux = this;
 163   }
 164
 165   /* Destroy a struct occurrence and remove it from its basic block.  */
 166   ~occurrence ()
 167   {
 168     bb->aux = nullptr;
 169   }
 170
 171   /* Allocate memory for a struct occurrence from OCC_POOL.  */
 172   static void* operator new (size_t);
 173
 174   /* Return memory for a struct occurrence to OCC_POOL.  */
 175   static void operator delete (void*, size_t);
 176 };
 177
 178 static struct
 179 {
 180   /* Number of 1.0/X ops inserted.  */
 181   int rdivs_inserted;
 182
 183   /* Number of 1.0/FUNC ops inserted.  */
 184   int rfuncs_inserted;
 185 } reciprocal_stats;
 186
 187 static struct
 188 {
 189   /* Number of cexpi calls inserted.  */
 190   int inserted;
 191
 192   /* Number of conversions removed.  */
 193   int conv_removed;
 194
 195 } sincos_stats;
 196
 197 static struct
 198 {
 199   /* Number of widening multiplication ops inserted.  */
 200   int widen_mults_inserted;
 201
 202   /* Number of integer multiply-and-accumulate ops inserted.  */
 203   int maccs_inserted;
 204
 205   /* Number of fp fused multiply-add ops inserted.  */
 206   int fmas_inserted;
 207
 208   /* Number of divmod calls inserted.  */
 209   int divmod_calls_inserted;
 210
 211   /* Number of highpart multiplication ops inserted.  */
 212   int highpart_mults_inserted;
 213 } widen_mul_stats;
 214
 215 /* The instance of "struct occurrence" representing the highest
 216    interesting block in the dominator tree.  */
 217 static struct occurrence *occ_head;
 218
 219 /* Allocation pool for getting instances of "struct occurrence".  */
 220 static object_allocator<occurrence> *occ_pool;
 221
 222 void* occurrence::operator new (size_t n)
 223 {
 224   gcc_assert (n == sizeof(occurrence));
 225   return occ_pool->allocate_raw ();
 226 }
 227
 228 void occurrence::operator delete (void *occ, size_t n)
 229 {
 230   gcc_assert (n == sizeof(occurrence));
 231   occ_pool->remove_raw (occ);
 232 }
 233
 234 /* Insert NEW_OCC into our subset of the dominator tree.  P_HEAD points to a
 235    list of "struct occurrence"s, one per basic block, having IDOM as
 236    their common dominator.
 237
 238    We try to insert NEW_OCC as deep as possible in the tree, and we also
 239    insert any other block that is a common dominator for BB and one
 240    block already in the tree.  */
 241
 242 static void
 243 insert_bb (struct occurrence *new_occ, basic_block idom,
 244            struct occurrence **p_head)
 245 {
 246   struct occurrence *occ, **p_occ;
 247
 248   for (p_occ = p_head; (occ = *p_occ) != NULL; )
 249     {
 250       basic_block bb = new_occ->bb, occ_bb = occ->bb;
 251       basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb);
 252       if (dom == bb)
 253         {
 254           /* BB dominates OCC_BB.  OCC becomes NEW_OCC's child: remove OCC
 255              from its list.  */
 256           *p_occ = occ->next;
 257           occ->next = new_occ->children;
 258           new_occ->children = occ;
 259
 260           /* Try the next block (it may as well be dominated by BB).  */
 261         }
 262
 263       else if (dom == occ_bb)
 264         {
 265           /* OCC_BB dominates BB.  Tail recurse to look deeper.  */
 266           insert_bb (new_occ, dom, &occ->children);
 267           return;
 268         }
 269
 270       else if (dom != idom)
 271         {
 272           gcc_assert (!dom->aux);
 273
 274           /* There is a dominator between IDOM and BB, add it and make
 275              two children out of NEW_OCC and OCC.  First, remove OCC from
 276              its list.  */
 277           *p_occ = occ->next;
 278           new_occ->next = occ;
 279           occ->next = NULL;
 280
 281           /* None of the previous blocks has DOM as a dominator: if we tail
 282              recursed, we would reexamine them uselessly. Just switch BB with
 283              DOM, and go on looking for blocks dominated by DOM.  */
 284           new_occ = new occurrence (dom, new_occ);
 285         }
 286
 287       else
 288         {
 289           /* Nothing special, go on with the next element.  */
 290           p_occ = &occ->next;
 291         }
 292     }
 293
 294   /* No place was found as a child of IDOM.  Make BB a sibling of IDOM.  */
 295   new_occ->next = *p_head;
 296   *p_head = new_occ;
 297 }
 298
 299 /* Register that we found a division in BB.
 300    IMPORTANCE is a measure of how much weighting to give
 301    that division.  Use IMPORTANCE = 2 to register a single
 302    division.  If the division is going to be found multiple
 303    times use 1 (as it is with squares).  */
 304
 305 static inline void
 306 register_division_in (basic_block bb, int importance)
 307 {
 308   struct occurrence *occ;
 309
 310   occ = (struct occurrence *) bb->aux;
 311   if (!occ)
 312     {
 313       occ = new occurrence (bb, NULL);
 314       insert_bb (occ, ENTRY_BLOCK_PTR_FOR_FN (cfun), &occ_head);
 315     }
 316
 317   occ->bb_has_division = true;
 318   occ->num_divisions += importance;
 319 }
 320
 321
 322 /* Compute the number of divisions that postdominate each block in OCC and
 323    its children.  */
 324
 325 static void
 326 compute_merit (struct occurrence *occ)
 327 {
 328   struct occurrence *occ_child;
 329   basic_block dom = occ->bb;
 330
 331   for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
 332     {
 333       basic_block bb;
 334       if (occ_child->children)
 335         compute_merit (occ_child);
 336
 337       if (flag_exceptions)
 338         bb = single_noncomplex_succ (dom);
 339       else
 340         bb = dom;
 341
 342       if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
 343         occ->num_divisions += occ_child->num_divisions;
 344     }
 345 }
 346
 347
 348 /* Return whether USE_STMT is a floating-point division by DEF.  */
 349 static inline bool
 350 is_division_by (gimple *use_stmt, tree def)
 351 {
 352   return is_gimple_assign (use_stmt)
 353          && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
 354          && gimple_assign_rhs2 (use_stmt) == def
 355          /* Do not recognize x / x as valid division, as we are getting
 356             confused later by replacing all immediate uses x in such
 357             a stmt.  */
 358          && gimple_assign_rhs1 (use_stmt) != def
 359          && !stmt_can_throw_internal (cfun, use_stmt);
 360 }
 361
 362 /* Return TRUE if USE_STMT is a multiplication of DEF by A.  */
 363 static inline bool
 364 is_mult_by (gimple *use_stmt, tree def, tree a)
 365 {
 366   if (gimple_code (use_stmt) == GIMPLE_ASSIGN
 367       && gimple_assign_rhs_code (use_stmt) == MULT_EXPR)
 368     {
 369       tree op0 = gimple_assign_rhs1 (use_stmt);
 370       tree op1 = gimple_assign_rhs2 (use_stmt);
 371
 372       return (op0 == def && op1 == a)
 373               || (op0 == a && op1 == def);
 374     }
 375   return 0;
 376 }
 377
 378 /* Return whether USE_STMT is DEF * DEF.  */
 379 static inline bool
 380 is_square_of (gimple *use_stmt, tree def)
 381 {
 382   return is_mult_by (use_stmt, def, def);
 383 }
 384
 385 /* Return whether USE_STMT is a floating-point division by
 386    DEF * DEF.  */
 387 static inline bool
 388 is_division_by_square (gimple *use_stmt, tree def)
 389 {
 390   if (gimple_code (use_stmt) == GIMPLE_ASSIGN
 391       && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
 392       && gimple_assign_rhs1 (use_stmt) != gimple_assign_rhs2 (use_stmt)
 393       && !stmt_can_throw_internal (cfun, use_stmt))
 394     {
 395       tree denominator = gimple_assign_rhs2 (use_stmt);
 396       if (TREE_CODE (denominator) == SSA_NAME)
 397         return is_square_of (SSA_NAME_DEF_STMT (denominator), def);
 398     }
 399   return 0;
 400 }
 401
 402 /* Walk the subset of the dominator tree rooted at OCC, setting the
 403    RECIP_DEF field to a definition of 1.0 / DEF that can be used in
 404    the given basic block.  The field may be left NULL, of course,
 405    if it is not possible or profitable to do the optimization.
 406
 407    DEF_BSI is an iterator pointing at the statement defining DEF.
 408    If RECIP_DEF is set, a dominator already has a computation that can
 409    be used.
 410
 411    If should_insert_square_recip is set, then this also inserts
 412    the square of the reciprocal immediately after the definition
 413    of the reciprocal.  */
 414
 415 static void
 416 insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
 417                     tree def, tree recip_def, tree square_recip_def,
 418                     int should_insert_square_recip, int threshold)
 419 {
 420   tree type;
 421   gassign *new_stmt, *new_square_stmt;
 422   gimple_stmt_iterator gsi;
 423   struct occurrence *occ_child;
 424
 425   if (!recip_def
 426       && (occ->bb_has_division || !flag_trapping_math)
 427       /* Divide by two as all divisions are counted twice in
 428          the costing loop.  */
 429       && occ->num_divisions / 2 >= threshold)
 430     {
 431       /* Make a variable with the replacement and substitute it.  */
 432       type = TREE_TYPE (def);
 433       recip_def = create_tmp_reg (type, "reciptmp");
 434       new_stmt = gimple_build_assign (recip_def, RDIV_EXPR,
 435                                       build_one_cst (type), def);
 436
 437       if (should_insert_square_recip)
 438         {
 439           square_recip_def = create_tmp_reg (type, "powmult_reciptmp");
 440           new_square_stmt = gimple_build_assign (square_recip_def, MULT_EXPR,
 441                                                  recip_def, recip_def);
 442         }
 443
 444       if (occ->bb_has_division)
 445         {
 446           /* Case 1: insert before an existing division.  */
 447           gsi = gsi_after_labels (occ->bb);
 448           while (!gsi_end_p (gsi)
 449                  && (!is_division_by (gsi_stmt (gsi), def))
 450                  && (!is_division_by_square (gsi_stmt (gsi), def)))
 451             gsi_next (&gsi);
 452
 453           gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
 454           if (should_insert_square_recip)
 455             gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
 456         }
 457       else if (def_gsi && occ->bb == gsi_bb (*def_gsi))
 458         {
 459           /* Case 2: insert right after the definition.  Note that this will
 460              never happen if the definition statement can throw, because in
 461              that case the sole successor of the statement's basic block will
 462              dominate all the uses as well.  */
 463           gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
 464           if (should_insert_square_recip)
 465             gsi_insert_after (def_gsi, new_square_stmt, GSI_NEW_STMT);
 466         }
 467       else
 468         {
 469           /* Case 3: insert in a basic block not containing defs/uses.  */
 470           gsi = gsi_after_labels (occ->bb);
 471           gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
 472           if (should_insert_square_recip)
 473             gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
 474         }
 475
 476       reciprocal_stats.rdivs_inserted++;
 477
 478       occ->recip_def_stmt = new_stmt;
 479     }
 480
 481   occ->recip_def = recip_def;
 482   occ->square_recip_def = square_recip_def;
 483   for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
 484     insert_reciprocals (def_gsi, occ_child, def, recip_def,
 485                         square_recip_def, should_insert_square_recip,
 486                         threshold);
 487 }
 488
 489 /* Replace occurrences of expr / (x * x) with expr * ((1 / x) * (1 / x)).
 490    Take as argument the use for (x * x).  */
 491 static inline void
 492 replace_reciprocal_squares (use_operand_p use_p)
 493 {
 494   gimple *use_stmt = USE_STMT (use_p);
 495   basic_block bb = gimple_bb (use_stmt);
 496   struct occurrence *occ = (struct occurrence *) bb->aux;
 497
 498   if (optimize_bb_for_speed_p (bb) && occ->square_recip_def
 499       && occ->recip_def)
 500     {
 501       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
 502       gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
 503       gimple_assign_set_rhs2 (use_stmt, occ->square_recip_def);
 504       SET_USE (use_p, occ->square_recip_def);
 505       fold_stmt_inplace (&gsi);
 506       update_stmt (use_stmt);
 507     }
 508 }
 509
 510
 511 /* Replace the division at USE_P with a multiplication by the reciprocal, if
 512    possible.  */
 513
 514 static inline void
 515 replace_reciprocal (use_operand_p use_p)
 516 {
 517   gimple *use_stmt = USE_STMT (use_p);
 518   basic_block bb = gimple_bb (use_stmt);
 519   struct occurrence *occ = (struct occurrence *) bb->aux;
 520
 521   if (optimize_bb_for_speed_p (bb)
 522       && occ->recip_def && use_stmt != occ->recip_def_stmt)
 523     {
 524       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
 525       gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
 526       SET_USE (use_p, occ->recip_def);
 527       fold_stmt_inplace (&gsi);
 528       update_stmt (use_stmt);
 529     }
 530 }
 531
 532
 533 /* Free OCC and return one more "struct occurrence" to be freed.  */
 534
 535 static struct occurrence *
 536 free_bb (struct occurrence *occ)
 537 {
 538   struct occurrence *child, *next;
 539
 540   /* First get the two pointers hanging off OCC.  */
 541   next = occ->next;
 542   child = occ->children;
 543   delete occ;
 544
 545   /* Now ensure that we don't recurse unless it is necessary.  */
 546   if (!child)
 547     return next;
 548   else
 549     {
 550       while (next)
 551         next = free_bb (next);
 552
 553       return child;
 554     }
 555 }
 556
 557 /* Transform sequences like
 558    t = sqrt (a)
 559    x = 1.0 / t;
 560    r1 = x * x;
 561    r2 = a * x;
 562    into:
 563    t = sqrt (a)
 564    r1 = 1.0 / a;
 565    r2 = t;
 566    x = r1 * r2;
 567    depending on the uses of x, r1, r2.  This removes one multiplication and
 568    allows the sqrt and division operations to execute in parallel.
 569    DEF_GSI is the gsi of the initial division by sqrt that defines
 570    DEF (x in the example above).  */
 571
 572 static void
 573 optimize_recip_sqrt (gimple_stmt_iterator *def_gsi, tree def)
 574 {
 575   gimple *use_stmt;
 576   imm_use_iterator use_iter;
 577   gimple *stmt = gsi_stmt (*def_gsi);
 578   tree x = def;
 579   tree orig_sqrt_ssa_name = gimple_assign_rhs2 (stmt);
 580   tree div_rhs1 = gimple_assign_rhs1 (stmt);
 581
 582   if (TREE_CODE (orig_sqrt_ssa_name) != SSA_NAME
 583       || TREE_CODE (div_rhs1) != REAL_CST
 584       || !real_equal (&TREE_REAL_CST (div_rhs1), &dconst1))
 585     return;
 586
 587   gcall *sqrt_stmt
 588     = dyn_cast <gcall *> (SSA_NAME_DEF_STMT (orig_sqrt_ssa_name));
 589
 590   if (!sqrt_stmt || !gimple_call_lhs (sqrt_stmt))
 591     return;
 592
 593   switch (gimple_call_combined_fn (sqrt_stmt))
 594     {
 595     CASE_CFN_SQRT:
 596     CASE_CFN_SQRT_FN:
 597       break;
 598
 599     default:
 600       return;
 601     }
 602   tree a = gimple_call_arg (sqrt_stmt, 0);
 603
 604   /* We have 'a' and 'x'.  Now analyze the uses of 'x'.  */
 605
 606   /* Statements that use x in x * x.  */
 607   auto_vec<gimple *> sqr_stmts;
 608   /* Statements that use x in a * x.  */
 609   auto_vec<gimple *> mult_stmts;
 610   bool has_other_use = false;
 611   bool mult_on_main_path = false;
 612
 613   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, x)
 614     {
 615       if (is_gimple_debug (use_stmt))
 616         continue;
 617       if (is_square_of (use_stmt, x))
 618         {
 619           sqr_stmts.safe_push (use_stmt);
 620           if (gimple_bb (use_stmt) == gimple_bb (stmt))
 621             mult_on_main_path = true;
 622         }
 623       else if (is_mult_by (use_stmt, x, a))
 624         {
 625           mult_stmts.safe_push (use_stmt);
 626           if (gimple_bb (use_stmt) == gimple_bb (stmt))
 627             mult_on_main_path = true;
 628         }
 629       else
 630         has_other_use = true;
 631     }
 632
 633   /* In the x * x and a * x cases we just rewire stmt operands or
 634      remove multiplications.  In the has_other_use case we introduce
 635      a multiplication so make sure we don't introduce a multiplication
 636      on a path where there was none.  */
 637   if (has_other_use && !mult_on_main_path)
 638     return;
 639
 640   if (sqr_stmts.is_empty () && mult_stmts.is_empty ())
 641     return;
 642
 643   /* If x = 1.0 / sqrt (a) has uses other than those optimized here we want
 644      to be able to compose it from the sqr and mult cases.  */
 645   if (has_other_use && (sqr_stmts.is_empty () || mult_stmts.is_empty ()))
 646     return;
 647
 648   if (dump_file)
 649     {
 650       fprintf (dump_file, "Optimizing reciprocal sqrt multiplications of\n");
 651       print_gimple_stmt (dump_file, sqrt_stmt, 0, TDF_NONE);
 652       print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
 653       fprintf (dump_file, "\n");
 654     }
 655
 656   bool delete_div = !has_other_use;
 657   tree sqr_ssa_name = NULL_TREE;
 658   if (!sqr_stmts.is_empty ())
 659     {
 660       /* r1 = x * x.  Transform the original
 661          x = 1.0 / t
 662          into
 663          tmp1 = 1.0 / a
 664          r1 = tmp1.  */
 665
 666       sqr_ssa_name
 667         = make_temp_ssa_name (TREE_TYPE (a), NULL, "recip_sqrt_sqr");
 668
 669       if (dump_file)
 670         {
 671           fprintf (dump_file, "Replacing original division\n");
 672           print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
 673           fprintf (dump_file, "with new division\n");
 674         }
 675       stmt
 676         = gimple_build_assign (sqr_ssa_name, gimple_assign_rhs_code (stmt),
 677                                gimple_assign_rhs1 (stmt), a);
 678       gsi_insert_before (def_gsi, stmt, GSI_SAME_STMT);
 679       gsi_remove (def_gsi, true);
 680       *def_gsi = gsi_for_stmt (stmt);
 681       fold_stmt_inplace (def_gsi);
 682       update_stmt (stmt);
 683
 684       if (dump_file)
 685         print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
 686
 687       delete_div = false;
 688       gimple *sqr_stmt;
 689       unsigned int i;
 690       FOR_EACH_VEC_ELT (sqr_stmts, i, sqr_stmt)
 691         {
 692           gimple_stmt_iterator gsi2 = gsi_for_stmt (sqr_stmt);
 693           gimple_assign_set_rhs_from_tree (&gsi2, sqr_ssa_name);
 694           update_stmt (sqr_stmt);
 695         }
 696     }
 697   if (!mult_stmts.is_empty ())
 698     {
 699       /* r2 = a * x.  Transform this into:
 700          r2 = t (The original sqrt (a)).  */
 701       unsigned int i;
 702       gimple *mult_stmt = NULL;
 703       FOR_EACH_VEC_ELT (mult_stmts, i, mult_stmt)
 704         {
 705           gimple_stmt_iterator gsi2 = gsi_for_stmt (mult_stmt);
 706
 707           if (dump_file)
 708             {
 709               fprintf (dump_file, "Replacing squaring multiplication\n");
 710               print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE);
 711               fprintf (dump_file, "with assignment\n");
 712             }
 713           gimple_assign_set_rhs_from_tree (&gsi2, orig_sqrt_ssa_name);
 714           fold_stmt_inplace (&gsi2);
 715           update_stmt (mult_stmt);
 716           if (dump_file)
 717             print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE);
 718       }
 719     }
 720
 721   if (has_other_use)
 722     {
 723       /* Using the two temporaries tmp1, tmp2 from above
 724          the original x is now:
 725          x = tmp1 * tmp2.  */
 726       gcc_assert (orig_sqrt_ssa_name);
 727       gcc_assert (sqr_ssa_name);
 728
 729       gimple *new_stmt
 730         = gimple_build_assign (x, MULT_EXPR,
 731                                orig_sqrt_ssa_name, sqr_ssa_name);
 732       gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
 733       update_stmt (stmt);
 734     }
 735   else if (delete_div)
 736     {
 737       /* Remove the original division.  */
 738       gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt);
 739       gsi_remove (&gsi2, true);
 740       release_defs (stmt);
 741     }
 742   else
 743     release_ssa_name (x);
 744 }
 745
 746 /* Look for floating-point divisions among DEF's uses, and try to
 747    replace them by multiplications with the reciprocal.  Add
 748    as many statements computing the reciprocal as needed.
 749
 750    DEF must be a GIMPLE register of a floating-point type.  */
 751
 752 static void
 753 execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
 754 {
 755   use_operand_p use_p, square_use_p;
 756   imm_use_iterator use_iter, square_use_iter;
 757   tree square_def;
 758   struct occurrence *occ;
 759   int count = 0;
 760   int threshold;
 761   int square_recip_count = 0;
 762   int sqrt_recip_count = 0;
 763
 764   gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && TREE_CODE (def) == SSA_NAME);
 765   threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
 766
 767   /* If DEF is a square (x * x), count the number of divisions by x.
 768      If there are more divisions by x than by (DEF * DEF), prefer to optimize
 769      the reciprocal of x instead of DEF.  This improves cases like:
 770        def = x * x
 771        t0 = a / def
 772        t1 = b / def
 773        t2 = c / x
 774      Reciprocal optimization of x results in 1 division rather than 2 or 3.  */
 775   gimple *def_stmt = SSA_NAME_DEF_STMT (def);
 776
 777   if (is_gimple_assign (def_stmt)
 778       && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
 779       && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME
 780       && gimple_assign_rhs1 (def_stmt) == gimple_assign_rhs2 (def_stmt))
 781     {
 782       tree op0 = gimple_assign_rhs1 (def_stmt);
 783
 784       FOR_EACH_IMM_USE_FAST (use_p, use_iter, op0)
 785         {
 786           gimple *use_stmt = USE_STMT (use_p);
 787           if (is_division_by (use_stmt, op0))
 788             sqrt_recip_count++;
 789         }
 790     }
 791
 792   FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
 793     {
 794       gimple *use_stmt = USE_STMT (use_p);
 795       if (is_division_by (use_stmt, def))
 796         {
 797           register_division_in (gimple_bb (use_stmt), 2);
 798           count++;
 799         }
 800
 801       if (is_square_of (use_stmt, def))
 802         {
 803           square_def = gimple_assign_lhs (use_stmt);
 804           FOR_EACH_IMM_USE_FAST (square_use_p, square_use_iter, square_def)
 805             {
 806               gimple *square_use_stmt = USE_STMT (square_use_p);
 807               if (is_division_by (square_use_stmt, square_def))
 808                 {
 809                   /* This is executed twice for each division by a square.  */
 810                   register_division_in (gimple_bb (square_use_stmt), 1);
 811                   square_recip_count++;
 812                 }
 813             }
 814         }
 815     }
 816
 817   /* Square reciprocals were counted twice above.  */
 818   square_recip_count /= 2;
 819
 820   /* If it is more profitable to optimize 1 / x, don't optimize 1 / (x * x).  */
 821   if (sqrt_recip_count > square_recip_count)
 822     goto out;
 823
 824   /* Do the expensive part only if we can hope to optimize something.  */
 825   if (count + square_recip_count >= threshold && count >= 1)
 826     {
 827       gimple *use_stmt;
 828       for (occ = occ_head; occ; occ = occ->next)
 829         {
 830           compute_merit (occ);
 831           insert_reciprocals (def_gsi, occ, def, NULL, NULL,
 832                               square_recip_count, threshold);
 833         }
 834
 835       FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
 836         {
 837           if (is_division_by (use_stmt, def))
 838             {
 839               FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
 840                 replace_reciprocal (use_p);
 841             }
 842           else if (square_recip_count > 0 && is_square_of (use_stmt, def))
 843             {
 844               FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
 845                 {
 846                   /* Find all uses of the square that are divisions and
 847                    * replace them by multiplications with the inverse.  */
 848                   imm_use_iterator square_iterator;
 849                   gimple *powmult_use_stmt = USE_STMT (use_p);
 850                   tree powmult_def_name = gimple_assign_lhs (powmult_use_stmt);
 851
 852                   FOR_EACH_IMM_USE_STMT (powmult_use_stmt,
 853                                          square_iterator, powmult_def_name)
 854                     FOR_EACH_IMM_USE_ON_STMT (square_use_p, square_iterator)
 855                       {
 856                         gimple *powmult_use_stmt = USE_STMT (square_use_p);
 857                         if (is_division_by (powmult_use_stmt, powmult_def_name))
 858                           replace_reciprocal_squares (square_use_p);
 859                       }
 860                 }
 861             }
 862         }
 863     }
 864
 865 out:
 866   for (occ = occ_head; occ; )
 867     occ = free_bb (occ);
 868
 869   occ_head = NULL;
 870 }
 871
 872 /* Return an internal function that implements the reciprocal of CALL,
 873    or IFN_LAST if there is no such function that the target supports.  */
 874
 875 internal_fn
 876 internal_fn_reciprocal (gcall *call)
 877 {
 878   internal_fn ifn;
 879
 880   switch (gimple_call_combined_fn (call))
 881     {
 882     CASE_CFN_SQRT:
 883     CASE_CFN_SQRT_FN:
 884       ifn = IFN_RSQRT;
 885       break;
 886
 887     default:
 888       return IFN_LAST;
 889     }
 890
 891   tree_pair types = direct_internal_fn_types (ifn, call);
 892   if (!direct_internal_fn_supported_p (ifn, types, OPTIMIZE_FOR_SPEED))
 893     return IFN_LAST;
 894
 895   return ifn;
 896 }
 897
 898 /* Go through all the floating-point SSA_NAMEs, and call
 899    execute_cse_reciprocals_1 on each of them.  */
 900 namespace {
 901
 902 const pass_data pass_data_cse_reciprocals =
 903 {
 904   GIMPLE_PASS, /* type */
 905   "recip", /* name */
 906   OPTGROUP_NONE, /* optinfo_flags */
 907   TV_TREE_RECIP, /* tv_id */
 908   PROP_ssa, /* properties_required */
 909   0, /* properties_provided */
 910   0, /* properties_destroyed */
 911   0, /* todo_flags_start */
 912   TODO_update_ssa, /* todo_flags_finish */
 913 };
 914
 915 class pass_cse_reciprocals : public gimple_opt_pass
 916 {
 917 public:
 918   pass_cse_reciprocals (gcc::context *ctxt)
 919     : gimple_opt_pass (pass_data_cse_reciprocals, ctxt)
 920   {}
 921
 922   /* opt_pass methods: */
 923   bool gate (function *) final override
 924   {
 925     return optimize && flag_reciprocal_math;
 926   }
 927   unsigned int execute (function *) final override;
 928
 929 }; // class pass_cse_reciprocals
 930
 931 unsigned int
 932 pass_cse_reciprocals::execute (function *fun)
 933 {
 934   basic_block bb;
 935   tree arg;
 936
 937   occ_pool = new object_allocator<occurrence> ("dominators for recip");
 938
 939   memset (&reciprocal_stats, 0, sizeof (reciprocal_stats));
 940   calculate_dominance_info (CDI_DOMINATORS);
 941   calculate_dominance_info (CDI_POST_DOMINATORS);
 942
 943   if (flag_checking)
 944     FOR_EACH_BB_FN (bb, fun)
 945       gcc_assert (!bb->aux);
 946
 947   for (arg = DECL_ARGUMENTS (fun->decl); arg; arg = DECL_CHAIN (arg))
 948     if (FLOAT_TYPE_P (TREE_TYPE (arg))
 949         && is_gimple_reg (arg))
 950       {
 951         tree name = ssa_default_def (fun, arg);
 952         if (name)
 953           execute_cse_reciprocals_1 (NULL, name);
 954       }
 955
 956   FOR_EACH_BB_FN (bb, fun)
 957     {
 958       tree def;
 959
 960       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
 961            gsi_next (&gsi))
 962         {
 963           gphi *phi = gsi.phi ();
 964           def = PHI_RESULT (phi);
 965           if (! virtual_operand_p (def)
 966               && FLOAT_TYPE_P (TREE_TYPE (def)))
 967             execute_cse_reciprocals_1 (NULL, def);
 968         }
 969
 970       for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
 971            gsi_next (&gsi))
 972         {
 973           gimple *stmt = gsi_stmt (gsi);
 974
 975           if (gimple_has_lhs (stmt)
 976               && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
 977               && FLOAT_TYPE_P (TREE_TYPE (def))
 978               && TREE_CODE (def) == SSA_NAME)
 979             {
 980               execute_cse_reciprocals_1 (&gsi, def);
 981               stmt = gsi_stmt (gsi);
 982               if (flag_unsafe_math_optimizations
 983                   && is_gimple_assign (stmt)
 984                   && gimple_assign_lhs (stmt) == def
 985                   && !stmt_can_throw_internal (cfun, stmt)
 986                   && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
 987                 optimize_recip_sqrt (&gsi, def);
 988             }
 989         }
 990
 991       if (optimize_bb_for_size_p (bb))
 992         continue;
 993
 994       /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b).  */
 995       for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
 996            gsi_next (&gsi))
 997         {
 998           gimple *stmt = gsi_stmt (gsi);
 999
1000           if (is_gimple_assign (stmt)
1001               && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
1002             {
1003               tree arg1 = gimple_assign_rhs2 (stmt);
1004               gimple *stmt1;
1005
1006               if (TREE_CODE (arg1) != SSA_NAME)
1007                 continue;
1008
1009               stmt1 = SSA_NAME_DEF_STMT (arg1);
1010
1011               if (is_gimple_call (stmt1)
1012                   && gimple_call_lhs (stmt1))
1013                 {
1014                   bool fail;
1015                   imm_use_iterator ui;
1016                   use_operand_p use_p;
1017                   tree fndecl = NULL_TREE;
1018
1019                   gcall *call = as_a <gcall *> (stmt1);
1020                   internal_fn ifn = internal_fn_reciprocal (call);
1021                   if (ifn == IFN_LAST)
1022                     {
1023                       fndecl = gimple_call_fndecl (call);
1024                       if (!fndecl
1025                           || !fndecl_built_in_p (fndecl, BUILT_IN_MD))
1026                         continue;
1027                       fndecl = targetm.builtin_reciprocal (fndecl);
1028                       if (!fndecl)
1029                         continue;
1030                     }
1031
1032                   /* Check that all uses of the SSA name are divisions,
1033                      otherwise replacing the defining statement will do
1034                      the wrong thing.  */
1035                   fail = false;
1036                   FOR_EACH_IMM_USE_FAST (use_p, ui, arg1)
1037                     {
1038                       gimple *stmt2 = USE_STMT (use_p);
1039                       if (is_gimple_debug (stmt2))
1040                         continue;
1041                       if (!is_gimple_assign (stmt2)
1042                           || gimple_assign_rhs_code (stmt2) != RDIV_EXPR
1043                           || gimple_assign_rhs1 (stmt2) == arg1
1044                           || gimple_assign_rhs2 (stmt2) != arg1)
1045                         {
1046                           fail = true;
1047                           break;
1048                         }
1049                     }
1050                   if (fail)
1051                     continue;
1052
1053                   gimple_replace_ssa_lhs (call, arg1);
1054                   if (gimple_call_internal_p (call) != (ifn != IFN_LAST))
1055                     {
1056                       auto_vec<tree, 4> args;
1057                       for (unsigned int i = 0;
1058                            i < gimple_call_num_args (call); i++)
1059                         args.safe_push (gimple_call_arg (call, i));
1060                       gcall *stmt2;
1061                       if (ifn == IFN_LAST)
1062                         stmt2 = gimple_build_call_vec (fndecl, args);
1063                       else
1064                         stmt2 = gimple_build_call_internal_vec (ifn, args);
1065                       gimple_call_set_lhs (stmt2, arg1);
1066                       gimple_move_vops (stmt2, call);
1067                       gimple_call_set_nothrow (stmt2,
1068                                                gimple_call_nothrow_p (call));
1069                       gimple_stmt_iterator gsi2 = gsi_for_stmt (call);
1070                       gsi_replace (&gsi2, stmt2, true);
1071                     }
1072                   else
1073                     {
1074                       if (ifn == IFN_LAST)
1075                         gimple_call_set_fndecl (call, fndecl);
1076                       else
1077                         gimple_call_set_internal_fn (call, ifn);
1078                       update_stmt (call);
1079                     }
1080                   reciprocal_stats.rfuncs_inserted++;
1081
1082                   FOR_EACH_IMM_USE_STMT (stmt, ui, arg1)
1083                     {
1084                       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1085                       gimple_assign_set_rhs_code (stmt, MULT_EXPR);
1086                       fold_stmt_inplace (&gsi);
1087                       update_stmt (stmt);
1088                     }
1089                 }
1090             }
1091         }
1092     }
1093
1094   statistics_counter_event (fun, "reciprocal divs inserted",
1095                             reciprocal_stats.rdivs_inserted);
1096   statistics_counter_event (fun, "reciprocal functions inserted",
1097                             reciprocal_stats.rfuncs_inserted);
1098
1099   free_dominance_info (CDI_DOMINATORS);
1100   free_dominance_info (CDI_POST_DOMINATORS);
1101   delete occ_pool;
1102   return 0;
1103 }
1104
1105 } // anon namespace
1106
1107 gimple_opt_pass *
1108 make_pass_cse_reciprocals (gcc::context *ctxt)
1109 {
1110   return new pass_cse_reciprocals (ctxt);
1111 }
1112
1113 /* If NAME is the result of a type conversion, look for other
1114    equivalent dominating or dominated conversions, and replace all
1115    uses with the earliest dominating name, removing the redundant
1116    conversions.  Return the prevailing name.  */
1117
1118 static tree
1119 execute_cse_conv_1 (tree name, bool *cfg_changed)
1120 {
1121   if (SSA_NAME_IS_DEFAULT_DEF (name)
1122       || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name))
1123     return name;
1124
1125   gimple *def_stmt = SSA_NAME_DEF_STMT (name);
1126
1127   if (!gimple_assign_cast_p (def_stmt))
1128     return name;
1129
1130   tree src = gimple_assign_rhs1 (def_stmt);
1131
1132   if (TREE_CODE (src) != SSA_NAME)
1133     return name;
1134
1135   imm_use_iterator use_iter;
1136   gimple *use_stmt;
1137
1138   /* Find the earliest dominating def.    */
1139   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, src)
1140     {
1141       if (use_stmt == def_stmt
1142           || !gimple_assign_cast_p (use_stmt))
1143         continue;
1144
1145       tree lhs = gimple_assign_lhs (use_stmt);
1146
1147       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
1148           || (gimple_assign_rhs1 (use_stmt)
1149               != gimple_assign_rhs1 (def_stmt))
1150           || !types_compatible_p (TREE_TYPE (name), TREE_TYPE (lhs)))
1151         continue;
1152
1153       bool use_dominates;
1154       if (gimple_bb (def_stmt) == gimple_bb (use_stmt))
1155         {
1156           gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
1157           while (!gsi_end_p (gsi) && gsi_stmt (gsi) != def_stmt)
1158             gsi_next (&gsi);
1159           use_dominates = !gsi_end_p (gsi);
1160         }
1161       else if (dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt),
1162                                gimple_bb (def_stmt)))
1163         use_dominates = false;
1164       else if (dominated_by_p (CDI_DOMINATORS, gimple_bb (def_stmt),
1165                                gimple_bb (use_stmt)))
1166         use_dominates = true;
1167       else
1168         continue;
1169
1170       if (use_dominates)
1171         {
1172           std::swap (name, lhs);
1173           std::swap (def_stmt, use_stmt);
1174         }
1175     }
1176
1177   /* Now go through all uses of SRC again, replacing the equivalent
1178      dominated conversions.  We may replace defs that were not
1179      dominated by the then-prevailing defs when we first visited
1180      them.  */
1181   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, src)
1182     {
1183       if (use_stmt == def_stmt
1184           || !gimple_assign_cast_p (use_stmt))
1185         continue;
1186
1187       tree lhs = gimple_assign_lhs (use_stmt);
1188
1189       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
1190           || (gimple_assign_rhs1 (use_stmt)
1191               != gimple_assign_rhs1 (def_stmt))
1192           || !types_compatible_p (TREE_TYPE (name), TREE_TYPE (lhs)))
1193         continue;
1194
1195       basic_block use_bb = gimple_bb (use_stmt);
1196       if (gimple_bb (def_stmt) == use_bb
1197           || dominated_by_p (CDI_DOMINATORS, use_bb, gimple_bb (def_stmt)))
1198         {
1199           sincos_stats.conv_removed++;
1200
1201           gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
1202           replace_uses_by (lhs, name);
1203           if (gsi_remove (&gsi, true)
1204               && gimple_purge_dead_eh_edges (use_bb))
1205             *cfg_changed = true;
1206           release_defs (use_stmt);
1207         }
1208     }
1209
1210   return name;
1211 }
1212
1213 /* Records an occurrence at statement USE_STMT in the vector of trees
1214    STMTS if it is dominated by *TOP_BB or dominates it or this basic block
1215    is not yet initialized.  Returns true if the occurrence was pushed on
1216    the vector.  Adjusts *TOP_BB to be the basic block dominating all
1217    statements in the vector.  */
1218
1219 static bool
1220 maybe_record_sincos (vec<gimple *> *stmts,
1221                      basic_block *top_bb, gimple *use_stmt)
1222 {
1223   basic_block use_bb = gimple_bb (use_stmt);
1224   if (*top_bb
1225       && (*top_bb == use_bb
1226           || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb)))
1227     stmts->safe_push (use_stmt);
1228   else if (!*top_bb
1229            || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb))
1230     {
1231       stmts->safe_push (use_stmt);
1232       *top_bb = use_bb;
1233     }
1234   else
1235     return false;
1236
1237   return true;
1238 }
1239
1240 /* Look for sin, cos and cexpi calls with the same argument NAME and
1241    create a single call to cexpi CSEing the result in this case.
1242    We first walk over all immediate uses of the argument collecting
1243    statements that we can CSE in a vector and in a second pass replace
1244    the statement rhs with a REALPART or IMAGPART expression on the
1245    result of the cexpi call we insert before the use statement that
1246    dominates all other candidates.  */
1247
1248 static bool
1249 execute_cse_sincos_1 (tree name)
1250 {
1251   gimple_stmt_iterator gsi;
1252   imm_use_iterator use_iter;
1253   tree fndecl, res, type = NULL_TREE;
1254   gimple *def_stmt, *use_stmt, *stmt;
1255   int seen_cos = 0, seen_sin = 0, seen_cexpi = 0;
1256   auto_vec<gimple *> stmts;
1257   basic_block top_bb = NULL;
1258   int i;
1259   bool cfg_changed = false;
1260
1261   name = execute_cse_conv_1 (name, &cfg_changed);
1262
1263   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
1264     {
1265       if (gimple_code (use_stmt) != GIMPLE_CALL
1266           || !gimple_call_lhs (use_stmt))
1267         continue;
1268
1269       switch (gimple_call_combined_fn (use_stmt))
1270         {
1271         CASE_CFN_COS:
1272           seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1273           break;
1274
1275         CASE_CFN_SIN:
1276           seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1277           break;
1278
1279         CASE_CFN_CEXPI:
1280           seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1281           break;
1282
1283         default:;
1284           continue;
1285         }
1286
1287       tree t = mathfn_built_in_type (gimple_call_combined_fn (use_stmt));
1288       if (!type)
1289         {
1290           type = t;
1291           t = TREE_TYPE (name);
1292         }
1293       /* This checks that NAME has the right type in the first round,
1294          and, in subsequent rounds, that the built_in type is the same
1295          type, or a compatible type.  */
1296       if (type != t && !types_compatible_p (type, t))
1297         return false;
1298     }
1299   if (seen_cos + seen_sin + seen_cexpi <= 1)
1300     return false;
1301
1302   /* Simply insert cexpi at the beginning of top_bb but not earlier than
1303      the name def statement.  */
1304   fndecl = mathfn_built_in (type, BUILT_IN_CEXPI);
1305   if (!fndecl)
1306     return false;
1307   stmt = gimple_build_call (fndecl, 1, name);
1308   res = make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl)), stmt, "sincostmp");
1309   gimple_call_set_lhs (stmt, res);
1310
1311   def_stmt = SSA_NAME_DEF_STMT (name);
1312   if (!SSA_NAME_IS_DEFAULT_DEF (name)
1313       && gimple_code (def_stmt) != GIMPLE_PHI
1314       && gimple_bb (def_stmt) == top_bb)
1315     {
1316       gsi = gsi_for_stmt (def_stmt);
1317       gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1318     }
1319   else
1320     {
1321       gsi = gsi_after_labels (top_bb);
1322       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1323     }
1324   sincos_stats.inserted++;
1325
1326   /* And adjust the recorded old call sites.  */
1327   for (i = 0; stmts.iterate (i, &use_stmt); ++i)
1328     {
1329       tree rhs = NULL;
1330
1331       switch (gimple_call_combined_fn (use_stmt))
1332         {
1333         CASE_CFN_COS:
1334           rhs = fold_build1 (REALPART_EXPR, type, res);
1335           break;
1336
1337         CASE_CFN_SIN:
1338           rhs = fold_build1 (IMAGPART_EXPR, type, res);
1339           break;
1340
1341         CASE_CFN_CEXPI:
1342           rhs = res;
1343           break;
1344
1345         default:;
1346           gcc_unreachable ();
1347         }
1348
1349         /* Replace call with a copy.  */
1350         stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs);
1351
1352         gsi = gsi_for_stmt (use_stmt);
1353         gsi_replace (&gsi, stmt, true);
1354         if (gimple_purge_dead_eh_edges (gimple_bb (stmt)))
1355           cfg_changed = true;
1356     }
1357
1358   return cfg_changed;
1359 }
1360
1361 /* To evaluate powi(x,n), the floating point value x raised to the
1362    constant integer exponent n, we use a hybrid algorithm that
1363    combines the "window method" with look-up tables.  For an
1364    introduction to exponentiation algorithms and "addition chains",
1365    see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
1366    "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
1367    3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
1368    Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998.  */
1369
1370 /* Provide a default value for POWI_MAX_MULTS, the maximum number of
1371    multiplications to inline before calling the system library's pow
1372    function.  powi(x,n) requires at worst 2*bits(n)-2 multiplications,
1373    so this default never requires calling pow, powf or powl.  */
1374
1375 #ifndef POWI_MAX_MULTS
1376 #define POWI_MAX_MULTS  (2*HOST_BITS_PER_WIDE_INT-2)
1377 #endif
1378
1379 /* The size of the "optimal power tree" lookup table.  All
1380    exponents less than this value are simply looked up in the
1381    powi_table below.  This threshold is also used to size the
1382    cache of pseudo registers that hold intermediate results.  */
1383 #define POWI_TABLE_SIZE 256
1384
1385 /* The size, in bits of the window, used in the "window method"
1386    exponentiation algorithm.  This is equivalent to a radix of
1387    (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method".  */
1388 #define POWI_WINDOW_SIZE 3
1389
1390 /* The following table is an efficient representation of an
1391    "optimal power tree".  For each value, i, the corresponding
1392    value, j, in the table states than an optimal evaluation
1393    sequence for calculating pow(x,i) can be found by evaluating
1394    pow(x,j)*pow(x,i-j).  An optimal power tree for the first
1395    100 integers is given in Knuth's "Seminumerical algorithms".  */
1396
1397 static const unsigned char powi_table[POWI_TABLE_SIZE] =
1398   {
1399       0,   1,   1,   2,   2,   3,   3,   4,  /*   0 -   7 */
1400       4,   6,   5,   6,   6,  10,   7,   9,  /*   8 -  15 */
1401       8,  16,   9,  16,  10,  12,  11,  13,  /*  16 -  23 */
1402      12,  17,  13,  18,  14,  24,  15,  26,  /*  24 -  31 */
1403      16,  17,  17,  19,  18,  33,  19,  26,  /*  32 -  39 */
1404      20,  25,  21,  40,  22,  27,  23,  44,  /*  40 -  47 */
1405      24,  32,  25,  34,  26,  29,  27,  44,  /*  48 -  55 */
1406      28,  31,  29,  34,  30,  60,  31,  36,  /*  56 -  63 */
1407      32,  64,  33,  34,  34,  46,  35,  37,  /*  64 -  71 */
1408      36,  65,  37,  50,  38,  48,  39,  69,  /*  72 -  79 */
1409      40,  49,  41,  43,  42,  51,  43,  58,  /*  80 -  87 */
1410      44,  64,  45,  47,  46,  59,  47,  76,  /*  88 -  95 */
1411      48,  65,  49,  66,  50,  67,  51,  66,  /*  96 - 103 */
1412      52,  70,  53,  74,  54, 104,  55,  74,  /* 104 - 111 */
1413      56,  64,  57,  69,  58,  78,  59,  68,  /* 112 - 119 */
1414      60,  61,  61,  80,  62,  75,  63,  68,  /* 120 - 127 */
1415      64,  65,  65, 128,  66, 129,  67,  90,  /* 128 - 135 */
1416      68,  73,  69, 131,  70,  94,  71,  88,  /* 136 - 143 */
1417      72, 128,  73,  98,  74, 132,  75, 121,  /* 144 - 151 */
1418      76, 102,  77, 124,  78, 132,  79, 106,  /* 152 - 159 */
1419      80,  97,  81, 160,  82,  99,  83, 134,  /* 160 - 167 */
1420      84,  86,  85,  95,  86, 160,  87, 100,  /* 168 - 175 */
1421      88, 113,  89,  98,  90, 107,  91, 122,  /* 176 - 183 */
1422      92, 111,  93, 102,  94, 126,  95, 150,  /* 184 - 191 */
1423      96, 128,  97, 130,  98, 133,  99, 195,  /* 192 - 199 */
1424     100, 128, 101, 123, 102, 164, 103, 138,  /* 200 - 207 */
1425     104, 145, 105, 146, 106, 109, 107, 149,  /* 208 - 215 */
1426     108, 200, 109, 146, 110, 170, 111, 157,  /* 216 - 223 */
1427     112, 128, 113, 130, 114, 182, 115, 132,  /* 224 - 231 */
1428     116, 200, 117, 132, 118, 158, 119, 206,  /* 232 - 239 */
1429     120, 240, 121, 162, 122, 147, 123, 152,  /* 240 - 247 */
1430     124, 166, 125, 214, 126, 138, 127, 153,  /* 248 - 255 */
1431   };
1432
1433
1434 /* Return the number of multiplications required to calculate
1435    powi(x,n) where n is less than POWI_TABLE_SIZE.  This is a
1436    subroutine of powi_cost.  CACHE is an array indicating
1437    which exponents have already been calculated.  */
1438
1439 static int
1440 powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache)
1441 {
1442   /* If we've already calculated this exponent, then this evaluation
1443      doesn't require any additional multiplications.  */
1444   if (cache[n])
1445     return 0;
1446
1447   cache[n] = true;
1448   return powi_lookup_cost (n - powi_table[n], cache)
1449          + powi_lookup_cost (powi_table[n], cache) + 1;
1450 }
1451
1452 /* Return the number of multiplications required to calculate
1453    powi(x,n) for an arbitrary x, given the exponent N.  This
1454    function needs to be kept in sync with powi_as_mults below.  */
1455
1456 static int
1457 powi_cost (HOST_WIDE_INT n)
1458 {
1459   bool cache[POWI_TABLE_SIZE];
1460   unsigned HOST_WIDE_INT digit;
1461   unsigned HOST_WIDE_INT val;
1462   int result;
1463
1464   if (n == 0)
1465     return 0;
1466
1467   /* Ignore the reciprocal when calculating the cost.  */
1468   val = absu_hwi (n);
1469
1470   /* Initialize the exponent cache.  */
1471   memset (cache, 0, POWI_TABLE_SIZE * sizeof (bool));
1472   cache[1] = true;
1473
1474   result = 0;
1475
1476   while (val >= POWI_TABLE_SIZE)
1477     {
1478       if (val & 1)
1479         {
1480           digit = val & ((1 << POWI_WINDOW_SIZE) - 1);
1481           result += powi_lookup_cost (digit, cache)
1482                     + POWI_WINDOW_SIZE + 1;
1483           val >>= POWI_WINDOW_SIZE;
1484         }
1485       else
1486         {
1487           val >>= 1;
1488           result++;
1489         }
1490     }
1491
1492   return result + powi_lookup_cost (val, cache);
1493 }
1494
1495 /* Recursive subroutine of powi_as_mults.  This function takes the
1496    array, CACHE, of already calculated exponents and an exponent N and
1497    returns a tree that corresponds to CACHE[1]**N, with type TYPE.  */
1498
1499 static tree
1500 powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type,
1501                  unsigned HOST_WIDE_INT n, tree *cache)
1502 {
1503   tree op0, op1, ssa_target;
1504   unsigned HOST_WIDE_INT digit;
1505   gassign *mult_stmt;
1506
1507   if (n < POWI_TABLE_SIZE && cache[n])
1508     return cache[n];
1509
1510   ssa_target = make_temp_ssa_name (type, NULL, "powmult");
1511
1512   if (n < POWI_TABLE_SIZE)
1513     {
1514       cache[n] = ssa_target;
1515       op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache);
1516       op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache);
1517     }
1518   else if (n & 1)
1519     {
1520       digit = n & ((1 << POWI_WINDOW_SIZE) - 1);
1521       op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache);
1522       op1 = powi_as_mults_1 (gsi, loc, type, digit, cache);
1523     }
1524   else
1525     {
1526       op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache);
1527       op1 = op0;
1528     }
1529
1530   mult_stmt = gimple_build_assign (ssa_target, MULT_EXPR, op0, op1);
1531   gimple_set_location (mult_stmt, loc);
1532   gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
1533
1534   return ssa_target;
1535 }
1536
1537 /* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
1538    This function needs to be kept in sync with powi_cost above.  */
1539
1540 tree
1541 powi_as_mults (gimple_stmt_iterator *gsi, location_t loc,
1542                tree arg0, HOST_WIDE_INT n)
1543 {
1544   tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0);
1545   gassign *div_stmt;
1546   tree target;
1547
1548   if (n == 0)
1549     return build_one_cst (type);
1550
1551   memset (cache, 0, sizeof (cache));
1552   cache[1] = arg0;
1553
1554   result = powi_as_mults_1 (gsi, loc, type, absu_hwi (n), cache);
1555   if (n >= 0)
1556     return result;
1557
1558   /* If the original exponent was negative, reciprocate the result.  */
1559   target = make_temp_ssa_name (type, NULL, "powmult");
1560   div_stmt = gimple_build_assign (target, RDIV_EXPR,
1561                                   build_real (type, dconst1), result);
1562   gimple_set_location (div_stmt, loc);
1563   gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
1564
1565   return target;
1566 }
1567
1568 /* ARG0 and N are the two arguments to a powi builtin in GSI with
1569    location info LOC.  If the arguments are appropriate, create an
1570    equivalent sequence of statements prior to GSI using an optimal
1571    number of multiplications, and return an expession holding the
1572    result.  */
1573
1574 static tree
1575 gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc,
1576                             tree arg0, HOST_WIDE_INT n)
1577 {
1578   if ((n >= -1 && n <= 2)
1579       || (optimize_function_for_speed_p (cfun)
1580           && powi_cost (n) <= POWI_MAX_MULTS))
1581     return powi_as_mults (gsi, loc, arg0, n);
1582
1583   return NULL_TREE;
1584 }
1585
1586 /* Build a gimple call statement that calls FN with argument ARG.
1587    Set the lhs of the call statement to a fresh SSA name.  Insert the
1588    statement prior to GSI's current position, and return the fresh
1589    SSA name.  */
1590
1591 static tree
1592 build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc,
1593                        tree fn, tree arg)
1594 {
1595   gcall *call_stmt;
1596   tree ssa_target;
1597
1598   call_stmt = gimple_build_call (fn, 1, arg);
1599   ssa_target = make_temp_ssa_name (TREE_TYPE (arg), NULL, "powroot");
1600   gimple_set_lhs (call_stmt, ssa_target);
1601   gimple_set_location (call_stmt, loc);
1602   gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
1603
1604   return ssa_target;
1605 }
1606
1607 /* Build a gimple binary operation with the given CODE and arguments
1608    ARG0, ARG1, assigning the result to a new SSA name for variable
1609    TARGET.  Insert the statement prior to GSI's current position, and
1610    return the fresh SSA name.*/
1611
1612 static tree
1613 build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc,
1614                         const char *name, enum tree_code code,
1615                         tree arg0, tree arg1)
1616 {
1617   tree result = make_temp_ssa_name (TREE_TYPE (arg0), NULL, name);
1618   gassign *stmt = gimple_build_assign (result, code, arg0, arg1);
1619   gimple_set_location (stmt, loc);
1620   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1621   return result;
1622 }
1623
1624 /* Build a gimple reference operation with the given CODE and argument
1625    ARG, assigning the result to a new SSA name of TYPE with NAME.
1626    Insert the statement prior to GSI's current position, and return
1627    the fresh SSA name.  */
1628
1629 static inline tree
1630 build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type,
1631                       const char *name, enum tree_code code, tree arg0)
1632 {
1633   tree result = make_temp_ssa_name (type, NULL, name);
1634   gimple *stmt = gimple_build_assign (result, build1 (code, type, arg0));
1635   gimple_set_location (stmt, loc);
1636   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1637   return result;
1638 }
1639
1640 /* Build a gimple assignment to cast VAL to TYPE.  Insert the statement
1641    prior to GSI's current position, and return the fresh SSA name.  */
1642
1643 static tree
1644 build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,
1645                        tree type, tree val)
1646 {
1647   tree result = make_ssa_name (type);
1648   gassign *stmt = gimple_build_assign (result, NOP_EXPR, val);
1649   gimple_set_location (stmt, loc);
1650   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1651   return result;
1652 }
1653
1654 struct pow_synth_sqrt_info
1655 {
1656   bool *factors;
1657   unsigned int deepest;
1658   unsigned int num_mults;
1659 };
1660
1661 /* Return true iff the real value C can be represented as a
1662    sum of powers of 0.5 up to N.  That is:
1663    C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1.
1664    Record in INFO the various parameters of the synthesis algorithm such
1665    as the factors a[i], the maximum 0.5 power and the number of
1666    multiplications that will be required.  */
1667
1668 bool
1669 representable_as_half_series_p (REAL_VALUE_TYPE c, unsigned n,
1670                                  struct pow_synth_sqrt_info *info)
1671 {
1672   REAL_VALUE_TYPE factor = dconsthalf;
1673   REAL_VALUE_TYPE remainder = c;
1674
1675   info->deepest = 0;
1676   info->num_mults = 0;
1677   memset (info->factors, 0, n * sizeof (bool));
1678
1679   for (unsigned i = 0; i < n; i++)
1680     {
1681       REAL_VALUE_TYPE res;
1682
1683       /* If something inexact happened bail out now.  */
1684       if (real_arithmetic (&res, MINUS_EXPR, &remainder, &factor))
1685         return false;
1686
1687       /* We have hit zero.  The number is representable as a sum
1688          of powers of 0.5.  */
1689       if (real_equal (&res, &dconst0))
1690         {
1691           info->factors[i] = true;
1692           info->deepest = i + 1;
1693           return true;
1694         }
1695       else if (!REAL_VALUE_NEGATIVE (res))
1696         {
1697           remainder = res;
1698           info->factors[i] = true;
1699           info->num_mults++;
1700         }
1701       else
1702         info->factors[i] = false;
1703
1704       real_arithmetic (&factor, MULT_EXPR, &factor, &dconsthalf);
1705     }
1706   return false;
1707 }
1708
1709 /* Return the tree corresponding to FN being applied
1710    to ARG N times at GSI and LOC.
1711    Look up previous results from CACHE if need be.
1712    cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times.  */
1713
1714 static tree
1715 get_fn_chain (tree arg, unsigned int n, gimple_stmt_iterator *gsi,
1716               tree fn, location_t loc, tree *cache)
1717 {
1718   tree res = cache[n];
1719   if (!res)
1720     {
1721       tree prev = get_fn_chain (arg, n - 1, gsi, fn, loc, cache);
1722       res = build_and_insert_call (gsi, loc, fn, prev);
1723       cache[n] = res;
1724     }
1725
1726   return res;
1727 }
1728
1729 /* Print to STREAM the repeated application of function FNAME to ARG
1730    N times.  So, for FNAME = "foo", ARG = "x", N = 2 it would print:
1731    "foo (foo (x))".  */
1732
1733 static void
1734 print_nested_fn (FILE* stream, const char *fname, const char* arg,
1735                  unsigned int n)
1736 {
1737   if (n == 0)
1738     fprintf (stream, "%s", arg);
1739   else
1740     {
1741       fprintf (stream, "%s (", fname);
1742       print_nested_fn (stream, fname, arg, n - 1);
1743       fprintf (stream, ")");
1744     }
1745 }
1746
1747 /* Print to STREAM the fractional sequence of sqrt chains
1748    applied to ARG, described by INFO.  Used for the dump file.  */
1749
1750 static void
1751 dump_fractional_sqrt_sequence (FILE *stream, const char *arg,
1752                                 struct pow_synth_sqrt_info *info)
1753 {
1754   for (unsigned int i = 0; i < info->deepest; i++)
1755     {
1756       bool is_set = info->factors[i];
1757       if (is_set)
1758         {
1759           print_nested_fn (stream, "sqrt", arg, i + 1);
1760           if (i != info->deepest - 1)
1761             fprintf (stream, " * ");
1762         }
1763     }
1764 }
1765
1766 /* Print to STREAM a representation of raising ARG to an integer
1767    power N.  Used for the dump file.  */
1768
1769 static void
1770 dump_integer_part (FILE *stream, const char* arg, HOST_WIDE_INT n)
1771 {
1772   if (n > 1)
1773     fprintf (stream, "powi (%s, " HOST_WIDE_INT_PRINT_DEC ")", arg, n);
1774   else if (n == 1)
1775     fprintf (stream, "%s", arg);
1776 }
1777
1778 /* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of
1779    square roots.  Place at GSI and LOC.  Limit the maximum depth
1780    of the sqrt chains to MAX_DEPTH.  Return the tree holding the
1781    result of the expanded sequence or NULL_TREE if the expansion failed.
1782
1783    This routine assumes that ARG1 is a real number with a fractional part
1784    (the integer exponent case will have been handled earlier in
1785    gimple_expand_builtin_pow).
1786
1787    For ARG1 > 0.0:
1788    * For ARG1 composed of a whole part WHOLE_PART and a fractional part
1789      FRAC_PART i.e. WHOLE_PART == floor (ARG1) and
1790                     FRAC_PART == ARG1 - WHOLE_PART:
1791      Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where
1792      POW (ARG0, FRAC_PART) is expanded as a product of square root chains
1793      if it can be expressed as such, that is if FRAC_PART satisfies:
1794      FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i))
1795      where integer a[i] is either 0 or 1.
1796
1797      Example:
1798      POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625)
1799        --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x)))
1800
1801    For ARG1 < 0.0 there are two approaches:
1802    * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1)
1803          is calculated as above.
1804
1805      Example:
1806      POW (x, -5.625) == 1.0 / POW (x, 5.625)
1807        -->  1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x))))
1808
1809    * (B) : WHOLE_PART := - ceil (abs (ARG1))
1810            FRAC_PART  := ARG1 - WHOLE_PART
1811      and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART).
1812      Example:
1813      POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6)
1814        --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6))
1815
1816    For ARG1 < 0.0 we choose between (A) and (B) depending on
1817    how many multiplications we'd have to do.
1818    So, for the example in (B): POW (x, -5.875), if we were to
1819    follow algorithm (A) we would produce:
1820    1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X)))
1821    which contains more multiplications than approach (B).
1822
1823    Hopefully, this approach will eliminate potentially expensive POW library
1824    calls when unsafe floating point math is enabled and allow the compiler to
1825    further optimise the multiplies, square roots and divides produced by this
1826    function.  */
1827
1828 static tree
1829 expand_pow_as_sqrts (gimple_stmt_iterator *gsi, location_t loc,
1830                      tree arg0, tree arg1, HOST_WIDE_INT max_depth)
1831 {
1832   tree type = TREE_TYPE (arg0);
1833   machine_mode mode = TYPE_MODE (type);
1834   tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1835   bool one_over = true;
1836
1837   if (!sqrtfn)
1838     return NULL_TREE;
1839
1840   if (TREE_CODE (arg1) != REAL_CST)
1841     return NULL_TREE;
1842
1843   REAL_VALUE_TYPE exp_init = TREE_REAL_CST (arg1);
1844
1845   gcc_assert (max_depth > 0);
1846   tree *cache = XALLOCAVEC (tree, max_depth + 1);
1847
1848   struct pow_synth_sqrt_info synth_info;
1849   synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1850   synth_info.deepest = 0;
1851   synth_info.num_mults = 0;
1852
1853   bool neg_exp = REAL_VALUE_NEGATIVE (exp_init);
1854   REAL_VALUE_TYPE exp = real_value_abs (&exp_init);
1855
1856   /* The whole and fractional parts of exp.  */
1857   REAL_VALUE_TYPE whole_part;
1858   REAL_VALUE_TYPE frac_part;
1859
1860   real_floor (&whole_part, mode, &exp);
1861   real_arithmetic (&frac_part, MINUS_EXPR, &exp, &whole_part);
1862
1863
1864   REAL_VALUE_TYPE ceil_whole = dconst0;
1865   REAL_VALUE_TYPE ceil_fract = dconst0;
1866
1867   if (neg_exp)
1868     {
1869       real_ceil (&ceil_whole, mode, &exp);
1870       real_arithmetic (&ceil_fract, MINUS_EXPR, &ceil_whole, &exp);
1871     }
1872
1873   if (!representable_as_half_series_p (frac_part, max_depth, &synth_info))
1874     return NULL_TREE;
1875
1876   /* Check whether it's more profitable to not use 1.0 / ...  */
1877   if (neg_exp)
1878     {
1879       struct pow_synth_sqrt_info alt_synth_info;
1880       alt_synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1881       alt_synth_info.deepest = 0;
1882       alt_synth_info.num_mults = 0;
1883
1884       if (representable_as_half_series_p (ceil_fract, max_depth,
1885                                            &alt_synth_info)
1886           && alt_synth_info.deepest <= synth_info.deepest
1887           && alt_synth_info.num_mults < synth_info.num_mults)
1888         {
1889           whole_part = ceil_whole;
1890           frac_part = ceil_fract;
1891           synth_info.deepest = alt_synth_info.deepest;
1892           synth_info.num_mults = alt_synth_info.num_mults;
1893           memcpy (synth_info.factors, alt_synth_info.factors,
1894                   (max_depth + 1) * sizeof (bool));
1895           one_over = false;
1896         }
1897     }
1898
1899   HOST_WIDE_INT n = real_to_integer (&whole_part);
1900   REAL_VALUE_TYPE cint;
1901   real_from_integer (&cint, VOIDmode, n, SIGNED);
1902
1903   if (!real_identical (&whole_part, &cint))
1904     return NULL_TREE;
1905
1906   if (powi_cost (n) + synth_info.num_mults > POWI_MAX_MULTS)
1907     return NULL_TREE;
1908
1909   memset (cache, 0, (max_depth + 1) * sizeof (tree));
1910
1911   tree integer_res = n == 0 ? build_real (type, dconst1) : arg0;
1912
1913   /* Calculate the integer part of the exponent.  */
1914   if (n > 1)
1915     {
1916       integer_res = gimple_expand_builtin_powi (gsi, loc, arg0, n);
1917       if (!integer_res)
1918         return NULL_TREE;
1919     }
1920
1921   if (dump_file)
1922     {
1923       char string[64];
1924
1925       real_to_decimal (string, &exp_init, sizeof (string), 0, 1);
1926       fprintf (dump_file, "synthesizing pow (x, %s) as:\n", string);
1927
1928       if (neg_exp)
1929         {
1930           if (one_over)
1931             {
1932               fprintf (dump_file, "1.0 / (");
1933               dump_integer_part (dump_file, "x", n);
1934               if (n > 0)
1935                 fprintf (dump_file, " * ");
1936               dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1937               fprintf (dump_file, ")");
1938             }
1939           else
1940             {
1941               dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1942               fprintf (dump_file, " / (");
1943               dump_integer_part (dump_file, "x", n);
1944               fprintf (dump_file, ")");
1945             }
1946         }
1947       else
1948         {
1949           dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1950           if (n > 0)
1951             fprintf (dump_file, " * ");
1952           dump_integer_part (dump_file, "x", n);
1953         }
1954
1955       fprintf (dump_file, "\ndeepest sqrt chain: %d\n", synth_info.deepest);
1956     }
1957
1958
1959   tree fract_res = NULL_TREE;
1960   cache[0] = arg0;
1961
1962   /* Calculate the fractional part of the exponent.  */
1963   for (unsigned i = 0; i < synth_info.deepest; i++)
1964     {
1965       if (synth_info.factors[i])
1966         {
1967           tree sqrt_chain = get_fn_chain (arg0, i + 1, gsi, sqrtfn, loc, cache);
1968
1969           if (!fract_res)
1970               fract_res = sqrt_chain;
1971
1972           else
1973             fract_res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1974                                            fract_res, sqrt_chain);
1975         }
1976     }
1977
1978   tree res = NULL_TREE;
1979
1980   if (neg_exp)
1981     {
1982       if (one_over)
1983         {
1984           if (n > 0)
1985             res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1986                                            fract_res, integer_res);
1987           else
1988             res = fract_res;
1989
1990           res = build_and_insert_binop (gsi, loc, "powrootrecip", RDIV_EXPR,
1991                                           build_real (type, dconst1), res);
1992         }
1993       else
1994         {
1995           res = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1996                                          fract_res, integer_res);
1997         }
1998     }
1999   else
2000     res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
2001                                    fract_res, integer_res);
2002   return res;
2003 }
2004
2005 /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
2006    with location info LOC.  If possible, create an equivalent and
2007    less expensive sequence of statements prior to GSI, and return an
2008    expession holding the result.  */
2009
2010 static tree
2011 gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,
2012                            tree arg0, tree arg1)
2013 {
2014   REAL_VALUE_TYPE c, cint, dconst1_3, dconst1_4, dconst1_6;
2015   REAL_VALUE_TYPE c2, dconst3;
2016   HOST_WIDE_INT n;
2017   tree type, sqrtfn, cbrtfn, sqrt_arg0, result, cbrt_x, powi_cbrt_x;
2018   machine_mode mode;
2019   bool speed_p = optimize_bb_for_speed_p (gsi_bb (*gsi));
2020   bool hw_sqrt_exists, c_is_int, c2_is_int;
2021
2022   dconst1_4 = dconst1;
2023   SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
2024
2025   /* If the exponent isn't a constant, there's nothing of interest
2026      to be done.  */
2027   if (TREE_CODE (arg1) != REAL_CST)
2028     return NULL_TREE;
2029
2030   /* Don't perform the operation if flag_signaling_nans is on
2031      and the operand is a signaling NaN.  */
2032   if (HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg1)))
2033       && ((TREE_CODE (arg0) == REAL_CST
2034            && REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg0)))
2035           || REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg1))))
2036     return NULL_TREE;
2037
2038   /* If the exponent is equivalent to an integer, expand to an optimal
2039      multiplication sequence when profitable.  */
2040   c = TREE_REAL_CST (arg1);
2041   n = real_to_integer (&c);
2042   real_from_integer (&cint, VOIDmode, n, SIGNED);
2043   c_is_int = real_identical (&c, &cint);
2044
2045   if (c_is_int
2046       && ((n >= -1 && n <= 2)
2047           || (flag_unsafe_math_optimizations
2048               && speed_p
2049               && powi_cost (n) <= POWI_MAX_MULTS)))
2050     return gimple_expand_builtin_powi (gsi, loc, arg0, n);
2051
2052   /* Attempt various optimizations using sqrt and cbrt.  */
2053   type = TREE_TYPE (arg0);
2054   mode = TYPE_MODE (type);
2055   sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
2056
2057   /* Optimize pow(x,0.5) = sqrt(x).  This replacement is always safe
2058      unless signed zeros must be maintained.  pow(-0,0.5) = +0, while
2059      sqrt(-0) = -0.  */
2060   if (sqrtfn
2061       && real_equal (&c, &dconsthalf)
2062       && !HONOR_SIGNED_ZEROS (mode))
2063     return build_and_insert_call (gsi, loc, sqrtfn, arg0);
2064
2065   hw_sqrt_exists = optab_handler (sqrt_optab, mode) != CODE_FOR_nothing;
2066
2067   /* Optimize pow(x,1./3.) = cbrt(x).  This requires unsafe math
2068      optimizations since 1./3. is not exactly representable.  If x
2069      is negative and finite, the correct value of pow(x,1./3.) is
2070      a NaN with the "invalid" exception raised, because the value
2071      of 1./3. actually has an even denominator.  The correct value
2072      of cbrt(x) is a negative real value.  */
2073   cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT);
2074   dconst1_3 = real_value_truncate (mode, dconst_third ());
2075
2076   if (flag_unsafe_math_optimizations
2077       && cbrtfn
2078       && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2079       && real_equal (&c, &dconst1_3))
2080     return build_and_insert_call (gsi, loc, cbrtfn, arg0);
2081
2082   /* Optimize pow(x,1./6.) = cbrt(sqrt(x)).  Don't do this optimization
2083      if we don't have a hardware sqrt insn.  */
2084   dconst1_6 = dconst1_3;
2085   SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
2086
2087   if (flag_unsafe_math_optimizations
2088       && sqrtfn
2089       && cbrtfn
2090       && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2091       && speed_p
2092       && hw_sqrt_exists
2093       && real_equal (&c, &dconst1_6))
2094     {
2095       /* sqrt(x)  */
2096       sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
2097
2098       /* cbrt(sqrt(x))  */
2099       return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0);
2100     }
2101
2102
2103   /* Attempt to expand the POW as a product of square root chains.
2104      Expand the 0.25 case even when otpimising for size.  */
2105   if (flag_unsafe_math_optimizations
2106       && sqrtfn
2107       && hw_sqrt_exists
2108       && (speed_p || real_equal (&c, &dconst1_4))
2109       && !HONOR_SIGNED_ZEROS (mode))
2110     {
2111       unsigned int max_depth = speed_p
2112                                 ? param_max_pow_sqrt_depth
2113                                 : 2;
2114
2115       tree expand_with_sqrts
2116         = expand_pow_as_sqrts (gsi, loc, arg0, arg1, max_depth);
2117
2118       if (expand_with_sqrts)
2119         return expand_with_sqrts;
2120     }
2121
2122   real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
2123   n = real_to_integer (&c2);
2124   real_from_integer (&cint, VOIDmode, n, SIGNED);
2125   c2_is_int = real_identical (&c2, &cint);
2126
2127   /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
2128
2129      powi(x, n/3) * powi(cbrt(x), n%3),                    n > 0;
2130      1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)),  n < 0.
2131
2132      Do not calculate the first factor when n/3 = 0.  As cbrt(x) is
2133      different from pow(x, 1./3.) due to rounding and behavior with
2134      negative x, we need to constrain this transformation to unsafe
2135      math and positive x or finite math.  */
2136   real_from_integer (&dconst3, VOIDmode, 3, SIGNED);
2137   real_arithmetic (&c2, MULT_EXPR, &c, &dconst3);
2138   real_round (&c2, mode, &c2);
2139   n = real_to_integer (&c2);
2140   real_from_integer (&cint, VOIDmode, n, SIGNED);
2141   real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3);
2142   real_convert (&c2, mode, &c2);
2143
2144   if (flag_unsafe_math_optimizations
2145       && cbrtfn
2146       && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2147       && real_identical (&c2, &c)
2148       && !c2_is_int
2149       && optimize_function_for_speed_p (cfun)
2150       && powi_cost (n / 3) <= POWI_MAX_MULTS)
2151     {
2152       tree powi_x_ndiv3 = NULL_TREE;
2153
2154       /* Attempt to fold powi(arg0, abs(n/3)) into multiplies.  If not
2155          possible or profitable, give up.  Skip the degenerate case when
2156          abs(n) < 3, where the result is always 1.  */
2157       if (absu_hwi (n) >= 3)
2158         {
2159           powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0,
2160                                                      abs_hwi (n / 3));
2161           if (!powi_x_ndiv3)
2162             return NULL_TREE;
2163         }
2164
2165       /* Calculate powi(cbrt(x), n%3).  Don't use gimple_expand_builtin_powi
2166          as that creates an unnecessary variable.  Instead, just produce
2167          either cbrt(x) or cbrt(x) * cbrt(x).  */
2168       cbrt_x = build_and_insert_call (gsi, loc, cbrtfn, arg0);
2169
2170       if (absu_hwi (n) % 3 == 1)
2171         powi_cbrt_x = cbrt_x;
2172       else
2173         powi_cbrt_x = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
2174                                               cbrt_x, cbrt_x);
2175
2176       /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1.  */
2177       if (absu_hwi (n) < 3)
2178         result = powi_cbrt_x;
2179       else
2180         result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
2181                                          powi_x_ndiv3, powi_cbrt_x);
2182
2183       /* If n is negative, reciprocate the result.  */
2184       if (n < 0)
2185         result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
2186                                          build_real (type, dconst1), result);
2187
2188       return result;
2189     }
2190
2191   /* No optimizations succeeded.  */
2192   return NULL_TREE;
2193 }
2194
2195 /* ARG is the argument to a cabs builtin call in GSI with location info
2196    LOC.  Create a sequence of statements prior to GSI that calculates
2197    sqrt(R*R + I*I), where R and I are the real and imaginary components
2198    of ARG, respectively.  Return an expression holding the result.  */
2199
2200 static tree
2201 gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg)
2202 {
2203   tree real_part, imag_part, addend1, addend2, sum, result;
2204   tree type = TREE_TYPE (TREE_TYPE (arg));
2205   tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
2206   machine_mode mode = TYPE_MODE (type);
2207
2208   if (!flag_unsafe_math_optimizations
2209       || !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi)))
2210       || !sqrtfn
2211       || optab_handler (sqrt_optab, mode) == CODE_FOR_nothing)
2212     return NULL_TREE;
2213
2214   real_part = build_and_insert_ref (gsi, loc, type, "cabs",
2215                                     REALPART_EXPR, arg);
2216   addend1 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
2217                                     real_part, real_part);
2218   imag_part = build_and_insert_ref (gsi, loc, type, "cabs",
2219                                     IMAGPART_EXPR, arg);
2220   addend2 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
2221                                     imag_part, imag_part);
2222   sum = build_and_insert_binop (gsi, loc, "cabs", PLUS_EXPR, addend1, addend2);
2223   result = build_and_insert_call (gsi, loc, sqrtfn, sum);
2224
2225   return result;
2226 }
2227
2228 /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
2229    on the SSA_NAME argument of each of them.  */
2230
2231 namespace {
2232
2233 const pass_data pass_data_cse_sincos =
2234 {
2235   GIMPLE_PASS, /* type */
2236   "sincos", /* name */
2237   OPTGROUP_NONE, /* optinfo_flags */
2238   TV_TREE_SINCOS, /* tv_id */
2239   PROP_ssa, /* properties_required */
2240   PROP_gimple_opt_math, /* properties_provided */
2241   0, /* properties_destroyed */
2242   0, /* todo_flags_start */
2243   TODO_update_ssa, /* todo_flags_finish */
2244 };
2245
2246 class pass_cse_sincos : public gimple_opt_pass
2247 {
2248 public:
2249   pass_cse_sincos (gcc::context *ctxt)
2250     : gimple_opt_pass (pass_data_cse_sincos, ctxt)
2251   {}
2252
2253   /* opt_pass methods: */
2254   bool gate (function *) final override
2255     {
2256       return optimize;
2257     }
2258
2259   unsigned int execute (function *) final override;
2260
2261 }; // class pass_cse_sincos
2262
2263 unsigned int
2264 pass_cse_sincos::execute (function *fun)
2265 {
2266   basic_block bb;
2267   bool cfg_changed = false;
2268
2269   calculate_dominance_info (CDI_DOMINATORS);
2270   memset (&sincos_stats, 0, sizeof (sincos_stats));
2271
2272   FOR_EACH_BB_FN (bb, fun)
2273     {
2274       gimple_stmt_iterator gsi;
2275
2276       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2277         {
2278           gimple *stmt = gsi_stmt (gsi);
2279
2280           if (is_gimple_call (stmt)
2281               && gimple_call_lhs (stmt))
2282             {
2283               tree arg;
2284               switch (gimple_call_combined_fn (stmt))
2285                 {
2286                 CASE_CFN_COS:
2287                 CASE_CFN_SIN:
2288                 CASE_CFN_CEXPI:
2289                   arg = gimple_call_arg (stmt, 0);
2290                   /* Make sure we have either sincos or cexp.  */
2291                   if (!targetm.libc_has_function (function_c99_math_complex,
2292                                                   TREE_TYPE (arg))
2293                       && !targetm.libc_has_function (function_sincos,
2294                                                      TREE_TYPE (arg)))
2295                     break;
2296
2297                   if (TREE_CODE (arg) == SSA_NAME)
2298                     cfg_changed |= execute_cse_sincos_1 (arg);
2299                   break;
2300                 default:
2301                   break;
2302                 }
2303             }
2304         }
2305     }
2306
2307   statistics_counter_event (fun, "sincos statements inserted",
2308                             sincos_stats.inserted);
2309   statistics_counter_event (fun, "conv statements removed",
2310                             sincos_stats.conv_removed);
2311
2312   return cfg_changed ? TODO_cleanup_cfg : 0;
2313 }
2314
2315 } // anon namespace
2316
2317 gimple_opt_pass *
2318 make_pass_cse_sincos (gcc::context *ctxt)
2319 {
2320   return new pass_cse_sincos (ctxt);
2321 }
2322
2323 /* Expand powi(x,n) into an optimal number of multiplies, when n is a constant.
2324    Also expand CABS.  */
2325 namespace {
2326
2327 const pass_data pass_data_expand_powcabs =
2328 {
2329   GIMPLE_PASS, /* type */
2330   "powcabs", /* name */
2331   OPTGROUP_NONE, /* optinfo_flags */
2332   TV_TREE_POWCABS, /* tv_id */
2333   PROP_ssa, /* properties_required */
2334   0, /* properties_provided */
2335   0, /* properties_destroyed */
2336   0, /* todo_flags_start */
2337   TODO_update_ssa, /* todo_flags_finish */
2338 };
2339
2340 class pass_expand_powcabs : public gimple_opt_pass
2341 {
2342 public:
2343   pass_expand_powcabs (gcc::context *ctxt)
2344     : gimple_opt_pass (pass_data_expand_powcabs, ctxt)
2345   {}
2346
2347   /* opt_pass methods: */
2348   bool gate (function *) final override
2349     {
2350       return optimize;
2351     }
2352
2353   unsigned int execute (function *) final override;
2354
2355 }; // class pass_expand_powcabs
2356
2357 unsigned int
2358 pass_expand_powcabs::execute (function *fun)
2359 {
2360   basic_block bb;
2361   bool cfg_changed = false;
2362
2363   calculate_dominance_info (CDI_DOMINATORS);
2364
2365   FOR_EACH_BB_FN (bb, fun)
2366     {
2367       gimple_stmt_iterator gsi;
2368       bool cleanup_eh = false;
2369
2370       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2371         {
2372           gimple *stmt = gsi_stmt (gsi);
2373
2374           /* Only the last stmt in a bb could throw, no need to call
2375              gimple_purge_dead_eh_edges if we change something in the middle
2376              of a basic block.  */
2377           cleanup_eh = false;
2378
2379           if (is_gimple_call (stmt)
2380               && gimple_call_lhs (stmt))
2381             {
2382               tree arg0, arg1, result;
2383               HOST_WIDE_INT n;
2384               location_t loc;
2385
2386               switch (gimple_call_combined_fn (stmt))
2387                 {
2388                 CASE_CFN_POW:
2389                   arg0 = gimple_call_arg (stmt, 0);
2390                   arg1 = gimple_call_arg (stmt, 1);
2391
2392                   loc = gimple_location (stmt);
2393                   result = gimple_expand_builtin_pow (&gsi, loc, arg0, arg1);
2394
2395                   if (result)
2396                     {
2397                       tree lhs = gimple_get_lhs (stmt);
2398                       gassign *new_stmt = gimple_build_assign (lhs, result);
2399                       gimple_set_location (new_stmt, loc);
2400                       unlink_stmt_vdef (stmt);
2401                       gsi_replace (&gsi, new_stmt, true);
2402                       cleanup_eh = true;
2403                       if (gimple_vdef (stmt))
2404                         release_ssa_name (gimple_vdef (stmt));
2405                     }
2406                   break;
2407
2408                 CASE_CFN_POWI:
2409                   arg0 = gimple_call_arg (stmt, 0);
2410                   arg1 = gimple_call_arg (stmt, 1);
2411                   loc = gimple_location (stmt);
2412
2413                   if (real_minus_onep (arg0))
2414                     {
2415                       tree t0, t1, cond, one, minus_one;
2416                       gassign *stmt;
2417
2418                       t0 = TREE_TYPE (arg0);
2419                       t1 = TREE_TYPE (arg1);
2420                       one = build_real (t0, dconst1);
2421                       minus_one = build_real (t0, dconstm1);
2422
2423                       cond = make_temp_ssa_name (t1, NULL, "powi_cond");
2424                       stmt = gimple_build_assign (cond, BIT_AND_EXPR,
2425                                                   arg1, build_int_cst (t1, 1));
2426                       gimple_set_location (stmt, loc);
2427                       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2428
2429                       result = make_temp_ssa_name (t0, NULL, "powi");
2430                       stmt = gimple_build_assign (result, COND_EXPR, cond,
2431                                                   minus_one, one);
2432                       gimple_set_location (stmt, loc);
2433                       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2434                     }
2435                   else
2436                     {
2437                       if (!tree_fits_shwi_p (arg1))
2438                         break;
2439
2440                       n = tree_to_shwi (arg1);
2441                       result = gimple_expand_builtin_powi (&gsi, loc, arg0, n);
2442                     }
2443
2444                   if (result)
2445                     {
2446                       tree lhs = gimple_get_lhs (stmt);
2447                       gassign *new_stmt = gimple_build_assign (lhs, result);
2448                       gimple_set_location (new_stmt, loc);
2449                       unlink_stmt_vdef (stmt);
2450                       gsi_replace (&gsi, new_stmt, true);
2451                       cleanup_eh = true;
2452                       if (gimple_vdef (stmt))
2453                         release_ssa_name (gimple_vdef (stmt));
2454                     }
2455                   break;
2456
2457                 CASE_CFN_CABS:
2458                   arg0 = gimple_call_arg (stmt, 0);
2459                   loc = gimple_location (stmt);
2460                   result = gimple_expand_builtin_cabs (&gsi, loc, arg0);
2461
2462                   if (result)
2463                     {
2464                       tree lhs = gimple_get_lhs (stmt);
2465                       gassign *new_stmt = gimple_build_assign (lhs, result);
2466                       gimple_set_location (new_stmt, loc);
2467                       unlink_stmt_vdef (stmt);
2468                       gsi_replace (&gsi, new_stmt, true);
2469                       cleanup_eh = true;
2470                       if (gimple_vdef (stmt))
2471                         release_ssa_name (gimple_vdef (stmt));
2472                     }
2473                   break;
2474
2475                 default:;
2476                 }
2477             }
2478         }
2479       if (cleanup_eh)
2480         cfg_changed |= gimple_purge_dead_eh_edges (bb);
2481     }
2482
2483   return cfg_changed ? TODO_cleanup_cfg : 0;
2484 }
2485
2486 } // anon namespace
2487
2488 gimple_opt_pass *
2489 make_pass_expand_powcabs (gcc::context *ctxt)
2490 {
2491   return new pass_expand_powcabs (ctxt);
2492 }
2493
2494 /* Return true if stmt is a type conversion operation that can be stripped
2495    when used in a widening multiply operation.  */
2496 static bool
2497 widening_mult_conversion_strippable_p (tree result_type, gimple *stmt)
2498 {
2499   enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
2500
2501   if (TREE_CODE (result_type) == INTEGER_TYPE)
2502     {
2503       tree op_type;
2504       tree inner_op_type;
2505
2506       if (!CONVERT_EXPR_CODE_P (rhs_code))
2507         return false;
2508
2509       op_type = TREE_TYPE (gimple_assign_lhs (stmt));
2510
2511       /* If the type of OP has the same precision as the result, then
2512          we can strip this conversion.  The multiply operation will be
2513          selected to create the correct extension as a by-product.  */
2514       if (TYPE_PRECISION (result_type) == TYPE_PRECISION (op_type))
2515         return true;
2516
2517       /* We can also strip a conversion if it preserves the signed-ness of
2518          the operation and doesn't narrow the range.  */
2519       inner_op_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
2520
2521       /* If the inner-most type is unsigned, then we can strip any
2522          intermediate widening operation.  If it's signed, then the
2523          intermediate widening operation must also be signed.  */
2524       if ((TYPE_UNSIGNED (inner_op_type)
2525            || TYPE_UNSIGNED (op_type) == TYPE_UNSIGNED (inner_op_type))
2526           && TYPE_PRECISION (op_type) > TYPE_PRECISION (inner_op_type))
2527         return true;
2528
2529       return false;
2530     }
2531
2532   return rhs_code == FIXED_CONVERT_EXPR;
2533 }
2534
2535 /* Return true if RHS is a suitable operand for a widening multiplication,
2536    assuming a target type of TYPE.
2537    There are two cases:
2538
2539      - RHS makes some value at least twice as wide.  Store that value
2540        in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
2541
2542      - RHS is an integer constant.  Store that value in *NEW_RHS_OUT if so,
2543        but leave *TYPE_OUT untouched.  */
2544
2545 static bool
2546 is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out,
2547                         tree *new_rhs_out)
2548 {
2549   gimple *stmt;
2550   tree type1, rhs1;
2551
2552   if (TREE_CODE (rhs) == SSA_NAME)
2553     {
2554       stmt = SSA_NAME_DEF_STMT (rhs);
2555       if (is_gimple_assign (stmt))
2556         {
2557           if (! widening_mult_conversion_strippable_p (type, stmt))
2558             rhs1 = rhs;
2559           else
2560             {
2561               rhs1 = gimple_assign_rhs1 (stmt);
2562
2563               if (TREE_CODE (rhs1) == INTEGER_CST)
2564                 {
2565                   *new_rhs_out = rhs1;
2566                   *type_out = NULL;
2567                   return true;
2568                 }
2569             }
2570         }
2571       else
2572         rhs1 = rhs;
2573
2574       type1 = TREE_TYPE (rhs1);
2575
2576       if (TREE_CODE (type1) != TREE_CODE (type)
2577           || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type))
2578         return false;
2579
2580       *new_rhs_out = rhs1;
2581       *type_out = type1;
2582       return true;
2583     }
2584
2585   if (TREE_CODE (rhs) == INTEGER_CST)
2586     {
2587       *new_rhs_out = rhs;
2588       *type_out = NULL;
2589       return true;
2590     }
2591
2592   return false;
2593 }
2594
2595 /* Return true if STMT performs a widening multiplication, assuming the
2596    output type is TYPE.  If so, store the unwidened types of the operands
2597    in *TYPE1_OUT and *TYPE2_OUT respectively.  Also fill *RHS1_OUT and
2598    *RHS2_OUT such that converting those operands to types *TYPE1_OUT
2599    and *TYPE2_OUT would give the operands of the multiplication.  */
2600
2601 static bool
2602 is_widening_mult_p (gimple *stmt,
2603                     tree *type1_out, tree *rhs1_out,
2604                     tree *type2_out, tree *rhs2_out)
2605 {
2606   tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2607
2608   if (TREE_CODE (type) == INTEGER_TYPE)
2609     {
2610       if (TYPE_OVERFLOW_TRAPS (type))
2611         return false;
2612     }
2613   else if (TREE_CODE (type) != FIXED_POINT_TYPE)
2614     return false;
2615
2616   if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out,
2617                                rhs1_out))
2618     return false;
2619
2620   if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out,
2621                                rhs2_out))
2622     return false;
2623
2624   if (*type1_out == NULL)
2625     {
2626       if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out))
2627         return false;
2628       *type1_out = *type2_out;
2629     }
2630
2631   if (*type2_out == NULL)
2632     {
2633       if (!int_fits_type_p (*rhs2_out, *type1_out))
2634         return false;
2635       *type2_out = *type1_out;
2636     }
2637
2638   /* Ensure that the larger of the two operands comes first. */
2639   if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out))
2640     {
2641       std::swap (*type1_out, *type2_out);
2642       std::swap (*rhs1_out, *rhs2_out);
2643     }
2644
2645   return true;
2646 }
2647
2648 /* Check to see if the CALL statement is an invocation of copysign
2649    with 1. being the first argument.  */
2650 static bool
2651 is_copysign_call_with_1 (gimple *call)
2652 {
2653   gcall *c = dyn_cast <gcall *> (call);
2654   if (! c)
2655     return false;
2656
2657   enum combined_fn code = gimple_call_combined_fn (c);
2658
2659   if (code == CFN_LAST)
2660     return false;
2661
2662   if (builtin_fn_p (code))
2663     {
2664       switch (as_builtin_fn (code))
2665         {
2666         CASE_FLT_FN (BUILT_IN_COPYSIGN):
2667         CASE_FLT_FN_FLOATN_NX (BUILT_IN_COPYSIGN):
2668           return real_onep (gimple_call_arg (c, 0));
2669         default:
2670           return false;
2671         }
2672     }
2673
2674   if (internal_fn_p (code))
2675     {
2676       switch (as_internal_fn (code))
2677         {
2678         case IFN_COPYSIGN:
2679           return real_onep (gimple_call_arg (c, 0));
2680         default:
2681           return false;
2682         }
2683     }
2684
2685    return false;
2686 }
2687
2688 /* Try to expand the pattern x * copysign (1, y) into xorsign (x, y).
2689    This only happens when the xorsign optab is defined, if the
2690    pattern is not a xorsign pattern or if expansion fails FALSE is
2691    returned, otherwise TRUE is returned.  */
2692 static bool
2693 convert_expand_mult_copysign (gimple *stmt, gimple_stmt_iterator *gsi)
2694 {
2695   tree treeop0, treeop1, lhs, type;
2696   location_t loc = gimple_location (stmt);
2697   lhs = gimple_assign_lhs (stmt);
2698   treeop0 = gimple_assign_rhs1 (stmt);
2699   treeop1 = gimple_assign_rhs2 (stmt);
2700   type = TREE_TYPE (lhs);
2701   machine_mode mode = TYPE_MODE (type);
2702
2703   if (HONOR_SNANS (type))
2704     return false;
2705
2706   if (TREE_CODE (treeop0) == SSA_NAME && TREE_CODE (treeop1) == SSA_NAME)
2707     {
2708       gimple *call0 = SSA_NAME_DEF_STMT (treeop0);
2709       if (!has_single_use (treeop0) || !is_copysign_call_with_1 (call0))
2710         {
2711           call0 = SSA_NAME_DEF_STMT (treeop1);
2712           if (!has_single_use (treeop1) || !is_copysign_call_with_1 (call0))
2713             return false;
2714
2715           treeop1 = treeop0;
2716         }
2717         if (optab_handler (xorsign_optab, mode) == CODE_FOR_nothing)
2718           return false;
2719
2720         gcall *c = as_a<gcall*> (call0);
2721         treeop0 = gimple_call_arg (c, 1);
2722
2723         gcall *call_stmt
2724           = gimple_build_call_internal (IFN_XORSIGN, 2, treeop1, treeop0);
2725         gimple_set_lhs (call_stmt, lhs);
2726         gimple_set_location (call_stmt, loc);
2727         gsi_replace (gsi, call_stmt, true);
2728         return true;
2729     }
2730
2731   return false;
2732 }
2733
2734 /* Process a single gimple statement STMT, which has a MULT_EXPR as
2735    its rhs, and try to convert it into a WIDEN_MULT_EXPR.  The return
2736    value is true iff we converted the statement.  */
2737
2738 static bool
2739 convert_mult_to_widen (gimple *stmt, gimple_stmt_iterator *gsi)
2740 {
2741   tree lhs, rhs1, rhs2, type, type1, type2;
2742   enum insn_code handler;
2743   scalar_int_mode to_mode, from_mode, actual_mode;
2744   optab op;
2745   int actual_precision;
2746   location_t loc = gimple_location (stmt);
2747   bool from_unsigned1, from_unsigned2;
2748
2749   lhs = gimple_assign_lhs (stmt);
2750   type = TREE_TYPE (lhs);
2751   if (TREE_CODE (type) != INTEGER_TYPE)
2752     return false;
2753
2754   if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2))
2755     return false;
2756
2757   to_mode = SCALAR_INT_TYPE_MODE (type);
2758   from_mode = SCALAR_INT_TYPE_MODE (type1);
2759   if (to_mode == from_mode)
2760     return false;
2761
2762   from_unsigned1 = TYPE_UNSIGNED (type1);
2763   from_unsigned2 = TYPE_UNSIGNED (type2);
2764
2765   if (from_unsigned1 && from_unsigned2)
2766     op = umul_widen_optab;
2767   else if (!from_unsigned1 && !from_unsigned2)
2768     op = smul_widen_optab;
2769   else
2770     op = usmul_widen_optab;
2771
2772   handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode,
2773                                                   &actual_mode);
2774
2775   if (handler == CODE_FOR_nothing)
2776     {
2777       if (op != smul_widen_optab)
2778         {
2779           /* We can use a signed multiply with unsigned types as long as
2780              there is a wider mode to use, or it is the smaller of the two
2781              types that is unsigned.  Note that type1 >= type2, always.  */
2782           if ((TYPE_UNSIGNED (type1)
2783                && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2784               || (TYPE_UNSIGNED (type2)
2785                   && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2786             {
2787               if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
2788                   || GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode))
2789                 return false;
2790             }
2791
2792           op = smul_widen_optab;
2793           handler = find_widening_optab_handler_and_mode (op, to_mode,
2794                                                           from_mode,
2795                                                           &actual_mode);
2796
2797           if (handler == CODE_FOR_nothing)
2798             return false;
2799
2800           from_unsigned1 = from_unsigned2 = false;
2801         }
2802       else
2803         {
2804           /* Expand can synthesize smul_widen_optab if the target
2805              supports umul_widen_optab.  */
2806           op = umul_widen_optab;
2807           handler = find_widening_optab_handler_and_mode (op, to_mode,
2808                                                           from_mode,
2809                                                           &actual_mode);
2810           if (handler == CODE_FOR_nothing)
2811             return false;
2812         }
2813     }
2814
2815   /* Ensure that the inputs to the handler are in the correct precison
2816      for the opcode.  This will be the full mode size.  */
2817   actual_precision = GET_MODE_PRECISION (actual_mode);
2818   if (2 * actual_precision > TYPE_PRECISION (type))
2819     return false;
2820   if (actual_precision != TYPE_PRECISION (type1)
2821       || from_unsigned1 != TYPE_UNSIGNED (type1))
2822     rhs1 = build_and_insert_cast (gsi, loc,
2823                                   build_nonstandard_integer_type
2824                                     (actual_precision, from_unsigned1), rhs1);
2825   if (actual_precision != TYPE_PRECISION (type2)
2826       || from_unsigned2 != TYPE_UNSIGNED (type2))
2827     rhs2 = build_and_insert_cast (gsi, loc,
2828                                   build_nonstandard_integer_type
2829                                     (actual_precision, from_unsigned2), rhs2);
2830
2831   /* Handle constants.  */
2832   if (TREE_CODE (rhs1) == INTEGER_CST)
2833     rhs1 = fold_convert (type1, rhs1);
2834   if (TREE_CODE (rhs2) == INTEGER_CST)
2835     rhs2 = fold_convert (type2, rhs2);
2836
2837   gimple_assign_set_rhs1 (stmt, rhs1);
2838   gimple_assign_set_rhs2 (stmt, rhs2);
2839   gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
2840   update_stmt (stmt);
2841   widen_mul_stats.widen_mults_inserted++;
2842   return true;
2843 }
2844
2845 /* Process a single gimple statement STMT, which is found at the
2846    iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
2847    rhs (given by CODE), and try to convert it into a
2848    WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR.  The return value
2849    is true iff we converted the statement.  */
2850
2851 static bool
2852 convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple *stmt,
2853                             enum tree_code code)
2854 {
2855   gimple *rhs1_stmt = NULL, *rhs2_stmt = NULL;
2856   gimple *conv1_stmt = NULL, *conv2_stmt = NULL, *conv_stmt;
2857   tree type, type1, type2, optype;
2858   tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
2859   enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
2860   optab this_optab;
2861   enum tree_code wmult_code;
2862   enum insn_code handler;
2863   scalar_mode to_mode, from_mode, actual_mode;
2864   location_t loc = gimple_location (stmt);
2865   int actual_precision;
2866   bool from_unsigned1, from_unsigned2;
2867
2868   lhs = gimple_assign_lhs (stmt);
2869   type = TREE_TYPE (lhs);
2870   if (TREE_CODE (type) != INTEGER_TYPE
2871       && TREE_CODE (type) != FIXED_POINT_TYPE)
2872     return false;
2873
2874   if (code == MINUS_EXPR)
2875     wmult_code = WIDEN_MULT_MINUS_EXPR;
2876   else
2877     wmult_code = WIDEN_MULT_PLUS_EXPR;
2878
2879   rhs1 = gimple_assign_rhs1 (stmt);
2880   rhs2 = gimple_assign_rhs2 (stmt);
2881
2882   if (TREE_CODE (rhs1) == SSA_NAME)
2883     {
2884       rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2885       if (is_gimple_assign (rhs1_stmt))
2886         rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2887     }
2888
2889   if (TREE_CODE (rhs2) == SSA_NAME)
2890     {
2891       rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2892       if (is_gimple_assign (rhs2_stmt))
2893         rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2894     }
2895
2896   /* Allow for one conversion statement between the multiply
2897      and addition/subtraction statement.  If there are more than
2898      one conversions then we assume they would invalidate this
2899      transformation.  If that's not the case then they should have
2900      been folded before now.  */
2901   if (CONVERT_EXPR_CODE_P (rhs1_code))
2902     {
2903       conv1_stmt = rhs1_stmt;
2904       rhs1 = gimple_assign_rhs1 (rhs1_stmt);
2905       if (TREE_CODE (rhs1) == SSA_NAME)
2906         {
2907           rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2908           if (is_gimple_assign (rhs1_stmt))
2909             rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2910         }
2911       else
2912         return false;
2913     }
2914   if (CONVERT_EXPR_CODE_P (rhs2_code))
2915     {
2916       conv2_stmt = rhs2_stmt;
2917       rhs2 = gimple_assign_rhs1 (rhs2_stmt);
2918       if (TREE_CODE (rhs2) == SSA_NAME)
2919         {
2920           rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2921           if (is_gimple_assign (rhs2_stmt))
2922             rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2923         }
2924       else
2925         return false;
2926     }
2927
2928   /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
2929      is_widening_mult_p, but we still need the rhs returns.
2930
2931      It might also appear that it would be sufficient to use the existing
2932      operands of the widening multiply, but that would limit the choice of
2933      multiply-and-accumulate instructions.
2934
2935      If the widened-multiplication result has more than one uses, it is
2936      probably wiser not to do the conversion.  Also restrict this operation
2937      to single basic block to avoid moving the multiply to a different block
2938      with a higher execution frequency.  */
2939   if (code == PLUS_EXPR
2940       && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
2941     {
2942       if (!has_single_use (rhs1)
2943           || gimple_bb (rhs1_stmt) != gimple_bb (stmt)
2944           || !is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
2945                                   &type2, &mult_rhs2))
2946         return false;
2947       add_rhs = rhs2;
2948       conv_stmt = conv1_stmt;
2949     }
2950   else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
2951     {
2952       if (!has_single_use (rhs2)
2953           || gimple_bb (rhs2_stmt) != gimple_bb (stmt)
2954           || !is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
2955                                   &type2, &mult_rhs2))
2956         return false;
2957       add_rhs = rhs1;
2958       conv_stmt = conv2_stmt;
2959     }
2960   else
2961     return false;
2962
2963   to_mode = SCALAR_TYPE_MODE (type);
2964   from_mode = SCALAR_TYPE_MODE (type1);
2965   if (to_mode == from_mode)
2966     return false;
2967
2968   from_unsigned1 = TYPE_UNSIGNED (type1);
2969   from_unsigned2 = TYPE_UNSIGNED (type2);
2970   optype = type1;
2971
2972   /* There's no such thing as a mixed sign madd yet, so use a wider mode.  */
2973   if (from_unsigned1 != from_unsigned2)
2974     {
2975       if (!INTEGRAL_TYPE_P (type))
2976         return false;
2977       /* We can use a signed multiply with unsigned types as long as
2978          there is a wider mode to use, or it is the smaller of the two
2979          types that is unsigned.  Note that type1 >= type2, always.  */
2980       if ((from_unsigned1
2981            && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2982           || (from_unsigned2
2983               && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2984         {
2985           if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
2986               || GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode))
2987             return false;
2988         }
2989
2990       from_unsigned1 = from_unsigned2 = false;
2991       optype = build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode),
2992                                                false);
2993     }
2994
2995   /* If there was a conversion between the multiply and addition
2996      then we need to make sure it fits a multiply-and-accumulate.
2997      The should be a single mode change which does not change the
2998      value.  */
2999   if (conv_stmt)
3000     {
3001       /* We use the original, unmodified data types for this.  */
3002       tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt));
3003       tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt));
3004       int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2);
3005       bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2);
3006
3007       if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type))
3008         {
3009           /* Conversion is a truncate.  */
3010           if (TYPE_PRECISION (to_type) < data_size)
3011             return false;
3012         }
3013       else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type))
3014         {
3015           /* Conversion is an extend.  Check it's the right sort.  */
3016           if (TYPE_UNSIGNED (from_type) != is_unsigned
3017               && !(is_unsigned && TYPE_PRECISION (from_type) > data_size))
3018             return false;
3019         }
3020       /* else convert is a no-op for our purposes.  */
3021     }
3022
3023   /* Verify that the machine can perform a widening multiply
3024      accumulate in this mode/signedness combination, otherwise
3025      this transformation is likely to pessimize code.  */
3026   this_optab = optab_for_tree_code (wmult_code, optype, optab_default);
3027   handler = find_widening_optab_handler_and_mode (this_optab, to_mode,
3028                                                   from_mode, &actual_mode);
3029
3030   if (handler == CODE_FOR_nothing)
3031     return false;
3032
3033   /* Ensure that the inputs to the handler are in the correct precison
3034      for the opcode.  This will be the full mode size.  */
3035   actual_precision = GET_MODE_PRECISION (actual_mode);
3036   if (actual_precision != TYPE_PRECISION (type1)
3037       || from_unsigned1 != TYPE_UNSIGNED (type1))
3038     mult_rhs1 = build_and_insert_cast (gsi, loc,
3039                                        build_nonstandard_integer_type
3040                                          (actual_precision, from_unsigned1),
3041                                        mult_rhs1);
3042   if (actual_precision != TYPE_PRECISION (type2)
3043       || from_unsigned2 != TYPE_UNSIGNED (type2))
3044     mult_rhs2 = build_and_insert_cast (gsi, loc,
3045                                        build_nonstandard_integer_type
3046                                          (actual_precision, from_unsigned2),
3047                                        mult_rhs2);
3048
3049   if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs)))
3050     add_rhs = build_and_insert_cast (gsi, loc, type, add_rhs);
3051
3052   /* Handle constants.  */
3053   if (TREE_CODE (mult_rhs1) == INTEGER_CST)
3054     mult_rhs1 = fold_convert (type1, mult_rhs1);
3055   if (TREE_CODE (mult_rhs2) == INTEGER_CST)
3056     mult_rhs2 = fold_convert (type2, mult_rhs2);
3057
3058   gimple_assign_set_rhs_with_ops (gsi, wmult_code, mult_rhs1, mult_rhs2,
3059                                   add_rhs);
3060   update_stmt (gsi_stmt (*gsi));
3061   widen_mul_stats.maccs_inserted++;
3062   return true;
3063 }
3064
3065 /* Given a result MUL_RESULT which is a result of a multiplication of OP1 and
3066    OP2 and which we know is used in statements that can be, together with the
3067    multiplication, converted to FMAs, perform the transformation.  */
3068
3069 static void
3070 convert_mult_to_fma_1 (tree mul_result, tree op1, tree op2)
3071 {
3072   tree type = TREE_TYPE (mul_result);
3073   gimple *use_stmt;
3074   imm_use_iterator imm_iter;
3075   gcall *fma_stmt;
3076
3077   FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
3078     {
3079       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
3080       tree addop, mulop1 = op1, result = mul_result;
3081       bool negate_p = false;
3082       gimple_seq seq = NULL;
3083
3084       if (is_gimple_debug (use_stmt))
3085         continue;
3086
3087       if (is_gimple_assign (use_stmt)
3088           && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
3089         {
3090           result = gimple_assign_lhs (use_stmt);
3091           use_operand_p use_p;
3092           gimple *neguse_stmt;
3093           single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt);
3094           gsi_remove (&gsi, true);
3095           release_defs (use_stmt);
3096
3097           use_stmt = neguse_stmt;
3098           gsi = gsi_for_stmt (use_stmt);
3099           negate_p = true;
3100         }
3101
3102       tree cond, else_value, ops[3];
3103       tree_code code;
3104       if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code,
3105                                               ops, &else_value))
3106         gcc_unreachable ();
3107       addop = ops[0] == result ? ops[1] : ops[0];
3108
3109       if (code == MINUS_EXPR)
3110         {
3111           if (ops[0] == result)
3112             /* a * b - c -> a * b + (-c)  */
3113             addop = gimple_build (&seq, NEGATE_EXPR, type, addop);
3114           else
3115             /* a - b * c -> (-b) * c + a */
3116             negate_p = !negate_p;
3117         }
3118
3119       if (negate_p)
3120         mulop1 = gimple_build (&seq, NEGATE_EXPR, type, mulop1);
3121
3122       if (seq)
3123         gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
3124
3125       if (cond)
3126         fma_stmt = gimple_build_call_internal (IFN_COND_FMA, 5, cond, mulop1,
3127                                                op2, addop, else_value);
3128       else
3129         fma_stmt = gimple_build_call_internal (IFN_FMA, 3, mulop1, op2, addop);
3130       gimple_set_lhs (fma_stmt, gimple_get_lhs (use_stmt));
3131       gimple_call_set_nothrow (fma_stmt, !stmt_can_throw_internal (cfun,
3132                                                                    use_stmt));
3133       gsi_replace (&gsi, fma_stmt, true);
3134       /* Follow all SSA edges so that we generate FMS, FNMA and FNMS
3135          regardless of where the negation occurs.  */
3136       gimple *orig_stmt = gsi_stmt (gsi);
3137       if (fold_stmt (&gsi, follow_all_ssa_edges))
3138         {
3139           if (maybe_clean_or_replace_eh_stmt (orig_stmt, gsi_stmt (gsi)))
3140             gcc_unreachable ();
3141           update_stmt (gsi_stmt (gsi));
3142         }
3143
3144       if (dump_file && (dump_flags & TDF_DETAILS))
3145         {
3146           fprintf (dump_file, "Generated FMA ");
3147           print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_NONE);
3148           fprintf (dump_file, "\n");
3149         }
3150
3151       /* If the FMA result is negated in a single use, fold the negation
3152          too.  */
3153       orig_stmt = gsi_stmt (gsi);
3154       use_operand_p use_p;
3155       gimple *neg_stmt;
3156       if (is_gimple_call (orig_stmt)
3157           && gimple_call_internal_p (orig_stmt)
3158           && gimple_call_lhs (orig_stmt)
3159           && TREE_CODE (gimple_call_lhs (orig_stmt)) == SSA_NAME
3160           && single_imm_use (gimple_call_lhs (orig_stmt), &use_p, &neg_stmt)
3161           && is_gimple_assign (neg_stmt)
3162           && gimple_assign_rhs_code (neg_stmt) == NEGATE_EXPR
3163           && !stmt_could_throw_p (cfun, neg_stmt))
3164         {
3165           gsi = gsi_for_stmt (neg_stmt);
3166           if (fold_stmt (&gsi, follow_all_ssa_edges))
3167             {
3168               if (maybe_clean_or_replace_eh_stmt (neg_stmt, gsi_stmt (gsi)))
3169                 gcc_unreachable ();
3170               update_stmt (gsi_stmt (gsi));
3171               if (dump_file && (dump_flags & TDF_DETAILS))
3172                 {
3173                   fprintf (dump_file, "Folded FMA negation ");
3174                   print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_NONE);
3175                   fprintf (dump_file, "\n");
3176                 }
3177             }
3178         }
3179
3180       widen_mul_stats.fmas_inserted++;
3181     }
3182 }
3183
3184 /* Data necessary to perform the actual transformation from a multiplication
3185    and an addition to an FMA after decision is taken it should be done and to
3186    then delete the multiplication statement from the function IL.  */
3187
3188 struct fma_transformation_info
3189 {
3190   gimple *mul_stmt;
3191   tree mul_result;
3192   tree op1;
3193   tree op2;
3194 };
3195
3196 /* Structure containing the current state of FMA deferring, i.e. whether we are
3197    deferring, whether to continue deferring, and all data necessary to come
3198    back and perform all deferred transformations.  */
3199
3200 class fma_deferring_state
3201 {
3202 public:
3203   /* Class constructor.  Pass true as PERFORM_DEFERRING in order to actually
3204      do any deferring.  */
3205
3206   fma_deferring_state (bool perform_deferring)
3207     : m_candidates (), m_mul_result_set (), m_initial_phi (NULL),
3208       m_last_result (NULL_TREE), m_deferring_p (perform_deferring) {}
3209
3210   /* List of FMA candidates for which we the transformation has been determined
3211      possible but we at this point in BB analysis we do not consider them
3212      beneficial.  */
3213   auto_vec<fma_transformation_info, 8> m_candidates;
3214
3215   /* Set of results of multiplication that are part of an already deferred FMA
3216      candidates.  */
3217   hash_set<tree> m_mul_result_set;
3218
3219   /* The PHI that supposedly feeds back result of a FMA to another over loop
3220      boundary.  */
3221   gphi *m_initial_phi;
3222
3223   /* Result of the last produced FMA candidate or NULL if there has not been
3224      one.  */
3225   tree m_last_result;
3226
3227   /* If true, deferring might still be profitable.  If false, transform all
3228      candidates and no longer defer.  */
3229   bool m_deferring_p;
3230 };
3231
3232 /* Transform all deferred FMA candidates and mark STATE as no longer
3233    deferring.  */
3234
3235 static void
3236 cancel_fma_deferring (fma_deferring_state *state)
3237 {
3238   if (!state->m_deferring_p)
3239     return;
3240
3241   for (unsigned i = 0; i < state->m_candidates.length (); i++)
3242     {
3243       if (dump_file && (dump_flags & TDF_DETAILS))
3244         fprintf (dump_file, "Generating deferred FMA\n");
3245
3246       const fma_transformation_info &fti = state->m_candidates[i];
3247       convert_mult_to_fma_1 (fti.mul_result, fti.op1, fti.op2);
3248
3249       gimple_stmt_iterator gsi = gsi_for_stmt (fti.mul_stmt);
3250       gsi_remove (&gsi, true);
3251       release_defs (fti.mul_stmt);
3252     }
3253   state->m_deferring_p = false;
3254 }
3255
3256 /* If OP is an SSA name defined by a PHI node, return the PHI statement.
3257    Otherwise return NULL.  */
3258
3259 static gphi *
3260 result_of_phi (tree op)
3261 {
3262   if (TREE_CODE (op) != SSA_NAME)
3263     return NULL;
3264
3265   return dyn_cast <gphi *> (SSA_NAME_DEF_STMT (op));
3266 }
3267
3268 /* After processing statements of a BB and recording STATE, return true if the
3269    initial phi is fed by the last FMA candidate result ore one such result from
3270    previously processed BBs marked in LAST_RESULT_SET.  */
3271
3272 static bool
3273 last_fma_candidate_feeds_initial_phi (fma_deferring_state *state,
3274                                       hash_set<tree> *last_result_set)
3275 {
3276   ssa_op_iter iter;
3277   use_operand_p use;
3278   FOR_EACH_PHI_ARG (use, state->m_initial_phi, iter, SSA_OP_USE)
3279     {
3280       tree t = USE_FROM_PTR (use);
3281       if (t == state->m_last_result
3282           || last_result_set->contains (t))
3283         return true;
3284     }
3285
3286   return false;
3287 }
3288
3289 /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
3290    with uses in additions and subtractions to form fused multiply-add
3291    operations.  Returns true if successful and MUL_STMT should be removed.
3292    If MUL_COND is nonnull, the multiplication in MUL_STMT is conditional
3293    on MUL_COND, otherwise it is unconditional.
3294
3295    If STATE indicates that we are deferring FMA transformation, that means
3296    that we do not produce FMAs for basic blocks which look like:
3297
3298     <bb 6>
3299     # accumulator_111 = PHI <0.0(5), accumulator_66(6)>
3300     _65 = _14 * _16;
3301     accumulator_66 = _65 + accumulator_111;
3302
3303   or its unrolled version, i.e. with several FMA candidates that feed result
3304   of one into the addend of another.  Instead, we add them to a list in STATE
3305   and if we later discover an FMA candidate that is not part of such a chain,
3306   we go back and perform all deferred past candidates.  */
3307
3308 static bool
3309 convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2,
3310                      fma_deferring_state *state, tree mul_cond = NULL_TREE)
3311 {
3312   tree mul_result = gimple_get_lhs (mul_stmt);
3313   /* If there isn't a LHS then this can't be an FMA.  There can be no LHS
3314      if the statement was left just for the side-effects.  */
3315   if (!mul_result)
3316     return false;
3317   tree type = TREE_TYPE (mul_result);
3318   gimple *use_stmt, *neguse_stmt;
3319   use_operand_p use_p;
3320   imm_use_iterator imm_iter;
3321
3322   if (FLOAT_TYPE_P (type)
3323       && flag_fp_contract_mode == FP_CONTRACT_OFF)
3324     return false;
3325
3326   /* We don't want to do bitfield reduction ops.  */
3327   if (INTEGRAL_TYPE_P (type)
3328       && (!type_has_mode_precision_p (type) || TYPE_OVERFLOW_TRAPS (type)))
3329     return false;
3330
3331   /* If the target doesn't support it, don't generate it.  We assume that
3332      if fma isn't available then fms, fnma or fnms are not either.  */
3333   optimization_type opt_type = bb_optimization_type (gimple_bb (mul_stmt));
3334   if (!direct_internal_fn_supported_p (IFN_FMA, type, opt_type))
3335     return false;
3336
3337   /* If the multiplication has zero uses, it is kept around probably because
3338      of -fnon-call-exceptions.  Don't optimize it away in that case,
3339      it is DCE job.  */
3340   if (has_zero_uses (mul_result))
3341     return false;
3342
3343   bool check_defer
3344     = (state->m_deferring_p
3345        && maybe_le (tree_to_poly_int64 (TYPE_SIZE (type)),
3346                     param_avoid_fma_max_bits));
3347   bool defer = check_defer;
3348   bool seen_negate_p = false;
3349   /* Make sure that the multiplication statement becomes dead after
3350      the transformation, thus that all uses are transformed to FMAs.
3351      This means we assume that an FMA operation has the same cost
3352      as an addition.  */
3353   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
3354     {
3355       tree result = mul_result;
3356       bool negate_p = false;
3357
3358       use_stmt = USE_STMT (use_p);
3359
3360       if (is_gimple_debug (use_stmt))
3361         continue;
3362
3363       /* For now restrict this operations to single basic blocks.  In theory
3364          we would want to support sinking the multiplication in
3365          m = a*b;
3366          if ()
3367            ma = m + c;
3368          else
3369            d = m;
3370          to form a fma in the then block and sink the multiplication to the
3371          else block.  */
3372       if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3373         return false;
3374
3375       /* A negate on the multiplication leads to FNMA.  */
3376       if (is_gimple_assign (use_stmt)
3377           && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
3378         {
3379           ssa_op_iter iter;
3380           use_operand_p usep;
3381
3382           /* If (due to earlier missed optimizations) we have two
3383              negates of the same value, treat them as equivalent
3384              to a single negate with multiple uses.  */
3385           if (seen_negate_p)
3386             return false;
3387
3388           result = gimple_assign_lhs (use_stmt);
3389
3390           /* Make sure the negate statement becomes dead with this
3391              single transformation.  */
3392           if (!single_imm_use (gimple_assign_lhs (use_stmt),
3393                                &use_p, &neguse_stmt))
3394             return false;
3395
3396           /* Make sure the multiplication isn't also used on that stmt.  */
3397           FOR_EACH_PHI_OR_STMT_USE (usep, neguse_stmt, iter, SSA_OP_USE)
3398             if (USE_FROM_PTR (usep) == mul_result)
3399               return false;
3400
3401           /* Re-validate.  */
3402           use_stmt = neguse_stmt;
3403           if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3404             return false;
3405
3406           negate_p = seen_negate_p = true;
3407         }
3408
3409       tree cond, else_value, ops[3];
3410       tree_code code;
3411       if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code, ops,
3412                                               &else_value))
3413         return false;
3414
3415       switch (code)
3416         {
3417         case MINUS_EXPR:
3418           if (ops[1] == result)
3419             negate_p = !negate_p;
3420           break;
3421         case PLUS_EXPR:
3422           break;
3423         default:
3424           /* FMA can only be formed from PLUS and MINUS.  */
3425           return false;
3426         }
3427
3428       if (mul_cond && cond != mul_cond)
3429         return false;
3430
3431       if (cond)
3432         {
3433           if (cond == result || else_value == result)
3434             return false;
3435           if (!direct_internal_fn_supported_p (IFN_COND_FMA, type, opt_type))
3436             return false;
3437         }
3438
3439       /* If the subtrahend (OPS[1]) is computed by a MULT_EXPR that
3440          we'll visit later, we might be able to get a more profitable
3441          match with fnma.
3442          OTOH, if we don't, a negate / fma pair has likely lower latency
3443          that a mult / subtract pair.  */
3444       if (code == MINUS_EXPR
3445           && !negate_p
3446           && ops[0] == result
3447           && !direct_internal_fn_supported_p (IFN_FMS, type, opt_type)
3448           && direct_internal_fn_supported_p (IFN_FNMA, type, opt_type)
3449           && TREE_CODE (ops[1]) == SSA_NAME
3450           && has_single_use (ops[1]))
3451         {
3452           gimple *stmt2 = SSA_NAME_DEF_STMT (ops[1]);
3453           if (is_gimple_assign (stmt2)
3454               && gimple_assign_rhs_code (stmt2) == MULT_EXPR)
3455             return false;
3456         }
3457
3458       /* We can't handle a * b + a * b.  */
3459       if (ops[0] == ops[1])
3460         return false;
3461       /* If deferring, make sure we are not looking at an instruction that
3462          wouldn't have existed if we were not.  */
3463       if (state->m_deferring_p
3464           && (state->m_mul_result_set.contains (ops[0])
3465               || state->m_mul_result_set.contains (ops[1])))
3466         return false;
3467
3468       if (check_defer)
3469         {
3470           tree use_lhs = gimple_get_lhs (use_stmt);
3471           if (state->m_last_result)
3472             {
3473               if (ops[1] == state->m_last_result
3474                   || ops[0] == state->m_last_result)
3475                 defer = true;
3476               else
3477                 defer = false;
3478             }
3479           else
3480             {
3481               gcc_checking_assert (!state->m_initial_phi);
3482               gphi *phi;
3483               if (ops[0] == result)
3484                 phi = result_of_phi (ops[1]);
3485               else
3486                 {
3487                   gcc_assert (ops[1] == result);
3488                   phi = result_of_phi (ops[0]);
3489                 }
3490
3491               if (phi)
3492                 {
3493                   state->m_initial_phi = phi;
3494                   defer = true;
3495                 }
3496               else
3497                 defer = false;
3498             }
3499
3500           state->m_last_result = use_lhs;
3501           check_defer = false;
3502         }
3503       else
3504         defer = false;
3505
3506       /* While it is possible to validate whether or not the exact form that
3507          we've recognized is available in the backend, the assumption is that
3508          if the deferring logic above did not trigger, the transformation is
3509          never a loss.  For instance, suppose the target only has the plain FMA
3510          pattern available.  Consider a*b-c -> fma(a,b,-c): we've exchanged
3511          MUL+SUB for FMA+NEG, which is still two operations.  Consider
3512          -(a*b)-c -> fma(-a,b,-c): we still have 3 operations, but in the FMA
3513          form the two NEGs are independent and could be run in parallel.  */
3514     }
3515
3516   if (defer)
3517     {
3518       fma_transformation_info fti;
3519       fti.mul_stmt = mul_stmt;
3520       fti.mul_result = mul_result;
3521       fti.op1 = op1;
3522       fti.op2 = op2;
3523       state->m_candidates.safe_push (fti);
3524       state->m_mul_result_set.add (mul_result);
3525
3526       if (dump_file && (dump_flags & TDF_DETAILS))
3527         {
3528           fprintf (dump_file, "Deferred generating FMA for multiplication ");
3529           print_gimple_stmt (dump_file, mul_stmt, 0, TDF_NONE);
3530           fprintf (dump_file, "\n");
3531         }
3532
3533       return false;
3534     }
3535   else
3536     {
3537       if (state->m_deferring_p)
3538         cancel_fma_deferring (state);
3539       convert_mult_to_fma_1 (mul_result, op1, op2);
3540       return true;
3541     }
3542 }
3543
3544
3545 /* Helper function of match_arith_overflow.  For MUL_OVERFLOW, if we have
3546    a check for non-zero like:
3547    _1 = x_4(D) * y_5(D);
3548    *res_7(D) = _1;
3549    if (x_4(D) != 0)
3550      goto <bb 3>; [50.00%]
3551    else
3552      goto <bb 4>; [50.00%]
3553
3554    <bb 3> [local count: 536870913]:
3555    _2 = _1 / x_4(D);
3556    _9 = _2 != y_5(D);
3557    _10 = (int) _9;
3558
3559    <bb 4> [local count: 1073741824]:
3560    # iftmp.0_3 = PHI <_10(3), 0(2)>
3561    then in addition to using .MUL_OVERFLOW (x_4(D), y_5(D)) we can also
3562    optimize the x_4(D) != 0 condition to 1.  */
3563
3564 static void
3565 maybe_optimize_guarding_check (vec<gimple *> &mul_stmts, gimple *cond_stmt,
3566                                gimple *div_stmt, bool *cfg_changed)
3567 {
3568   basic_block bb = gimple_bb (cond_stmt);
3569   if (gimple_bb (div_stmt) != bb || !single_pred_p (bb))
3570     return;
3571   edge pred_edge = single_pred_edge (bb);
3572   basic_block pred_bb = pred_edge->src;
3573   if (EDGE_COUNT (pred_bb->succs) != 2)
3574     return;
3575   edge other_edge = EDGE_SUCC (pred_bb, EDGE_SUCC (pred_bb, 0) == pred_edge);
3576   edge other_succ_edge = NULL;
3577   if (gimple_code (cond_stmt) == GIMPLE_COND)
3578     {
3579       if (EDGE_COUNT (bb->succs) != 2)
3580         return;
3581       other_succ_edge = EDGE_SUCC (bb, 0);
3582       if (gimple_cond_code (cond_stmt) == NE_EXPR)
3583         {
3584           if (other_succ_edge->flags & EDGE_TRUE_VALUE)
3585             other_succ_edge = EDGE_SUCC (bb, 1);
3586         }
3587       else if (other_succ_edge->flags & EDGE_FALSE_VALUE)
3588         other_succ_edge = EDGE_SUCC (bb, 0);
3589       if (other_edge->dest != other_succ_edge->dest)
3590         return;
3591     }
3592   else if (!single_succ_p (bb) || other_edge->dest != single_succ (bb))
3593     return;
3594   gimple *zero_cond = last_stmt (pred_bb);
3595   if (zero_cond == NULL
3596       || gimple_code (zero_cond) != GIMPLE_COND
3597       || (gimple_cond_code (zero_cond)
3598           != ((pred_edge->flags & EDGE_TRUE_VALUE) ? NE_EXPR : EQ_EXPR))
3599       || !integer_zerop (gimple_cond_rhs (zero_cond)))
3600     return;
3601   tree zero_cond_lhs = gimple_cond_lhs (zero_cond);
3602   if (TREE_CODE (zero_cond_lhs) != SSA_NAME)
3603     return;
3604   if (gimple_assign_rhs2 (div_stmt) != zero_cond_lhs)
3605     {
3606       /* Allow the divisor to be result of a same precision cast
3607          from zero_cond_lhs.  */
3608       tree rhs2 = gimple_assign_rhs2 (div_stmt);
3609       if (TREE_CODE (rhs2) != SSA_NAME)
3610         return;
3611       gimple *g = SSA_NAME_DEF_STMT (rhs2);
3612       if (!gimple_assign_cast_p (g)
3613           || gimple_assign_rhs1 (g) != gimple_cond_lhs (zero_cond)
3614           || !INTEGRAL_TYPE_P (TREE_TYPE (zero_cond_lhs))
3615           || (TYPE_PRECISION (TREE_TYPE (zero_cond_lhs))
3616               != TYPE_PRECISION (TREE_TYPE (rhs2))))
3617         return;
3618     }
3619   gimple_stmt_iterator gsi = gsi_after_labels (bb);
3620   mul_stmts.quick_push (div_stmt);
3621   if (is_gimple_debug (gsi_stmt (gsi)))
3622     gsi_next_nondebug (&gsi);
3623   unsigned cast_count = 0;
3624   while (gsi_stmt (gsi) != cond_stmt)
3625     {
3626       /* If original mul_stmt has a single use, allow it in the same bb,
3627          we are looking then just at __builtin_mul_overflow_p.
3628          Though, in that case the original mul_stmt will be replaced
3629          by .MUL_OVERFLOW, REALPART_EXPR and IMAGPART_EXPR stmts.  */
3630       gimple *mul_stmt;
3631       unsigned int i;
3632       bool ok = false;
3633       FOR_EACH_VEC_ELT (mul_stmts, i, mul_stmt)
3634         {
3635           if (gsi_stmt (gsi) == mul_stmt)
3636             {
3637               ok = true;
3638               break;
3639             }
3640         }
3641       if (!ok && gimple_assign_cast_p (gsi_stmt (gsi)) && ++cast_count < 4)
3642         ok = true;
3643       if (!ok)
3644         return;
3645       gsi_next_nondebug (&gsi);
3646     }
3647   if (gimple_code (cond_stmt) == GIMPLE_COND)
3648     {
3649       basic_block succ_bb = other_edge->dest;
3650       for (gphi_iterator gpi = gsi_start_phis (succ_bb); !gsi_end_p (gpi);
3651            gsi_next (&gpi))
3652         {
3653           gphi *phi = gpi.phi ();
3654           tree v1 = gimple_phi_arg_def (phi, other_edge->dest_idx);
3655           tree v2 = gimple_phi_arg_def (phi, other_succ_edge->dest_idx);
3656           if (!operand_equal_p (v1, v2, 0))
3657             return;
3658         }
3659     }
3660   else
3661     {
3662       tree lhs = gimple_assign_lhs (cond_stmt);
3663       if (!lhs || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
3664         return;
3665       gsi_next_nondebug (&gsi);
3666       if (!gsi_end_p (gsi))
3667         {
3668           if (gimple_assign_rhs_code (cond_stmt) == COND_EXPR)
3669             return;
3670           gimple *cast_stmt = gsi_stmt (gsi);
3671           if (!gimple_assign_cast_p (cast_stmt))
3672             return;
3673           tree new_lhs = gimple_assign_lhs (cast_stmt);
3674           gsi_next_nondebug (&gsi);
3675           if (!gsi_end_p (gsi)
3676               || !new_lhs
3677               || !INTEGRAL_TYPE_P (TREE_TYPE (new_lhs))
3678               || TYPE_PRECISION (TREE_TYPE (new_lhs)) <= 1)
3679             return;
3680           lhs = new_lhs;
3681         }
3682       edge succ_edge = single_succ_edge (bb);
3683       basic_block succ_bb = succ_edge->dest;
3684       gsi = gsi_start_phis (succ_bb);
3685       if (gsi_end_p (gsi))
3686         return;
3687       gphi *phi = as_a <gphi *> (gsi_stmt (gsi));
3688       gsi_next (&gsi);
3689       if (!gsi_end_p (gsi))
3690         return;
3691       if (gimple_phi_arg_def (phi, succ_edge->dest_idx) != lhs)
3692         return;
3693       tree other_val = gimple_phi_arg_def (phi, other_edge->dest_idx);
3694       if (gimple_assign_rhs_code (cond_stmt) == COND_EXPR)
3695         {
3696           tree cond = gimple_assign_rhs1 (cond_stmt);
3697           if (TREE_CODE (cond) == NE_EXPR)
3698             {
3699               if (!operand_equal_p (other_val,
3700                                     gimple_assign_rhs3 (cond_stmt), 0))
3701                 return;
3702             }
3703           else if (!operand_equal_p (other_val,
3704                                      gimple_assign_rhs2 (cond_stmt), 0))
3705             return;
3706         }
3707       else if (gimple_assign_rhs_code (cond_stmt) == NE_EXPR)
3708         {
3709           if (!integer_zerop (other_val))
3710             return;
3711         }
3712       else if (!integer_onep (other_val))
3713         return;
3714     }
3715   gcond *zero_gcond = as_a <gcond *> (zero_cond);
3716   if (pred_edge->flags & EDGE_TRUE_VALUE)
3717     gimple_cond_make_true (zero_gcond);
3718   else
3719     gimple_cond_make_false (zero_gcond);
3720   update_stmt (zero_cond);
3721   *cfg_changed = true;
3722 }
3723
3724 /* Helper function for arith_overflow_check_p.  Return true
3725    if VAL1 is equal to VAL2 cast to corresponding integral type
3726    with other signedness or vice versa.  */
3727
3728 static bool
3729 arith_cast_equal_p (tree val1, tree val2)
3730 {
3731   if (TREE_CODE (val1) == INTEGER_CST && TREE_CODE (val2) == INTEGER_CST)
3732     return wi::eq_p (wi::to_wide (val1), wi::to_wide (val2));
3733   else if (TREE_CODE (val1) != SSA_NAME || TREE_CODE (val2) != SSA_NAME)
3734     return false;
3735   if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val1))
3736       && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val1)) == val2)
3737     return true;
3738   if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val2))
3739       && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val2)) == val1)
3740     return true;
3741   return false;
3742 }
3743
3744 /* Helper function of match_arith_overflow.  Return 1
3745    if USE_STMT is unsigned overflow check ovf != 0 for
3746    STMT, -1 if USE_STMT is unsigned overflow check ovf == 0
3747    and 0 otherwise.  */
3748
3749 static int
3750 arith_overflow_check_p (gimple *stmt, gimple *cast_stmt, gimple *&use_stmt,
3751                         tree maxval, tree *other)
3752 {
3753   enum tree_code ccode = ERROR_MARK;
3754   tree crhs1 = NULL_TREE, crhs2 = NULL_TREE;
3755   enum tree_code code = gimple_assign_rhs_code (stmt);
3756   tree lhs = gimple_assign_lhs (cast_stmt ? cast_stmt : stmt);
3757   tree rhs1 = gimple_assign_rhs1 (stmt);
3758   tree rhs2 = gimple_assign_rhs2 (stmt);
3759   tree multop = NULL_TREE, divlhs = NULL_TREE;
3760   gimple *cur_use_stmt = use_stmt;
3761
3762   if (code == MULT_EXPR)
3763     {
3764       if (!is_gimple_assign (use_stmt))
3765         return 0;
3766       if (gimple_assign_rhs_code (use_stmt) != TRUNC_DIV_EXPR)
3767         return 0;
3768       if (gimple_assign_rhs1 (use_stmt) != lhs)
3769         return 0;
3770       if (cast_stmt)
3771         {
3772           if (arith_cast_equal_p (gimple_assign_rhs2 (use_stmt), rhs1))
3773             multop = rhs2;
3774           else if (arith_cast_equal_p (gimple_assign_rhs2 (use_stmt), rhs2))
3775             multop = rhs1;
3776           else
3777             return 0;
3778         }
3779       else if (gimple_assign_rhs2 (use_stmt) == rhs1)
3780         multop = rhs2;
3781       else if (operand_equal_p (gimple_assign_rhs2 (use_stmt), rhs2, 0))
3782         multop = rhs1;
3783       else
3784         return 0;
3785       if (stmt_ends_bb_p (use_stmt))
3786         return 0;
3787       divlhs = gimple_assign_lhs (use_stmt);
3788       if (!divlhs)
3789         return 0;
3790       use_operand_p use;
3791       if (!single_imm_use (divlhs, &use, &cur_use_stmt))
3792         return 0;
3793     }
3794   if (gimple_code (cur_use_stmt) == GIMPLE_COND)
3795     {
3796       ccode = gimple_cond_code (cur_use_stmt);
3797       crhs1 = gimple_cond_lhs (cur_use_stmt);
3798       crhs2 = gimple_cond_rhs (cur_use_stmt);
3799     }
3800   else if (is_gimple_assign (cur_use_stmt))
3801     {
3802       if (gimple_assign_rhs_class (cur_use_stmt) == GIMPLE_BINARY_RHS)
3803         {
3804           ccode = gimple_assign_rhs_code (cur_use_stmt);
3805           crhs1 = gimple_assign_rhs1 (cur_use_stmt);
3806           crhs2 = gimple_assign_rhs2 (cur_use_stmt);
3807         }
3808       else if (gimple_assign_rhs_code (cur_use_stmt) == COND_EXPR)
3809         {
3810           tree cond = gimple_assign_rhs1 (cur_use_stmt);
3811           if (COMPARISON_CLASS_P (cond))
3812             {
3813               ccode = TREE_CODE (cond);
3814               crhs1 = TREE_OPERAND (cond, 0);
3815               crhs2 = TREE_OPERAND (cond, 1);
3816             }
3817           else
3818             return 0;
3819         }
3820       else
3821         return 0;
3822     }
3823   else
3824     return 0;
3825
3826   if (TREE_CODE_CLASS (ccode) != tcc_comparison)
3827     return 0;
3828
3829   switch (ccode)
3830     {
3831     case GT_EXPR:
3832     case LE_EXPR:
3833       if (maxval)
3834         {
3835           /* r = a + b; r > maxval or r <= maxval  */
3836           if (crhs1 == lhs
3837               && TREE_CODE (crhs2) == INTEGER_CST
3838               && tree_int_cst_equal (crhs2, maxval))
3839             return ccode == GT_EXPR ? 1 : -1;
3840           break;
3841         }
3842       /* r = a - b; r > a or r <= a
3843          r = a + b; a > r or a <= r or b > r or b <= r.  */
3844       if ((code == MINUS_EXPR && crhs1 == lhs && crhs2 == rhs1)
3845           || (code == PLUS_EXPR && (crhs1 == rhs1 || crhs1 == rhs2)
3846               && crhs2 == lhs))
3847         return ccode == GT_EXPR ? 1 : -1;
3848       /* r = ~a; b > r or b <= r.  */
3849       if (code == BIT_NOT_EXPR && crhs2 == lhs)
3850         {
3851           if (other)
3852             *other = crhs1;
3853           return ccode == GT_EXPR ? 1 : -1;
3854         }
3855       break;
3856     case LT_EXPR:
3857     case GE_EXPR:
3858       if (maxval)
3859         break;
3860       /* r = a - b; a < r or a >= r
3861          r = a + b; r < a or r >= a or r < b or r >= b.  */
3862       if ((code == MINUS_EXPR && crhs1 == rhs1 && crhs2 == lhs)
3863           || (code == PLUS_EXPR && crhs1 == lhs
3864               && (crhs2 == rhs1 || crhs2 == rhs2)))
3865         return ccode == LT_EXPR ? 1 : -1;
3866       /* r = ~a; r < b or r >= b.  */
3867       if (code == BIT_NOT_EXPR && crhs1 == lhs)
3868         {
3869           if (other)
3870             *other = crhs2;
3871           return ccode == LT_EXPR ? 1 : -1;
3872         }
3873       break;
3874     case EQ_EXPR:
3875     case NE_EXPR:
3876       /* r = a * b; _1 = r / a; _1 == b
3877          r = a * b; _1 = r / b; _1 == a
3878          r = a * b; _1 = r / a; _1 != b
3879          r = a * b; _1 = r / b; _1 != a.  */
3880       if (code == MULT_EXPR)
3881         {
3882           if (cast_stmt)
3883             {
3884               if ((crhs1 == divlhs && arith_cast_equal_p (crhs2, multop))
3885                   || (crhs2 == divlhs && arith_cast_equal_p (crhs1, multop)))
3886                 {
3887                   use_stmt = cur_use_stmt;
3888                   return ccode == NE_EXPR ? 1 : -1;
3889                 }
3890             }
3891           else if ((crhs1 == divlhs && operand_equal_p (crhs2, multop, 0))
3892                    || (crhs2 == divlhs && crhs1 == multop))
3893             {
3894               use_stmt = cur_use_stmt;
3895               return ccode == NE_EXPR ? 1 : -1;
3896             }
3897         }
3898       break;
3899     default:
3900       break;
3901     }
3902   return 0;
3903 }
3904
3905 /* Recognize for unsigned x
3906    x = y - z;
3907    if (x > y)
3908    where there are other uses of x and replace it with
3909    _7 = .SUB_OVERFLOW (y, z);
3910    x = REALPART_EXPR <_7>;
3911    _8 = IMAGPART_EXPR <_7>;
3912    if (_8)
3913    and similarly for addition.
3914
3915    Also recognize:
3916    yc = (type) y;
3917    zc = (type) z;
3918    x = yc + zc;
3919    if (x > max)
3920    where y and z have unsigned types with maximum max
3921    and there are other uses of x and all of those cast x
3922    back to that unsigned type and again replace it with
3923    _7 = .ADD_OVERFLOW (y, z);
3924    _9 = REALPART_EXPR <_7>;
3925    _8 = IMAGPART_EXPR <_7>;
3926    if (_8)
3927    and replace (utype) x with _9.
3928
3929    Also recognize:
3930    x = ~z;
3931    if (y > x)
3932    and replace it with
3933    _7 = .ADD_OVERFLOW (y, z);
3934    _8 = IMAGPART_EXPR <_7>;
3935    if (_8)
3936
3937    And also recognize:
3938    z = x * y;
3939    if (x != 0)
3940      goto <bb 3>; [50.00%]
3941    else
3942      goto <bb 4>; [50.00%]
3943
3944    <bb 3> [local count: 536870913]:
3945    _2 = z / x;
3946    _9 = _2 != y;
3947    _10 = (int) _9;
3948
3949    <bb 4> [local count: 1073741824]:
3950    # iftmp.0_3 = PHI <_10(3), 0(2)>
3951    and replace it with
3952    _7 = .MUL_OVERFLOW (x, y);
3953    z = IMAGPART_EXPR <_7>;
3954    _8 = IMAGPART_EXPR <_7>;
3955    _9 = _8 != 0;
3956    iftmp.0_3 = (int) _9;  */
3957
3958 static bool
3959 match_arith_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
3960                       enum tree_code code, bool *cfg_changed)
3961 {
3962   tree lhs = gimple_assign_lhs (stmt);
3963   tree type = TREE_TYPE (lhs);
3964   use_operand_p use_p;
3965   imm_use_iterator iter;
3966   bool use_seen = false;
3967   bool ovf_use_seen = false;
3968   gimple *use_stmt;
3969   gimple *add_stmt = NULL;
3970   bool add_first = false;
3971   gimple *cond_stmt = NULL;
3972   gimple *cast_stmt = NULL;
3973   tree cast_lhs = NULL_TREE;
3974
3975   gcc_checking_assert (code == PLUS_EXPR
3976                        || code == MINUS_EXPR
3977                        || code == MULT_EXPR
3978                        || code == BIT_NOT_EXPR);
3979   if (!INTEGRAL_TYPE_P (type)
3980       || !TYPE_UNSIGNED (type)
3981       || has_zero_uses (lhs)
3982       || (code != PLUS_EXPR
3983           && code != MULT_EXPR
3984           && optab_handler (code == MINUS_EXPR ? usubv4_optab : uaddv4_optab,
3985                             TYPE_MODE (type)) == CODE_FOR_nothing))
3986     return false;
3987
3988   tree rhs1 = gimple_assign_rhs1 (stmt);
3989   tree rhs2 = gimple_assign_rhs2 (stmt);
3990   FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
3991     {
3992       use_stmt = USE_STMT (use_p);
3993       if (is_gimple_debug (use_stmt))
3994         continue;
3995
3996       tree other = NULL_TREE;
3997       if (arith_overflow_check_p (stmt, NULL, use_stmt, NULL_TREE, &other))
3998         {
3999           if (code == BIT_NOT_EXPR)
4000             {
4001               gcc_assert (other);
4002               if (TREE_CODE (other) != SSA_NAME)
4003                 return false;
4004               if (rhs2 == NULL)
4005                 rhs2 = other;
4006               else
4007                 return false;
4008               cond_stmt = use_stmt;
4009             }
4010           ovf_use_seen = true;
4011         }
4012       else
4013         {
4014           use_seen = true;
4015           if (code == MULT_EXPR
4016               && cast_stmt == NULL
4017               && gimple_assign_cast_p (use_stmt))
4018             {
4019               cast_lhs = gimple_assign_lhs (use_stmt);
4020               if (INTEGRAL_TYPE_P (TREE_TYPE (cast_lhs))
4021                   && !TYPE_UNSIGNED (TREE_TYPE (cast_lhs))
4022                   && (TYPE_PRECISION (TREE_TYPE (cast_lhs))
4023                       == TYPE_PRECISION (TREE_TYPE (lhs))))
4024                 cast_stmt = use_stmt;
4025               else
4026                 cast_lhs = NULL_TREE;
4027             }
4028         }
4029       if (ovf_use_seen && use_seen)
4030         break;
4031     }
4032
4033   if (!ovf_use_seen
4034       && code == MULT_EXPR
4035       && cast_stmt)
4036     {
4037       if (TREE_CODE (rhs1) != SSA_NAME
4038           || (TREE_CODE (rhs2) != SSA_NAME && TREE_CODE (rhs2) != INTEGER_CST))
4039         return false;
4040       FOR_EACH_IMM_USE_FAST (use_p, iter, cast_lhs)
4041         {
4042           use_stmt = USE_STMT (use_p);
4043           if (is_gimple_debug (use_stmt))
4044             continue;
4045
4046           if (arith_overflow_check_p (stmt, cast_stmt, use_stmt,
4047                                       NULL_TREE, NULL))
4048             ovf_use_seen = true;
4049         }
4050     }
4051   else
4052     {
4053       cast_stmt = NULL;
4054       cast_lhs = NULL_TREE;
4055     }
4056
4057   tree maxval = NULL_TREE;
4058   if (!ovf_use_seen
4059       || (code != MULT_EXPR && (code == BIT_NOT_EXPR ? use_seen : !use_seen))
4060       || (code == PLUS_EXPR
4061           && optab_handler (uaddv4_optab,
4062                             TYPE_MODE (type)) == CODE_FOR_nothing)
4063       || (code == MULT_EXPR
4064           && optab_handler (cast_stmt ? mulv4_optab : umulv4_optab,
4065                             TYPE_MODE (type)) == CODE_FOR_nothing))
4066     {
4067       if (code != PLUS_EXPR)
4068         return false;
4069       if (TREE_CODE (rhs1) != SSA_NAME
4070           || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs1)))
4071         return false;
4072       rhs1 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs1));
4073       tree type1 = TREE_TYPE (rhs1);
4074       if (!INTEGRAL_TYPE_P (type1)
4075           || !TYPE_UNSIGNED (type1)
4076           || TYPE_PRECISION (type1) >= TYPE_PRECISION (type)
4077           || (TYPE_PRECISION (type1)
4078               != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type1))))
4079         return false;
4080       if (TREE_CODE (rhs2) == INTEGER_CST)
4081         {
4082           if (wi::ne_p (wi::rshift (wi::to_wide (rhs2),
4083                                     TYPE_PRECISION (type1),
4084                                     UNSIGNED), 0))
4085             return false;
4086           rhs2 = fold_convert (type1, rhs2);
4087         }
4088       else
4089         {
4090           if (TREE_CODE (rhs2) != SSA_NAME
4091               || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs2)))
4092             return false;
4093           rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs2));
4094           tree type2 = TREE_TYPE (rhs2);
4095           if (!INTEGRAL_TYPE_P (type2)
4096               || !TYPE_UNSIGNED (type2)
4097               || TYPE_PRECISION (type2) >= TYPE_PRECISION (type)
4098               || (TYPE_PRECISION (type2)
4099                   != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type2))))
4100             return false;
4101         }
4102       if (TYPE_PRECISION (type1) >= TYPE_PRECISION (TREE_TYPE (rhs2)))
4103         type = type1;
4104       else
4105         type = TREE_TYPE (rhs2);
4106
4107       if (TREE_CODE (type) != INTEGER_TYPE
4108           || optab_handler (uaddv4_optab,
4109                             TYPE_MODE (type)) == CODE_FOR_nothing)
4110         return false;
4111
4112       maxval = wide_int_to_tree (type, wi::max_value (TYPE_PRECISION (type),
4113                                                       UNSIGNED));
4114       ovf_use_seen = false;
4115       use_seen = false;
4116       basic_block use_bb = NULL;
4117       FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
4118         {
4119           use_stmt = USE_STMT (use_p);
4120           if (is_gimple_debug (use_stmt))
4121             continue;
4122
4123           if (arith_overflow_check_p (stmt, NULL, use_stmt, maxval, NULL))
4124             {
4125               ovf_use_seen = true;
4126               use_bb = gimple_bb (use_stmt);
4127             }
4128           else
4129             {
4130               if (!gimple_assign_cast_p (use_stmt)
4131                   || gimple_assign_rhs_code (use_stmt) == VIEW_CONVERT_EXPR)
4132                 return false;
4133               tree use_lhs = gimple_assign_lhs (use_stmt);
4134               if (!INTEGRAL_TYPE_P (TREE_TYPE (use_lhs))
4135                   || (TYPE_PRECISION (TREE_TYPE (use_lhs))
4136                       > TYPE_PRECISION (type)))
4137                 return false;
4138               use_seen = true;
4139             }
4140         }
4141       if (!ovf_use_seen)
4142         return false;
4143       if (!useless_type_conversion_p (type, TREE_TYPE (rhs1)))
4144         {
4145           if (!use_seen)
4146             return false;
4147           tree new_rhs1 = make_ssa_name (type);
4148           gimple *g = gimple_build_assign (new_rhs1, NOP_EXPR, rhs1);
4149           gsi_insert_before (gsi, g, GSI_SAME_STMT);
4150           rhs1 = new_rhs1;
4151         }
4152       else if (!useless_type_conversion_p (type, TREE_TYPE (rhs2)))
4153         {
4154           if (!use_seen)
4155             return false;
4156           tree new_rhs2 = make_ssa_name (type);
4157           gimple *g = gimple_build_assign (new_rhs2, NOP_EXPR, rhs2);
4158           gsi_insert_before (gsi, g, GSI_SAME_STMT);
4159           rhs2 = new_rhs2;
4160         }
4161       else if (!use_seen)
4162         {
4163           /* If there are no uses of the wider addition, check if
4164              forwprop has not created a narrower addition.
4165              Require it to be in the same bb as the overflow check.  */
4166           FOR_EACH_IMM_USE_FAST (use_p, iter, rhs1)
4167             {
4168               use_stmt = USE_STMT (use_p);
4169               if (is_gimple_debug (use_stmt))
4170                 continue;
4171
4172               if (use_stmt == stmt)
4173                 continue;
4174
4175               if (!is_gimple_assign (use_stmt)
4176                   || gimple_bb (use_stmt) != use_bb
4177                   || gimple_assign_rhs_code (use_stmt) != PLUS_EXPR)
4178                 continue;
4179
4180               if (gimple_assign_rhs1 (use_stmt) == rhs1)
4181                 {
4182                   if (!operand_equal_p (gimple_assign_rhs2 (use_stmt),
4183                                         rhs2, 0))
4184                     continue;
4185                 }
4186               else if (gimple_assign_rhs2 (use_stmt) == rhs1)
4187                 {
4188                   if (gimple_assign_rhs1 (use_stmt) != rhs2)
4189                     continue;
4190                 }
4191               else
4192                 continue;
4193
4194               add_stmt = use_stmt;
4195               break;
4196             }
4197           if (add_stmt == NULL)
4198             return false;
4199
4200           /* If stmt and add_stmt are in the same bb, we need to find out
4201              which one is earlier.  If they are in different bbs, we've
4202              checked add_stmt is in the same bb as one of the uses of the
4203              stmt lhs, so stmt needs to dominate add_stmt too.  */
4204           if (gimple_bb (stmt) == gimple_bb (add_stmt))
4205             {
4206               gimple_stmt_iterator gsif = *gsi;
4207               gimple_stmt_iterator gsib = *gsi;
4208               int i;
4209               /* Search both forward and backward from stmt and have a small
4210                  upper bound.  */
4211               for (i = 0; i < 128; i++)
4212                 {
4213                   if (!gsi_end_p (gsib))
4214                     {
4215                       gsi_prev_nondebug (&gsib);
4216                       if (gsi_stmt (gsib) == add_stmt)
4217                         {
4218                           add_first = true;
4219                           break;
4220                         }
4221                     }
4222                   else if (gsi_end_p (gsif))
4223                     break;
4224                   if (!gsi_end_p (gsif))
4225                     {
4226                       gsi_next_nondebug (&gsif);
4227                       if (gsi_stmt (gsif) == add_stmt)
4228                         break;
4229                     }
4230                 }
4231               if (i == 128)
4232                 return false;
4233               if (add_first)
4234                 *gsi = gsi_for_stmt (add_stmt);
4235             }
4236         }
4237     }
4238
4239   if (code == BIT_NOT_EXPR)
4240     *gsi = gsi_for_stmt (cond_stmt);
4241
4242   auto_vec<gimple *, 8> mul_stmts;
4243   if (code == MULT_EXPR && cast_stmt)
4244     {
4245       type = TREE_TYPE (cast_lhs);
4246       gimple *g = SSA_NAME_DEF_STMT (rhs1);
4247       if (gimple_assign_cast_p (g)
4248           && useless_type_conversion_p (type,
4249                                         TREE_TYPE (gimple_assign_rhs1 (g)))
4250           && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
4251         rhs1 = gimple_assign_rhs1 (g);
4252       else
4253         {
4254           g = gimple_build_assign (make_ssa_name (type), NOP_EXPR, rhs1);
4255           gsi_insert_before (gsi, g, GSI_SAME_STMT);
4256           rhs1 = gimple_assign_lhs (g);
4257           mul_stmts.quick_push (g);
4258         }
4259       if (TREE_CODE (rhs2) == INTEGER_CST)
4260         rhs2 = fold_convert (type, rhs2);
4261       else
4262         {
4263           g = SSA_NAME_DEF_STMT (rhs2);
4264           if (gimple_assign_cast_p (g)
4265               && useless_type_conversion_p (type,
4266                                             TREE_TYPE (gimple_assign_rhs1 (g)))
4267               && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
4268             rhs2 = gimple_assign_rhs1 (g);
4269           else
4270             {
4271               g = gimple_build_assign (make_ssa_name (type), NOP_EXPR, rhs2);
4272               gsi_insert_before (gsi, g, GSI_SAME_STMT);
4273               rhs2 = gimple_assign_lhs (g);
4274               mul_stmts.quick_push (g);
4275             }
4276         }
4277     }
4278   tree ctype = build_complex_type (type);
4279   gcall *g = gimple_build_call_internal (code == MULT_EXPR
4280                                          ? IFN_MUL_OVERFLOW
4281                                          : code != MINUS_EXPR
4282                                          ? IFN_ADD_OVERFLOW : IFN_SUB_OVERFLOW,
4283                                          2, rhs1, rhs2);
4284   tree ctmp = make_ssa_name (ctype);
4285   gimple_call_set_lhs (g, ctmp);
4286   gsi_insert_before (gsi, g, GSI_SAME_STMT);
4287   tree new_lhs = (maxval || cast_stmt) ? make_ssa_name (type) : lhs;
4288   gassign *g2;
4289   if (code != BIT_NOT_EXPR)
4290     {
4291       g2 = gimple_build_assign (new_lhs, REALPART_EXPR,
4292                                 build1 (REALPART_EXPR, type, ctmp));
4293       if (maxval || cast_stmt)
4294         {
4295           gsi_insert_before (gsi, g2, GSI_SAME_STMT);
4296           if (add_first)
4297             *gsi = gsi_for_stmt (stmt);
4298         }
4299       else
4300         gsi_replace (gsi, g2, true);
4301       if (code == MULT_EXPR)
4302         {
4303           mul_stmts.quick_push (g);
4304           mul_stmts.quick_push (g2);
4305           if (cast_stmt)
4306             {
4307               g2 = gimple_build_assign (lhs, NOP_EXPR, new_lhs);
4308               gsi_replace (gsi, g2, true);
4309               mul_stmts.quick_push (g2);
4310             }
4311         }
4312     }
4313   tree ovf = make_ssa_name (type);
4314   g2 = gimple_build_assign (ovf, IMAGPART_EXPR,
4315                             build1 (IMAGPART_EXPR, type, ctmp));
4316   if (code != BIT_NOT_EXPR)
4317     gsi_insert_after (gsi, g2, GSI_NEW_STMT);
4318   else
4319     gsi_insert_before (gsi, g2, GSI_SAME_STMT);
4320   if (code == MULT_EXPR)
4321     mul_stmts.quick_push (g2);
4322
4323   FOR_EACH_IMM_USE_STMT (use_stmt, iter, cast_lhs ? cast_lhs : lhs)
4324     {
4325       if (is_gimple_debug (use_stmt))
4326         continue;
4327
4328       gimple *orig_use_stmt = use_stmt;
4329       int ovf_use = arith_overflow_check_p (stmt, cast_stmt, use_stmt,
4330                                             maxval, NULL);
4331       if (ovf_use == 0)
4332         {
4333           gcc_assert (code != BIT_NOT_EXPR);
4334           if (maxval)
4335             {
4336               tree use_lhs = gimple_assign_lhs (use_stmt);
4337               gimple_assign_set_rhs1 (use_stmt, new_lhs);
4338               if (useless_type_conversion_p (TREE_TYPE (use_lhs),
4339                                              TREE_TYPE (new_lhs)))
4340                 gimple_assign_set_rhs_code (use_stmt, SSA_NAME);
4341               update_stmt (use_stmt);
4342             }
4343           continue;
4344         }
4345       if (gimple_code (use_stmt) == GIMPLE_COND)
4346         {
4347           gcond *cond_stmt = as_a <gcond *> (use_stmt);
4348           gimple_cond_set_lhs (cond_stmt, ovf);
4349           gimple_cond_set_rhs (cond_stmt, build_int_cst (type, 0));
4350           gimple_cond_set_code (cond_stmt, ovf_use == 1 ? NE_EXPR : EQ_EXPR);
4351         }
4352       else
4353         {
4354           gcc_checking_assert (is_gimple_assign (use_stmt));
4355           if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
4356             {
4357               gimple_assign_set_rhs1 (use_stmt, ovf);
4358               gimple_assign_set_rhs2 (use_stmt, build_int_cst (type, 0));
4359               gimple_assign_set_rhs_code (use_stmt,
4360                                           ovf_use == 1 ? NE_EXPR : EQ_EXPR);
4361             }
4362           else
4363             {
4364               gcc_checking_assert (gimple_assign_rhs_code (use_stmt)
4365                                    == COND_EXPR);
4366               tree cond = build2 (ovf_use == 1 ? NE_EXPR : EQ_EXPR,
4367                                   boolean_type_node, ovf,
4368                                   build_int_cst (type, 0));
4369               gimple_assign_set_rhs1 (use_stmt, cond);
4370             }
4371         }
4372       update_stmt (use_stmt);
4373       if (code == MULT_EXPR && use_stmt != orig_use_stmt)
4374         {
4375           gimple_stmt_iterator gsi2 = gsi_for_stmt (orig_use_stmt);
4376           maybe_optimize_guarding_check (mul_stmts, use_stmt, orig_use_stmt,
4377                                          cfg_changed);
4378           gsi_remove (&gsi2, true);
4379           release_ssa_name (gimple_assign_lhs (orig_use_stmt));
4380         }
4381     }
4382   if (maxval)
4383     {
4384       gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt);
4385       gsi_remove (&gsi2, true);
4386       if (add_stmt)
4387         {
4388           gimple *g = gimple_build_assign (gimple_assign_lhs (add_stmt),
4389                                            new_lhs);
4390           gsi2 = gsi_for_stmt (add_stmt);
4391           gsi_replace (&gsi2, g, true);
4392         }
4393     }
4394   else if (code == BIT_NOT_EXPR)
4395     {
4396       *gsi = gsi_for_stmt (stmt);
4397       gsi_remove (gsi, true);
4398       release_ssa_name (lhs);
4399       return true;
4400     }
4401   return false;
4402 }
4403
4404 /* Return true if target has support for divmod.  */
4405
4406 static bool
4407 target_supports_divmod_p (optab divmod_optab, optab div_optab, machine_mode mode)
4408 {
4409   /* If target supports hardware divmod insn, use it for divmod.  */
4410   if (optab_handler (divmod_optab, mode) != CODE_FOR_nothing)
4411     return true;
4412
4413   /* Check if libfunc for divmod is available.  */
4414   rtx libfunc = optab_libfunc (divmod_optab, mode);
4415   if (libfunc != NULL_RTX)
4416     {
4417       /* If optab_handler exists for div_optab, perhaps in a wider mode,
4418          we don't want to use the libfunc even if it exists for given mode.  */
4419       machine_mode div_mode;
4420       FOR_EACH_MODE_FROM (div_mode, mode)
4421         if (optab_handler (div_optab, div_mode) != CODE_FOR_nothing)
4422           return false;
4423
4424       return targetm.expand_divmod_libfunc != NULL;
4425     }
4426
4427   return false;
4428 }
4429
4430 /* Check if stmt is candidate for divmod transform.  */
4431
4432 static bool
4433 divmod_candidate_p (gassign *stmt)
4434 {
4435   tree type = TREE_TYPE (gimple_assign_lhs (stmt));
4436   machine_mode mode = TYPE_MODE (type);
4437   optab divmod_optab, div_optab;
4438
4439   if (TYPE_UNSIGNED (type))
4440     {
4441       divmod_optab = udivmod_optab;
4442       div_optab = udiv_optab;
4443     }
4444   else
4445     {
4446       divmod_optab = sdivmod_optab;
4447       div_optab = sdiv_optab;
4448     }
4449
4450   tree op1 = gimple_assign_rhs1 (stmt);
4451   tree op2 = gimple_assign_rhs2 (stmt);
4452
4453   /* Disable the transform if either is a constant, since division-by-constant
4454      may have specialized expansion.  */
4455   if (CONSTANT_CLASS_P (op1))
4456     return false;
4457
4458   if (CONSTANT_CLASS_P (op2))
4459     {
4460       if (integer_pow2p (op2))
4461         return false;
4462
4463       if (TYPE_PRECISION (type) <= HOST_BITS_PER_WIDE_INT
4464           && TYPE_PRECISION (type) <= BITS_PER_WORD)
4465         return false;
4466
4467       /* If the divisor is not power of 2 and the precision wider than
4468          HWI, expand_divmod punts on that, so in that case it is better
4469          to use divmod optab or libfunc.  Similarly if choose_multiplier
4470          might need pre/post shifts of BITS_PER_WORD or more.  */
4471     }
4472
4473   /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should
4474      expand using the [su]divv optabs.  */
4475   if (TYPE_OVERFLOW_TRAPS (type))
4476     return false;
4477
4478   if (!target_supports_divmod_p (divmod_optab, div_optab, mode))
4479     return false;
4480
4481   return true;
4482 }
4483
4484 /* This function looks for:
4485    t1 = a TRUNC_DIV_EXPR b;
4486    t2 = a TRUNC_MOD_EXPR b;
4487    and transforms it to the following sequence:
4488    complex_tmp = DIVMOD (a, b);
4489    t1 = REALPART_EXPR(a);
4490    t2 = IMAGPART_EXPR(b);
4491    For conditions enabling the transform see divmod_candidate_p().
4492
4493    The pass has three parts:
4494    1) Find top_stmt which is trunc_div or trunc_mod stmt and dominates all
4495       other trunc_div_expr and trunc_mod_expr stmts.
4496    2) Add top_stmt and all trunc_div and trunc_mod stmts dominated by top_stmt
4497       to stmts vector.
4498    3) Insert DIVMOD call just before top_stmt and update entries in
4499       stmts vector to use return value of DIMOVD (REALEXPR_PART for div,
4500       IMAGPART_EXPR for mod).  */
4501
4502 static bool
4503 convert_to_divmod (gassign *stmt)
4504 {
4505   if (stmt_can_throw_internal (cfun, stmt)
4506       || !divmod_candidate_p (stmt))
4507     return false;
4508
4509   tree op1 = gimple_assign_rhs1 (stmt);
4510   tree op2 = gimple_assign_rhs2 (stmt);
4511
4512   imm_use_iterator use_iter;
4513   gimple *use_stmt;
4514   auto_vec<gimple *> stmts;
4515
4516   gimple *top_stmt = stmt;
4517   basic_block top_bb = gimple_bb (stmt);
4518
4519   /* Part 1: Try to set top_stmt to "topmost" stmt that dominates
4520      at-least stmt and possibly other trunc_div/trunc_mod stmts
4521      having same operands as stmt.  */
4522
4523   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, op1)
4524     {
4525       if (is_gimple_assign (use_stmt)
4526           && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
4527               || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
4528           && operand_equal_p (op1, gimple_assign_rhs1 (use_stmt), 0)
4529           && operand_equal_p (op2, gimple_assign_rhs2 (use_stmt), 0))
4530         {
4531           if (stmt_can_throw_internal (cfun, use_stmt))
4532             continue;
4533
4534           basic_block bb = gimple_bb (use_stmt);
4535
4536           if (bb == top_bb)
4537             {
4538               if (gimple_uid (use_stmt) < gimple_uid (top_stmt))
4539                 top_stmt = use_stmt;
4540             }
4541           else if (dominated_by_p (CDI_DOMINATORS, top_bb, bb))
4542             {
4543               top_bb = bb;
4544               top_stmt = use_stmt;
4545             }
4546         }
4547     }
4548
4549   tree top_op1 = gimple_assign_rhs1 (top_stmt);
4550   tree top_op2 = gimple_assign_rhs2 (top_stmt);
4551
4552   stmts.safe_push (top_stmt);
4553   bool div_seen = (gimple_assign_rhs_code (top_stmt) == TRUNC_DIV_EXPR);
4554
4555   /* Part 2: Add all trunc_div/trunc_mod statements domianted by top_bb
4556      to stmts vector. The 2nd loop will always add stmt to stmts vector, since
4557      gimple_bb (top_stmt) dominates gimple_bb (stmt), so the
4558      2nd loop ends up adding at-least single trunc_mod_expr stmt.  */
4559
4560   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, top_op1)
4561     {
4562       if (is_gimple_assign (use_stmt)
4563           && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
4564               || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
4565           && operand_equal_p (top_op1, gimple_assign_rhs1 (use_stmt), 0)
4566           && operand_equal_p (top_op2, gimple_assign_rhs2 (use_stmt), 0))
4567         {
4568           if (use_stmt == top_stmt
4569               || stmt_can_throw_internal (cfun, use_stmt)
4570               || !dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt), top_bb))
4571             continue;
4572
4573           stmts.safe_push (use_stmt);
4574           if (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR)
4575             div_seen = true;
4576         }
4577     }
4578
4579   if (!div_seen)
4580     return false;
4581
4582   /* Part 3: Create libcall to internal fn DIVMOD:
4583      divmod_tmp = DIVMOD (op1, op2).  */
4584
4585   gcall *call_stmt = gimple_build_call_internal (IFN_DIVMOD, 2, op1, op2);
4586   tree res = make_temp_ssa_name (build_complex_type (TREE_TYPE (op1)),
4587                                  call_stmt, "divmod_tmp");
4588   gimple_call_set_lhs (call_stmt, res);
4589   /* We rejected throwing statements above.  */
4590   gimple_call_set_nothrow (call_stmt, true);
4591
4592   /* Insert the call before top_stmt.  */
4593   gimple_stmt_iterator top_stmt_gsi = gsi_for_stmt (top_stmt);
4594   gsi_insert_before (&top_stmt_gsi, call_stmt, GSI_SAME_STMT);
4595
4596   widen_mul_stats.divmod_calls_inserted++;
4597
4598   /* Update all statements in stmts vector:
4599      lhs = op1 TRUNC_DIV_EXPR op2 -> lhs = REALPART_EXPR<divmod_tmp>
4600      lhs = op1 TRUNC_MOD_EXPR op2 -> lhs = IMAGPART_EXPR<divmod_tmp>.  */
4601
4602   for (unsigned i = 0; stmts.iterate (i, &use_stmt); ++i)
4603     {
4604       tree new_rhs;
4605
4606       switch (gimple_assign_rhs_code (use_stmt))
4607         {
4608           case TRUNC_DIV_EXPR:
4609             new_rhs = fold_build1 (REALPART_EXPR, TREE_TYPE (op1), res);
4610             break;
4611
4612           case TRUNC_MOD_EXPR:
4613             new_rhs = fold_build1 (IMAGPART_EXPR, TREE_TYPE (op1), res);
4614             break;
4615
4616           default:
4617             gcc_unreachable ();
4618         }
4619
4620       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
4621       gimple_assign_set_rhs_from_tree (&gsi, new_rhs);
4622       update_stmt (use_stmt);
4623     }
4624
4625   return true;
4626 }
4627
4628 /* Process a single gimple assignment STMT, which has a RSHIFT_EXPR as
4629    its rhs, and try to convert it into a MULT_HIGHPART_EXPR.  The return
4630    value is true iff we converted the statement.  */
4631
4632 static bool
4633 convert_mult_to_highpart (gassign *stmt, gimple_stmt_iterator *gsi)
4634 {
4635   tree lhs = gimple_assign_lhs (stmt);
4636   tree stype = TREE_TYPE (lhs);
4637   tree sarg0 = gimple_assign_rhs1 (stmt);
4638   tree sarg1 = gimple_assign_rhs2 (stmt);
4639
4640   if (TREE_CODE (stype) != INTEGER_TYPE
4641       || TREE_CODE (sarg1) != INTEGER_CST
4642       || TREE_CODE (sarg0) != SSA_NAME
4643       || !tree_fits_uhwi_p (sarg1)
4644       || !has_single_use (sarg0))
4645     return false;
4646
4647   gassign *def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (sarg0));
4648   if (!def)
4649     return false;
4650
4651   enum tree_code mcode = gimple_assign_rhs_code (def);
4652   if (mcode == NOP_EXPR)
4653     {
4654       tree tmp = gimple_assign_rhs1 (def);
4655       if (TREE_CODE (tmp) != SSA_NAME || !has_single_use (tmp))
4656         return false;
4657       def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (tmp));
4658       if (!def)
4659         return false;
4660       mcode = gimple_assign_rhs_code (def);
4661     }
4662
4663   if (mcode != WIDEN_MULT_EXPR
4664       || gimple_bb (def) != gimple_bb (stmt))
4665     return false;
4666   tree mtype = TREE_TYPE (gimple_assign_lhs (def));
4667   if (TREE_CODE (mtype) != INTEGER_TYPE
4668       || TYPE_PRECISION (mtype) != TYPE_PRECISION (stype))
4669     return false;
4670
4671   tree mop1 = gimple_assign_rhs1 (def);
4672   tree mop2 = gimple_assign_rhs2 (def);
4673   tree optype = TREE_TYPE (mop1);
4674   bool unsignedp = TYPE_UNSIGNED (optype);
4675   unsigned int prec = TYPE_PRECISION (optype);
4676
4677   if (unsignedp != TYPE_UNSIGNED (mtype)
4678       || TYPE_PRECISION (mtype) != 2 * prec)
4679     return false;
4680
4681   unsigned HOST_WIDE_INT bits = tree_to_uhwi (sarg1);
4682   if (bits < prec || bits >= 2 * prec)
4683     return false;
4684
4685   /* For the time being, require operands to have the same sign.  */
4686   if (unsignedp != TYPE_UNSIGNED (TREE_TYPE (mop2)))
4687     return false;
4688
4689   machine_mode mode = TYPE_MODE (optype);
4690   optab tab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
4691   if (optab_handler (tab, mode) == CODE_FOR_nothing)
4692     return false;
4693
4694   location_t loc = gimple_location (stmt);
4695   tree highpart1 = build_and_insert_binop (gsi, loc, "highparttmp",
4696                                            MULT_HIGHPART_EXPR, mop1, mop2);
4697   tree highpart2 = highpart1;
4698   tree ntype = optype;
4699
4700   if (TYPE_UNSIGNED (stype) != TYPE_UNSIGNED (optype))
4701     {
4702       ntype = TYPE_UNSIGNED (stype) ? unsigned_type_for (optype)
4703                                     : signed_type_for (optype);
4704       highpart2 = build_and_insert_cast (gsi, loc, ntype, highpart1);
4705     }
4706   if (bits > prec)
4707     highpart2 = build_and_insert_binop (gsi, loc, "highparttmp",
4708                                         RSHIFT_EXPR, highpart2,
4709                                         build_int_cst (ntype, bits - prec));
4710
4711   gassign *new_stmt = gimple_build_assign (lhs, NOP_EXPR, highpart2);
4712   gsi_replace (gsi, new_stmt, true);
4713
4714   widen_mul_stats.highpart_mults_inserted++;
4715   return true;
4716 }
4717
4718 /* If target has spaceship<MODE>3 expander, pattern recognize
4719    <bb 2> [local count: 1073741824]:
4720    if (a_2(D) == b_3(D))
4721      goto <bb 6>; [34.00%]
4722    else
4723      goto <bb 3>; [66.00%]
4724
4725    <bb 3> [local count: 708669601]:
4726    if (a_2(D) < b_3(D))
4727      goto <bb 6>; [1.04%]
4728    else
4729      goto <bb 4>; [98.96%]
4730
4731    <bb 4> [local count: 701299439]:
4732    if (a_2(D) > b_3(D))
4733      goto <bb 5>; [48.89%]
4734    else
4735      goto <bb 6>; [51.11%]
4736
4737    <bb 5> [local count: 342865295]:
4738
4739    <bb 6> [local count: 1073741824]:
4740    and turn it into:
4741    <bb 2> [local count: 1073741824]:
4742    _1 = .SPACESHIP (a_2(D), b_3(D));
4743    if (_1 == 0)
4744      goto <bb 6>; [34.00%]
4745    else
4746      goto <bb 3>; [66.00%]
4747
4748    <bb 3> [local count: 708669601]:
4749    if (_1 == -1)
4750      goto <bb 6>; [1.04%]
4751    else
4752      goto <bb 4>; [98.96%]
4753
4754    <bb 4> [local count: 701299439]:
4755    if (_1 == 1)
4756      goto <bb 5>; [48.89%]
4757    else
4758      goto <bb 6>; [51.11%]
4759
4760    <bb 5> [local count: 342865295]:
4761
4762    <bb 6> [local count: 1073741824]:
4763    so that the backend can emit optimal comparison and
4764    conditional jump sequence.  */
4765
4766 static void
4767 optimize_spaceship (gimple *stmt)
4768 {
4769   enum tree_code code = gimple_cond_code (stmt);
4770   if (code != EQ_EXPR && code != NE_EXPR)
4771     return;
4772   tree arg1 = gimple_cond_lhs (stmt);
4773   tree arg2 = gimple_cond_rhs (stmt);
4774   if (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg1))
4775       || optab_handler (spaceship_optab,
4776                         TYPE_MODE (TREE_TYPE (arg1))) == CODE_FOR_nothing
4777       || operand_equal_p (arg1, arg2, 0))
4778     return;
4779
4780   basic_block bb0 = gimple_bb (stmt), bb1, bb2 = NULL;
4781   edge em1 = NULL, e1 = NULL, e2 = NULL;
4782   bb1 = EDGE_SUCC (bb0, 1)->dest;
4783   if (((EDGE_SUCC (bb0, 0)->flags & EDGE_TRUE_VALUE) != 0) ^ (code == EQ_EXPR))
4784     bb1 = EDGE_SUCC (bb0, 0)->dest;
4785
4786   gimple *g = last_stmt (bb1);
4787   if (g == NULL
4788       || gimple_code (g) != GIMPLE_COND
4789       || !single_pred_p (bb1)
4790       || (operand_equal_p (gimple_cond_lhs (g), arg1, 0)
4791           ? !operand_equal_p (gimple_cond_rhs (g), arg2, 0)
4792           : (!operand_equal_p (gimple_cond_lhs (g), arg2, 0)
4793              || !operand_equal_p (gimple_cond_rhs (g), arg1, 0)))
4794       || !cond_only_block_p (bb1))
4795     return;
4796
4797   enum tree_code ccode = (operand_equal_p (gimple_cond_lhs (g), arg1, 0)
4798                           ? LT_EXPR : GT_EXPR);
4799   switch (gimple_cond_code (g))
4800     {
4801     case LT_EXPR:
4802     case LE_EXPR:
4803       break;
4804     case GT_EXPR:
4805     case GE_EXPR:
4806       ccode = ccode == LT_EXPR ? GT_EXPR : LT_EXPR;
4807       break;
4808     default:
4809       return;
4810     }
4811
4812   for (int i = 0; i < 2; ++i)
4813     {
4814       /* With NaNs, </<=/>/>= are false, so we need to look for the
4815          third comparison on the false edge from whatever non-equality
4816          comparison the second comparison is.  */
4817       if (HONOR_NANS (TREE_TYPE (arg1))
4818           && (EDGE_SUCC (bb1, i)->flags & EDGE_TRUE_VALUE) != 0)
4819         continue;
4820
4821       bb2 = EDGE_SUCC (bb1, i)->dest;
4822       g = last_stmt (bb2);
4823       if (g == NULL
4824           || gimple_code (g) != GIMPLE_COND
4825           || !single_pred_p (bb2)
4826           || (operand_equal_p (gimple_cond_lhs (g), arg1, 0)
4827               ? !operand_equal_p (gimple_cond_rhs (g), arg2, 0)
4828               : (!operand_equal_p (gimple_cond_lhs (g), arg2, 0)
4829                  || !operand_equal_p (gimple_cond_rhs (g), arg1, 0)))
4830           || !cond_only_block_p (bb2)
4831           || EDGE_SUCC (bb2, 0)->dest == EDGE_SUCC (bb2, 1)->dest)
4832         continue;
4833
4834       enum tree_code ccode2
4835         = (operand_equal_p (gimple_cond_lhs (g), arg1, 0) ? LT_EXPR : GT_EXPR);
4836       switch (gimple_cond_code (g))
4837         {
4838         case LT_EXPR:
4839         case LE_EXPR:
4840           break;
4841         case GT_EXPR:
4842         case GE_EXPR:
4843           ccode2 = ccode2 == LT_EXPR ? GT_EXPR : LT_EXPR;
4844           break;
4845         default:
4846           continue;
4847         }
4848       if (HONOR_NANS (TREE_TYPE (arg1)) && ccode == ccode2)
4849         continue;
4850
4851       if ((ccode == LT_EXPR)
4852           ^ ((EDGE_SUCC (bb1, i)->flags & EDGE_TRUE_VALUE) != 0))
4853         {
4854           em1 = EDGE_SUCC (bb1, 1 - i);
4855           e1 = EDGE_SUCC (bb2, 0);
4856           e2 = EDGE_SUCC (bb2, 1);
4857           if ((ccode2 == LT_EXPR) ^ ((e1->flags & EDGE_TRUE_VALUE) == 0))
4858             std::swap (e1, e2);
4859         }
4860       else
4861         {
4862           e1 = EDGE_SUCC (bb1, 1 - i);
4863           em1 = EDGE_SUCC (bb2, 0);
4864           e2 = EDGE_SUCC (bb2, 1);
4865           if ((ccode2 != LT_EXPR) ^ ((em1->flags & EDGE_TRUE_VALUE) == 0))
4866             std::swap (em1, e2);
4867         }
4868       break;
4869     }
4870
4871   if (em1 == NULL)
4872     {
4873       if ((ccode == LT_EXPR)
4874           ^ ((EDGE_SUCC (bb1, 0)->flags & EDGE_TRUE_VALUE) != 0))
4875         {
4876           em1 = EDGE_SUCC (bb1, 1);
4877           e1 = EDGE_SUCC (bb1, 0);
4878           e2 = (e1->flags & EDGE_TRUE_VALUE) ? em1 : e1;
4879         }
4880       else
4881         {
4882           em1 = EDGE_SUCC (bb1, 0);
4883           e1 = EDGE_SUCC (bb1, 1);
4884           e2 = (e1->flags & EDGE_TRUE_VALUE) ? em1 : e1;
4885         }
4886     }
4887
4888   g = gimple_build_call_internal (IFN_SPACESHIP, 2, arg1, arg2);
4889   tree lhs = make_ssa_name (integer_type_node);
4890   gimple_call_set_lhs (g, lhs);
4891   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4892   gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4893
4894   gcond *cond = as_a <gcond *> (stmt);
4895   gimple_cond_set_lhs (cond, lhs);
4896   gimple_cond_set_rhs (cond, integer_zero_node);
4897   update_stmt (stmt);
4898
4899   g = last_stmt (bb1);
4900   cond = as_a <gcond *> (g);
4901   gimple_cond_set_lhs (cond, lhs);
4902   if (em1->src == bb1 && e2 != em1)
4903     {
4904       gimple_cond_set_rhs (cond, integer_minus_one_node);
4905       gimple_cond_set_code (cond, (em1->flags & EDGE_TRUE_VALUE)
4906                                   ? EQ_EXPR : NE_EXPR);
4907     }
4908   else
4909     {
4910       gcc_assert (e1->src == bb1 && e2 != e1);
4911       gimple_cond_set_rhs (cond, integer_one_node);
4912       gimple_cond_set_code (cond, (e1->flags & EDGE_TRUE_VALUE)
4913                                   ? EQ_EXPR : NE_EXPR);
4914     }
4915   update_stmt (g);
4916
4917   if (e2 != e1 && e2 != em1)
4918     {
4919       g = last_stmt (bb2);
4920       cond = as_a <gcond *> (g);
4921       gimple_cond_set_lhs (cond, lhs);
4922       if (em1->src == bb2)
4923         gimple_cond_set_rhs (cond, integer_minus_one_node);
4924       else
4925         {
4926           gcc_assert (e1->src == bb2);
4927           gimple_cond_set_rhs (cond, integer_one_node);
4928         }
4929       gimple_cond_set_code (cond,
4930                             (e2->flags & EDGE_TRUE_VALUE) ? NE_EXPR : EQ_EXPR);
4931       update_stmt (g);
4932     }
4933
4934   wide_int wm1 = wi::minus_one (TYPE_PRECISION (integer_type_node));
4935   wide_int w2 = wi::two (TYPE_PRECISION (integer_type_node));
4936   value_range vr (TREE_TYPE (lhs), wm1, w2);
4937   set_range_info (lhs, vr);
4938 }
4939
4940
4941 /* Find integer multiplications where the operands are extended from
4942    smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
4943    or MULT_HIGHPART_EXPR where appropriate.  */
4944
4945 namespace {
4946
4947 const pass_data pass_data_optimize_widening_mul =
4948 {
4949   GIMPLE_PASS, /* type */
4950   "widening_mul", /* name */
4951   OPTGROUP_NONE, /* optinfo_flags */
4952   TV_TREE_WIDEN_MUL, /* tv_id */
4953   PROP_ssa, /* properties_required */
4954   0, /* properties_provided */
4955   0, /* properties_destroyed */
4956   0, /* todo_flags_start */
4957   TODO_update_ssa, /* todo_flags_finish */
4958 };
4959
4960 class pass_optimize_widening_mul : public gimple_opt_pass
4961 {
4962 public:
4963   pass_optimize_widening_mul (gcc::context *ctxt)
4964     : gimple_opt_pass (pass_data_optimize_widening_mul, ctxt)
4965   {}
4966
4967   /* opt_pass methods: */
4968   bool gate (function *) final override
4969     {
4970       return flag_expensive_optimizations && optimize;
4971     }
4972
4973   unsigned int execute (function *) final override;
4974
4975 }; // class pass_optimize_widening_mul
4976
4977 /* Walker class to perform the transformation in reverse dominance order. */
4978
4979 class math_opts_dom_walker : public dom_walker
4980 {
4981 public:
4982   /* Constructor, CFG_CHANGED is a pointer to a boolean flag that will be set
4983      if walking modidifes the CFG.  */
4984
4985   math_opts_dom_walker (bool *cfg_changed_p)
4986     : dom_walker (CDI_DOMINATORS), m_last_result_set (),
4987       m_cfg_changed_p (cfg_changed_p) {}
4988
4989   /* The actual actions performed in the walk.  */
4990
4991   void after_dom_children (basic_block) final override;
4992
4993   /* Set of results of chains of multiply and add statement combinations that
4994      were not transformed into FMAs because of active deferring.  */
4995   hash_set<tree> m_last_result_set;
4996
4997   /* Pointer to a flag of the user that needs to be set if CFG has been
4998      modified.  */
4999   bool *m_cfg_changed_p;
5000 };
5001
5002 void
5003 math_opts_dom_walker::after_dom_children (basic_block bb)
5004 {
5005   gimple_stmt_iterator gsi;
5006
5007   fma_deferring_state fma_state (param_avoid_fma_max_bits > 0);
5008
5009   for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
5010     {
5011       gimple *stmt = gsi_stmt (gsi);
5012       enum tree_code code;
5013
5014       if (is_gimple_assign (stmt))
5015         {
5016           code = gimple_assign_rhs_code (stmt);
5017           switch (code)
5018             {
5019             case MULT_EXPR:
5020               if (!convert_mult_to_widen (stmt, &gsi)
5021                   && !convert_expand_mult_copysign (stmt, &gsi)
5022                   && convert_mult_to_fma (stmt,
5023                                           gimple_assign_rhs1 (stmt),
5024                                           gimple_assign_rhs2 (stmt),
5025                                           &fma_state))
5026                 {
5027                   gsi_remove (&gsi, true);
5028                   release_defs (stmt);
5029                   continue;
5030                 }
5031               match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
5032               break;
5033
5034             case PLUS_EXPR:
5035             case MINUS_EXPR:
5036               if (!convert_plusminus_to_widen (&gsi, stmt, code))
5037                 match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
5038               break;
5039
5040             case BIT_NOT_EXPR:
5041               if (match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p))
5042                 continue;
5043               break;
5044
5045             case TRUNC_MOD_EXPR:
5046               convert_to_divmod (as_a<gassign *> (stmt));
5047               break;
5048
5049             case RSHIFT_EXPR:
5050               convert_mult_to_highpart (as_a<gassign *> (stmt), &gsi);
5051               break;
5052
5053             default:;
5054             }
5055         }
5056       else if (is_gimple_call (stmt))
5057         {
5058           switch (gimple_call_combined_fn (stmt))
5059             {
5060             CASE_CFN_POW:
5061               if (gimple_call_lhs (stmt)
5062                   && TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST
5063                   && real_equal (&TREE_REAL_CST (gimple_call_arg (stmt, 1)),
5064                                  &dconst2)
5065                   && convert_mult_to_fma (stmt,
5066                                           gimple_call_arg (stmt, 0),
5067                                           gimple_call_arg (stmt, 0),
5068                                           &fma_state))
5069                 {
5070                   unlink_stmt_vdef (stmt);
5071                   if (gsi_remove (&gsi, true)
5072                       && gimple_purge_dead_eh_edges (bb))
5073                     *m_cfg_changed_p = true;
5074                   release_defs (stmt);
5075                   continue;
5076                 }
5077               break;
5078
5079             case CFN_COND_MUL:
5080               if (convert_mult_to_fma (stmt,
5081                                        gimple_call_arg (stmt, 1),
5082                                        gimple_call_arg (stmt, 2),
5083                                        &fma_state,
5084                                        gimple_call_arg (stmt, 0)))
5085
5086                 {
5087                   gsi_remove (&gsi, true);
5088                   release_defs (stmt);
5089                   continue;
5090                 }
5091               break;
5092
5093             case CFN_LAST:
5094               cancel_fma_deferring (&fma_state);
5095               break;
5096
5097             default:
5098               break;
5099             }
5100         }
5101       else if (gimple_code (stmt) == GIMPLE_COND)
5102         optimize_spaceship (stmt);
5103       gsi_next (&gsi);
5104     }
5105   if (fma_state.m_deferring_p
5106       && fma_state.m_initial_phi)
5107     {
5108       gcc_checking_assert (fma_state.m_last_result);
5109       if (!last_fma_candidate_feeds_initial_phi (&fma_state,
5110                                                  &m_last_result_set))
5111         cancel_fma_deferring (&fma_state);
5112       else
5113         m_last_result_set.add (fma_state.m_last_result);
5114     }
5115 }
5116
5117
5118 unsigned int
5119 pass_optimize_widening_mul::execute (function *fun)
5120 {
5121   bool cfg_changed = false;
5122
5123   memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
5124   calculate_dominance_info (CDI_DOMINATORS);
5125   renumber_gimple_stmt_uids (cfun);
5126
5127   math_opts_dom_walker (&cfg_changed).walk (ENTRY_BLOCK_PTR_FOR_FN (cfun));
5128
5129   statistics_counter_event (fun, "widening multiplications inserted",
5130                             widen_mul_stats.widen_mults_inserted);
5131   statistics_counter_event (fun, "widening maccs inserted",
5132                             widen_mul_stats.maccs_inserted);
5133   statistics_counter_event (fun, "fused multiply-adds inserted",
5134                             widen_mul_stats.fmas_inserted);
5135   statistics_counter_event (fun, "divmod calls inserted",
5136                             widen_mul_stats.divmod_calls_inserted);
5137   statistics_counter_event (fun, "highpart multiplications inserted",
5138                             widen_mul_stats.highpart_mults_inserted);
5139
5140   return cfg_changed ? TODO_cleanup_cfg : 0;
5141 }
5142
5143 } // anon namespace
5144
5145 gimple_opt_pass *
5146 make_pass_optimize_widening_mul (gcc::context *ctxt)
5147 {
5148   return new pass_optimize_widening_mul (ctxt);
5149 }