gcc/tree-ssa-math-opts.cc

   1 /* Global, SSA-based optimizations using mathematical identities.
   2    Copyright (C) 2005-2023 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* Currently, the only mini-pass in this file tries to CSE reciprocal
  21    operations.  These are common in sequences such as this one:
  22
  23         modulus = sqrt(x*x + y*y + z*z);
  24         x = x / modulus;
  25         y = y / modulus;
  26         z = z / modulus;
  27
  28    that can be optimized to
  29
  30         modulus = sqrt(x*x + y*y + z*z);
  31         rmodulus = 1.0 / modulus;
  32         x = x * rmodulus;
  33         y = y * rmodulus;
  34         z = z * rmodulus;
  35
  36    We do this for loop invariant divisors, and with this pass whenever
  37    we notice that a division has the same divisor multiple times.
  38
  39    Of course, like in PRE, we don't insert a division if a dominator
  40    already has one.  However, this cannot be done as an extension of
  41    PRE for several reasons.
  42
  43    First of all, with some experiments it was found out that the
  44    transformation is not always useful if there are only two divisions
  45    by the same divisor.  This is probably because modern processors
  46    can pipeline the divisions; on older, in-order processors it should
  47    still be effective to optimize two divisions by the same number.
  48    We make this a param, and it shall be called N in the remainder of
  49    this comment.
  50
  51    Second, if trapping math is active, we have less freedom on where
  52    to insert divisions: we can only do so in basic blocks that already
  53    contain one.  (If divisions don't trap, instead, we can insert
  54    divisions elsewhere, which will be in blocks that are common dominators
  55    of those that have the division).
  56
  57    We really don't want to compute the reciprocal unless a division will
  58    be found.  To do this, we won't insert the division in a basic block
  59    that has less than N divisions *post-dominating* it.
  60
  61    The algorithm constructs a subset of the dominator tree, holding the
  62    blocks containing the divisions and the common dominators to them,
  63    and walk it twice.  The first walk is in post-order, and it annotates
  64    each block with the number of divisions that post-dominate it: this
  65    gives information on where divisions can be inserted profitably.
  66    The second walk is in pre-order, and it inserts divisions as explained
  67    above, and replaces divisions by multiplications.
  68
  69    In the best case, the cost of the pass is O(n_statements).  In the
  70    worst-case, the cost is due to creating the dominator tree subset,
  71    with a cost of O(n_basic_blocks ^ 2); however this can only happen
  72    for n_statements / n_basic_blocks statements.  So, the amortized cost
  73    of creating the dominator tree subset is O(n_basic_blocks) and the
  74    worst-case cost of the pass is O(n_statements * n_basic_blocks).
  75
  76    More practically, the cost will be small because there are few
  77    divisions, and they tend to be in the same basic block, so insert_bb
  78    is called very few times.
  79
  80    If we did this using domwalk.cc, an efficient implementation would have
  81    to work on all the variables in a single pass, because we could not
  82    work on just a subset of the dominator tree, as we do now, and the
  83    cost would also be something like O(n_statements * n_basic_blocks).
  84    The data structures would be more complex in order to work on all the
  85    variables in a single pass.  */
  86
  87 #include "config.h"
  88 #include "system.h"
  89 #include "coretypes.h"
  90 #include "backend.h"
  91 #include "target.h"
  92 #include "rtl.h"
  93 #include "tree.h"
  94 #include "gimple.h"
  95 #include "predict.h"
  96 #include "alloc-pool.h"
  97 #include "tree-pass.h"
  98 #include "ssa.h"
  99 #include "optabs-tree.h"
 100 #include "gimple-pretty-print.h"
 101 #include "alias.h"
 102 #include "fold-const.h"
 103 #include "gimple-iterator.h"
 104 #include "gimple-fold.h"
 105 #include "gimplify.h"
 106 #include "gimplify-me.h"
 107 #include "stor-layout.h"
 108 #include "tree-cfg.h"
 109 #include "tree-dfa.h"
 110 #include "tree-ssa.h"
 111 #include "builtins.h"
 112 #include "internal-fn.h"
 113 #include "case-cfn-macros.h"
 114 #include "optabs-libfuncs.h"
 115 #include "tree-eh.h"
 116 #include "targhooks.h"
 117 #include "domwalk.h"
 118 #include "tree-ssa-math-opts.h"
 119
 120 /* This structure represents one basic block that either computes a
 121    division, or is a common dominator for basic block that compute a
 122    division.  */
 123 struct occurrence {
 124   /* The basic block represented by this structure.  */
 125   basic_block bb = basic_block();
 126
 127   /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
 128      inserted in BB.  */
 129   tree recip_def = tree();
 130
 131   /* If non-NULL, the SSA_NAME holding the definition for a squared
 132      reciprocal inserted in BB.  */
 133   tree square_recip_def = tree();
 134
 135   /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
 136      was inserted in BB.  */
 137   gimple *recip_def_stmt = nullptr;
 138
 139   /* Pointer to a list of "struct occurrence"s for blocks dominated
 140      by BB.  */
 141   struct occurrence *children = nullptr;
 142
 143   /* Pointer to the next "struct occurrence"s in the list of blocks
 144      sharing a common dominator.  */
 145   struct occurrence *next = nullptr;
 146
 147   /* The number of divisions that are in BB before compute_merit.  The
 148      number of divisions that are in BB or post-dominate it after
 149      compute_merit.  */
 150   int num_divisions = 0;
 151
 152   /* True if the basic block has a division, false if it is a common
 153      dominator for basic blocks that do.  If it is false and trapping
 154      math is active, BB is not a candidate for inserting a reciprocal.  */
 155   bool bb_has_division = false;
 156
 157   /* Construct a struct occurrence for basic block BB, and whose
 158      children list is headed by CHILDREN.  */
 159   occurrence (basic_block bb, struct occurrence *children)
 160   : bb (bb), children (children)
 161   {
 162     bb->aux = this;
 163   }
 164
 165   /* Destroy a struct occurrence and remove it from its basic block.  */
 166   ~occurrence ()
 167   {
 168     bb->aux = nullptr;
 169   }
 170
 171   /* Allocate memory for a struct occurrence from OCC_POOL.  */
 172   static void* operator new (size_t);
 173
 174   /* Return memory for a struct occurrence to OCC_POOL.  */
 175   static void operator delete (void*, size_t);
 176 };
 177
 178 static struct
 179 {
 180   /* Number of 1.0/X ops inserted.  */
 181   int rdivs_inserted;
 182
 183   /* Number of 1.0/FUNC ops inserted.  */
 184   int rfuncs_inserted;
 185 } reciprocal_stats;
 186
 187 static struct
 188 {
 189   /* Number of cexpi calls inserted.  */
 190   int inserted;
 191
 192   /* Number of conversions removed.  */
 193   int conv_removed;
 194
 195 } sincos_stats;
 196
 197 static struct
 198 {
 199   /* Number of widening multiplication ops inserted.  */
 200   int widen_mults_inserted;
 201
 202   /* Number of integer multiply-and-accumulate ops inserted.  */
 203   int maccs_inserted;
 204
 205   /* Number of fp fused multiply-add ops inserted.  */
 206   int fmas_inserted;
 207
 208   /* Number of divmod calls inserted.  */
 209   int divmod_calls_inserted;
 210
 211   /* Number of highpart multiplication ops inserted.  */
 212   int highpart_mults_inserted;
 213 } widen_mul_stats;
 214
 215 /* The instance of "struct occurrence" representing the highest
 216    interesting block in the dominator tree.  */
 217 static struct occurrence *occ_head;
 218
 219 /* Allocation pool for getting instances of "struct occurrence".  */
 220 static object_allocator<occurrence> *occ_pool;
 221
 222 void* occurrence::operator new (size_t n)
 223 {
 224   gcc_assert (n == sizeof(occurrence));
 225   return occ_pool->allocate_raw ();
 226 }
 227
 228 void occurrence::operator delete (void *occ, size_t n)
 229 {
 230   gcc_assert (n == sizeof(occurrence));
 231   occ_pool->remove_raw (occ);
 232 }
 233
 234 /* Insert NEW_OCC into our subset of the dominator tree.  P_HEAD points to a
 235    list of "struct occurrence"s, one per basic block, having IDOM as
 236    their common dominator.
 237
 238    We try to insert NEW_OCC as deep as possible in the tree, and we also
 239    insert any other block that is a common dominator for BB and one
 240    block already in the tree.  */
 241
 242 static void
 243 insert_bb (struct occurrence *new_occ, basic_block idom,
 244            struct occurrence **p_head)
 245 {
 246   struct occurrence *occ, **p_occ;
 247
 248   for (p_occ = p_head; (occ = *p_occ) != NULL; )
 249     {
 250       basic_block bb = new_occ->bb, occ_bb = occ->bb;
 251       basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb);
 252       if (dom == bb)
 253         {
 254           /* BB dominates OCC_BB.  OCC becomes NEW_OCC's child: remove OCC
 255              from its list.  */
 256           *p_occ = occ->next;
 257           occ->next = new_occ->children;
 258           new_occ->children = occ;
 259
 260           /* Try the next block (it may as well be dominated by BB).  */
 261         }
 262
 263       else if (dom == occ_bb)
 264         {
 265           /* OCC_BB dominates BB.  Tail recurse to look deeper.  */
 266           insert_bb (new_occ, dom, &occ->children);
 267           return;
 268         }
 269
 270       else if (dom != idom)
 271         {
 272           gcc_assert (!dom->aux);
 273
 274           /* There is a dominator between IDOM and BB, add it and make
 275              two children out of NEW_OCC and OCC.  First, remove OCC from
 276              its list.  */
 277           *p_occ = occ->next;
 278           new_occ->next = occ;
 279           occ->next = NULL;
 280
 281           /* None of the previous blocks has DOM as a dominator: if we tail
 282              recursed, we would reexamine them uselessly. Just switch BB with
 283              DOM, and go on looking for blocks dominated by DOM.  */
 284           new_occ = new occurrence (dom, new_occ);
 285         }
 286
 287       else
 288         {
 289           /* Nothing special, go on with the next element.  */
 290           p_occ = &occ->next;
 291         }
 292     }
 293
 294   /* No place was found as a child of IDOM.  Make BB a sibling of IDOM.  */
 295   new_occ->next = *p_head;
 296   *p_head = new_occ;
 297 }
 298
 299 /* Register that we found a division in BB.
 300    IMPORTANCE is a measure of how much weighting to give
 301    that division.  Use IMPORTANCE = 2 to register a single
 302    division.  If the division is going to be found multiple
 303    times use 1 (as it is with squares).  */
 304
 305 static inline void
 306 register_division_in (basic_block bb, int importance)
 307 {
 308   struct occurrence *occ;
 309
 310   occ = (struct occurrence *) bb->aux;
 311   if (!occ)
 312     {
 313       occ = new occurrence (bb, NULL);
 314       insert_bb (occ, ENTRY_BLOCK_PTR_FOR_FN (cfun), &occ_head);
 315     }
 316
 317   occ->bb_has_division = true;
 318   occ->num_divisions += importance;
 319 }
 320
 321
 322 /* Compute the number of divisions that postdominate each block in OCC and
 323    its children.  */
 324
 325 static void
 326 compute_merit (struct occurrence *occ)
 327 {
 328   struct occurrence *occ_child;
 329   basic_block dom = occ->bb;
 330
 331   for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
 332     {
 333       basic_block bb;
 334       if (occ_child->children)
 335         compute_merit (occ_child);
 336
 337       if (flag_exceptions)
 338         bb = single_noncomplex_succ (dom);
 339       else
 340         bb = dom;
 341
 342       if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
 343         occ->num_divisions += occ_child->num_divisions;
 344     }
 345 }
 346
 347
 348 /* Return whether USE_STMT is a floating-point division by DEF.  */
 349 static inline bool
 350 is_division_by (gimple *use_stmt, tree def)
 351 {
 352   return is_gimple_assign (use_stmt)
 353          && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
 354          && gimple_assign_rhs2 (use_stmt) == def
 355          /* Do not recognize x / x as valid division, as we are getting
 356             confused later by replacing all immediate uses x in such
 357             a stmt.  */
 358          && gimple_assign_rhs1 (use_stmt) != def
 359          && !stmt_can_throw_internal (cfun, use_stmt);
 360 }
 361
 362 /* Return TRUE if USE_STMT is a multiplication of DEF by A.  */
 363 static inline bool
 364 is_mult_by (gimple *use_stmt, tree def, tree a)
 365 {
 366   if (gimple_code (use_stmt) == GIMPLE_ASSIGN
 367       && gimple_assign_rhs_code (use_stmt) == MULT_EXPR)
 368     {
 369       tree op0 = gimple_assign_rhs1 (use_stmt);
 370       tree op1 = gimple_assign_rhs2 (use_stmt);
 371
 372       return (op0 == def && op1 == a)
 373               || (op0 == a && op1 == def);
 374     }
 375   return 0;
 376 }
 377
 378 /* Return whether USE_STMT is DEF * DEF.  */
 379 static inline bool
 380 is_square_of (gimple *use_stmt, tree def)
 381 {
 382   return is_mult_by (use_stmt, def, def);
 383 }
 384
 385 /* Return whether USE_STMT is a floating-point division by
 386    DEF * DEF.  */
 387 static inline bool
 388 is_division_by_square (gimple *use_stmt, tree def)
 389 {
 390   if (gimple_code (use_stmt) == GIMPLE_ASSIGN
 391       && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
 392       && gimple_assign_rhs1 (use_stmt) != gimple_assign_rhs2 (use_stmt)
 393       && !stmt_can_throw_internal (cfun, use_stmt))
 394     {
 395       tree denominator = gimple_assign_rhs2 (use_stmt);
 396       if (TREE_CODE (denominator) == SSA_NAME)
 397         return is_square_of (SSA_NAME_DEF_STMT (denominator), def);
 398     }
 399   return 0;
 400 }
 401
 402 /* Walk the subset of the dominator tree rooted at OCC, setting the
 403    RECIP_DEF field to a definition of 1.0 / DEF that can be used in
 404    the given basic block.  The field may be left NULL, of course,
 405    if it is not possible or profitable to do the optimization.
 406
 407    DEF_BSI is an iterator pointing at the statement defining DEF.
 408    If RECIP_DEF is set, a dominator already has a computation that can
 409    be used.
 410
 411    If should_insert_square_recip is set, then this also inserts
 412    the square of the reciprocal immediately after the definition
 413    of the reciprocal.  */
 414
 415 static void
 416 insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
 417                     tree def, tree recip_def, tree square_recip_def,
 418                     int should_insert_square_recip, int threshold)
 419 {
 420   tree type;
 421   gassign *new_stmt, *new_square_stmt;
 422   gimple_stmt_iterator gsi;
 423   struct occurrence *occ_child;
 424
 425   if (!recip_def
 426       && (occ->bb_has_division || !flag_trapping_math)
 427       /* Divide by two as all divisions are counted twice in
 428          the costing loop.  */
 429       && occ->num_divisions / 2 >= threshold)
 430     {
 431       /* Make a variable with the replacement and substitute it.  */
 432       type = TREE_TYPE (def);
 433       recip_def = create_tmp_reg (type, "reciptmp");
 434       new_stmt = gimple_build_assign (recip_def, RDIV_EXPR,
 435                                       build_one_cst (type), def);
 436
 437       if (should_insert_square_recip)
 438         {
 439           square_recip_def = create_tmp_reg (type, "powmult_reciptmp");
 440           new_square_stmt = gimple_build_assign (square_recip_def, MULT_EXPR,
 441                                                  recip_def, recip_def);
 442         }
 443
 444       if (occ->bb_has_division)
 445         {
 446           /* Case 1: insert before an existing division.  */
 447           gsi = gsi_after_labels (occ->bb);
 448           while (!gsi_end_p (gsi)
 449                  && (!is_division_by (gsi_stmt (gsi), def))
 450                  && (!is_division_by_square (gsi_stmt (gsi), def)))
 451             gsi_next (&gsi);
 452
 453           gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
 454           if (should_insert_square_recip)
 455             gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
 456         }
 457       else if (def_gsi && occ->bb == gsi_bb (*def_gsi))
 458         {
 459           /* Case 2: insert right after the definition.  Note that this will
 460              never happen if the definition statement can throw, because in
 461              that case the sole successor of the statement's basic block will
 462              dominate all the uses as well.  */
 463           gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
 464           if (should_insert_square_recip)
 465             gsi_insert_after (def_gsi, new_square_stmt, GSI_NEW_STMT);
 466         }
 467       else
 468         {
 469           /* Case 3: insert in a basic block not containing defs/uses.  */
 470           gsi = gsi_after_labels (occ->bb);
 471           gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
 472           if (should_insert_square_recip)
 473             gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
 474         }
 475
 476       reciprocal_stats.rdivs_inserted++;
 477
 478       occ->recip_def_stmt = new_stmt;
 479     }
 480
 481   occ->recip_def = recip_def;
 482   occ->square_recip_def = square_recip_def;
 483   for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
 484     insert_reciprocals (def_gsi, occ_child, def, recip_def,
 485                         square_recip_def, should_insert_square_recip,
 486                         threshold);
 487 }
 488
 489 /* Replace occurrences of expr / (x * x) with expr * ((1 / x) * (1 / x)).
 490    Take as argument the use for (x * x).  */
 491 static inline void
 492 replace_reciprocal_squares (use_operand_p use_p)
 493 {
 494   gimple *use_stmt = USE_STMT (use_p);
 495   basic_block bb = gimple_bb (use_stmt);
 496   struct occurrence *occ = (struct occurrence *) bb->aux;
 497
 498   if (optimize_bb_for_speed_p (bb) && occ->square_recip_def
 499       && occ->recip_def)
 500     {
 501       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
 502       gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
 503       gimple_assign_set_rhs2 (use_stmt, occ->square_recip_def);
 504       SET_USE (use_p, occ->square_recip_def);
 505       fold_stmt_inplace (&gsi);
 506       update_stmt (use_stmt);
 507     }
 508 }
 509
 510
 511 /* Replace the division at USE_P with a multiplication by the reciprocal, if
 512    possible.  */
 513
 514 static inline void
 515 replace_reciprocal (use_operand_p use_p)
 516 {
 517   gimple *use_stmt = USE_STMT (use_p);
 518   basic_block bb = gimple_bb (use_stmt);
 519   struct occurrence *occ = (struct occurrence *) bb->aux;
 520
 521   if (optimize_bb_for_speed_p (bb)
 522       && occ->recip_def && use_stmt != occ->recip_def_stmt)
 523     {
 524       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
 525       gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
 526       SET_USE (use_p, occ->recip_def);
 527       fold_stmt_inplace (&gsi);
 528       update_stmt (use_stmt);
 529     }
 530 }
 531
 532
 533 /* Free OCC and return one more "struct occurrence" to be freed.  */
 534
 535 static struct occurrence *
 536 free_bb (struct occurrence *occ)
 537 {
 538   struct occurrence *child, *next;
 539
 540   /* First get the two pointers hanging off OCC.  */
 541   next = occ->next;
 542   child = occ->children;
 543   delete occ;
 544
 545   /* Now ensure that we don't recurse unless it is necessary.  */
 546   if (!child)
 547     return next;
 548   else
 549     {
 550       while (next)
 551         next = free_bb (next);
 552
 553       return child;
 554     }
 555 }
 556
 557 /* Transform sequences like
 558    t = sqrt (a)
 559    x = 1.0 / t;
 560    r1 = x * x;
 561    r2 = a * x;
 562    into:
 563    t = sqrt (a)
 564    r1 = 1.0 / a;
 565    r2 = t;
 566    x = r1 * r2;
 567    depending on the uses of x, r1, r2.  This removes one multiplication and
 568    allows the sqrt and division operations to execute in parallel.
 569    DEF_GSI is the gsi of the initial division by sqrt that defines
 570    DEF (x in the example above).  */
 571
 572 static void
 573 optimize_recip_sqrt (gimple_stmt_iterator *def_gsi, tree def)
 574 {
 575   gimple *use_stmt;
 576   imm_use_iterator use_iter;
 577   gimple *stmt = gsi_stmt (*def_gsi);
 578   tree x = def;
 579   tree orig_sqrt_ssa_name = gimple_assign_rhs2 (stmt);
 580   tree div_rhs1 = gimple_assign_rhs1 (stmt);
 581
 582   if (TREE_CODE (orig_sqrt_ssa_name) != SSA_NAME
 583       || TREE_CODE (div_rhs1) != REAL_CST
 584       || !real_equal (&TREE_REAL_CST (div_rhs1), &dconst1))
 585     return;
 586
 587   gcall *sqrt_stmt
 588     = dyn_cast <gcall *> (SSA_NAME_DEF_STMT (orig_sqrt_ssa_name));
 589
 590   if (!sqrt_stmt || !gimple_call_lhs (sqrt_stmt))
 591     return;
 592
 593   switch (gimple_call_combined_fn (sqrt_stmt))
 594     {
 595     CASE_CFN_SQRT:
 596     CASE_CFN_SQRT_FN:
 597       break;
 598
 599     default:
 600       return;
 601     }
 602   tree a = gimple_call_arg (sqrt_stmt, 0);
 603
 604   /* We have 'a' and 'x'.  Now analyze the uses of 'x'.  */
 605
 606   /* Statements that use x in x * x.  */
 607   auto_vec<gimple *> sqr_stmts;
 608   /* Statements that use x in a * x.  */
 609   auto_vec<gimple *> mult_stmts;
 610   bool has_other_use = false;
 611   bool mult_on_main_path = false;
 612
 613   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, x)
 614     {
 615       if (is_gimple_debug (use_stmt))
 616         continue;
 617       if (is_square_of (use_stmt, x))
 618         {
 619           sqr_stmts.safe_push (use_stmt);
 620           if (gimple_bb (use_stmt) == gimple_bb (stmt))
 621             mult_on_main_path = true;
 622         }
 623       else if (is_mult_by (use_stmt, x, a))
 624         {
 625           mult_stmts.safe_push (use_stmt);
 626           if (gimple_bb (use_stmt) == gimple_bb (stmt))
 627             mult_on_main_path = true;
 628         }
 629       else
 630         has_other_use = true;
 631     }
 632
 633   /* In the x * x and a * x cases we just rewire stmt operands or
 634      remove multiplications.  In the has_other_use case we introduce
 635      a multiplication so make sure we don't introduce a multiplication
 636      on a path where there was none.  */
 637   if (has_other_use && !mult_on_main_path)
 638     return;
 639
 640   if (sqr_stmts.is_empty () && mult_stmts.is_empty ())
 641     return;
 642
 643   /* If x = 1.0 / sqrt (a) has uses other than those optimized here we want
 644      to be able to compose it from the sqr and mult cases.  */
 645   if (has_other_use && (sqr_stmts.is_empty () || mult_stmts.is_empty ()))
 646     return;
 647
 648   if (dump_file)
 649     {
 650       fprintf (dump_file, "Optimizing reciprocal sqrt multiplications of\n");
 651       print_gimple_stmt (dump_file, sqrt_stmt, 0, TDF_NONE);
 652       print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
 653       fprintf (dump_file, "\n");
 654     }
 655
 656   bool delete_div = !has_other_use;
 657   tree sqr_ssa_name = NULL_TREE;
 658   if (!sqr_stmts.is_empty ())
 659     {
 660       /* r1 = x * x.  Transform the original
 661          x = 1.0 / t
 662          into
 663          tmp1 = 1.0 / a
 664          r1 = tmp1.  */
 665
 666       sqr_ssa_name
 667         = make_temp_ssa_name (TREE_TYPE (a), NULL, "recip_sqrt_sqr");
 668
 669       if (dump_file)
 670         {
 671           fprintf (dump_file, "Replacing original division\n");
 672           print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
 673           fprintf (dump_file, "with new division\n");
 674         }
 675       stmt
 676         = gimple_build_assign (sqr_ssa_name, gimple_assign_rhs_code (stmt),
 677                                gimple_assign_rhs1 (stmt), a);
 678       gsi_insert_before (def_gsi, stmt, GSI_SAME_STMT);
 679       gsi_remove (def_gsi, true);
 680       *def_gsi = gsi_for_stmt (stmt);
 681       fold_stmt_inplace (def_gsi);
 682       update_stmt (stmt);
 683
 684       if (dump_file)
 685         print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
 686
 687       delete_div = false;
 688       gimple *sqr_stmt;
 689       unsigned int i;
 690       FOR_EACH_VEC_ELT (sqr_stmts, i, sqr_stmt)
 691         {
 692           gimple_stmt_iterator gsi2 = gsi_for_stmt (sqr_stmt);
 693           gimple_assign_set_rhs_from_tree (&gsi2, sqr_ssa_name);
 694           update_stmt (sqr_stmt);
 695         }
 696     }
 697   if (!mult_stmts.is_empty ())
 698     {
 699       /* r2 = a * x.  Transform this into:
 700          r2 = t (The original sqrt (a)).  */
 701       unsigned int i;
 702       gimple *mult_stmt = NULL;
 703       FOR_EACH_VEC_ELT (mult_stmts, i, mult_stmt)
 704         {
 705           gimple_stmt_iterator gsi2 = gsi_for_stmt (mult_stmt);
 706
 707           if (dump_file)
 708             {
 709               fprintf (dump_file, "Replacing squaring multiplication\n");
 710               print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE);
 711               fprintf (dump_file, "with assignment\n");
 712             }
 713           gimple_assign_set_rhs_from_tree (&gsi2, orig_sqrt_ssa_name);
 714           fold_stmt_inplace (&gsi2);
 715           update_stmt (mult_stmt);
 716           if (dump_file)
 717             print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE);
 718       }
 719     }
 720
 721   if (has_other_use)
 722     {
 723       /* Using the two temporaries tmp1, tmp2 from above
 724          the original x is now:
 725          x = tmp1 * tmp2.  */
 726       gcc_assert (orig_sqrt_ssa_name);
 727       gcc_assert (sqr_ssa_name);
 728
 729       gimple *new_stmt
 730         = gimple_build_assign (x, MULT_EXPR,
 731                                orig_sqrt_ssa_name, sqr_ssa_name);
 732       gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
 733       update_stmt (stmt);
 734     }
 735   else if (delete_div)
 736     {
 737       /* Remove the original division.  */
 738       gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt);
 739       gsi_remove (&gsi2, true);
 740       release_defs (stmt);
 741     }
 742   else
 743     release_ssa_name (x);
 744 }
 745
 746 /* Look for floating-point divisions among DEF's uses, and try to
 747    replace them by multiplications with the reciprocal.  Add
 748    as many statements computing the reciprocal as needed.
 749
 750    DEF must be a GIMPLE register of a floating-point type.  */
 751
 752 static void
 753 execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
 754 {
 755   use_operand_p use_p, square_use_p;
 756   imm_use_iterator use_iter, square_use_iter;
 757   tree square_def;
 758   struct occurrence *occ;
 759   int count = 0;
 760   int threshold;
 761   int square_recip_count = 0;
 762   int sqrt_recip_count = 0;
 763
 764   gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && TREE_CODE (def) == SSA_NAME);
 765   threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
 766
 767   /* If DEF is a square (x * x), count the number of divisions by x.
 768      If there are more divisions by x than by (DEF * DEF), prefer to optimize
 769      the reciprocal of x instead of DEF.  This improves cases like:
 770        def = x * x
 771        t0 = a / def
 772        t1 = b / def
 773        t2 = c / x
 774      Reciprocal optimization of x results in 1 division rather than 2 or 3.  */
 775   gimple *def_stmt = SSA_NAME_DEF_STMT (def);
 776
 777   if (is_gimple_assign (def_stmt)
 778       && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
 779       && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME
 780       && gimple_assign_rhs1 (def_stmt) == gimple_assign_rhs2 (def_stmt))
 781     {
 782       tree op0 = gimple_assign_rhs1 (def_stmt);
 783
 784       FOR_EACH_IMM_USE_FAST (use_p, use_iter, op0)
 785         {
 786           gimple *use_stmt = USE_STMT (use_p);
 787           if (is_division_by (use_stmt, op0))
 788             sqrt_recip_count++;
 789         }
 790     }
 791
 792   FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
 793     {
 794       gimple *use_stmt = USE_STMT (use_p);
 795       if (is_division_by (use_stmt, def))
 796         {
 797           register_division_in (gimple_bb (use_stmt), 2);
 798           count++;
 799         }
 800
 801       if (is_square_of (use_stmt, def))
 802         {
 803           square_def = gimple_assign_lhs (use_stmt);
 804           FOR_EACH_IMM_USE_FAST (square_use_p, square_use_iter, square_def)
 805             {
 806               gimple *square_use_stmt = USE_STMT (square_use_p);
 807               if (is_division_by (square_use_stmt, square_def))
 808                 {
 809                   /* This is executed twice for each division by a square.  */
 810                   register_division_in (gimple_bb (square_use_stmt), 1);
 811                   square_recip_count++;
 812                 }
 813             }
 814         }
 815     }
 816
 817   /* Square reciprocals were counted twice above.  */
 818   square_recip_count /= 2;
 819
 820   /* If it is more profitable to optimize 1 / x, don't optimize 1 / (x * x).  */
 821   if (sqrt_recip_count > square_recip_count)
 822     goto out;
 823
 824   /* Do the expensive part only if we can hope to optimize something.  */
 825   if (count + square_recip_count >= threshold && count >= 1)
 826     {
 827       gimple *use_stmt;
 828       for (occ = occ_head; occ; occ = occ->next)
 829         {
 830           compute_merit (occ);
 831           insert_reciprocals (def_gsi, occ, def, NULL, NULL,
 832                               square_recip_count, threshold);
 833         }
 834
 835       FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
 836         {
 837           if (is_division_by (use_stmt, def))
 838             {
 839               FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
 840                 replace_reciprocal (use_p);
 841             }
 842           else if (square_recip_count > 0 && is_square_of (use_stmt, def))
 843             {
 844               FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
 845                 {
 846                   /* Find all uses of the square that are divisions and
 847                    * replace them by multiplications with the inverse.  */
 848                   imm_use_iterator square_iterator;
 849                   gimple *powmult_use_stmt = USE_STMT (use_p);
 850                   tree powmult_def_name = gimple_assign_lhs (powmult_use_stmt);
 851
 852                   FOR_EACH_IMM_USE_STMT (powmult_use_stmt,
 853                                          square_iterator, powmult_def_name)
 854                     FOR_EACH_IMM_USE_ON_STMT (square_use_p, square_iterator)
 855                       {
 856                         gimple *powmult_use_stmt = USE_STMT (square_use_p);
 857                         if (is_division_by (powmult_use_stmt, powmult_def_name))
 858                           replace_reciprocal_squares (square_use_p);
 859                       }
 860                 }
 861             }
 862         }
 863     }
 864
 865 out:
 866   for (occ = occ_head; occ; )
 867     occ = free_bb (occ);
 868
 869   occ_head = NULL;
 870 }
 871
 872 /* Return an internal function that implements the reciprocal of CALL,
 873    or IFN_LAST if there is no such function that the target supports.  */
 874
 875 internal_fn
 876 internal_fn_reciprocal (gcall *call)
 877 {
 878   internal_fn ifn;
 879
 880   switch (gimple_call_combined_fn (call))
 881     {
 882     CASE_CFN_SQRT:
 883     CASE_CFN_SQRT_FN:
 884       ifn = IFN_RSQRT;
 885       break;
 886
 887     default:
 888       return IFN_LAST;
 889     }
 890
 891   tree_pair types = direct_internal_fn_types (ifn, call);
 892   if (!direct_internal_fn_supported_p (ifn, types, OPTIMIZE_FOR_SPEED))
 893     return IFN_LAST;
 894
 895   return ifn;
 896 }
 897
 898 /* Go through all the floating-point SSA_NAMEs, and call
 899    execute_cse_reciprocals_1 on each of them.  */
 900 namespace {
 901
 902 const pass_data pass_data_cse_reciprocals =
 903 {
 904   GIMPLE_PASS, /* type */
 905   "recip", /* name */
 906   OPTGROUP_NONE, /* optinfo_flags */
 907   TV_TREE_RECIP, /* tv_id */
 908   PROP_ssa, /* properties_required */
 909   0, /* properties_provided */
 910   0, /* properties_destroyed */
 911   0, /* todo_flags_start */
 912   TODO_update_ssa, /* todo_flags_finish */
 913 };
 914
 915 class pass_cse_reciprocals : public gimple_opt_pass
 916 {
 917 public:
 918   pass_cse_reciprocals (gcc::context *ctxt)
 919     : gimple_opt_pass (pass_data_cse_reciprocals, ctxt)
 920   {}
 921
 922   /* opt_pass methods: */
 923   bool gate (function *) final override
 924   {
 925     return optimize && flag_reciprocal_math;
 926   }
 927   unsigned int execute (function *) final override;
 928
 929 }; // class pass_cse_reciprocals
 930
 931 unsigned int
 932 pass_cse_reciprocals::execute (function *fun)
 933 {
 934   basic_block bb;
 935   tree arg;
 936
 937   occ_pool = new object_allocator<occurrence> ("dominators for recip");
 938
 939   memset (&reciprocal_stats, 0, sizeof (reciprocal_stats));
 940   calculate_dominance_info (CDI_DOMINATORS);
 941   calculate_dominance_info (CDI_POST_DOMINATORS);
 942
 943   if (flag_checking)
 944     FOR_EACH_BB_FN (bb, fun)
 945       gcc_assert (!bb->aux);
 946
 947   for (arg = DECL_ARGUMENTS (fun->decl); arg; arg = DECL_CHAIN (arg))
 948     if (FLOAT_TYPE_P (TREE_TYPE (arg))
 949         && is_gimple_reg (arg))
 950       {
 951         tree name = ssa_default_def (fun, arg);
 952         if (name)
 953           execute_cse_reciprocals_1 (NULL, name);
 954       }
 955
 956   FOR_EACH_BB_FN (bb, fun)
 957     {
 958       tree def;
 959
 960       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
 961            gsi_next (&gsi))
 962         {
 963           gphi *phi = gsi.phi ();
 964           def = PHI_RESULT (phi);
 965           if (! virtual_operand_p (def)
 966               && FLOAT_TYPE_P (TREE_TYPE (def)))
 967             execute_cse_reciprocals_1 (NULL, def);
 968         }
 969
 970       for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
 971            gsi_next (&gsi))
 972         {
 973           gimple *stmt = gsi_stmt (gsi);
 974
 975           if (gimple_has_lhs (stmt)
 976               && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
 977               && FLOAT_TYPE_P (TREE_TYPE (def))
 978               && TREE_CODE (def) == SSA_NAME)
 979             {
 980               execute_cse_reciprocals_1 (&gsi, def);
 981               stmt = gsi_stmt (gsi);
 982               if (flag_unsafe_math_optimizations
 983                   && is_gimple_assign (stmt)
 984                   && gimple_assign_lhs (stmt) == def
 985                   && !stmt_can_throw_internal (cfun, stmt)
 986                   && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
 987                 optimize_recip_sqrt (&gsi, def);
 988             }
 989         }
 990
 991       if (optimize_bb_for_size_p (bb))
 992         continue;
 993
 994       /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b).  */
 995       for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
 996            gsi_next (&gsi))
 997         {
 998           gimple *stmt = gsi_stmt (gsi);
 999
1000           if (is_gimple_assign (stmt)
1001               && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
1002             {
1003               tree arg1 = gimple_assign_rhs2 (stmt);
1004               gimple *stmt1;
1005
1006               if (TREE_CODE (arg1) != SSA_NAME)
1007                 continue;
1008
1009               stmt1 = SSA_NAME_DEF_STMT (arg1);
1010
1011               if (is_gimple_call (stmt1)
1012                   && gimple_call_lhs (stmt1))
1013                 {
1014                   bool fail;
1015                   imm_use_iterator ui;
1016                   use_operand_p use_p;
1017                   tree fndecl = NULL_TREE;
1018
1019                   gcall *call = as_a <gcall *> (stmt1);
1020                   internal_fn ifn = internal_fn_reciprocal (call);
1021                   if (ifn == IFN_LAST)
1022                     {
1023                       fndecl = gimple_call_fndecl (call);
1024                       if (!fndecl
1025                           || !fndecl_built_in_p (fndecl, BUILT_IN_MD))
1026                         continue;
1027                       fndecl = targetm.builtin_reciprocal (fndecl);
1028                       if (!fndecl)
1029                         continue;
1030                     }
1031
1032                   /* Check that all uses of the SSA name are divisions,
1033                      otherwise replacing the defining statement will do
1034                      the wrong thing.  */
1035                   fail = false;
1036                   FOR_EACH_IMM_USE_FAST (use_p, ui, arg1)
1037                     {
1038                       gimple *stmt2 = USE_STMT (use_p);
1039                       if (is_gimple_debug (stmt2))
1040                         continue;
1041                       if (!is_gimple_assign (stmt2)
1042                           || gimple_assign_rhs_code (stmt2) != RDIV_EXPR
1043                           || gimple_assign_rhs1 (stmt2) == arg1
1044                           || gimple_assign_rhs2 (stmt2) != arg1)
1045                         {
1046                           fail = true;
1047                           break;
1048                         }
1049                     }
1050                   if (fail)
1051                     continue;
1052
1053                   gimple_replace_ssa_lhs (call, arg1);
1054                   if (gimple_call_internal_p (call) != (ifn != IFN_LAST))
1055                     {
1056                       auto_vec<tree, 4> args;
1057                       for (unsigned int i = 0;
1058                            i < gimple_call_num_args (call); i++)
1059                         args.safe_push (gimple_call_arg (call, i));
1060                       gcall *stmt2;
1061                       if (ifn == IFN_LAST)
1062                         stmt2 = gimple_build_call_vec (fndecl, args);
1063                       else
1064                         stmt2 = gimple_build_call_internal_vec (ifn, args);
1065                       gimple_call_set_lhs (stmt2, arg1);
1066                       gimple_move_vops (stmt2, call);
1067                       gimple_call_set_nothrow (stmt2,
1068                                                gimple_call_nothrow_p (call));
1069                       gimple_stmt_iterator gsi2 = gsi_for_stmt (call);
1070                       gsi_replace (&gsi2, stmt2, true);
1071                     }
1072                   else
1073                     {
1074                       if (ifn == IFN_LAST)
1075                         gimple_call_set_fndecl (call, fndecl);
1076                       else
1077                         gimple_call_set_internal_fn (call, ifn);
1078                       update_stmt (call);
1079                     }
1080                   reciprocal_stats.rfuncs_inserted++;
1081
1082                   FOR_EACH_IMM_USE_STMT (stmt, ui, arg1)
1083                     {
1084                       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1085                       gimple_assign_set_rhs_code (stmt, MULT_EXPR);
1086                       fold_stmt_inplace (&gsi);
1087                       update_stmt (stmt);
1088                     }
1089                 }
1090             }
1091         }
1092     }
1093
1094   statistics_counter_event (fun, "reciprocal divs inserted",
1095                             reciprocal_stats.rdivs_inserted);
1096   statistics_counter_event (fun, "reciprocal functions inserted",
1097                             reciprocal_stats.rfuncs_inserted);
1098
1099   free_dominance_info (CDI_DOMINATORS);
1100   free_dominance_info (CDI_POST_DOMINATORS);
1101   delete occ_pool;
1102   return 0;
1103 }
1104
1105 } // anon namespace
1106
1107 gimple_opt_pass *
1108 make_pass_cse_reciprocals (gcc::context *ctxt)
1109 {
1110   return new pass_cse_reciprocals (ctxt);
1111 }
1112
1113 /* If NAME is the result of a type conversion, look for other
1114    equivalent dominating or dominated conversions, and replace all
1115    uses with the earliest dominating name, removing the redundant
1116    conversions.  Return the prevailing name.  */
1117
1118 static tree
1119 execute_cse_conv_1 (tree name, bool *cfg_changed)
1120 {
1121   if (SSA_NAME_IS_DEFAULT_DEF (name)
1122       || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name))
1123     return name;
1124
1125   gimple *def_stmt = SSA_NAME_DEF_STMT (name);
1126
1127   if (!gimple_assign_cast_p (def_stmt))
1128     return name;
1129
1130   tree src = gimple_assign_rhs1 (def_stmt);
1131
1132   if (TREE_CODE (src) != SSA_NAME)
1133     return name;
1134
1135   imm_use_iterator use_iter;
1136   gimple *use_stmt;
1137
1138   /* Find the earliest dominating def.    */
1139   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, src)
1140     {
1141       if (use_stmt == def_stmt
1142           || !gimple_assign_cast_p (use_stmt))
1143         continue;
1144
1145       tree lhs = gimple_assign_lhs (use_stmt);
1146
1147       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
1148           || (gimple_assign_rhs1 (use_stmt)
1149               != gimple_assign_rhs1 (def_stmt))
1150           || !types_compatible_p (TREE_TYPE (name), TREE_TYPE (lhs)))
1151         continue;
1152
1153       bool use_dominates;
1154       if (gimple_bb (def_stmt) == gimple_bb (use_stmt))
1155         {
1156           gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
1157           while (!gsi_end_p (gsi) && gsi_stmt (gsi) != def_stmt)
1158             gsi_next (&gsi);
1159           use_dominates = !gsi_end_p (gsi);
1160         }
1161       else if (dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt),
1162                                gimple_bb (def_stmt)))
1163         use_dominates = false;
1164       else if (dominated_by_p (CDI_DOMINATORS, gimple_bb (def_stmt),
1165                                gimple_bb (use_stmt)))
1166         use_dominates = true;
1167       else
1168         continue;
1169
1170       if (use_dominates)
1171         {
1172           std::swap (name, lhs);
1173           std::swap (def_stmt, use_stmt);
1174         }
1175     }
1176
1177   /* Now go through all uses of SRC again, replacing the equivalent
1178      dominated conversions.  We may replace defs that were not
1179      dominated by the then-prevailing defs when we first visited
1180      them.  */
1181   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, src)
1182     {
1183       if (use_stmt == def_stmt
1184           || !gimple_assign_cast_p (use_stmt))
1185         continue;
1186
1187       tree lhs = gimple_assign_lhs (use_stmt);
1188
1189       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
1190           || (gimple_assign_rhs1 (use_stmt)
1191               != gimple_assign_rhs1 (def_stmt))
1192           || !types_compatible_p (TREE_TYPE (name), TREE_TYPE (lhs)))
1193         continue;
1194
1195       basic_block use_bb = gimple_bb (use_stmt);
1196       if (gimple_bb (def_stmt) == use_bb
1197           || dominated_by_p (CDI_DOMINATORS, use_bb, gimple_bb (def_stmt)))
1198         {
1199           sincos_stats.conv_removed++;
1200
1201           gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
1202           replace_uses_by (lhs, name);
1203           if (gsi_remove (&gsi, true)
1204               && gimple_purge_dead_eh_edges (use_bb))
1205             *cfg_changed = true;
1206           release_defs (use_stmt);
1207         }
1208     }
1209
1210   return name;
1211 }
1212
1213 /* Records an occurrence at statement USE_STMT in the vector of trees
1214    STMTS if it is dominated by *TOP_BB or dominates it or this basic block
1215    is not yet initialized.  Returns true if the occurrence was pushed on
1216    the vector.  Adjusts *TOP_BB to be the basic block dominating all
1217    statements in the vector.  */
1218
1219 static bool
1220 maybe_record_sincos (vec<gimple *> *stmts,
1221                      basic_block *top_bb, gimple *use_stmt)
1222 {
1223   basic_block use_bb = gimple_bb (use_stmt);
1224   if (*top_bb
1225       && (*top_bb == use_bb
1226           || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb)))
1227     stmts->safe_push (use_stmt);
1228   else if (!*top_bb
1229            || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb))
1230     {
1231       stmts->safe_push (use_stmt);
1232       *top_bb = use_bb;
1233     }
1234   else
1235     return false;
1236
1237   return true;
1238 }
1239
1240 /* Look for sin, cos and cexpi calls with the same argument NAME and
1241    create a single call to cexpi CSEing the result in this case.
1242    We first walk over all immediate uses of the argument collecting
1243    statements that we can CSE in a vector and in a second pass replace
1244    the statement rhs with a REALPART or IMAGPART expression on the
1245    result of the cexpi call we insert before the use statement that
1246    dominates all other candidates.  */
1247
1248 static bool
1249 execute_cse_sincos_1 (tree name)
1250 {
1251   gimple_stmt_iterator gsi;
1252   imm_use_iterator use_iter;
1253   tree fndecl, res, type = NULL_TREE;
1254   gimple *def_stmt, *use_stmt, *stmt;
1255   int seen_cos = 0, seen_sin = 0, seen_cexpi = 0;
1256   auto_vec<gimple *> stmts;
1257   basic_block top_bb = NULL;
1258   int i;
1259   bool cfg_changed = false;
1260
1261   name = execute_cse_conv_1 (name, &cfg_changed);
1262
1263   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
1264     {
1265       if (gimple_code (use_stmt) != GIMPLE_CALL
1266           || !gimple_call_lhs (use_stmt))
1267         continue;
1268
1269       switch (gimple_call_combined_fn (use_stmt))
1270         {
1271         CASE_CFN_COS:
1272           seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1273           break;
1274
1275         CASE_CFN_SIN:
1276           seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1277           break;
1278
1279         CASE_CFN_CEXPI:
1280           seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1281           break;
1282
1283         default:;
1284           continue;
1285         }
1286
1287       tree t = mathfn_built_in_type (gimple_call_combined_fn (use_stmt));
1288       if (!type)
1289         {
1290           type = t;
1291           t = TREE_TYPE (name);
1292         }
1293       /* This checks that NAME has the right type in the first round,
1294          and, in subsequent rounds, that the built_in type is the same
1295          type, or a compatible type.  */
1296       if (type != t && !types_compatible_p (type, t))
1297         return false;
1298     }
1299   if (seen_cos + seen_sin + seen_cexpi <= 1)
1300     return false;
1301
1302   /* Simply insert cexpi at the beginning of top_bb but not earlier than
1303      the name def statement.  */
1304   fndecl = mathfn_built_in (type, BUILT_IN_CEXPI);
1305   if (!fndecl)
1306     return false;
1307   stmt = gimple_build_call (fndecl, 1, name);
1308   res = make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl)), stmt, "sincostmp");
1309   gimple_call_set_lhs (stmt, res);
1310
1311   def_stmt = SSA_NAME_DEF_STMT (name);
1312   if (!SSA_NAME_IS_DEFAULT_DEF (name)
1313       && gimple_code (def_stmt) != GIMPLE_PHI
1314       && gimple_bb (def_stmt) == top_bb)
1315     {
1316       gsi = gsi_for_stmt (def_stmt);
1317       gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1318     }
1319   else
1320     {
1321       gsi = gsi_after_labels (top_bb);
1322       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1323     }
1324   sincos_stats.inserted++;
1325
1326   /* And adjust the recorded old call sites.  */
1327   for (i = 0; stmts.iterate (i, &use_stmt); ++i)
1328     {
1329       tree rhs = NULL;
1330
1331       switch (gimple_call_combined_fn (use_stmt))
1332         {
1333         CASE_CFN_COS:
1334           rhs = fold_build1 (REALPART_EXPR, type, res);
1335           break;
1336
1337         CASE_CFN_SIN:
1338           rhs = fold_build1 (IMAGPART_EXPR, type, res);
1339           break;
1340
1341         CASE_CFN_CEXPI:
1342           rhs = res;
1343           break;
1344
1345         default:;
1346           gcc_unreachable ();
1347         }
1348
1349         /* Replace call with a copy.  */
1350         stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs);
1351
1352         gsi = gsi_for_stmt (use_stmt);
1353         gsi_replace (&gsi, stmt, true);
1354         if (gimple_purge_dead_eh_edges (gimple_bb (stmt)))
1355           cfg_changed = true;
1356     }
1357
1358   return cfg_changed;
1359 }
1360
1361 /* To evaluate powi(x,n), the floating point value x raised to the
1362    constant integer exponent n, we use a hybrid algorithm that
1363    combines the "window method" with look-up tables.  For an
1364    introduction to exponentiation algorithms and "addition chains",
1365    see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
1366    "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
1367    3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
1368    Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998.  */
1369
1370 /* Provide a default value for POWI_MAX_MULTS, the maximum number of
1371    multiplications to inline before calling the system library's pow
1372    function.  powi(x,n) requires at worst 2*bits(n)-2 multiplications,
1373    so this default never requires calling pow, powf or powl.  */
1374
1375 #ifndef POWI_MAX_MULTS
1376 #define POWI_MAX_MULTS  (2*HOST_BITS_PER_WIDE_INT-2)
1377 #endif
1378
1379 /* The size of the "optimal power tree" lookup table.  All
1380    exponents less than this value are simply looked up in the
1381    powi_table below.  This threshold is also used to size the
1382    cache of pseudo registers that hold intermediate results.  */
1383 #define POWI_TABLE_SIZE 256
1384
1385 /* The size, in bits of the window, used in the "window method"
1386    exponentiation algorithm.  This is equivalent to a radix of
1387    (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method".  */
1388 #define POWI_WINDOW_SIZE 3
1389
1390 /* The following table is an efficient representation of an
1391    "optimal power tree".  For each value, i, the corresponding
1392    value, j, in the table states than an optimal evaluation
1393    sequence for calculating pow(x,i) can be found by evaluating
1394    pow(x,j)*pow(x,i-j).  An optimal power tree for the first
1395    100 integers is given in Knuth's "Seminumerical algorithms".  */
1396
1397 static const unsigned char powi_table[POWI_TABLE_SIZE] =
1398   {
1399       0,   1,   1,   2,   2,   3,   3,   4,  /*   0 -   7 */
1400       4,   6,   5,   6,   6,  10,   7,   9,  /*   8 -  15 */
1401       8,  16,   9,  16,  10,  12,  11,  13,  /*  16 -  23 */
1402      12,  17,  13,  18,  14,  24,  15,  26,  /*  24 -  31 */
1403      16,  17,  17,  19,  18,  33,  19,  26,  /*  32 -  39 */
1404      20,  25,  21,  40,  22,  27,  23,  44,  /*  40 -  47 */
1405      24,  32,  25,  34,  26,  29,  27,  44,  /*  48 -  55 */
1406      28,  31,  29,  34,  30,  60,  31,  36,  /*  56 -  63 */
1407      32,  64,  33,  34,  34,  46,  35,  37,  /*  64 -  71 */
1408      36,  65,  37,  50,  38,  48,  39,  69,  /*  72 -  79 */
1409      40,  49,  41,  43,  42,  51,  43,  58,  /*  80 -  87 */
1410      44,  64,  45,  47,  46,  59,  47,  76,  /*  88 -  95 */
1411      48,  65,  49,  66,  50,  67,  51,  66,  /*  96 - 103 */
1412      52,  70,  53,  74,  54, 104,  55,  74,  /* 104 - 111 */
1413      56,  64,  57,  69,  58,  78,  59,  68,  /* 112 - 119 */
1414      60,  61,  61,  80,  62,  75,  63,  68,  /* 120 - 127 */
1415      64,  65,  65, 128,  66, 129,  67,  90,  /* 128 - 135 */
1416      68,  73,  69, 131,  70,  94,  71,  88,  /* 136 - 143 */
1417      72, 128,  73,  98,  74, 132,  75, 121,  /* 144 - 151 */
1418      76, 102,  77, 124,  78, 132,  79, 106,  /* 152 - 159 */
1419      80,  97,  81, 160,  82,  99,  83, 134,  /* 160 - 167 */
1420      84,  86,  85,  95,  86, 160,  87, 100,  /* 168 - 175 */
1421      88, 113,  89,  98,  90, 107,  91, 122,  /* 176 - 183 */
1422      92, 111,  93, 102,  94, 126,  95, 150,  /* 184 - 191 */
1423      96, 128,  97, 130,  98, 133,  99, 195,  /* 192 - 199 */
1424     100, 128, 101, 123, 102, 164, 103, 138,  /* 200 - 207 */
1425     104, 145, 105, 146, 106, 109, 107, 149,  /* 208 - 215 */
1426     108, 200, 109, 146, 110, 170, 111, 157,  /* 216 - 223 */
1427     112, 128, 113, 130, 114, 182, 115, 132,  /* 224 - 231 */
1428     116, 200, 117, 132, 118, 158, 119, 206,  /* 232 - 239 */
1429     120, 240, 121, 162, 122, 147, 123, 152,  /* 240 - 247 */
1430     124, 166, 125, 214, 126, 138, 127, 153,  /* 248 - 255 */
1431   };
1432
1433
1434 /* Return the number of multiplications required to calculate
1435    powi(x,n) where n is less than POWI_TABLE_SIZE.  This is a
1436    subroutine of powi_cost.  CACHE is an array indicating
1437    which exponents have already been calculated.  */
1438
1439 static int
1440 powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache)
1441 {
1442   /* If we've already calculated this exponent, then this evaluation
1443      doesn't require any additional multiplications.  */
1444   if (cache[n])
1445     return 0;
1446
1447   cache[n] = true;
1448   return powi_lookup_cost (n - powi_table[n], cache)
1449          + powi_lookup_cost (powi_table[n], cache) + 1;
1450 }
1451
1452 /* Return the number of multiplications required to calculate
1453    powi(x,n) for an arbitrary x, given the exponent N.  This
1454    function needs to be kept in sync with powi_as_mults below.  */
1455
1456 static int
1457 powi_cost (HOST_WIDE_INT n)
1458 {
1459   bool cache[POWI_TABLE_SIZE];
1460   unsigned HOST_WIDE_INT digit;
1461   unsigned HOST_WIDE_INT val;
1462   int result;
1463
1464   if (n == 0)
1465     return 0;
1466
1467   /* Ignore the reciprocal when calculating the cost.  */
1468   val = absu_hwi (n);
1469
1470   /* Initialize the exponent cache.  */
1471   memset (cache, 0, POWI_TABLE_SIZE * sizeof (bool));
1472   cache[1] = true;
1473
1474   result = 0;
1475
1476   while (val >= POWI_TABLE_SIZE)
1477     {
1478       if (val & 1)
1479         {
1480           digit = val & ((1 << POWI_WINDOW_SIZE) - 1);
1481           result += powi_lookup_cost (digit, cache)
1482                     + POWI_WINDOW_SIZE + 1;
1483           val >>= POWI_WINDOW_SIZE;
1484         }
1485       else
1486         {
1487           val >>= 1;
1488           result++;
1489         }
1490     }
1491
1492   return result + powi_lookup_cost (val, cache);
1493 }
1494
1495 /* Recursive subroutine of powi_as_mults.  This function takes the
1496    array, CACHE, of already calculated exponents and an exponent N and
1497    returns a tree that corresponds to CACHE[1]**N, with type TYPE.  */
1498
1499 static tree
1500 powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type,
1501                  unsigned HOST_WIDE_INT n, tree *cache)
1502 {
1503   tree op0, op1, ssa_target;
1504   unsigned HOST_WIDE_INT digit;
1505   gassign *mult_stmt;
1506
1507   if (n < POWI_TABLE_SIZE && cache[n])
1508     return cache[n];
1509
1510   ssa_target = make_temp_ssa_name (type, NULL, "powmult");
1511
1512   if (n < POWI_TABLE_SIZE)
1513     {
1514       cache[n] = ssa_target;
1515       op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache);
1516       op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache);
1517     }
1518   else if (n & 1)
1519     {
1520       digit = n & ((1 << POWI_WINDOW_SIZE) - 1);
1521       op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache);
1522       op1 = powi_as_mults_1 (gsi, loc, type, digit, cache);
1523     }
1524   else
1525     {
1526       op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache);
1527       op1 = op0;
1528     }
1529
1530   mult_stmt = gimple_build_assign (ssa_target, MULT_EXPR, op0, op1);
1531   gimple_set_location (mult_stmt, loc);
1532   gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
1533
1534   return ssa_target;
1535 }
1536
1537 /* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
1538    This function needs to be kept in sync with powi_cost above.  */
1539
1540 tree
1541 powi_as_mults (gimple_stmt_iterator *gsi, location_t loc,
1542                tree arg0, HOST_WIDE_INT n)
1543 {
1544   tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0);
1545   gassign *div_stmt;
1546   tree target;
1547
1548   if (n == 0)
1549     return build_one_cst (type);
1550
1551   memset (cache, 0, sizeof (cache));
1552   cache[1] = arg0;
1553
1554   result = powi_as_mults_1 (gsi, loc, type, absu_hwi (n), cache);
1555   if (n >= 0)
1556     return result;
1557
1558   /* If the original exponent was negative, reciprocate the result.  */
1559   target = make_temp_ssa_name (type, NULL, "powmult");
1560   div_stmt = gimple_build_assign (target, RDIV_EXPR,
1561                                   build_real (type, dconst1), result);
1562   gimple_set_location (div_stmt, loc);
1563   gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
1564
1565   return target;
1566 }
1567
1568 /* ARG0 and N are the two arguments to a powi builtin in GSI with
1569    location info LOC.  If the arguments are appropriate, create an
1570    equivalent sequence of statements prior to GSI using an optimal
1571    number of multiplications, and return an expession holding the
1572    result.  */
1573
1574 static tree
1575 gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc,
1576                             tree arg0, HOST_WIDE_INT n)
1577 {
1578   if ((n >= -1 && n <= 2)
1579       || (optimize_function_for_speed_p (cfun)
1580           && powi_cost (n) <= POWI_MAX_MULTS))
1581     return powi_as_mults (gsi, loc, arg0, n);
1582
1583   return NULL_TREE;
1584 }
1585
1586 /* Build a gimple call statement that calls FN with argument ARG.
1587    Set the lhs of the call statement to a fresh SSA name.  Insert the
1588    statement prior to GSI's current position, and return the fresh
1589    SSA name.  */
1590
1591 static tree
1592 build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc,
1593                        tree fn, tree arg)
1594 {
1595   gcall *call_stmt;
1596   tree ssa_target;
1597
1598   call_stmt = gimple_build_call (fn, 1, arg);
1599   ssa_target = make_temp_ssa_name (TREE_TYPE (arg), NULL, "powroot");
1600   gimple_set_lhs (call_stmt, ssa_target);
1601   gimple_set_location (call_stmt, loc);
1602   gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
1603
1604   return ssa_target;
1605 }
1606
1607 /* Build a gimple binary operation with the given CODE and arguments
1608    ARG0, ARG1, assigning the result to a new SSA name for variable
1609    TARGET.  Insert the statement prior to GSI's current position, and
1610    return the fresh SSA name.*/
1611
1612 static tree
1613 build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc,
1614                         const char *name, enum tree_code code,
1615                         tree arg0, tree arg1)
1616 {
1617   tree result = make_temp_ssa_name (TREE_TYPE (arg0), NULL, name);
1618   gassign *stmt = gimple_build_assign (result, code, arg0, arg1);
1619   gimple_set_location (stmt, loc);
1620   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1621   return result;
1622 }
1623
1624 /* Build a gimple reference operation with the given CODE and argument
1625    ARG, assigning the result to a new SSA name of TYPE with NAME.
1626    Insert the statement prior to GSI's current position, and return
1627    the fresh SSA name.  */
1628
1629 static inline tree
1630 build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type,
1631                       const char *name, enum tree_code code, tree arg0)
1632 {
1633   tree result = make_temp_ssa_name (type, NULL, name);
1634   gimple *stmt = gimple_build_assign (result, build1 (code, type, arg0));
1635   gimple_set_location (stmt, loc);
1636   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1637   return result;
1638 }
1639
1640 /* Build a gimple assignment to cast VAL to TYPE.  Insert the statement
1641    prior to GSI's current position, and return the fresh SSA name.  */
1642
1643 static tree
1644 build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,
1645                        tree type, tree val)
1646 {
1647   tree result = make_ssa_name (type);
1648   gassign *stmt = gimple_build_assign (result, NOP_EXPR, val);
1649   gimple_set_location (stmt, loc);
1650   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1651   return result;
1652 }
1653
1654 struct pow_synth_sqrt_info
1655 {
1656   bool *factors;
1657   unsigned int deepest;
1658   unsigned int num_mults;
1659 };
1660
1661 /* Return true iff the real value C can be represented as a
1662    sum of powers of 0.5 up to N.  That is:
1663    C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1.
1664    Record in INFO the various parameters of the synthesis algorithm such
1665    as the factors a[i], the maximum 0.5 power and the number of
1666    multiplications that will be required.  */
1667
1668 bool
1669 representable_as_half_series_p (REAL_VALUE_TYPE c, unsigned n,
1670                                  struct pow_synth_sqrt_info *info)
1671 {
1672   REAL_VALUE_TYPE factor = dconsthalf;
1673   REAL_VALUE_TYPE remainder = c;
1674
1675   info->deepest = 0;
1676   info->num_mults = 0;
1677   memset (info->factors, 0, n * sizeof (bool));
1678
1679   for (unsigned i = 0; i < n; i++)
1680     {
1681       REAL_VALUE_TYPE res;
1682
1683       /* If something inexact happened bail out now.  */
1684       if (real_arithmetic (&res, MINUS_EXPR, &remainder, &factor))
1685         return false;
1686
1687       /* We have hit zero.  The number is representable as a sum
1688          of powers of 0.5.  */
1689       if (real_equal (&res, &dconst0))
1690         {
1691           info->factors[i] = true;
1692           info->deepest = i + 1;
1693           return true;
1694         }
1695       else if (!REAL_VALUE_NEGATIVE (res))
1696         {
1697           remainder = res;
1698           info->factors[i] = true;
1699           info->num_mults++;
1700         }
1701       else
1702         info->factors[i] = false;
1703
1704       real_arithmetic (&factor, MULT_EXPR, &factor, &dconsthalf);
1705     }
1706   return false;
1707 }
1708
1709 /* Return the tree corresponding to FN being applied
1710    to ARG N times at GSI and LOC.
1711    Look up previous results from CACHE if need be.
1712    cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times.  */
1713
1714 static tree
1715 get_fn_chain (tree arg, unsigned int n, gimple_stmt_iterator *gsi,
1716               tree fn, location_t loc, tree *cache)
1717 {
1718   tree res = cache[n];
1719   if (!res)
1720     {
1721       tree prev = get_fn_chain (arg, n - 1, gsi, fn, loc, cache);
1722       res = build_and_insert_call (gsi, loc, fn, prev);
1723       cache[n] = res;
1724     }
1725
1726   return res;
1727 }
1728
1729 /* Print to STREAM the repeated application of function FNAME to ARG
1730    N times.  So, for FNAME = "foo", ARG = "x", N = 2 it would print:
1731    "foo (foo (x))".  */
1732
1733 static void
1734 print_nested_fn (FILE* stream, const char *fname, const char* arg,
1735                  unsigned int n)
1736 {
1737   if (n == 0)
1738     fprintf (stream, "%s", arg);
1739   else
1740     {
1741       fprintf (stream, "%s (", fname);
1742       print_nested_fn (stream, fname, arg, n - 1);
1743       fprintf (stream, ")");
1744     }
1745 }
1746
1747 /* Print to STREAM the fractional sequence of sqrt chains
1748    applied to ARG, described by INFO.  Used for the dump file.  */
1749
1750 static void
1751 dump_fractional_sqrt_sequence (FILE *stream, const char *arg,
1752                                 struct pow_synth_sqrt_info *info)
1753 {
1754   for (unsigned int i = 0; i < info->deepest; i++)
1755     {
1756       bool is_set = info->factors[i];
1757       if (is_set)
1758         {
1759           print_nested_fn (stream, "sqrt", arg, i + 1);
1760           if (i != info->deepest - 1)
1761             fprintf (stream, " * ");
1762         }
1763     }
1764 }
1765
1766 /* Print to STREAM a representation of raising ARG to an integer
1767    power N.  Used for the dump file.  */
1768
1769 static void
1770 dump_integer_part (FILE *stream, const char* arg, HOST_WIDE_INT n)
1771 {
1772   if (n > 1)
1773     fprintf (stream, "powi (%s, " HOST_WIDE_INT_PRINT_DEC ")", arg, n);
1774   else if (n == 1)
1775     fprintf (stream, "%s", arg);
1776 }
1777
1778 /* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of
1779    square roots.  Place at GSI and LOC.  Limit the maximum depth
1780    of the sqrt chains to MAX_DEPTH.  Return the tree holding the
1781    result of the expanded sequence or NULL_TREE if the expansion failed.
1782
1783    This routine assumes that ARG1 is a real number with a fractional part
1784    (the integer exponent case will have been handled earlier in
1785    gimple_expand_builtin_pow).
1786
1787    For ARG1 > 0.0:
1788    * For ARG1 composed of a whole part WHOLE_PART and a fractional part
1789      FRAC_PART i.e. WHOLE_PART == floor (ARG1) and
1790                     FRAC_PART == ARG1 - WHOLE_PART:
1791      Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where
1792      POW (ARG0, FRAC_PART) is expanded as a product of square root chains
1793      if it can be expressed as such, that is if FRAC_PART satisfies:
1794      FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i))
1795      where integer a[i] is either 0 or 1.
1796
1797      Example:
1798      POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625)
1799        --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x)))
1800
1801    For ARG1 < 0.0 there are two approaches:
1802    * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1)
1803          is calculated as above.
1804
1805      Example:
1806      POW (x, -5.625) == 1.0 / POW (x, 5.625)
1807        -->  1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x))))
1808
1809    * (B) : WHOLE_PART := - ceil (abs (ARG1))
1810            FRAC_PART  := ARG1 - WHOLE_PART
1811      and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART).
1812      Example:
1813      POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6)
1814        --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6))
1815
1816    For ARG1 < 0.0 we choose between (A) and (B) depending on
1817    how many multiplications we'd have to do.
1818    So, for the example in (B): POW (x, -5.875), if we were to
1819    follow algorithm (A) we would produce:
1820    1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X)))
1821    which contains more multiplications than approach (B).
1822
1823    Hopefully, this approach will eliminate potentially expensive POW library
1824    calls when unsafe floating point math is enabled and allow the compiler to
1825    further optimise the multiplies, square roots and divides produced by this
1826    function.  */
1827
1828 static tree
1829 expand_pow_as_sqrts (gimple_stmt_iterator *gsi, location_t loc,
1830                      tree arg0, tree arg1, HOST_WIDE_INT max_depth)
1831 {
1832   tree type = TREE_TYPE (arg0);
1833   machine_mode mode = TYPE_MODE (type);
1834   tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1835   bool one_over = true;
1836
1837   if (!sqrtfn)
1838     return NULL_TREE;
1839
1840   if (TREE_CODE (arg1) != REAL_CST)
1841     return NULL_TREE;
1842
1843   REAL_VALUE_TYPE exp_init = TREE_REAL_CST (arg1);
1844
1845   gcc_assert (max_depth > 0);
1846   tree *cache = XALLOCAVEC (tree, max_depth + 1);
1847
1848   struct pow_synth_sqrt_info synth_info;
1849   synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1850   synth_info.deepest = 0;
1851   synth_info.num_mults = 0;
1852
1853   bool neg_exp = REAL_VALUE_NEGATIVE (exp_init);
1854   REAL_VALUE_TYPE exp = real_value_abs (&exp_init);
1855
1856   /* The whole and fractional parts of exp.  */
1857   REAL_VALUE_TYPE whole_part;
1858   REAL_VALUE_TYPE frac_part;
1859
1860   real_floor (&whole_part, mode, &exp);
1861   real_arithmetic (&frac_part, MINUS_EXPR, &exp, &whole_part);
1862
1863
1864   REAL_VALUE_TYPE ceil_whole = dconst0;
1865   REAL_VALUE_TYPE ceil_fract = dconst0;
1866
1867   if (neg_exp)
1868     {
1869       real_ceil (&ceil_whole, mode, &exp);
1870       real_arithmetic (&ceil_fract, MINUS_EXPR, &ceil_whole, &exp);
1871     }
1872
1873   if (!representable_as_half_series_p (frac_part, max_depth, &synth_info))
1874     return NULL_TREE;
1875
1876   /* Check whether it's more profitable to not use 1.0 / ...  */
1877   if (neg_exp)
1878     {
1879       struct pow_synth_sqrt_info alt_synth_info;
1880       alt_synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1881       alt_synth_info.deepest = 0;
1882       alt_synth_info.num_mults = 0;
1883
1884       if (representable_as_half_series_p (ceil_fract, max_depth,
1885                                            &alt_synth_info)
1886           && alt_synth_info.deepest <= synth_info.deepest
1887           && alt_synth_info.num_mults < synth_info.num_mults)
1888         {
1889           whole_part = ceil_whole;
1890           frac_part = ceil_fract;
1891           synth_info.deepest = alt_synth_info.deepest;
1892           synth_info.num_mults = alt_synth_info.num_mults;
1893           memcpy (synth_info.factors, alt_synth_info.factors,
1894                   (max_depth + 1) * sizeof (bool));
1895           one_over = false;
1896         }
1897     }
1898
1899   HOST_WIDE_INT n = real_to_integer (&whole_part);
1900   REAL_VALUE_TYPE cint;
1901   real_from_integer (&cint, VOIDmode, n, SIGNED);
1902
1903   if (!real_identical (&whole_part, &cint))
1904     return NULL_TREE;
1905
1906   if (powi_cost (n) + synth_info.num_mults > POWI_MAX_MULTS)
1907     return NULL_TREE;
1908
1909   memset (cache, 0, (max_depth + 1) * sizeof (tree));
1910
1911   tree integer_res = n == 0 ? build_real (type, dconst1) : arg0;
1912
1913   /* Calculate the integer part of the exponent.  */
1914   if (n > 1)
1915     {
1916       integer_res = gimple_expand_builtin_powi (gsi, loc, arg0, n);
1917       if (!integer_res)
1918         return NULL_TREE;
1919     }
1920
1921   if (dump_file)
1922     {
1923       char string[64];
1924
1925       real_to_decimal (string, &exp_init, sizeof (string), 0, 1);
1926       fprintf (dump_file, "synthesizing pow (x, %s) as:\n", string);
1927
1928       if (neg_exp)
1929         {
1930           if (one_over)
1931             {
1932               fprintf (dump_file, "1.0 / (");
1933               dump_integer_part (dump_file, "x", n);
1934               if (n > 0)
1935                 fprintf (dump_file, " * ");
1936               dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1937               fprintf (dump_file, ")");
1938             }
1939           else
1940             {
1941               dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1942               fprintf (dump_file, " / (");
1943               dump_integer_part (dump_file, "x", n);
1944               fprintf (dump_file, ")");
1945             }
1946         }
1947       else
1948         {
1949           dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1950           if (n > 0)
1951             fprintf (dump_file, " * ");
1952           dump_integer_part (dump_file, "x", n);
1953         }
1954
1955       fprintf (dump_file, "\ndeepest sqrt chain: %d\n", synth_info.deepest);
1956     }
1957
1958
1959   tree fract_res = NULL_TREE;
1960   cache[0] = arg0;
1961
1962   /* Calculate the fractional part of the exponent.  */
1963   for (unsigned i = 0; i < synth_info.deepest; i++)
1964     {
1965       if (synth_info.factors[i])
1966         {
1967           tree sqrt_chain = get_fn_chain (arg0, i + 1, gsi, sqrtfn, loc, cache);
1968
1969           if (!fract_res)
1970               fract_res = sqrt_chain;
1971
1972           else
1973             fract_res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1974                                            fract_res, sqrt_chain);
1975         }
1976     }
1977
1978   tree res = NULL_TREE;
1979
1980   if (neg_exp)
1981     {
1982       if (one_over)
1983         {
1984           if (n > 0)
1985             res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1986                                            fract_res, integer_res);
1987           else
1988             res = fract_res;
1989
1990           res = build_and_insert_binop (gsi, loc, "powrootrecip", RDIV_EXPR,
1991                                           build_real (type, dconst1), res);
1992         }
1993       else
1994         {
1995           res = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1996                                          fract_res, integer_res);
1997         }
1998     }
1999   else
2000     res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
2001                                    fract_res, integer_res);
2002   return res;
2003 }
2004
2005 /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
2006    with location info LOC.  If possible, create an equivalent and
2007    less expensive sequence of statements prior to GSI, and return an
2008    expession holding the result.  */
2009
2010 static tree
2011 gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,
2012                            tree arg0, tree arg1)
2013 {
2014   REAL_VALUE_TYPE c, cint, dconst1_3, dconst1_4, dconst1_6;
2015   REAL_VALUE_TYPE c2, dconst3;
2016   HOST_WIDE_INT n;
2017   tree type, sqrtfn, cbrtfn, sqrt_arg0, result, cbrt_x, powi_cbrt_x;
2018   machine_mode mode;
2019   bool speed_p = optimize_bb_for_speed_p (gsi_bb (*gsi));
2020   bool hw_sqrt_exists, c_is_int, c2_is_int;
2021
2022   dconst1_4 = dconst1;
2023   SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
2024
2025   /* If the exponent isn't a constant, there's nothing of interest
2026      to be done.  */
2027   if (TREE_CODE (arg1) != REAL_CST)
2028     return NULL_TREE;
2029
2030   /* Don't perform the operation if flag_signaling_nans is on
2031      and the operand is a signaling NaN.  */
2032   if (HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg1)))
2033       && ((TREE_CODE (arg0) == REAL_CST
2034            && REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg0)))
2035           || REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg1))))
2036     return NULL_TREE;
2037
2038   /* If the exponent is equivalent to an integer, expand to an optimal
2039      multiplication sequence when profitable.  */
2040   c = TREE_REAL_CST (arg1);
2041   n = real_to_integer (&c);
2042   real_from_integer (&cint, VOIDmode, n, SIGNED);
2043   c_is_int = real_identical (&c, &cint);
2044
2045   if (c_is_int
2046       && ((n >= -1 && n <= 2)
2047           || (flag_unsafe_math_optimizations
2048               && speed_p
2049               && powi_cost (n) <= POWI_MAX_MULTS)))
2050     return gimple_expand_builtin_powi (gsi, loc, arg0, n);
2051
2052   /* Attempt various optimizations using sqrt and cbrt.  */
2053   type = TREE_TYPE (arg0);
2054   mode = TYPE_MODE (type);
2055   sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
2056
2057   /* Optimize pow(x,0.5) = sqrt(x).  This replacement is always safe
2058      unless signed zeros must be maintained.  pow(-0,0.5) = +0, while
2059      sqrt(-0) = -0.  */
2060   if (sqrtfn
2061       && real_equal (&c, &dconsthalf)
2062       && !HONOR_SIGNED_ZEROS (mode))
2063     return build_and_insert_call (gsi, loc, sqrtfn, arg0);
2064
2065   hw_sqrt_exists = optab_handler (sqrt_optab, mode) != CODE_FOR_nothing;
2066
2067   /* Optimize pow(x,1./3.) = cbrt(x).  This requires unsafe math
2068      optimizations since 1./3. is not exactly representable.  If x
2069      is negative and finite, the correct value of pow(x,1./3.) is
2070      a NaN with the "invalid" exception raised, because the value
2071      of 1./3. actually has an even denominator.  The correct value
2072      of cbrt(x) is a negative real value.  */
2073   cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT);
2074   dconst1_3 = real_value_truncate (mode, dconst_third ());
2075
2076   if (flag_unsafe_math_optimizations
2077       && cbrtfn
2078       && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2079       && real_equal (&c, &dconst1_3))
2080     return build_and_insert_call (gsi, loc, cbrtfn, arg0);
2081
2082   /* Optimize pow(x,1./6.) = cbrt(sqrt(x)).  Don't do this optimization
2083      if we don't have a hardware sqrt insn.  */
2084   dconst1_6 = dconst1_3;
2085   SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
2086
2087   if (flag_unsafe_math_optimizations
2088       && sqrtfn
2089       && cbrtfn
2090       && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2091       && speed_p
2092       && hw_sqrt_exists
2093       && real_equal (&c, &dconst1_6))
2094     {
2095       /* sqrt(x)  */
2096       sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
2097
2098       /* cbrt(sqrt(x))  */
2099       return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0);
2100     }
2101
2102
2103   /* Attempt to expand the POW as a product of square root chains.
2104      Expand the 0.25 case even when otpimising for size.  */
2105   if (flag_unsafe_math_optimizations
2106       && sqrtfn
2107       && hw_sqrt_exists
2108       && (speed_p || real_equal (&c, &dconst1_4))
2109       && !HONOR_SIGNED_ZEROS (mode))
2110     {
2111       unsigned int max_depth = speed_p
2112                                 ? param_max_pow_sqrt_depth
2113                                 : 2;
2114
2115       tree expand_with_sqrts
2116         = expand_pow_as_sqrts (gsi, loc, arg0, arg1, max_depth);
2117
2118       if (expand_with_sqrts)
2119         return expand_with_sqrts;
2120     }
2121
2122   real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
2123   n = real_to_integer (&c2);
2124   real_from_integer (&cint, VOIDmode, n, SIGNED);
2125   c2_is_int = real_identical (&c2, &cint);
2126
2127   /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
2128
2129      powi(x, n/3) * powi(cbrt(x), n%3),                    n > 0;
2130      1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)),  n < 0.
2131
2132      Do not calculate the first factor when n/3 = 0.  As cbrt(x) is
2133      different from pow(x, 1./3.) due to rounding and behavior with
2134      negative x, we need to constrain this transformation to unsafe
2135      math and positive x or finite math.  */
2136   real_from_integer (&dconst3, VOIDmode, 3, SIGNED);
2137   real_arithmetic (&c2, MULT_EXPR, &c, &dconst3);
2138   real_round (&c2, mode, &c2);
2139   n = real_to_integer (&c2);
2140   real_from_integer (&cint, VOIDmode, n, SIGNED);
2141   real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3);
2142   real_convert (&c2, mode, &c2);
2143
2144   if (flag_unsafe_math_optimizations
2145       && cbrtfn
2146       && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2147       && real_identical (&c2, &c)
2148       && !c2_is_int
2149       && optimize_function_for_speed_p (cfun)
2150       && powi_cost (n / 3) <= POWI_MAX_MULTS)
2151     {
2152       tree powi_x_ndiv3 = NULL_TREE;
2153
2154       /* Attempt to fold powi(arg0, abs(n/3)) into multiplies.  If not
2155          possible or profitable, give up.  Skip the degenerate case when
2156          abs(n) < 3, where the result is always 1.  */
2157       if (absu_hwi (n) >= 3)
2158         {
2159           powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0,
2160                                                      abs_hwi (n / 3));
2161           if (!powi_x_ndiv3)
2162             return NULL_TREE;
2163         }
2164
2165       /* Calculate powi(cbrt(x), n%3).  Don't use gimple_expand_builtin_powi
2166          as that creates an unnecessary variable.  Instead, just produce
2167          either cbrt(x) or cbrt(x) * cbrt(x).  */
2168       cbrt_x = build_and_insert_call (gsi, loc, cbrtfn, arg0);
2169
2170       if (absu_hwi (n) % 3 == 1)
2171         powi_cbrt_x = cbrt_x;
2172       else
2173         powi_cbrt_x = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
2174                                               cbrt_x, cbrt_x);
2175
2176       /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1.  */
2177       if (absu_hwi (n) < 3)
2178         result = powi_cbrt_x;
2179       else
2180         result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
2181                                          powi_x_ndiv3, powi_cbrt_x);
2182
2183       /* If n is negative, reciprocate the result.  */
2184       if (n < 0)
2185         result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
2186                                          build_real (type, dconst1), result);
2187
2188       return result;
2189     }
2190
2191   /* No optimizations succeeded.  */
2192   return NULL_TREE;
2193 }
2194
2195 /* ARG is the argument to a cabs builtin call in GSI with location info
2196    LOC.  Create a sequence of statements prior to GSI that calculates
2197    sqrt(R*R + I*I), where R and I are the real and imaginary components
2198    of ARG, respectively.  Return an expression holding the result.  */
2199
2200 static tree
2201 gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg)
2202 {
2203   tree real_part, imag_part, addend1, addend2, sum, result;
2204   tree type = TREE_TYPE (TREE_TYPE (arg));
2205   tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
2206   machine_mode mode = TYPE_MODE (type);
2207
2208   if (!flag_unsafe_math_optimizations
2209       || !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi)))
2210       || !sqrtfn
2211       || optab_handler (sqrt_optab, mode) == CODE_FOR_nothing)
2212     return NULL_TREE;
2213
2214   real_part = build_and_insert_ref (gsi, loc, type, "cabs",
2215                                     REALPART_EXPR, arg);
2216   addend1 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
2217                                     real_part, real_part);
2218   imag_part = build_and_insert_ref (gsi, loc, type, "cabs",
2219                                     IMAGPART_EXPR, arg);
2220   addend2 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
2221                                     imag_part, imag_part);
2222   sum = build_and_insert_binop (gsi, loc, "cabs", PLUS_EXPR, addend1, addend2);
2223   result = build_and_insert_call (gsi, loc, sqrtfn, sum);
2224
2225   return result;
2226 }
2227
2228 /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
2229    on the SSA_NAME argument of each of them.  */
2230
2231 namespace {
2232
2233 const pass_data pass_data_cse_sincos =
2234 {
2235   GIMPLE_PASS, /* type */
2236   "sincos", /* name */
2237   OPTGROUP_NONE, /* optinfo_flags */
2238   TV_TREE_SINCOS, /* tv_id */
2239   PROP_ssa, /* properties_required */
2240   0, /* properties_provided */
2241   0, /* properties_destroyed */
2242   0, /* todo_flags_start */
2243   TODO_update_ssa, /* todo_flags_finish */
2244 };
2245
2246 class pass_cse_sincos : public gimple_opt_pass
2247 {
2248 public:
2249   pass_cse_sincos (gcc::context *ctxt)
2250     : gimple_opt_pass (pass_data_cse_sincos, ctxt)
2251   {}
2252
2253   /* opt_pass methods: */
2254   bool gate (function *) final override
2255     {
2256       return optimize;
2257     }
2258
2259   unsigned int execute (function *) final override;
2260
2261 }; // class pass_cse_sincos
2262
2263 unsigned int
2264 pass_cse_sincos::execute (function *fun)
2265 {
2266   basic_block bb;
2267   bool cfg_changed = false;
2268
2269   calculate_dominance_info (CDI_DOMINATORS);
2270   memset (&sincos_stats, 0, sizeof (sincos_stats));
2271
2272   FOR_EACH_BB_FN (bb, fun)
2273     {
2274       gimple_stmt_iterator gsi;
2275
2276       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2277         {
2278           gimple *stmt = gsi_stmt (gsi);
2279
2280           if (is_gimple_call (stmt)
2281               && gimple_call_lhs (stmt))
2282             {
2283               tree arg;
2284               switch (gimple_call_combined_fn (stmt))
2285                 {
2286                 CASE_CFN_COS:
2287                 CASE_CFN_SIN:
2288                 CASE_CFN_CEXPI:
2289                   arg = gimple_call_arg (stmt, 0);
2290                   /* Make sure we have either sincos or cexp.  */
2291                   if (!targetm.libc_has_function (function_c99_math_complex,
2292                                                   TREE_TYPE (arg))
2293                       && !targetm.libc_has_function (function_sincos,
2294                                                      TREE_TYPE (arg)))
2295                     break;
2296
2297                   if (TREE_CODE (arg) == SSA_NAME)
2298                     cfg_changed |= execute_cse_sincos_1 (arg);
2299                   break;
2300                 default:
2301                   break;
2302                 }
2303             }
2304         }
2305     }
2306
2307   statistics_counter_event (fun, "sincos statements inserted",
2308                             sincos_stats.inserted);
2309   statistics_counter_event (fun, "conv statements removed",
2310                             sincos_stats.conv_removed);
2311
2312   return cfg_changed ? TODO_cleanup_cfg : 0;
2313 }
2314
2315 } // anon namespace
2316
2317 gimple_opt_pass *
2318 make_pass_cse_sincos (gcc::context *ctxt)
2319 {
2320   return new pass_cse_sincos (ctxt);
2321 }
2322
2323 /* Expand powi(x,n) into an optimal number of multiplies, when n is a constant.
2324    Also expand CABS.  */
2325 namespace {
2326
2327 const pass_data pass_data_expand_powcabs =
2328 {
2329   GIMPLE_PASS, /* type */
2330   "powcabs", /* name */
2331   OPTGROUP_NONE, /* optinfo_flags */
2332   TV_TREE_POWCABS, /* tv_id */
2333   PROP_ssa, /* properties_required */
2334   PROP_gimple_opt_math, /* properties_provided */
2335   0, /* properties_destroyed */
2336   0, /* todo_flags_start */
2337   TODO_update_ssa, /* todo_flags_finish */
2338 };
2339
2340 class pass_expand_powcabs : public gimple_opt_pass
2341 {
2342 public:
2343   pass_expand_powcabs (gcc::context *ctxt)
2344     : gimple_opt_pass (pass_data_expand_powcabs, ctxt)
2345   {}
2346
2347   /* opt_pass methods: */
2348   bool gate (function *) final override
2349     {
2350       return optimize;
2351     }
2352
2353   unsigned int execute (function *) final override;
2354
2355 }; // class pass_expand_powcabs
2356
2357 unsigned int
2358 pass_expand_powcabs::execute (function *fun)
2359 {
2360   basic_block bb;
2361   bool cfg_changed = false;
2362
2363   calculate_dominance_info (CDI_DOMINATORS);
2364
2365   FOR_EACH_BB_FN (bb, fun)
2366     {
2367       gimple_stmt_iterator gsi;
2368       bool cleanup_eh = false;
2369
2370       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2371         {
2372           gimple *stmt = gsi_stmt (gsi);
2373
2374           /* Only the last stmt in a bb could throw, no need to call
2375              gimple_purge_dead_eh_edges if we change something in the middle
2376              of a basic block.  */
2377           cleanup_eh = false;
2378
2379           if (is_gimple_call (stmt)
2380               && gimple_call_lhs (stmt))
2381             {
2382               tree arg0, arg1, result;
2383               HOST_WIDE_INT n;
2384               location_t loc;
2385
2386               switch (gimple_call_combined_fn (stmt))
2387                 {
2388                 CASE_CFN_POW:
2389                   arg0 = gimple_call_arg (stmt, 0);
2390                   arg1 = gimple_call_arg (stmt, 1);
2391
2392                   loc = gimple_location (stmt);
2393                   result = gimple_expand_builtin_pow (&gsi, loc, arg0, arg1);
2394
2395                   if (result)
2396                     {
2397                       tree lhs = gimple_get_lhs (stmt);
2398                       gassign *new_stmt = gimple_build_assign (lhs, result);
2399                       gimple_set_location (new_stmt, loc);
2400                       unlink_stmt_vdef (stmt);
2401                       gsi_replace (&gsi, new_stmt, true);
2402                       cleanup_eh = true;
2403                       if (gimple_vdef (stmt))
2404                         release_ssa_name (gimple_vdef (stmt));
2405                     }
2406                   break;
2407
2408                 CASE_CFN_POWI:
2409                   arg0 = gimple_call_arg (stmt, 0);
2410                   arg1 = gimple_call_arg (stmt, 1);
2411                   loc = gimple_location (stmt);
2412
2413                   if (real_minus_onep (arg0))
2414                     {
2415                       tree t0, t1, cond, one, minus_one;
2416                       gassign *stmt;
2417
2418                       t0 = TREE_TYPE (arg0);
2419                       t1 = TREE_TYPE (arg1);
2420                       one = build_real (t0, dconst1);
2421                       minus_one = build_real (t0, dconstm1);
2422
2423                       cond = make_temp_ssa_name (t1, NULL, "powi_cond");
2424                       stmt = gimple_build_assign (cond, BIT_AND_EXPR,
2425                                                   arg1, build_int_cst (t1, 1));
2426                       gimple_set_location (stmt, loc);
2427                       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2428
2429                       result = make_temp_ssa_name (t0, NULL, "powi");
2430                       stmt = gimple_build_assign (result, COND_EXPR, cond,
2431                                                   minus_one, one);
2432                       gimple_set_location (stmt, loc);
2433                       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2434                     }
2435                   else
2436                     {
2437                       if (!tree_fits_shwi_p (arg1))
2438                         break;
2439
2440                       n = tree_to_shwi (arg1);
2441                       result = gimple_expand_builtin_powi (&gsi, loc, arg0, n);
2442                     }
2443
2444                   if (result)
2445                     {
2446                       tree lhs = gimple_get_lhs (stmt);
2447                       gassign *new_stmt = gimple_build_assign (lhs, result);
2448                       gimple_set_location (new_stmt, loc);
2449                       unlink_stmt_vdef (stmt);
2450                       gsi_replace (&gsi, new_stmt, true);
2451                       cleanup_eh = true;
2452                       if (gimple_vdef (stmt))
2453                         release_ssa_name (gimple_vdef (stmt));
2454                     }
2455                   break;
2456
2457                 CASE_CFN_CABS:
2458                   arg0 = gimple_call_arg (stmt, 0);
2459                   loc = gimple_location (stmt);
2460                   result = gimple_expand_builtin_cabs (&gsi, loc, arg0);
2461
2462                   if (result)
2463                     {
2464                       tree lhs = gimple_get_lhs (stmt);
2465                       gassign *new_stmt = gimple_build_assign (lhs, result);
2466                       gimple_set_location (new_stmt, loc);
2467                       unlink_stmt_vdef (stmt);
2468                       gsi_replace (&gsi, new_stmt, true);
2469                       cleanup_eh = true;
2470                       if (gimple_vdef (stmt))
2471                         release_ssa_name (gimple_vdef (stmt));
2472                     }
2473                   break;
2474
2475                 default:;
2476                 }
2477             }
2478         }
2479       if (cleanup_eh)
2480         cfg_changed |= gimple_purge_dead_eh_edges (bb);
2481     }
2482
2483   return cfg_changed ? TODO_cleanup_cfg : 0;
2484 }
2485
2486 } // anon namespace
2487
2488 gimple_opt_pass *
2489 make_pass_expand_powcabs (gcc::context *ctxt)
2490 {
2491   return new pass_expand_powcabs (ctxt);
2492 }
2493
2494 /* Return true if stmt is a type conversion operation that can be stripped
2495    when used in a widening multiply operation.  */
2496 static bool
2497 widening_mult_conversion_strippable_p (tree result_type, gimple *stmt)
2498 {
2499   enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
2500
2501   if (TREE_CODE (result_type) == INTEGER_TYPE)
2502     {
2503       tree op_type;
2504       tree inner_op_type;
2505
2506       if (!CONVERT_EXPR_CODE_P (rhs_code))
2507         return false;
2508
2509       op_type = TREE_TYPE (gimple_assign_lhs (stmt));
2510
2511       /* If the type of OP has the same precision as the result, then
2512          we can strip this conversion.  The multiply operation will be
2513          selected to create the correct extension as a by-product.  */
2514       if (TYPE_PRECISION (result_type) == TYPE_PRECISION (op_type))
2515         return true;
2516
2517       /* We can also strip a conversion if it preserves the signed-ness of
2518          the operation and doesn't narrow the range.  */
2519       inner_op_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
2520
2521       /* If the inner-most type is unsigned, then we can strip any
2522          intermediate widening operation.  If it's signed, then the
2523          intermediate widening operation must also be signed.  */
2524       if ((TYPE_UNSIGNED (inner_op_type)
2525            || TYPE_UNSIGNED (op_type) == TYPE_UNSIGNED (inner_op_type))
2526           && TYPE_PRECISION (op_type) > TYPE_PRECISION (inner_op_type))
2527         return true;
2528
2529       return false;
2530     }
2531
2532   return rhs_code == FIXED_CONVERT_EXPR;
2533 }
2534
2535 /* Return true if RHS is a suitable operand for a widening multiplication,
2536    assuming a target type of TYPE.
2537    There are two cases:
2538
2539      - RHS makes some value at least twice as wide.  Store that value
2540        in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
2541
2542      - RHS is an integer constant.  Store that value in *NEW_RHS_OUT if so,
2543        but leave *TYPE_OUT untouched.  */
2544
2545 static bool
2546 is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out,
2547                         tree *new_rhs_out)
2548 {
2549   gimple *stmt;
2550   tree type1, rhs1;
2551
2552   if (TREE_CODE (rhs) == SSA_NAME)
2553     {
2554       stmt = SSA_NAME_DEF_STMT (rhs);
2555       if (is_gimple_assign (stmt))
2556         {
2557           if (! widening_mult_conversion_strippable_p (type, stmt))
2558             rhs1 = rhs;
2559           else
2560             {
2561               rhs1 = gimple_assign_rhs1 (stmt);
2562
2563               if (TREE_CODE (rhs1) == INTEGER_CST)
2564                 {
2565                   *new_rhs_out = rhs1;
2566                   *type_out = NULL;
2567                   return true;
2568                 }
2569             }
2570         }
2571       else
2572         rhs1 = rhs;
2573
2574       type1 = TREE_TYPE (rhs1);
2575
2576       if (TREE_CODE (type1) != TREE_CODE (type)
2577           || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type))
2578         return false;
2579
2580       *new_rhs_out = rhs1;
2581       *type_out = type1;
2582       return true;
2583     }
2584
2585   if (TREE_CODE (rhs) == INTEGER_CST)
2586     {
2587       *new_rhs_out = rhs;
2588       *type_out = NULL;
2589       return true;
2590     }
2591
2592   return false;
2593 }
2594
2595 /* Return true if STMT performs a widening multiplication, assuming the
2596    output type is TYPE.  If so, store the unwidened types of the operands
2597    in *TYPE1_OUT and *TYPE2_OUT respectively.  Also fill *RHS1_OUT and
2598    *RHS2_OUT such that converting those operands to types *TYPE1_OUT
2599    and *TYPE2_OUT would give the operands of the multiplication.  */
2600
2601 static bool
2602 is_widening_mult_p (gimple *stmt,
2603                     tree *type1_out, tree *rhs1_out,
2604                     tree *type2_out, tree *rhs2_out)
2605 {
2606   tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2607
2608   if (TREE_CODE (type) == INTEGER_TYPE)
2609     {
2610       if (TYPE_OVERFLOW_TRAPS (type))
2611         return false;
2612     }
2613   else if (TREE_CODE (type) != FIXED_POINT_TYPE)
2614     return false;
2615
2616   if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out,
2617                                rhs1_out))
2618     return false;
2619
2620   if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out,
2621                                rhs2_out))
2622     return false;
2623
2624   if (*type1_out == NULL)
2625     {
2626       if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out))
2627         return false;
2628       *type1_out = *type2_out;
2629     }
2630
2631   if (*type2_out == NULL)
2632     {
2633       if (!int_fits_type_p (*rhs2_out, *type1_out))
2634         return false;
2635       *type2_out = *type1_out;
2636     }
2637
2638   /* Ensure that the larger of the two operands comes first. */
2639   if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out))
2640     {
2641       std::swap (*type1_out, *type2_out);
2642       std::swap (*rhs1_out, *rhs2_out);
2643     }
2644
2645   return true;
2646 }
2647
2648 /* Check to see if the CALL statement is an invocation of copysign
2649    with 1. being the first argument.  */
2650 static bool
2651 is_copysign_call_with_1 (gimple *call)
2652 {
2653   gcall *c = dyn_cast <gcall *> (call);
2654   if (! c)
2655     return false;
2656
2657   enum combined_fn code = gimple_call_combined_fn (c);
2658
2659   if (code == CFN_LAST)
2660     return false;
2661
2662   if (builtin_fn_p (code))
2663     {
2664       switch (as_builtin_fn (code))
2665         {
2666         CASE_FLT_FN (BUILT_IN_COPYSIGN):
2667         CASE_FLT_FN_FLOATN_NX (BUILT_IN_COPYSIGN):
2668           return real_onep (gimple_call_arg (c, 0));
2669         default:
2670           return false;
2671         }
2672     }
2673
2674   if (internal_fn_p (code))
2675     {
2676       switch (as_internal_fn (code))
2677         {
2678         case IFN_COPYSIGN:
2679           return real_onep (gimple_call_arg (c, 0));
2680         default:
2681           return false;
2682         }
2683     }
2684
2685    return false;
2686 }
2687
2688 /* Try to expand the pattern x * copysign (1, y) into xorsign (x, y).
2689    This only happens when the xorsign optab is defined, if the
2690    pattern is not a xorsign pattern or if expansion fails FALSE is
2691    returned, otherwise TRUE is returned.  */
2692 static bool
2693 convert_expand_mult_copysign (gimple *stmt, gimple_stmt_iterator *gsi)
2694 {
2695   tree treeop0, treeop1, lhs, type;
2696   location_t loc = gimple_location (stmt);
2697   lhs = gimple_assign_lhs (stmt);
2698   treeop0 = gimple_assign_rhs1 (stmt);
2699   treeop1 = gimple_assign_rhs2 (stmt);
2700   type = TREE_TYPE (lhs);
2701   machine_mode mode = TYPE_MODE (type);
2702
2703   if (HONOR_SNANS (type))
2704     return false;
2705
2706   if (TREE_CODE (treeop0) == SSA_NAME && TREE_CODE (treeop1) == SSA_NAME)
2707     {
2708       gimple *call0 = SSA_NAME_DEF_STMT (treeop0);
2709       if (!has_single_use (treeop0) || !is_copysign_call_with_1 (call0))
2710         {
2711           call0 = SSA_NAME_DEF_STMT (treeop1);
2712           if (!has_single_use (treeop1) || !is_copysign_call_with_1 (call0))
2713             return false;
2714
2715           treeop1 = treeop0;
2716         }
2717         if (optab_handler (xorsign_optab, mode) == CODE_FOR_nothing)
2718           return false;
2719
2720         gcall *c = as_a<gcall*> (call0);
2721         treeop0 = gimple_call_arg (c, 1);
2722
2723         gcall *call_stmt
2724           = gimple_build_call_internal (IFN_XORSIGN, 2, treeop1, treeop0);
2725         gimple_set_lhs (call_stmt, lhs);
2726         gimple_set_location (call_stmt, loc);
2727         gsi_replace (gsi, call_stmt, true);
2728         return true;
2729     }
2730
2731   return false;
2732 }
2733
2734 /* Process a single gimple statement STMT, which has a MULT_EXPR as
2735    its rhs, and try to convert it into a WIDEN_MULT_EXPR.  The return
2736    value is true iff we converted the statement.  */
2737
2738 static bool
2739 convert_mult_to_widen (gimple *stmt, gimple_stmt_iterator *gsi)
2740 {
2741   tree lhs, rhs1, rhs2, type, type1, type2;
2742   enum insn_code handler;
2743   scalar_int_mode to_mode, from_mode, actual_mode;
2744   optab op;
2745   int actual_precision;
2746   location_t loc = gimple_location (stmt);
2747   bool from_unsigned1, from_unsigned2;
2748
2749   lhs = gimple_assign_lhs (stmt);
2750   type = TREE_TYPE (lhs);
2751   if (TREE_CODE (type) != INTEGER_TYPE)
2752     return false;
2753
2754   if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2))
2755     return false;
2756
2757   to_mode = SCALAR_INT_TYPE_MODE (type);
2758   from_mode = SCALAR_INT_TYPE_MODE (type1);
2759   if (to_mode == from_mode)
2760     return false;
2761
2762   from_unsigned1 = TYPE_UNSIGNED (type1);
2763   from_unsigned2 = TYPE_UNSIGNED (type2);
2764
2765   if (from_unsigned1 && from_unsigned2)
2766     op = umul_widen_optab;
2767   else if (!from_unsigned1 && !from_unsigned2)
2768     op = smul_widen_optab;
2769   else
2770     op = usmul_widen_optab;
2771
2772   handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode,
2773                                                   &actual_mode);
2774
2775   if (handler == CODE_FOR_nothing)
2776     {
2777       if (op != smul_widen_optab)
2778         {
2779           /* We can use a signed multiply with unsigned types as long as
2780              there is a wider mode to use, or it is the smaller of the two
2781              types that is unsigned.  Note that type1 >= type2, always.  */
2782           if ((TYPE_UNSIGNED (type1)
2783                && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2784               || (TYPE_UNSIGNED (type2)
2785                   && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2786             {
2787               if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
2788                   || GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode))
2789                 return false;
2790             }
2791
2792           op = smul_widen_optab;
2793           handler = find_widening_optab_handler_and_mode (op, to_mode,
2794                                                           from_mode,
2795                                                           &actual_mode);
2796
2797           if (handler == CODE_FOR_nothing)
2798             return false;
2799
2800           from_unsigned1 = from_unsigned2 = false;
2801         }
2802       else
2803         {
2804           /* Expand can synthesize smul_widen_optab if the target
2805              supports umul_widen_optab.  */
2806           op = umul_widen_optab;
2807           handler = find_widening_optab_handler_and_mode (op, to_mode,
2808                                                           from_mode,
2809                                                           &actual_mode);
2810           if (handler == CODE_FOR_nothing)
2811             return false;
2812         }
2813     }
2814
2815   /* Ensure that the inputs to the handler are in the correct precison
2816      for the opcode.  This will be the full mode size.  */
2817   actual_precision = GET_MODE_PRECISION (actual_mode);
2818   if (2 * actual_precision > TYPE_PRECISION (type))
2819     return false;
2820   if (actual_precision != TYPE_PRECISION (type1)
2821       || from_unsigned1 != TYPE_UNSIGNED (type1))
2822     rhs1 = build_and_insert_cast (gsi, loc,
2823                                   build_nonstandard_integer_type
2824                                     (actual_precision, from_unsigned1), rhs1);
2825   if (actual_precision != TYPE_PRECISION (type2)
2826       || from_unsigned2 != TYPE_UNSIGNED (type2))
2827     rhs2 = build_and_insert_cast (gsi, loc,
2828                                   build_nonstandard_integer_type
2829                                     (actual_precision, from_unsigned2), rhs2);
2830
2831   /* Handle constants.  */
2832   if (TREE_CODE (rhs1) == INTEGER_CST)
2833     rhs1 = fold_convert (type1, rhs1);
2834   if (TREE_CODE (rhs2) == INTEGER_CST)
2835     rhs2 = fold_convert (type2, rhs2);
2836
2837   gimple_assign_set_rhs1 (stmt, rhs1);
2838   gimple_assign_set_rhs2 (stmt, rhs2);
2839   gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
2840   update_stmt (stmt);
2841   widen_mul_stats.widen_mults_inserted++;
2842   return true;
2843 }
2844
2845 /* Process a single gimple statement STMT, which is found at the
2846    iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
2847    rhs (given by CODE), and try to convert it into a
2848    WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR.  The return value
2849    is true iff we converted the statement.  */
2850
2851 static bool
2852 convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple *stmt,
2853                             enum tree_code code)
2854 {
2855   gimple *rhs1_stmt = NULL, *rhs2_stmt = NULL;
2856   gimple *conv1_stmt = NULL, *conv2_stmt = NULL, *conv_stmt;
2857   tree type, type1, type2, optype;
2858   tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
2859   enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
2860   optab this_optab;
2861   enum tree_code wmult_code;
2862   enum insn_code handler;
2863   scalar_mode to_mode, from_mode, actual_mode;
2864   location_t loc = gimple_location (stmt);
2865   int actual_precision;
2866   bool from_unsigned1, from_unsigned2;
2867
2868   lhs = gimple_assign_lhs (stmt);
2869   type = TREE_TYPE (lhs);
2870   if (TREE_CODE (type) != INTEGER_TYPE
2871       && TREE_CODE (type) != FIXED_POINT_TYPE)
2872     return false;
2873
2874   if (code == MINUS_EXPR)
2875     wmult_code = WIDEN_MULT_MINUS_EXPR;
2876   else
2877     wmult_code = WIDEN_MULT_PLUS_EXPR;
2878
2879   rhs1 = gimple_assign_rhs1 (stmt);
2880   rhs2 = gimple_assign_rhs2 (stmt);
2881
2882   if (TREE_CODE (rhs1) == SSA_NAME)
2883     {
2884       rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2885       if (is_gimple_assign (rhs1_stmt))
2886         rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2887     }
2888
2889   if (TREE_CODE (rhs2) == SSA_NAME)
2890     {
2891       rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2892       if (is_gimple_assign (rhs2_stmt))
2893         rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2894     }
2895
2896   /* Allow for one conversion statement between the multiply
2897      and addition/subtraction statement.  If there are more than
2898      one conversions then we assume they would invalidate this
2899      transformation.  If that's not the case then they should have
2900      been folded before now.  */
2901   if (CONVERT_EXPR_CODE_P (rhs1_code))
2902     {
2903       conv1_stmt = rhs1_stmt;
2904       rhs1 = gimple_assign_rhs1 (rhs1_stmt);
2905       if (TREE_CODE (rhs1) == SSA_NAME)
2906         {
2907           rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2908           if (is_gimple_assign (rhs1_stmt))
2909             rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2910         }
2911       else
2912         return false;
2913     }
2914   if (CONVERT_EXPR_CODE_P (rhs2_code))
2915     {
2916       conv2_stmt = rhs2_stmt;
2917       rhs2 = gimple_assign_rhs1 (rhs2_stmt);
2918       if (TREE_CODE (rhs2) == SSA_NAME)
2919         {
2920           rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2921           if (is_gimple_assign (rhs2_stmt))
2922             rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2923         }
2924       else
2925         return false;
2926     }
2927
2928   /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
2929      is_widening_mult_p, but we still need the rhs returns.
2930
2931      It might also appear that it would be sufficient to use the existing
2932      operands of the widening multiply, but that would limit the choice of
2933      multiply-and-accumulate instructions.
2934
2935      If the widened-multiplication result has more than one uses, it is
2936      probably wiser not to do the conversion.  Also restrict this operation
2937      to single basic block to avoid moving the multiply to a different block
2938      with a higher execution frequency.  */
2939   if (code == PLUS_EXPR
2940       && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
2941     {
2942       if (!has_single_use (rhs1)
2943           || gimple_bb (rhs1_stmt) != gimple_bb (stmt)
2944           || !is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
2945                                   &type2, &mult_rhs2))
2946         return false;
2947       add_rhs = rhs2;
2948       conv_stmt = conv1_stmt;
2949     }
2950   else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
2951     {
2952       if (!has_single_use (rhs2)
2953           || gimple_bb (rhs2_stmt) != gimple_bb (stmt)
2954           || !is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
2955                                   &type2, &mult_rhs2))
2956         return false;
2957       add_rhs = rhs1;
2958       conv_stmt = conv2_stmt;
2959     }
2960   else
2961     return false;
2962
2963   to_mode = SCALAR_TYPE_MODE (type);
2964   from_mode = SCALAR_TYPE_MODE (type1);
2965   if (to_mode == from_mode)
2966     return false;
2967
2968   from_unsigned1 = TYPE_UNSIGNED (type1);
2969   from_unsigned2 = TYPE_UNSIGNED (type2);
2970   optype = type1;
2971
2972   /* There's no such thing as a mixed sign madd yet, so use a wider mode.  */
2973   if (from_unsigned1 != from_unsigned2)
2974     {
2975       if (!INTEGRAL_TYPE_P (type))
2976         return false;
2977       /* We can use a signed multiply with unsigned types as long as
2978          there is a wider mode to use, or it is the smaller of the two
2979          types that is unsigned.  Note that type1 >= type2, always.  */
2980       if ((from_unsigned1
2981            && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2982           || (from_unsigned2
2983               && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2984         {
2985           if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
2986               || GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode))
2987             return false;
2988         }
2989
2990       from_unsigned1 = from_unsigned2 = false;
2991       optype = build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode),
2992                                                false);
2993     }
2994
2995   /* If there was a conversion between the multiply and addition
2996      then we need to make sure it fits a multiply-and-accumulate.
2997      The should be a single mode change which does not change the
2998      value.  */
2999   if (conv_stmt)
3000     {
3001       /* We use the original, unmodified data types for this.  */
3002       tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt));
3003       tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt));
3004       int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2);
3005       bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2);
3006
3007       if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type))
3008         {
3009           /* Conversion is a truncate.  */
3010           if (TYPE_PRECISION (to_type) < data_size)
3011             return false;
3012         }
3013       else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type))
3014         {
3015           /* Conversion is an extend.  Check it's the right sort.  */
3016           if (TYPE_UNSIGNED (from_type) != is_unsigned
3017               && !(is_unsigned && TYPE_PRECISION (from_type) > data_size))
3018             return false;
3019         }
3020       /* else convert is a no-op for our purposes.  */
3021     }
3022
3023   /* Verify that the machine can perform a widening multiply
3024      accumulate in this mode/signedness combination, otherwise
3025      this transformation is likely to pessimize code.  */
3026   this_optab = optab_for_tree_code (wmult_code, optype, optab_default);
3027   handler = find_widening_optab_handler_and_mode (this_optab, to_mode,
3028                                                   from_mode, &actual_mode);
3029
3030   if (handler == CODE_FOR_nothing)
3031     return false;
3032
3033   /* Ensure that the inputs to the handler are in the correct precison
3034      for the opcode.  This will be the full mode size.  */
3035   actual_precision = GET_MODE_PRECISION (actual_mode);
3036   if (actual_precision != TYPE_PRECISION (type1)
3037       || from_unsigned1 != TYPE_UNSIGNED (type1))
3038     mult_rhs1 = build_and_insert_cast (gsi, loc,
3039                                        build_nonstandard_integer_type
3040                                          (actual_precision, from_unsigned1),
3041                                        mult_rhs1);
3042   if (actual_precision != TYPE_PRECISION (type2)
3043       || from_unsigned2 != TYPE_UNSIGNED (type2))
3044     mult_rhs2 = build_and_insert_cast (gsi, loc,
3045                                        build_nonstandard_integer_type
3046                                          (actual_precision, from_unsigned2),
3047                                        mult_rhs2);
3048
3049   if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs)))
3050     add_rhs = build_and_insert_cast (gsi, loc, type, add_rhs);
3051
3052   /* Handle constants.  */
3053   if (TREE_CODE (mult_rhs1) == INTEGER_CST)
3054     mult_rhs1 = fold_convert (type1, mult_rhs1);
3055   if (TREE_CODE (mult_rhs2) == INTEGER_CST)
3056     mult_rhs2 = fold_convert (type2, mult_rhs2);
3057
3058   gimple_assign_set_rhs_with_ops (gsi, wmult_code, mult_rhs1, mult_rhs2,
3059                                   add_rhs);
3060   update_stmt (gsi_stmt (*gsi));
3061   widen_mul_stats.maccs_inserted++;
3062   return true;
3063 }
3064
3065 /* Given a result MUL_RESULT which is a result of a multiplication of OP1 and
3066    OP2 and which we know is used in statements that can be, together with the
3067    multiplication, converted to FMAs, perform the transformation.  */
3068
3069 static void
3070 convert_mult_to_fma_1 (tree mul_result, tree op1, tree op2)
3071 {
3072   tree type = TREE_TYPE (mul_result);
3073   gimple *use_stmt;
3074   imm_use_iterator imm_iter;
3075   gcall *fma_stmt;
3076
3077   FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
3078     {
3079       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
3080       tree addop, mulop1 = op1, result = mul_result;
3081       bool negate_p = false;
3082       gimple_seq seq = NULL;
3083
3084       if (is_gimple_debug (use_stmt))
3085         continue;
3086
3087       if (is_gimple_assign (use_stmt)
3088           && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
3089         {
3090           result = gimple_assign_lhs (use_stmt);
3091           use_operand_p use_p;
3092           gimple *neguse_stmt;
3093           single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt);
3094           gsi_remove (&gsi, true);
3095           release_defs (use_stmt);
3096
3097           use_stmt = neguse_stmt;
3098           gsi = gsi_for_stmt (use_stmt);
3099           negate_p = true;
3100         }
3101
3102       tree cond, else_value, ops[3], len, bias;
3103       tree_code code;
3104       if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code,
3105                                               ops, &else_value,
3106                                               &len, &bias))
3107         gcc_unreachable ();
3108       addop = ops[0] == result ? ops[1] : ops[0];
3109
3110       if (code == MINUS_EXPR)
3111         {
3112           if (ops[0] == result)
3113             /* a * b - c -> a * b + (-c)  */
3114             addop = gimple_build (&seq, NEGATE_EXPR, type, addop);
3115           else
3116             /* a - b * c -> (-b) * c + a */
3117             negate_p = !negate_p;
3118         }
3119
3120       if (negate_p)
3121         mulop1 = gimple_build (&seq, NEGATE_EXPR, type, mulop1);
3122
3123       if (seq)
3124         gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
3125
3126       if (len)
3127         fma_stmt
3128           = gimple_build_call_internal (IFN_COND_LEN_FMA, 7, cond, mulop1, op2,
3129                                         addop, else_value, len, bias);
3130       else if (cond)
3131         fma_stmt = gimple_build_call_internal (IFN_COND_FMA, 5, cond, mulop1,
3132                                                op2, addop, else_value);
3133       else
3134         fma_stmt = gimple_build_call_internal (IFN_FMA, 3, mulop1, op2, addop);
3135       gimple_set_lhs (fma_stmt, gimple_get_lhs (use_stmt));
3136       gimple_call_set_nothrow (fma_stmt, !stmt_can_throw_internal (cfun,
3137                                                                    use_stmt));
3138       gsi_replace (&gsi, fma_stmt, true);
3139       /* Follow all SSA edges so that we generate FMS, FNMA and FNMS
3140          regardless of where the negation occurs.  */
3141       gimple *orig_stmt = gsi_stmt (gsi);
3142       if (fold_stmt (&gsi, follow_all_ssa_edges))
3143         {
3144           if (maybe_clean_or_replace_eh_stmt (orig_stmt, gsi_stmt (gsi)))
3145             gcc_unreachable ();
3146           update_stmt (gsi_stmt (gsi));
3147         }
3148
3149       if (dump_file && (dump_flags & TDF_DETAILS))
3150         {
3151           fprintf (dump_file, "Generated FMA ");
3152           print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_NONE);
3153           fprintf (dump_file, "\n");
3154         }
3155
3156       /* If the FMA result is negated in a single use, fold the negation
3157          too.  */
3158       orig_stmt = gsi_stmt (gsi);
3159       use_operand_p use_p;
3160       gimple *neg_stmt;
3161       if (is_gimple_call (orig_stmt)
3162           && gimple_call_internal_p (orig_stmt)
3163           && gimple_call_lhs (orig_stmt)
3164           && TREE_CODE (gimple_call_lhs (orig_stmt)) == SSA_NAME
3165           && single_imm_use (gimple_call_lhs (orig_stmt), &use_p, &neg_stmt)
3166           && is_gimple_assign (neg_stmt)
3167           && gimple_assign_rhs_code (neg_stmt) == NEGATE_EXPR
3168           && !stmt_could_throw_p (cfun, neg_stmt))
3169         {
3170           gsi = gsi_for_stmt (neg_stmt);
3171           if (fold_stmt (&gsi, follow_all_ssa_edges))
3172             {
3173               if (maybe_clean_or_replace_eh_stmt (neg_stmt, gsi_stmt (gsi)))
3174                 gcc_unreachable ();
3175               update_stmt (gsi_stmt (gsi));
3176               if (dump_file && (dump_flags & TDF_DETAILS))
3177                 {
3178                   fprintf (dump_file, "Folded FMA negation ");
3179                   print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_NONE);
3180                   fprintf (dump_file, "\n");
3181                 }
3182             }
3183         }
3184
3185       widen_mul_stats.fmas_inserted++;
3186     }
3187 }
3188
3189 /* Data necessary to perform the actual transformation from a multiplication
3190    and an addition to an FMA after decision is taken it should be done and to
3191    then delete the multiplication statement from the function IL.  */
3192
3193 struct fma_transformation_info
3194 {
3195   gimple *mul_stmt;
3196   tree mul_result;
3197   tree op1;
3198   tree op2;
3199 };
3200
3201 /* Structure containing the current state of FMA deferring, i.e. whether we are
3202    deferring, whether to continue deferring, and all data necessary to come
3203    back and perform all deferred transformations.  */
3204
3205 class fma_deferring_state
3206 {
3207 public:
3208   /* Class constructor.  Pass true as PERFORM_DEFERRING in order to actually
3209      do any deferring.  */
3210
3211   fma_deferring_state (bool perform_deferring)
3212     : m_candidates (), m_mul_result_set (), m_initial_phi (NULL),
3213       m_last_result (NULL_TREE), m_deferring_p (perform_deferring) {}
3214
3215   /* List of FMA candidates for which we the transformation has been determined
3216      possible but we at this point in BB analysis we do not consider them
3217      beneficial.  */
3218   auto_vec<fma_transformation_info, 8> m_candidates;
3219
3220   /* Set of results of multiplication that are part of an already deferred FMA
3221      candidates.  */
3222   hash_set<tree> m_mul_result_set;
3223
3224   /* The PHI that supposedly feeds back result of a FMA to another over loop
3225      boundary.  */
3226   gphi *m_initial_phi;
3227
3228   /* Result of the last produced FMA candidate or NULL if there has not been
3229      one.  */
3230   tree m_last_result;
3231
3232   /* If true, deferring might still be profitable.  If false, transform all
3233      candidates and no longer defer.  */
3234   bool m_deferring_p;
3235 };
3236
3237 /* Transform all deferred FMA candidates and mark STATE as no longer
3238    deferring.  */
3239
3240 static void
3241 cancel_fma_deferring (fma_deferring_state *state)
3242 {
3243   if (!state->m_deferring_p)
3244     return;
3245
3246   for (unsigned i = 0; i < state->m_candidates.length (); i++)
3247     {
3248       if (dump_file && (dump_flags & TDF_DETAILS))
3249         fprintf (dump_file, "Generating deferred FMA\n");
3250
3251       const fma_transformation_info &fti = state->m_candidates[i];
3252       convert_mult_to_fma_1 (fti.mul_result, fti.op1, fti.op2);
3253
3254       gimple_stmt_iterator gsi = gsi_for_stmt (fti.mul_stmt);
3255       gsi_remove (&gsi, true);
3256       release_defs (fti.mul_stmt);
3257     }
3258   state->m_deferring_p = false;
3259 }
3260
3261 /* If OP is an SSA name defined by a PHI node, return the PHI statement.
3262    Otherwise return NULL.  */
3263
3264 static gphi *
3265 result_of_phi (tree op)
3266 {
3267   if (TREE_CODE (op) != SSA_NAME)
3268     return NULL;
3269
3270   return dyn_cast <gphi *> (SSA_NAME_DEF_STMT (op));
3271 }
3272
3273 /* After processing statements of a BB and recording STATE, return true if the
3274    initial phi is fed by the last FMA candidate result ore one such result from
3275    previously processed BBs marked in LAST_RESULT_SET.  */
3276
3277 static bool
3278 last_fma_candidate_feeds_initial_phi (fma_deferring_state *state,
3279                                       hash_set<tree> *last_result_set)
3280 {
3281   ssa_op_iter iter;
3282   use_operand_p use;
3283   FOR_EACH_PHI_ARG (use, state->m_initial_phi, iter, SSA_OP_USE)
3284     {
3285       tree t = USE_FROM_PTR (use);
3286       if (t == state->m_last_result
3287           || last_result_set->contains (t))
3288         return true;
3289     }
3290
3291   return false;
3292 }
3293
3294 /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
3295    with uses in additions and subtractions to form fused multiply-add
3296    operations.  Returns true if successful and MUL_STMT should be removed.
3297    If MUL_COND is nonnull, the multiplication in MUL_STMT is conditional
3298    on MUL_COND, otherwise it is unconditional.
3299
3300    If STATE indicates that we are deferring FMA transformation, that means
3301    that we do not produce FMAs for basic blocks which look like:
3302
3303     <bb 6>
3304     # accumulator_111 = PHI <0.0(5), accumulator_66(6)>
3305     _65 = _14 * _16;
3306     accumulator_66 = _65 + accumulator_111;
3307
3308   or its unrolled version, i.e. with several FMA candidates that feed result
3309   of one into the addend of another.  Instead, we add them to a list in STATE
3310   and if we later discover an FMA candidate that is not part of such a chain,
3311   we go back and perform all deferred past candidates.  */
3312
3313 static bool
3314 convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2,
3315                      fma_deferring_state *state, tree mul_cond = NULL_TREE,
3316                      tree mul_len = NULL_TREE, tree mul_bias = NULL_TREE)
3317 {
3318   tree mul_result = gimple_get_lhs (mul_stmt);
3319   /* If there isn't a LHS then this can't be an FMA.  There can be no LHS
3320      if the statement was left just for the side-effects.  */
3321   if (!mul_result)
3322     return false;
3323   tree type = TREE_TYPE (mul_result);
3324   gimple *use_stmt, *neguse_stmt;
3325   use_operand_p use_p;
3326   imm_use_iterator imm_iter;
3327
3328   if (FLOAT_TYPE_P (type)
3329       && flag_fp_contract_mode != FP_CONTRACT_FAST)
3330     return false;
3331
3332   /* We don't want to do bitfield reduction ops.  */
3333   if (INTEGRAL_TYPE_P (type)
3334       && (!type_has_mode_precision_p (type) || TYPE_OVERFLOW_TRAPS (type)))
3335     return false;
3336
3337   /* If the target doesn't support it, don't generate it.  We assume that
3338      if fma isn't available then fms, fnma or fnms are not either.  */
3339   optimization_type opt_type = bb_optimization_type (gimple_bb (mul_stmt));
3340   if (!direct_internal_fn_supported_p (IFN_FMA, type, opt_type))
3341     return false;
3342
3343   /* If the multiplication has zero uses, it is kept around probably because
3344      of -fnon-call-exceptions.  Don't optimize it away in that case,
3345      it is DCE job.  */
3346   if (has_zero_uses (mul_result))
3347     return false;
3348
3349   bool check_defer
3350     = (state->m_deferring_p
3351        && maybe_le (tree_to_poly_int64 (TYPE_SIZE (type)),
3352                     param_avoid_fma_max_bits));
3353   bool defer = check_defer;
3354   bool seen_negate_p = false;
3355
3356   /* There is no numerical difference between fused and unfused integer FMAs,
3357      and the assumption below that FMA is as cheap as addition is unlikely
3358      to be true, especially if the multiplication occurs multiple times on
3359      the same chain.  E.g., for something like:
3360
3361          (((a * b) + c) >> 1) + (a * b)
3362
3363      we do not want to duplicate the a * b into two additions, not least
3364      because the result is not a natural FMA chain.  */
3365   if (ANY_INTEGRAL_TYPE_P (type)
3366       && !has_single_use (mul_result))
3367     return false;
3368
3369   /* Make sure that the multiplication statement becomes dead after
3370      the transformation, thus that all uses are transformed to FMAs.
3371      This means we assume that an FMA operation has the same cost
3372      as an addition.  */
3373   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
3374     {
3375       tree result = mul_result;
3376       bool negate_p = false;
3377
3378       use_stmt = USE_STMT (use_p);
3379
3380       if (is_gimple_debug (use_stmt))
3381         continue;
3382
3383       /* For now restrict this operations to single basic blocks.  In theory
3384          we would want to support sinking the multiplication in
3385          m = a*b;
3386          if ()
3387            ma = m + c;
3388          else
3389            d = m;
3390          to form a fma in the then block and sink the multiplication to the
3391          else block.  */
3392       if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3393         return false;
3394
3395       /* A negate on the multiplication leads to FNMA.  */
3396       if (is_gimple_assign (use_stmt)
3397           && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
3398         {
3399           ssa_op_iter iter;
3400           use_operand_p usep;
3401
3402           /* If (due to earlier missed optimizations) we have two
3403              negates of the same value, treat them as equivalent
3404              to a single negate with multiple uses.  */
3405           if (seen_negate_p)
3406             return false;
3407
3408           result = gimple_assign_lhs (use_stmt);
3409
3410           /* Make sure the negate statement becomes dead with this
3411              single transformation.  */
3412           if (!single_imm_use (gimple_assign_lhs (use_stmt),
3413                                &use_p, &neguse_stmt))
3414             return false;
3415
3416           /* Make sure the multiplication isn't also used on that stmt.  */
3417           FOR_EACH_PHI_OR_STMT_USE (usep, neguse_stmt, iter, SSA_OP_USE)
3418             if (USE_FROM_PTR (usep) == mul_result)
3419               return false;
3420
3421           /* Re-validate.  */
3422           use_stmt = neguse_stmt;
3423           if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3424             return false;
3425
3426           negate_p = seen_negate_p = true;
3427         }
3428
3429       tree cond, else_value, ops[3], len, bias;
3430       tree_code code;
3431       if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code, ops,
3432                                               &else_value, &len, &bias))
3433         return false;
3434
3435       switch (code)
3436         {
3437         case MINUS_EXPR:
3438           if (ops[1] == result)
3439             negate_p = !negate_p;
3440           break;
3441         case PLUS_EXPR:
3442           break;
3443         default:
3444           /* FMA can only be formed from PLUS and MINUS.  */
3445           return false;
3446         }
3447
3448       if (len)
3449         {
3450           /* For COND_LEN_* operations, we may have dummpy mask which is
3451              the all true mask.  Such TREE type may be mul_cond != cond
3452              but we still consider they are equal.  */
3453           if (mul_cond && cond != mul_cond
3454               && !(integer_truep (mul_cond) && integer_truep (cond)))
3455             return false;
3456
3457           if (else_value == result)
3458             return false;
3459
3460           if (!direct_internal_fn_supported_p (IFN_COND_LEN_FMA, type,
3461                                                opt_type))
3462             return false;
3463
3464           if (mul_len)
3465             {
3466               poly_int64 mul_value, value;
3467               if (poly_int_tree_p (mul_len, &mul_value)
3468                   && poly_int_tree_p (len, &value)
3469                   && maybe_ne (mul_value, value))
3470                 return false;
3471               else if (mul_len != len)
3472                 return false;
3473
3474               if (wi::to_widest (mul_bias) != wi::to_widest (bias))
3475                 return false;
3476             }
3477         }
3478       else
3479         {
3480           if (mul_cond && cond != mul_cond)
3481             return false;
3482
3483           if (cond)
3484             {
3485               if (cond == result || else_value == result)
3486                 return false;
3487               if (!direct_internal_fn_supported_p (IFN_COND_FMA, type,
3488                                                    opt_type))
3489                 return false;
3490             }
3491         }
3492
3493       /* If the subtrahend (OPS[1]) is computed by a MULT_EXPR that
3494          we'll visit later, we might be able to get a more profitable
3495          match with fnma.
3496          OTOH, if we don't, a negate / fma pair has likely lower latency
3497          that a mult / subtract pair.  */
3498       if (code == MINUS_EXPR
3499           && !negate_p
3500           && ops[0] == result
3501           && !direct_internal_fn_supported_p (IFN_FMS, type, opt_type)
3502           && direct_internal_fn_supported_p (IFN_FNMA, type, opt_type)
3503           && TREE_CODE (ops[1]) == SSA_NAME
3504           && has_single_use (ops[1]))
3505         {
3506           gimple *stmt2 = SSA_NAME_DEF_STMT (ops[1]);
3507           if (is_gimple_assign (stmt2)
3508               && gimple_assign_rhs_code (stmt2) == MULT_EXPR)
3509             return false;
3510         }
3511
3512       /* We can't handle a * b + a * b.  */
3513       if (ops[0] == ops[1])
3514         return false;
3515       /* If deferring, make sure we are not looking at an instruction that
3516          wouldn't have existed if we were not.  */
3517       if (state->m_deferring_p
3518           && (state->m_mul_result_set.contains (ops[0])
3519               || state->m_mul_result_set.contains (ops[1])))
3520         return false;
3521
3522       if (check_defer)
3523         {
3524           tree use_lhs = gimple_get_lhs (use_stmt);
3525           if (state->m_last_result)
3526             {
3527               if (ops[1] == state->m_last_result
3528                   || ops[0] == state->m_last_result)
3529                 defer = true;
3530               else
3531                 defer = false;
3532             }
3533           else
3534             {
3535               gcc_checking_assert (!state->m_initial_phi);
3536               gphi *phi;
3537               if (ops[0] == result)
3538                 phi = result_of_phi (ops[1]);
3539               else
3540                 {
3541                   gcc_assert (ops[1] == result);
3542                   phi = result_of_phi (ops[0]);
3543                 }
3544
3545               if (phi)
3546                 {
3547                   state->m_initial_phi = phi;
3548                   defer = true;
3549                 }
3550               else
3551                 defer = false;
3552             }
3553
3554           state->m_last_result = use_lhs;
3555           check_defer = false;
3556         }
3557       else
3558         defer = false;
3559
3560       /* While it is possible to validate whether or not the exact form that
3561          we've recognized is available in the backend, the assumption is that
3562          if the deferring logic above did not trigger, the transformation is
3563          never a loss.  For instance, suppose the target only has the plain FMA
3564          pattern available.  Consider a*b-c -> fma(a,b,-c): we've exchanged
3565          MUL+SUB for FMA+NEG, which is still two operations.  Consider
3566          -(a*b)-c -> fma(-a,b,-c): we still have 3 operations, but in the FMA
3567          form the two NEGs are independent and could be run in parallel.  */
3568     }
3569
3570   if (defer)
3571     {
3572       fma_transformation_info fti;
3573       fti.mul_stmt = mul_stmt;
3574       fti.mul_result = mul_result;
3575       fti.op1 = op1;
3576       fti.op2 = op2;
3577       state->m_candidates.safe_push (fti);
3578       state->m_mul_result_set.add (mul_result);
3579
3580       if (dump_file && (dump_flags & TDF_DETAILS))
3581         {
3582           fprintf (dump_file, "Deferred generating FMA for multiplication ");
3583           print_gimple_stmt (dump_file, mul_stmt, 0, TDF_NONE);
3584           fprintf (dump_file, "\n");
3585         }
3586
3587       return false;
3588     }
3589   else
3590     {
3591       if (state->m_deferring_p)
3592         cancel_fma_deferring (state);
3593       convert_mult_to_fma_1 (mul_result, op1, op2);
3594       return true;
3595     }
3596 }
3597
3598
3599 /* Helper function of match_arith_overflow.  For MUL_OVERFLOW, if we have
3600    a check for non-zero like:
3601    _1 = x_4(D) * y_5(D);
3602    *res_7(D) = _1;
3603    if (x_4(D) != 0)
3604      goto <bb 3>; [50.00%]
3605    else
3606      goto <bb 4>; [50.00%]
3607
3608    <bb 3> [local count: 536870913]:
3609    _2 = _1 / x_4(D);
3610    _9 = _2 != y_5(D);
3611    _10 = (int) _9;
3612
3613    <bb 4> [local count: 1073741824]:
3614    # iftmp.0_3 = PHI <_10(3), 0(2)>
3615    then in addition to using .MUL_OVERFLOW (x_4(D), y_5(D)) we can also
3616    optimize the x_4(D) != 0 condition to 1.  */
3617
3618 static void
3619 maybe_optimize_guarding_check (vec<gimple *> &mul_stmts, gimple *cond_stmt,
3620                                gimple *div_stmt, bool *cfg_changed)
3621 {
3622   basic_block bb = gimple_bb (cond_stmt);
3623   if (gimple_bb (div_stmt) != bb || !single_pred_p (bb))
3624     return;
3625   edge pred_edge = single_pred_edge (bb);
3626   basic_block pred_bb = pred_edge->src;
3627   if (EDGE_COUNT (pred_bb->succs) != 2)
3628     return;
3629   edge other_edge = EDGE_SUCC (pred_bb, EDGE_SUCC (pred_bb, 0) == pred_edge);
3630   edge other_succ_edge = NULL;
3631   if (gimple_code (cond_stmt) == GIMPLE_COND)
3632     {
3633       if (EDGE_COUNT (bb->succs) != 2)
3634         return;
3635       other_succ_edge = EDGE_SUCC (bb, 0);
3636       if (gimple_cond_code (cond_stmt) == NE_EXPR)
3637         {
3638           if (other_succ_edge->flags & EDGE_TRUE_VALUE)
3639             other_succ_edge = EDGE_SUCC (bb, 1);
3640         }
3641       else if (other_succ_edge->flags & EDGE_FALSE_VALUE)
3642         other_succ_edge = EDGE_SUCC (bb, 0);
3643       if (other_edge->dest != other_succ_edge->dest)
3644         return;
3645     }
3646   else if (!single_succ_p (bb) || other_edge->dest != single_succ (bb))
3647     return;
3648   gcond *zero_cond = safe_dyn_cast <gcond *> (*gsi_last_bb (pred_bb));
3649   if (zero_cond == NULL
3650       || (gimple_cond_code (zero_cond)
3651           != ((pred_edge->flags & EDGE_TRUE_VALUE) ? NE_EXPR : EQ_EXPR))
3652       || !integer_zerop (gimple_cond_rhs (zero_cond)))
3653     return;
3654   tree zero_cond_lhs = gimple_cond_lhs (zero_cond);
3655   if (TREE_CODE (zero_cond_lhs) != SSA_NAME)
3656     return;
3657   if (gimple_assign_rhs2 (div_stmt) != zero_cond_lhs)
3658     {
3659       /* Allow the divisor to be result of a same precision cast
3660          from zero_cond_lhs.  */
3661       tree rhs2 = gimple_assign_rhs2 (div_stmt);
3662       if (TREE_CODE (rhs2) != SSA_NAME)
3663         return;
3664       gimple *g = SSA_NAME_DEF_STMT (rhs2);
3665       if (!gimple_assign_cast_p (g)
3666           || gimple_assign_rhs1 (g) != gimple_cond_lhs (zero_cond)
3667           || !INTEGRAL_TYPE_P (TREE_TYPE (zero_cond_lhs))
3668           || (TYPE_PRECISION (TREE_TYPE (zero_cond_lhs))
3669               != TYPE_PRECISION (TREE_TYPE (rhs2))))
3670         return;
3671     }
3672   gimple_stmt_iterator gsi = gsi_after_labels (bb);
3673   mul_stmts.quick_push (div_stmt);
3674   if (is_gimple_debug (gsi_stmt (gsi)))
3675     gsi_next_nondebug (&gsi);
3676   unsigned cast_count = 0;
3677   while (gsi_stmt (gsi) != cond_stmt)
3678     {
3679       /* If original mul_stmt has a single use, allow it in the same bb,
3680          we are looking then just at __builtin_mul_overflow_p.
3681          Though, in that case the original mul_stmt will be replaced
3682          by .MUL_OVERFLOW, REALPART_EXPR and IMAGPART_EXPR stmts.  */
3683       gimple *mul_stmt;
3684       unsigned int i;
3685       bool ok = false;
3686       FOR_EACH_VEC_ELT (mul_stmts, i, mul_stmt)
3687         {
3688           if (gsi_stmt (gsi) == mul_stmt)
3689             {
3690               ok = true;
3691               break;
3692             }
3693         }
3694       if (!ok && gimple_assign_cast_p (gsi_stmt (gsi)) && ++cast_count < 4)
3695         ok = true;
3696       if (!ok)
3697         return;
3698       gsi_next_nondebug (&gsi);
3699     }
3700   if (gimple_code (cond_stmt) == GIMPLE_COND)
3701     {
3702       basic_block succ_bb = other_edge->dest;
3703       for (gphi_iterator gpi = gsi_start_phis (succ_bb); !gsi_end_p (gpi);
3704            gsi_next (&gpi))
3705         {
3706           gphi *phi = gpi.phi ();
3707           tree v1 = gimple_phi_arg_def (phi, other_edge->dest_idx);
3708           tree v2 = gimple_phi_arg_def (phi, other_succ_edge->dest_idx);
3709           if (!operand_equal_p (v1, v2, 0))
3710             return;
3711         }
3712     }
3713   else
3714     {
3715       tree lhs = gimple_assign_lhs (cond_stmt);
3716       if (!lhs || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
3717         return;
3718       gsi_next_nondebug (&gsi);
3719       if (!gsi_end_p (gsi))
3720         {
3721           if (gimple_assign_rhs_code (cond_stmt) == COND_EXPR)
3722             return;
3723           gimple *cast_stmt = gsi_stmt (gsi);
3724           if (!gimple_assign_cast_p (cast_stmt))
3725             return;
3726           tree new_lhs = gimple_assign_lhs (cast_stmt);
3727           gsi_next_nondebug (&gsi);
3728           if (!gsi_end_p (gsi)
3729               || !new_lhs
3730               || !INTEGRAL_TYPE_P (TREE_TYPE (new_lhs))
3731               || TYPE_PRECISION (TREE_TYPE (new_lhs)) <= 1)
3732             return;
3733           lhs = new_lhs;
3734         }
3735       edge succ_edge = single_succ_edge (bb);
3736       basic_block succ_bb = succ_edge->dest;
3737       gsi = gsi_start_phis (succ_bb);
3738       if (gsi_end_p (gsi))
3739         return;
3740       gphi *phi = as_a <gphi *> (gsi_stmt (gsi));
3741       gsi_next (&gsi);
3742       if (!gsi_end_p (gsi))
3743         return;
3744       if (gimple_phi_arg_def (phi, succ_edge->dest_idx) != lhs)
3745         return;
3746       tree other_val = gimple_phi_arg_def (phi, other_edge->dest_idx);
3747       if (gimple_assign_rhs_code (cond_stmt) == COND_EXPR)
3748         {
3749           tree cond = gimple_assign_rhs1 (cond_stmt);
3750           if (TREE_CODE (cond) == NE_EXPR)
3751             {
3752               if (!operand_equal_p (other_val,
3753                                     gimple_assign_rhs3 (cond_stmt), 0))
3754                 return;
3755             }
3756           else if (!operand_equal_p (other_val,
3757                                      gimple_assign_rhs2 (cond_stmt), 0))
3758             return;
3759         }
3760       else if (gimple_assign_rhs_code (cond_stmt) == NE_EXPR)
3761         {
3762           if (!integer_zerop (other_val))
3763             return;
3764         }
3765       else if (!integer_onep (other_val))
3766         return;
3767     }
3768   if (pred_edge->flags & EDGE_TRUE_VALUE)
3769     gimple_cond_make_true (zero_cond);
3770   else
3771     gimple_cond_make_false (zero_cond);
3772   update_stmt (zero_cond);
3773   *cfg_changed = true;
3774 }
3775
3776 /* Helper function for arith_overflow_check_p.  Return true
3777    if VAL1 is equal to VAL2 cast to corresponding integral type
3778    with other signedness or vice versa.  */
3779
3780 static bool
3781 arith_cast_equal_p (tree val1, tree val2)
3782 {
3783   if (TREE_CODE (val1) == INTEGER_CST && TREE_CODE (val2) == INTEGER_CST)
3784     return wi::eq_p (wi::to_wide (val1), wi::to_wide (val2));
3785   else if (TREE_CODE (val1) != SSA_NAME || TREE_CODE (val2) != SSA_NAME)
3786     return false;
3787   if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val1))
3788       && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val1)) == val2)
3789     return true;
3790   if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val2))
3791       && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val2)) == val1)
3792     return true;
3793   return false;
3794 }
3795
3796 /* Helper function of match_arith_overflow.  Return 1
3797    if USE_STMT is unsigned overflow check ovf != 0 for
3798    STMT, -1 if USE_STMT is unsigned overflow check ovf == 0
3799    and 0 otherwise.  */
3800
3801 static int
3802 arith_overflow_check_p (gimple *stmt, gimple *cast_stmt, gimple *&use_stmt,
3803                         tree maxval, tree *other)
3804 {
3805   enum tree_code ccode = ERROR_MARK;
3806   tree crhs1 = NULL_TREE, crhs2 = NULL_TREE;
3807   enum tree_code code = gimple_assign_rhs_code (stmt);
3808   tree lhs = gimple_assign_lhs (cast_stmt ? cast_stmt : stmt);
3809   tree rhs1 = gimple_assign_rhs1 (stmt);
3810   tree rhs2 = gimple_assign_rhs2 (stmt);
3811   tree multop = NULL_TREE, divlhs = NULL_TREE;
3812   gimple *cur_use_stmt = use_stmt;
3813
3814   if (code == MULT_EXPR)
3815     {
3816       if (!is_gimple_assign (use_stmt))
3817         return 0;
3818       if (gimple_assign_rhs_code (use_stmt) != TRUNC_DIV_EXPR)
3819         return 0;
3820       if (gimple_assign_rhs1 (use_stmt) != lhs)
3821         return 0;
3822       if (cast_stmt)
3823         {
3824           if (arith_cast_equal_p (gimple_assign_rhs2 (use_stmt), rhs1))
3825             multop = rhs2;
3826           else if (arith_cast_equal_p (gimple_assign_rhs2 (use_stmt), rhs2))
3827             multop = rhs1;
3828           else
3829             return 0;
3830         }
3831       else if (gimple_assign_rhs2 (use_stmt) == rhs1)
3832         multop = rhs2;
3833       else if (operand_equal_p (gimple_assign_rhs2 (use_stmt), rhs2, 0))
3834         multop = rhs1;
3835       else
3836         return 0;
3837       if (stmt_ends_bb_p (use_stmt))
3838         return 0;
3839       divlhs = gimple_assign_lhs (use_stmt);
3840       if (!divlhs)
3841         return 0;
3842       use_operand_p use;
3843       if (!single_imm_use (divlhs, &use, &cur_use_stmt))
3844         return 0;
3845       if (cast_stmt && gimple_assign_cast_p (cur_use_stmt))
3846         {
3847           tree cast_lhs = gimple_assign_lhs (cur_use_stmt);
3848           if (INTEGRAL_TYPE_P (TREE_TYPE (cast_lhs))
3849               && TYPE_UNSIGNED (TREE_TYPE (cast_lhs))
3850               && (TYPE_PRECISION (TREE_TYPE (cast_lhs))
3851                   == TYPE_PRECISION (TREE_TYPE (divlhs)))
3852               && single_imm_use (cast_lhs, &use, &cur_use_stmt))
3853             {
3854               cast_stmt = NULL;
3855               divlhs = cast_lhs;
3856             }
3857           else
3858             return 0;
3859         }
3860     }
3861   if (gimple_code (cur_use_stmt) == GIMPLE_COND)
3862     {
3863       ccode = gimple_cond_code (cur_use_stmt);
3864       crhs1 = gimple_cond_lhs (cur_use_stmt);
3865       crhs2 = gimple_cond_rhs (cur_use_stmt);
3866     }
3867   else if (is_gimple_assign (cur_use_stmt))
3868     {
3869       if (gimple_assign_rhs_class (cur_use_stmt) == GIMPLE_BINARY_RHS)
3870         {
3871           ccode = gimple_assign_rhs_code (cur_use_stmt);
3872           crhs1 = gimple_assign_rhs1 (cur_use_stmt);
3873           crhs2 = gimple_assign_rhs2 (cur_use_stmt);
3874         }
3875       else if (gimple_assign_rhs_code (cur_use_stmt) == COND_EXPR)
3876         {
3877           tree cond = gimple_assign_rhs1 (cur_use_stmt);
3878           if (COMPARISON_CLASS_P (cond))
3879             {
3880               ccode = TREE_CODE (cond);
3881               crhs1 = TREE_OPERAND (cond, 0);
3882               crhs2 = TREE_OPERAND (cond, 1);
3883             }
3884           else
3885             return 0;
3886         }
3887       else
3888         return 0;
3889     }
3890   else
3891     return 0;
3892
3893   if (TREE_CODE_CLASS (ccode) != tcc_comparison)
3894     return 0;
3895
3896   switch (ccode)
3897     {
3898     case GT_EXPR:
3899     case LE_EXPR:
3900       if (maxval)
3901         {
3902           /* r = a + b; r > maxval or r <= maxval  */
3903           if (crhs1 == lhs
3904               && TREE_CODE (crhs2) == INTEGER_CST
3905               && tree_int_cst_equal (crhs2, maxval))
3906             return ccode == GT_EXPR ? 1 : -1;
3907           break;
3908         }
3909       /* r = a - b; r > a or r <= a
3910          r = a + b; a > r or a <= r or b > r or b <= r.  */
3911       if ((code == MINUS_EXPR && crhs1 == lhs && crhs2 == rhs1)
3912           || (code == PLUS_EXPR && (crhs1 == rhs1 || crhs1 == rhs2)
3913               && crhs2 == lhs))
3914         return ccode == GT_EXPR ? 1 : -1;
3915       /* r = ~a; b > r or b <= r.  */
3916       if (code == BIT_NOT_EXPR && crhs2 == lhs)
3917         {
3918           if (other)
3919             *other = crhs1;
3920           return ccode == GT_EXPR ? 1 : -1;
3921         }
3922       break;
3923     case LT_EXPR:
3924     case GE_EXPR:
3925       if (maxval)
3926         break;
3927       /* r = a - b; a < r or a >= r
3928          r = a + b; r < a or r >= a or r < b or r >= b.  */
3929       if ((code == MINUS_EXPR && crhs1 == rhs1 && crhs2 == lhs)
3930           || (code == PLUS_EXPR && crhs1 == lhs
3931               && (crhs2 == rhs1 || crhs2 == rhs2)))
3932         return ccode == LT_EXPR ? 1 : -1;
3933       /* r = ~a; r < b or r >= b.  */
3934       if (code == BIT_NOT_EXPR && crhs1 == lhs)
3935         {
3936           if (other)
3937             *other = crhs2;
3938           return ccode == LT_EXPR ? 1 : -1;
3939         }
3940       break;
3941     case EQ_EXPR:
3942     case NE_EXPR:
3943       /* r = a * b; _1 = r / a; _1 == b
3944          r = a * b; _1 = r / b; _1 == a
3945          r = a * b; _1 = r / a; _1 != b
3946          r = a * b; _1 = r / b; _1 != a.  */
3947       if (code == MULT_EXPR)
3948         {
3949           if (cast_stmt)
3950             {
3951               if ((crhs1 == divlhs && arith_cast_equal_p (crhs2, multop))
3952                   || (crhs2 == divlhs && arith_cast_equal_p (crhs1, multop)))
3953                 {
3954                   use_stmt = cur_use_stmt;
3955                   return ccode == NE_EXPR ? 1 : -1;
3956                 }
3957             }
3958           else if ((crhs1 == divlhs && operand_equal_p (crhs2, multop, 0))
3959                    || (crhs2 == divlhs && crhs1 == multop))
3960             {
3961               use_stmt = cur_use_stmt;
3962               return ccode == NE_EXPR ? 1 : -1;
3963             }
3964         }
3965       break;
3966     default:
3967       break;
3968     }
3969   return 0;
3970 }
3971
3972 /* Recognize for unsigned x
3973    x = y - z;
3974    if (x > y)
3975    where there are other uses of x and replace it with
3976    _7 = .SUB_OVERFLOW (y, z);
3977    x = REALPART_EXPR <_7>;
3978    _8 = IMAGPART_EXPR <_7>;
3979    if (_8)
3980    and similarly for addition.
3981
3982    Also recognize:
3983    yc = (type) y;
3984    zc = (type) z;
3985    x = yc + zc;
3986    if (x > max)
3987    where y and z have unsigned types with maximum max
3988    and there are other uses of x and all of those cast x
3989    back to that unsigned type and again replace it with
3990    _7 = .ADD_OVERFLOW (y, z);
3991    _9 = REALPART_EXPR <_7>;
3992    _8 = IMAGPART_EXPR <_7>;
3993    if (_8)
3994    and replace (utype) x with _9.
3995
3996    Also recognize:
3997    x = ~z;
3998    if (y > x)
3999    and replace it with
4000    _7 = .ADD_OVERFLOW (y, z);
4001    _8 = IMAGPART_EXPR <_7>;
4002    if (_8)
4003
4004    And also recognize:
4005    z = x * y;
4006    if (x != 0)
4007      goto <bb 3>; [50.00%]
4008    else
4009      goto <bb 4>; [50.00%]
4010
4011    <bb 3> [local count: 536870913]:
4012    _2 = z / x;
4013    _9 = _2 != y;
4014    _10 = (int) _9;
4015
4016    <bb 4> [local count: 1073741824]:
4017    # iftmp.0_3 = PHI <_10(3), 0(2)>
4018    and replace it with
4019    _7 = .MUL_OVERFLOW (x, y);
4020    z = IMAGPART_EXPR <_7>;
4021    _8 = IMAGPART_EXPR <_7>;
4022    _9 = _8 != 0;
4023    iftmp.0_3 = (int) _9;  */
4024
4025 static bool
4026 match_arith_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
4027                       enum tree_code code, bool *cfg_changed)
4028 {
4029   tree lhs = gimple_assign_lhs (stmt);
4030   tree type = TREE_TYPE (lhs);
4031   use_operand_p use_p;
4032   imm_use_iterator iter;
4033   bool use_seen = false;
4034   bool ovf_use_seen = false;
4035   gimple *use_stmt;
4036   gimple *add_stmt = NULL;
4037   bool add_first = false;
4038   gimple *cond_stmt = NULL;
4039   gimple *cast_stmt = NULL;
4040   tree cast_lhs = NULL_TREE;
4041
4042   gcc_checking_assert (code == PLUS_EXPR
4043                        || code == MINUS_EXPR
4044                        || code == MULT_EXPR
4045                        || code == BIT_NOT_EXPR);
4046   if (!INTEGRAL_TYPE_P (type)
4047       || !TYPE_UNSIGNED (type)
4048       || has_zero_uses (lhs)
4049       || (code != PLUS_EXPR
4050           && code != MULT_EXPR
4051           && optab_handler (code == MINUS_EXPR ? usubv4_optab : uaddv4_optab,
4052                             TYPE_MODE (type)) == CODE_FOR_nothing))
4053     return false;
4054
4055   tree rhs1 = gimple_assign_rhs1 (stmt);
4056   tree rhs2 = gimple_assign_rhs2 (stmt);
4057   FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
4058     {
4059       use_stmt = USE_STMT (use_p);
4060       if (is_gimple_debug (use_stmt))
4061         continue;
4062
4063       tree other = NULL_TREE;
4064       if (arith_overflow_check_p (stmt, NULL, use_stmt, NULL_TREE, &other))
4065         {
4066           if (code == BIT_NOT_EXPR)
4067             {
4068               gcc_assert (other);
4069               if (TREE_CODE (other) != SSA_NAME)
4070                 return false;
4071               if (rhs2 == NULL)
4072                 rhs2 = other;
4073               else
4074                 return false;
4075               cond_stmt = use_stmt;
4076             }
4077           ovf_use_seen = true;
4078         }
4079       else
4080         {
4081           use_seen = true;
4082           if (code == MULT_EXPR
4083               && cast_stmt == NULL
4084               && gimple_assign_cast_p (use_stmt))
4085             {
4086               cast_lhs = gimple_assign_lhs (use_stmt);
4087               if (INTEGRAL_TYPE_P (TREE_TYPE (cast_lhs))
4088                   && !TYPE_UNSIGNED (TREE_TYPE (cast_lhs))
4089                   && (TYPE_PRECISION (TREE_TYPE (cast_lhs))
4090                       == TYPE_PRECISION (TREE_TYPE (lhs))))
4091                 cast_stmt = use_stmt;
4092               else
4093                 cast_lhs = NULL_TREE;
4094             }
4095         }
4096       if (ovf_use_seen && use_seen)
4097         break;
4098     }
4099
4100   if (!ovf_use_seen
4101       && code == MULT_EXPR
4102       && cast_stmt)
4103     {
4104       if (TREE_CODE (rhs1) != SSA_NAME
4105           || (TREE_CODE (rhs2) != SSA_NAME && TREE_CODE (rhs2) != INTEGER_CST))
4106         return false;
4107       FOR_EACH_IMM_USE_FAST (use_p, iter, cast_lhs)
4108         {
4109           use_stmt = USE_STMT (use_p);
4110           if (is_gimple_debug (use_stmt))
4111             continue;
4112
4113           if (arith_overflow_check_p (stmt, cast_stmt, use_stmt,
4114                                       NULL_TREE, NULL))
4115             ovf_use_seen = true;
4116         }
4117     }
4118   else
4119     {
4120       cast_stmt = NULL;
4121       cast_lhs = NULL_TREE;
4122     }
4123
4124   tree maxval = NULL_TREE;
4125   if (!ovf_use_seen
4126       || (code != MULT_EXPR && (code == BIT_NOT_EXPR ? use_seen : !use_seen))
4127       || (code == PLUS_EXPR
4128           && optab_handler (uaddv4_optab,
4129                             TYPE_MODE (type)) == CODE_FOR_nothing)
4130       || (code == MULT_EXPR
4131           && optab_handler (cast_stmt ? mulv4_optab : umulv4_optab,
4132                             TYPE_MODE (type)) == CODE_FOR_nothing
4133           && (use_seen
4134               || cast_stmt
4135               || !can_mult_highpart_p (TYPE_MODE (type), true))))
4136     {
4137       if (code != PLUS_EXPR)
4138         return false;
4139       if (TREE_CODE (rhs1) != SSA_NAME
4140           || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs1)))
4141         return false;
4142       rhs1 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs1));
4143       tree type1 = TREE_TYPE (rhs1);
4144       if (!INTEGRAL_TYPE_P (type1)
4145           || !TYPE_UNSIGNED (type1)
4146           || TYPE_PRECISION (type1) >= TYPE_PRECISION (type)
4147           || (TYPE_PRECISION (type1)
4148               != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type1))))
4149         return false;
4150       if (TREE_CODE (rhs2) == INTEGER_CST)
4151         {
4152           if (wi::ne_p (wi::rshift (wi::to_wide (rhs2),
4153                                     TYPE_PRECISION (type1),
4154                                     UNSIGNED), 0))
4155             return false;
4156           rhs2 = fold_convert (type1, rhs2);
4157         }
4158       else
4159         {
4160           if (TREE_CODE (rhs2) != SSA_NAME
4161               || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs2)))
4162             return false;
4163           rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs2));
4164           tree type2 = TREE_TYPE (rhs2);
4165           if (!INTEGRAL_TYPE_P (type2)
4166               || !TYPE_UNSIGNED (type2)
4167               || TYPE_PRECISION (type2) >= TYPE_PRECISION (type)
4168               || (TYPE_PRECISION (type2)
4169                   != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type2))))
4170             return false;
4171         }
4172       if (TYPE_PRECISION (type1) >= TYPE_PRECISION (TREE_TYPE (rhs2)))
4173         type = type1;
4174       else
4175         type = TREE_TYPE (rhs2);
4176
4177       if (TREE_CODE (type) != INTEGER_TYPE
4178           || optab_handler (uaddv4_optab,
4179                             TYPE_MODE (type)) == CODE_FOR_nothing)
4180         return false;
4181
4182       maxval = wide_int_to_tree (type, wi::max_value (TYPE_PRECISION (type),
4183                                                       UNSIGNED));
4184       ovf_use_seen = false;
4185       use_seen = false;
4186       basic_block use_bb = NULL;
4187       FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
4188         {
4189           use_stmt = USE_STMT (use_p);
4190           if (is_gimple_debug (use_stmt))
4191             continue;
4192
4193           if (arith_overflow_check_p (stmt, NULL, use_stmt, maxval, NULL))
4194             {
4195               ovf_use_seen = true;
4196               use_bb = gimple_bb (use_stmt);
4197             }
4198           else
4199             {
4200               if (!gimple_assign_cast_p (use_stmt)
4201                   || gimple_assign_rhs_code (use_stmt) == VIEW_CONVERT_EXPR)
4202                 return false;
4203               tree use_lhs = gimple_assign_lhs (use_stmt);
4204               if (!INTEGRAL_TYPE_P (TREE_TYPE (use_lhs))
4205                   || (TYPE_PRECISION (TREE_TYPE (use_lhs))
4206                       > TYPE_PRECISION (type)))
4207                 return false;
4208               use_seen = true;
4209             }
4210         }
4211       if (!ovf_use_seen)
4212         return false;
4213       if (!useless_type_conversion_p (type, TREE_TYPE (rhs1)))
4214         {
4215           if (!use_seen)
4216             return false;
4217           tree new_rhs1 = make_ssa_name (type);
4218           gimple *g = gimple_build_assign (new_rhs1, NOP_EXPR, rhs1);
4219           gsi_insert_before (gsi, g, GSI_SAME_STMT);
4220           rhs1 = new_rhs1;
4221         }
4222       else if (!useless_type_conversion_p (type, TREE_TYPE (rhs2)))
4223         {
4224           if (!use_seen)
4225             return false;
4226           tree new_rhs2 = make_ssa_name (type);
4227           gimple *g = gimple_build_assign (new_rhs2, NOP_EXPR, rhs2);
4228           gsi_insert_before (gsi, g, GSI_SAME_STMT);
4229           rhs2 = new_rhs2;
4230         }
4231       else if (!use_seen)
4232         {
4233           /* If there are no uses of the wider addition, check if
4234              forwprop has not created a narrower addition.
4235              Require it to be in the same bb as the overflow check.  */
4236           FOR_EACH_IMM_USE_FAST (use_p, iter, rhs1)
4237             {
4238               use_stmt = USE_STMT (use_p);
4239               if (is_gimple_debug (use_stmt))
4240                 continue;
4241
4242               if (use_stmt == stmt)
4243                 continue;
4244
4245               if (!is_gimple_assign (use_stmt)
4246                   || gimple_bb (use_stmt) != use_bb
4247                   || gimple_assign_rhs_code (use_stmt) != PLUS_EXPR)
4248                 continue;
4249
4250               if (gimple_assign_rhs1 (use_stmt) == rhs1)
4251                 {
4252                   if (!operand_equal_p (gimple_assign_rhs2 (use_stmt),
4253                                         rhs2, 0))
4254                     continue;
4255                 }
4256               else if (gimple_assign_rhs2 (use_stmt) == rhs1)
4257                 {
4258                   if (gimple_assign_rhs1 (use_stmt) != rhs2)
4259                     continue;
4260                 }
4261               else
4262                 continue;
4263
4264               add_stmt = use_stmt;
4265               break;
4266             }
4267           if (add_stmt == NULL)
4268             return false;
4269
4270           /* If stmt and add_stmt are in the same bb, we need to find out
4271              which one is earlier.  If they are in different bbs, we've
4272              checked add_stmt is in the same bb as one of the uses of the
4273              stmt lhs, so stmt needs to dominate add_stmt too.  */
4274           if (gimple_bb (stmt) == gimple_bb (add_stmt))
4275             {
4276               gimple_stmt_iterator gsif = *gsi;
4277               gimple_stmt_iterator gsib = *gsi;
4278               int i;
4279               /* Search both forward and backward from stmt and have a small
4280                  upper bound.  */
4281               for (i = 0; i < 128; i++)
4282                 {
4283                   if (!gsi_end_p (gsib))
4284                     {
4285                       gsi_prev_nondebug (&gsib);
4286                       if (gsi_stmt (gsib) == add_stmt)
4287                         {
4288                           add_first = true;
4289                           break;
4290                         }
4291                     }
4292                   else if (gsi_end_p (gsif))
4293                     break;
4294                   if (!gsi_end_p (gsif))
4295                     {
4296                       gsi_next_nondebug (&gsif);
4297                       if (gsi_stmt (gsif) == add_stmt)
4298                         break;
4299                     }
4300                 }
4301               if (i == 128)
4302                 return false;
4303               if (add_first)
4304                 *gsi = gsi_for_stmt (add_stmt);
4305             }
4306         }
4307     }
4308
4309   if (code == BIT_NOT_EXPR)
4310     *gsi = gsi_for_stmt (cond_stmt);
4311
4312   auto_vec<gimple *, 8> mul_stmts;
4313   if (code == MULT_EXPR && cast_stmt)
4314     {
4315       type = TREE_TYPE (cast_lhs);
4316       gimple *g = SSA_NAME_DEF_STMT (rhs1);
4317       if (gimple_assign_cast_p (g)
4318           && useless_type_conversion_p (type,
4319                                         TREE_TYPE (gimple_assign_rhs1 (g)))
4320           && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
4321         rhs1 = gimple_assign_rhs1 (g);
4322       else
4323         {
4324           g = gimple_build_assign (make_ssa_name (type), NOP_EXPR, rhs1);
4325           gsi_insert_before (gsi, g, GSI_SAME_STMT);
4326           rhs1 = gimple_assign_lhs (g);
4327           mul_stmts.quick_push (g);
4328         }
4329       if (TREE_CODE (rhs2) == INTEGER_CST)
4330         rhs2 = fold_convert (type, rhs2);
4331       else
4332         {
4333           g = SSA_NAME_DEF_STMT (rhs2);
4334           if (gimple_assign_cast_p (g)
4335               && useless_type_conversion_p (type,
4336                                             TREE_TYPE (gimple_assign_rhs1 (g)))
4337               && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
4338             rhs2 = gimple_assign_rhs1 (g);
4339           else
4340             {
4341               g = gimple_build_assign (make_ssa_name (type), NOP_EXPR, rhs2);
4342               gsi_insert_before (gsi, g, GSI_SAME_STMT);
4343               rhs2 = gimple_assign_lhs (g);
4344               mul_stmts.quick_push (g);
4345             }
4346         }
4347     }
4348   tree ctype = build_complex_type (type);
4349   gcall *g = gimple_build_call_internal (code == MULT_EXPR
4350                                          ? IFN_MUL_OVERFLOW
4351                                          : code != MINUS_EXPR
4352                                          ? IFN_ADD_OVERFLOW : IFN_SUB_OVERFLOW,
4353                                          2, rhs1, rhs2);
4354   tree ctmp = make_ssa_name (ctype);
4355   gimple_call_set_lhs (g, ctmp);
4356   gsi_insert_before (gsi, g, GSI_SAME_STMT);
4357   tree new_lhs = (maxval || cast_stmt) ? make_ssa_name (type) : lhs;
4358   gassign *g2;
4359   if (code != BIT_NOT_EXPR)
4360     {
4361       g2 = gimple_build_assign (new_lhs, REALPART_EXPR,
4362                                 build1 (REALPART_EXPR, type, ctmp));
4363       if (maxval || cast_stmt)
4364         {
4365           gsi_insert_before (gsi, g2, GSI_SAME_STMT);
4366           if (add_first)
4367             *gsi = gsi_for_stmt (stmt);
4368         }
4369       else
4370         gsi_replace (gsi, g2, true);
4371       if (code == MULT_EXPR)
4372         {
4373           mul_stmts.quick_push (g);
4374           mul_stmts.quick_push (g2);
4375           if (cast_stmt)
4376             {
4377               g2 = gimple_build_assign (lhs, NOP_EXPR, new_lhs);
4378               gsi_replace (gsi, g2, true);
4379               mul_stmts.quick_push (g2);
4380             }
4381         }
4382     }
4383   tree ovf = make_ssa_name (type);
4384   g2 = gimple_build_assign (ovf, IMAGPART_EXPR,
4385                             build1 (IMAGPART_EXPR, type, ctmp));
4386   if (code != BIT_NOT_EXPR)
4387     gsi_insert_after (gsi, g2, GSI_NEW_STMT);
4388   else
4389     gsi_insert_before (gsi, g2, GSI_SAME_STMT);
4390   if (code == MULT_EXPR)
4391     mul_stmts.quick_push (g2);
4392
4393   FOR_EACH_IMM_USE_STMT (use_stmt, iter, cast_lhs ? cast_lhs : lhs)
4394     {
4395       if (is_gimple_debug (use_stmt))
4396         continue;
4397
4398       gimple *orig_use_stmt = use_stmt;
4399       int ovf_use = arith_overflow_check_p (stmt, cast_stmt, use_stmt,
4400                                             maxval, NULL);
4401       if (ovf_use == 0)
4402         {
4403           gcc_assert (code != BIT_NOT_EXPR);
4404           if (maxval)
4405             {
4406               tree use_lhs = gimple_assign_lhs (use_stmt);
4407               gimple_assign_set_rhs1 (use_stmt, new_lhs);
4408               if (useless_type_conversion_p (TREE_TYPE (use_lhs),
4409                                              TREE_TYPE (new_lhs)))
4410                 gimple_assign_set_rhs_code (use_stmt, SSA_NAME);
4411               update_stmt (use_stmt);
4412             }
4413           continue;
4414         }
4415       if (gimple_code (use_stmt) == GIMPLE_COND)
4416         {
4417           gcond *cond_stmt = as_a <gcond *> (use_stmt);
4418           gimple_cond_set_lhs (cond_stmt, ovf);
4419           gimple_cond_set_rhs (cond_stmt, build_int_cst (type, 0));
4420           gimple_cond_set_code (cond_stmt, ovf_use == 1 ? NE_EXPR : EQ_EXPR);
4421         }
4422       else
4423         {
4424           gcc_checking_assert (is_gimple_assign (use_stmt));
4425           if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
4426             {
4427               gimple_assign_set_rhs1 (use_stmt, ovf);
4428               gimple_assign_set_rhs2 (use_stmt, build_int_cst (type, 0));
4429               gimple_assign_set_rhs_code (use_stmt,
4430                                           ovf_use == 1 ? NE_EXPR : EQ_EXPR);
4431             }
4432           else
4433             {
4434               gcc_checking_assert (gimple_assign_rhs_code (use_stmt)
4435                                    == COND_EXPR);
4436               tree cond = build2 (ovf_use == 1 ? NE_EXPR : EQ_EXPR,
4437                                   boolean_type_node, ovf,
4438                                   build_int_cst (type, 0));
4439               gimple_assign_set_rhs1 (use_stmt, cond);
4440             }
4441         }
4442       update_stmt (use_stmt);
4443       if (code == MULT_EXPR && use_stmt != orig_use_stmt)
4444         {
4445           gimple_stmt_iterator gsi2 = gsi_for_stmt (orig_use_stmt);
4446           maybe_optimize_guarding_check (mul_stmts, use_stmt, orig_use_stmt,
4447                                          cfg_changed);
4448           use_operand_p use;
4449           gimple *cast_stmt;
4450           if (single_imm_use (gimple_assign_lhs (orig_use_stmt), &use,
4451                               &cast_stmt)
4452               && gimple_assign_cast_p (cast_stmt))
4453             {
4454               gimple_stmt_iterator gsi3 = gsi_for_stmt (cast_stmt);
4455               gsi_remove (&gsi3, true);
4456               release_ssa_name (gimple_assign_lhs (cast_stmt));
4457             }
4458           gsi_remove (&gsi2, true);
4459           release_ssa_name (gimple_assign_lhs (orig_use_stmt));
4460         }
4461     }
4462   if (maxval)
4463     {
4464       gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt);
4465       gsi_remove (&gsi2, true);
4466       if (add_stmt)
4467         {
4468           gimple *g = gimple_build_assign (gimple_assign_lhs (add_stmt),
4469                                            new_lhs);
4470           gsi2 = gsi_for_stmt (add_stmt);
4471           gsi_replace (&gsi2, g, true);
4472         }
4473     }
4474   else if (code == BIT_NOT_EXPR)
4475     {
4476       *gsi = gsi_for_stmt (stmt);
4477       gsi_remove (gsi, true);
4478       release_ssa_name (lhs);
4479       return true;
4480     }
4481   return false;
4482 }
4483
4484 /* Helper of match_uaddc_usubc.  Look through an integral cast
4485    which should preserve [0, 1] range value (unless source has
4486    1-bit signed type) and the cast has single use.  */
4487
4488 static gimple *
4489 uaddc_cast (gimple *g)
4490 {
4491   if (!gimple_assign_cast_p (g))
4492     return g;
4493   tree op = gimple_assign_rhs1 (g);
4494   if (TREE_CODE (op) == SSA_NAME
4495       && INTEGRAL_TYPE_P (TREE_TYPE (op))
4496       && (TYPE_PRECISION (TREE_TYPE (op)) > 1
4497           || TYPE_UNSIGNED (TREE_TYPE (op)))
4498       && has_single_use (gimple_assign_lhs (g)))
4499     return SSA_NAME_DEF_STMT (op);
4500   return g;
4501 }
4502
4503 /* Helper of match_uaddc_usubc.  Look through a NE_EXPR
4504    comparison with 0 which also preserves [0, 1] value range.  */
4505
4506 static gimple *
4507 uaddc_ne0 (gimple *g)
4508 {
4509   if (is_gimple_assign (g)
4510       && gimple_assign_rhs_code (g) == NE_EXPR
4511       && integer_zerop (gimple_assign_rhs2 (g))
4512       && TREE_CODE (gimple_assign_rhs1 (g)) == SSA_NAME
4513       && has_single_use (gimple_assign_lhs (g)))
4514     return SSA_NAME_DEF_STMT (gimple_assign_rhs1 (g));
4515   return g;
4516 }
4517
4518 /* Return true if G is {REAL,IMAG}PART_EXPR PART with SSA_NAME
4519    operand.  */
4520
4521 static bool
4522 uaddc_is_cplxpart (gimple *g, tree_code part)
4523 {
4524   return (is_gimple_assign (g)
4525           && gimple_assign_rhs_code (g) == part
4526           && TREE_CODE (TREE_OPERAND (gimple_assign_rhs1 (g), 0)) == SSA_NAME);
4527 }
4528
4529 /* Try to match e.g.
4530    _29 = .ADD_OVERFLOW (_3, _4);
4531    _30 = REALPART_EXPR <_29>;
4532    _31 = IMAGPART_EXPR <_29>;
4533    _32 = .ADD_OVERFLOW (_30, _38);
4534    _33 = REALPART_EXPR <_32>;
4535    _34 = IMAGPART_EXPR <_32>;
4536    _35 = _31 + _34;
4537    as
4538    _36 = .UADDC (_3, _4, _38);
4539    _33 = REALPART_EXPR <_36>;
4540    _35 = IMAGPART_EXPR <_36>;
4541    or
4542    _22 = .SUB_OVERFLOW (_6, _5);
4543    _23 = REALPART_EXPR <_22>;
4544    _24 = IMAGPART_EXPR <_22>;
4545    _25 = .SUB_OVERFLOW (_23, _37);
4546    _26 = REALPART_EXPR <_25>;
4547    _27 = IMAGPART_EXPR <_25>;
4548    _28 = _24 | _27;
4549    as
4550    _29 = .USUBC (_6, _5, _37);
4551    _26 = REALPART_EXPR <_29>;
4552    _288 = IMAGPART_EXPR <_29>;
4553    provided _38 or _37 above have [0, 1] range
4554    and _3, _4 and _30 or _6, _5 and _23 are unsigned
4555    integral types with the same precision.  Whether + or | or ^ is
4556    used on the IMAGPART_EXPR results doesn't matter, with one of
4557    added or subtracted operands in [0, 1] range at most one
4558    .ADD_OVERFLOW or .SUB_OVERFLOW will indicate overflow.  */
4559
4560 static bool
4561 match_uaddc_usubc (gimple_stmt_iterator *gsi, gimple *stmt, tree_code code)
4562 {
4563   tree rhs[4];
4564   rhs[0] = gimple_assign_rhs1 (stmt);
4565   rhs[1] = gimple_assign_rhs2 (stmt);
4566   rhs[2] = NULL_TREE;
4567   rhs[3] = NULL_TREE;
4568   tree type = TREE_TYPE (rhs[0]);
4569   if (!INTEGRAL_TYPE_P (type) || !TYPE_UNSIGNED (type))
4570     return false;
4571
4572   if (code != BIT_IOR_EXPR && code != BIT_XOR_EXPR)
4573     {
4574       /* If overflow flag is ignored on the MSB limb, we can end up with
4575          the most significant limb handled as r = op1 + op2 + ovf1 + ovf2;
4576          or r = op1 - op2 - ovf1 - ovf2; or various equivalent expressions
4577          thereof.  Handle those like the ovf = ovf1 + ovf2; case to recognize
4578          the limb below the MSB, but also create another .UADDC/.USUBC call
4579          for the last limb.
4580
4581          First look through assignments with the same rhs code as CODE,
4582          with the exception that subtraction of a constant is canonicalized
4583          into addition of its negation.  rhs[0] will be minuend for
4584          subtractions and one of addends for addition, all other assigned
4585          rhs[i] operands will be subtrahends or other addends.  */
4586       while (TREE_CODE (rhs[0]) == SSA_NAME && !rhs[3])
4587         {
4588           gimple *g = SSA_NAME_DEF_STMT (rhs[0]);
4589           if (has_single_use (rhs[0])
4590               && is_gimple_assign (g)
4591               && (gimple_assign_rhs_code (g) == code
4592                   || (code == MINUS_EXPR
4593                       && gimple_assign_rhs_code (g) == PLUS_EXPR
4594                       && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST)))
4595             {
4596               tree r2 = gimple_assign_rhs2 (g);
4597               if (gimple_assign_rhs_code (g) != code)
4598                 {
4599                   r2 = const_unop (NEGATE_EXPR, TREE_TYPE (r2), r2);
4600                   if (!r2)
4601                     break;
4602                 }
4603               rhs[0] = gimple_assign_rhs1 (g);
4604               tree &r = rhs[2] ? rhs[3] : rhs[2];
4605               r = r2;
4606             }
4607           else
4608             break;
4609         }
4610       while (TREE_CODE (rhs[1]) == SSA_NAME && !rhs[3])
4611         {
4612           gimple *g = SSA_NAME_DEF_STMT (rhs[1]);
4613           if (has_single_use (rhs[1])
4614               && is_gimple_assign (g)
4615               && gimple_assign_rhs_code (g) == PLUS_EXPR)
4616             {
4617               rhs[1] = gimple_assign_rhs1 (g);
4618               if (rhs[2])
4619                 rhs[3] = gimple_assign_rhs2 (g);
4620               else
4621                 rhs[2] = gimple_assign_rhs2 (g);
4622             }
4623           else
4624             break;
4625         }
4626       /* If there are just 3 addends or one minuend and two subtrahends,
4627          check for UADDC or USUBC being pattern recognized earlier.
4628          Say r = op1 + op2 + ovf1 + ovf2; where the (ovf1 + ovf2) part
4629          got pattern matched earlier as __imag__ .UADDC (arg1, arg2, arg3)
4630          etc.  */
4631       if (rhs[2] && !rhs[3])
4632         {
4633           for (int i = (code == MINUS_EXPR ? 1 : 0); i < 3; ++i)
4634             if (TREE_CODE (rhs[i]) == SSA_NAME)
4635               {
4636                 gimple *im = uaddc_cast (SSA_NAME_DEF_STMT (rhs[i]));
4637                 im = uaddc_ne0 (im);
4638                 if (uaddc_is_cplxpart (im, IMAGPART_EXPR))
4639                   {
4640                     /* We found one of the 3 addends or 2 subtrahends to be
4641                        __imag__ of something, verify it is .UADDC/.USUBC.  */
4642                     tree rhs1 = gimple_assign_rhs1 (im);
4643                     gimple *ovf = SSA_NAME_DEF_STMT (TREE_OPERAND (rhs1, 0));
4644                     if (gimple_call_internal_p (ovf, code == PLUS_EXPR
4645                                                      ? IFN_UADDC : IFN_USUBC)
4646                         && (optab_handler (code == PLUS_EXPR
4647                                            ? uaddc5_optab : usubc5_optab,
4648                                            TYPE_MODE (type))
4649                             != CODE_FOR_nothing))
4650                       {
4651                         /* And in that case build another .UADDC/.USUBC
4652                            call for the most significand limb addition.
4653                            Overflow bit is ignored here.  */
4654                         if (i != 2)
4655                           std::swap (rhs[i], rhs[2]);
4656                         gimple *g
4657                           = gimple_build_call_internal (code == PLUS_EXPR
4658                                                         ? IFN_UADDC
4659                                                         : IFN_USUBC,
4660                                                         3, rhs[0], rhs[1],
4661                                                         rhs[2]);
4662                         tree nlhs = make_ssa_name (build_complex_type (type));
4663                         gimple_call_set_lhs (g, nlhs);
4664                         gsi_insert_before (gsi, g, GSI_SAME_STMT);
4665                         tree ilhs = gimple_assign_lhs (stmt);
4666                         g = gimple_build_assign (ilhs, REALPART_EXPR,
4667                                                  build1 (REALPART_EXPR,
4668                                                          TREE_TYPE (ilhs),
4669                                                          nlhs));
4670                         gsi_replace (gsi, g, true);
4671                         return true;
4672                       }
4673                   }
4674               }
4675           return false;
4676         }
4677       if (code == MINUS_EXPR && !rhs[2])
4678         return false;
4679       if (code == MINUS_EXPR)
4680         /* Code below expects rhs[0] and rhs[1] to have the IMAGPART_EXPRs.
4681            So, for MINUS_EXPR swap the single added rhs operand (others are
4682            subtracted) to rhs[3].  */
4683         std::swap (rhs[0], rhs[3]);
4684     }
4685   /* Walk from both operands of STMT (for +/- even sometimes from
4686      all the 4 addends or 3 subtrahends), see through casts and != 0
4687      statements which would preserve [0, 1] range of values and
4688      check which is initialized from __imag__.  */
4689   gimple *im1 = NULL, *im2 = NULL;
4690   for (int i = 0; i < (code == MINUS_EXPR ? 3 : 4); i++)
4691     if (rhs[i] && TREE_CODE (rhs[i]) == SSA_NAME)
4692       {
4693         gimple *im = uaddc_cast (SSA_NAME_DEF_STMT (rhs[i]));
4694         im = uaddc_ne0 (im);
4695         if (uaddc_is_cplxpart (im, IMAGPART_EXPR))
4696           {
4697             if (im1 == NULL)
4698               {
4699                 im1 = im;
4700                 if (i != 0)
4701                   std::swap (rhs[0], rhs[i]);
4702               }
4703             else
4704               {
4705                 im2 = im;
4706                 if (i != 1)
4707                   std::swap (rhs[1], rhs[i]);
4708                 break;
4709               }
4710           }
4711       }
4712   /* If we don't find at least two, punt.  */
4713   if (!im2)
4714     return false;
4715   /* Check they are __imag__ of .ADD_OVERFLOW or .SUB_OVERFLOW call results,
4716      either both .ADD_OVERFLOW or both .SUB_OVERFLOW and that we have
4717      uaddc5/usubc5 named pattern for the corresponding mode.  */
4718   gimple *ovf1
4719     = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (im1), 0));
4720   gimple *ovf2
4721     = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (im2), 0));
4722   internal_fn ifn;
4723   if (!is_gimple_call (ovf1)
4724       || !gimple_call_internal_p (ovf1)
4725       || ((ifn = gimple_call_internal_fn (ovf1)) != IFN_ADD_OVERFLOW
4726           && ifn != IFN_SUB_OVERFLOW)
4727       || !gimple_call_internal_p (ovf2, ifn)
4728       || optab_handler (ifn == IFN_ADD_OVERFLOW ? uaddc5_optab : usubc5_optab,
4729                         TYPE_MODE (type)) == CODE_FOR_nothing
4730       || (rhs[2]
4731           && optab_handler (code == PLUS_EXPR ? uaddc5_optab : usubc5_optab,
4732                             TYPE_MODE (type)) == CODE_FOR_nothing))
4733     return false;
4734   tree arg1, arg2, arg3 = NULL_TREE;
4735   gimple *re1 = NULL, *re2 = NULL;
4736   /* On one of the two calls, one of the .ADD_OVERFLOW/.SUB_OVERFLOW arguments
4737      should be initialized from __real__ of the other of the two calls.
4738      Though, for .SUB_OVERFLOW, it has to be the first argument, not the
4739      second one.  */
4740   for (int i = (ifn == IFN_ADD_OVERFLOW ? 1 : 0); i >= 0; --i)
4741     for (gimple *ovf = ovf1; ovf; ovf = (ovf == ovf1 ? ovf2 : NULL))
4742       {
4743         tree arg = gimple_call_arg (ovf, i);
4744         if (TREE_CODE (arg) != SSA_NAME)
4745           continue;
4746         re1 = SSA_NAME_DEF_STMT (arg);
4747         if (uaddc_is_cplxpart (re1, REALPART_EXPR)
4748             && (SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (re1), 0))
4749                 == (ovf == ovf1 ? ovf2 : ovf1)))
4750           {
4751             if (ovf == ovf1)
4752               {
4753                 /* Make sure ovf2 is the .*_OVERFLOW call with argument
4754                    initialized from __real__ of ovf1.  */
4755                 std::swap (rhs[0], rhs[1]);
4756                 std::swap (im1, im2);
4757                 std::swap (ovf1, ovf2);
4758               }
4759             arg3 = gimple_call_arg (ovf, 1 - i);
4760             i = -1;
4761             break;
4762           }
4763       }
4764   if (!arg3)
4765     return false;
4766   arg1 = gimple_call_arg (ovf1, 0);
4767   arg2 = gimple_call_arg (ovf1, 1);
4768   if (!types_compatible_p (type, TREE_TYPE (arg1)))
4769     return false;
4770   int kind[2] = { 0, 0 };
4771   tree arg_im[2] = { NULL_TREE, NULL_TREE };
4772   /* At least one of arg2 and arg3 should have type compatible
4773      with arg1/rhs[0], and the other one should have value in [0, 1]
4774      range.  If both are in [0, 1] range and type compatible with
4775      arg1/rhs[0], try harder to find after looking through casts,
4776      != 0 comparisons which one is initialized to __imag__ of
4777      .{ADD,SUB}_OVERFLOW or .U{ADD,SUB}C call results.  */
4778   for (int i = 0; i < 2; ++i)
4779     {
4780       tree arg = i == 0 ? arg2 : arg3;
4781       if (types_compatible_p (type, TREE_TYPE (arg)))
4782         kind[i] = 1;
4783       if (!INTEGRAL_TYPE_P (TREE_TYPE (arg))
4784           || (TYPE_PRECISION (TREE_TYPE (arg)) == 1
4785               && !TYPE_UNSIGNED (TREE_TYPE (arg))))
4786         continue;
4787       if (tree_zero_one_valued_p (arg))
4788         kind[i] |= 2;
4789       if (TREE_CODE (arg) == SSA_NAME)
4790         {
4791           gimple *g = SSA_NAME_DEF_STMT (arg);
4792           if (gimple_assign_cast_p (g))
4793             {
4794               tree op = gimple_assign_rhs1 (g);
4795               if (TREE_CODE (op) == SSA_NAME
4796                   && INTEGRAL_TYPE_P (TREE_TYPE (op)))
4797                 g = SSA_NAME_DEF_STMT (op);
4798             }
4799           g = uaddc_ne0 (g);
4800           if (!uaddc_is_cplxpart (g, IMAGPART_EXPR))
4801             continue;
4802           arg_im[i] = gimple_assign_lhs (g);
4803           g = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (g), 0));
4804           if (!is_gimple_call (g) || !gimple_call_internal_p (g))
4805             continue;
4806           switch (gimple_call_internal_fn (g))
4807             {
4808             case IFN_ADD_OVERFLOW:
4809             case IFN_SUB_OVERFLOW:
4810             case IFN_UADDC:
4811             case IFN_USUBC:
4812               break;
4813             default:
4814               continue;
4815             }
4816           kind[i] |= 4;
4817         }
4818     }
4819   /* Make arg2 the one with compatible type and arg3 the one
4820      with [0, 1] range.  If both is true for both operands,
4821      prefer as arg3 result of __imag__ of some ifn.  */
4822   if ((kind[0] & 1) == 0 || ((kind[1] & 1) != 0 && kind[0] > kind[1]))
4823     {
4824       std::swap (arg2, arg3);
4825       std::swap (kind[0], kind[1]);
4826       std::swap (arg_im[0], arg_im[1]);
4827     }
4828   if ((kind[0] & 1) == 0 || (kind[1] & 6) == 0)
4829     return false;
4830   if (!has_single_use (gimple_assign_lhs (im1))
4831       || !has_single_use (gimple_assign_lhs (im2))
4832       || !has_single_use (gimple_assign_lhs (re1))
4833       || num_imm_uses (gimple_call_lhs (ovf1)) != 2)
4834     return false;
4835   /* Check that ovf2's result is used in __real__ and set re2
4836      to that statement.  */
4837   use_operand_p use_p;
4838   imm_use_iterator iter;
4839   tree lhs = gimple_call_lhs (ovf2);
4840   FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
4841     {
4842       gimple *use_stmt = USE_STMT (use_p);
4843       if (is_gimple_debug (use_stmt))
4844         continue;
4845       if (use_stmt == im2)
4846         continue;
4847       if (re2)
4848         return false;
4849       if (!uaddc_is_cplxpart (use_stmt, REALPART_EXPR))
4850         return false;
4851       re2 = use_stmt;
4852     }
4853   /* Build .UADDC/.USUBC call which will be placed before the stmt.  */
4854   gimple_stmt_iterator gsi2 = gsi_for_stmt (ovf2);
4855   gimple *g;
4856   if ((kind[1] & 4) != 0 && types_compatible_p (type, TREE_TYPE (arg_im[1])))
4857     arg3 = arg_im[1];
4858   if ((kind[1] & 1) == 0)
4859     {
4860       if (TREE_CODE (arg3) == INTEGER_CST)
4861         arg3 = fold_convert (type, arg3);
4862       else
4863         {
4864           g = gimple_build_assign (make_ssa_name (type), NOP_EXPR, arg3);
4865           gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
4866           arg3 = gimple_assign_lhs (g);
4867         }
4868     }
4869   g = gimple_build_call_internal (ifn == IFN_ADD_OVERFLOW
4870                                   ? IFN_UADDC : IFN_USUBC,
4871                                   3, arg1, arg2, arg3);
4872   tree nlhs = make_ssa_name (TREE_TYPE (lhs));
4873   gimple_call_set_lhs (g, nlhs);
4874   gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
4875   /* In the case where stmt is | or ^ of two overflow flags
4876      or addition of those, replace stmt with __imag__ of the above
4877      added call.  In case of arg1 + arg2 + (ovf1 + ovf2) or
4878      arg1 - arg2 - (ovf1 + ovf2) just emit it before stmt.  */
4879   tree ilhs = rhs[2] ? make_ssa_name (type) : gimple_assign_lhs (stmt);
4880   g = gimple_build_assign (ilhs, IMAGPART_EXPR,
4881                            build1 (IMAGPART_EXPR, TREE_TYPE (ilhs), nlhs));
4882   if (rhs[2])
4883     gsi_insert_before (gsi, g, GSI_SAME_STMT);
4884   else
4885     gsi_replace (gsi, g, true);
4886   /* Remove some statements which can't be kept in the IL because they
4887      use SSA_NAME whose setter is going to be removed too.  */
4888   tree rhs1 = rhs[1];
4889   for (int i = 0; i < 2; i++)
4890     if (rhs1 == gimple_assign_lhs (im2))
4891       break;
4892     else
4893       {
4894         g = SSA_NAME_DEF_STMT (rhs1);
4895         rhs1 = gimple_assign_rhs1 (g);
4896         gsi2 = gsi_for_stmt (g);
4897         gsi_remove (&gsi2, true);
4898       }
4899   gcc_checking_assert (rhs1 == gimple_assign_lhs (im2));
4900   gsi2 = gsi_for_stmt (im2);
4901   gsi_remove (&gsi2, true);
4902   /* Replace the re2 statement with __real__ of the newly added
4903      .UADDC/.USUBC call.  */
4904   if (re2)
4905     {
4906       gsi2 = gsi_for_stmt (re2);
4907       tree rlhs = gimple_assign_lhs (re2);
4908       g = gimple_build_assign (rlhs, REALPART_EXPR,
4909                                build1 (REALPART_EXPR, TREE_TYPE (rlhs), nlhs));
4910       gsi_replace (&gsi2, g, true);
4911     }
4912   if (rhs[2])
4913     {
4914       /* If this is the arg1 + arg2 + (ovf1 + ovf2) or
4915          arg1 - arg2 - (ovf1 + ovf2) case for the most significant limb,
4916          replace stmt with __real__ of another .UADDC/.USUBC call which
4917          handles the most significant limb.  Overflow flag from this is
4918          ignored.  */
4919       g = gimple_build_call_internal (code == PLUS_EXPR
4920                                       ? IFN_UADDC : IFN_USUBC,
4921                                       3, rhs[3], rhs[2], ilhs);
4922       nlhs = make_ssa_name (TREE_TYPE (lhs));
4923       gimple_call_set_lhs (g, nlhs);
4924       gsi_insert_before (gsi, g, GSI_SAME_STMT);
4925       ilhs = gimple_assign_lhs (stmt);
4926       g = gimple_build_assign (ilhs, REALPART_EXPR,
4927                                build1 (REALPART_EXPR, TREE_TYPE (ilhs), nlhs));
4928       gsi_replace (gsi, g, true);
4929     }
4930   if (TREE_CODE (arg3) == SSA_NAME)
4931     {
4932       /* When pattern recognizing the second least significant limb
4933          above (i.e. first pair of .{ADD,SUB}_OVERFLOW calls for one limb),
4934          check if the [0, 1] range argument (i.e. carry in) isn't the
4935          result of another .{ADD,SUB}_OVERFLOW call (one handling the
4936          least significant limb).  Again look through casts and != 0.  */
4937       gimple *im3 = SSA_NAME_DEF_STMT (arg3);
4938       for (int i = 0; i < 2; ++i)
4939         {
4940           gimple *im4 = uaddc_cast (im3);
4941           if (im4 == im3)
4942             break;
4943           else
4944             im3 = im4;
4945         }
4946       im3 = uaddc_ne0 (im3);
4947       if (uaddc_is_cplxpart (im3, IMAGPART_EXPR))
4948         {
4949           gimple *ovf3
4950             = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (im3), 0));
4951           if (gimple_call_internal_p (ovf3, ifn))
4952             {
4953               lhs = gimple_call_lhs (ovf3);
4954               arg1 = gimple_call_arg (ovf3, 0);
4955               arg2 = gimple_call_arg (ovf3, 1);
4956               if (types_compatible_p (type, TREE_TYPE (TREE_TYPE (lhs)))
4957                   && types_compatible_p (type, TREE_TYPE (arg1))
4958                   && types_compatible_p (type, TREE_TYPE (arg2)))
4959                 {
4960                   /* And if it is initialized from result of __imag__
4961                      of .{ADD,SUB}_OVERFLOW call, replace that
4962                      call with .U{ADD,SUB}C call with the same arguments,
4963                      just 0 added as third argument.  This isn't strictly
4964                      necessary, .ADD_OVERFLOW (x, y) and .UADDC (x, y, 0)
4965                      produce the same result, but may result in better
4966                      generated code on some targets where the backend can
4967                      better prepare in how the result will be used.  */
4968                   g = gimple_build_call_internal (ifn == IFN_ADD_OVERFLOW
4969                                                   ? IFN_UADDC : IFN_USUBC,
4970                                                   3, arg1, arg2,
4971                                                   build_zero_cst (type));
4972                   gimple_call_set_lhs (g, lhs);
4973                   gsi2 = gsi_for_stmt (ovf3);
4974                   gsi_replace (&gsi2, g, true);
4975                 }
4976             }
4977         }
4978     }
4979   return true;
4980 }
4981
4982 /* Return true if target has support for divmod.  */
4983
4984 static bool
4985 target_supports_divmod_p (optab divmod_optab, optab div_optab, machine_mode mode)
4986 {
4987   /* If target supports hardware divmod insn, use it for divmod.  */
4988   if (optab_handler (divmod_optab, mode) != CODE_FOR_nothing)
4989     return true;
4990
4991   /* Check if libfunc for divmod is available.  */
4992   rtx libfunc = optab_libfunc (divmod_optab, mode);
4993   if (libfunc != NULL_RTX)
4994     {
4995       /* If optab_handler exists for div_optab, perhaps in a wider mode,
4996          we don't want to use the libfunc even if it exists for given mode.  */
4997       machine_mode div_mode;
4998       FOR_EACH_MODE_FROM (div_mode, mode)
4999         if (optab_handler (div_optab, div_mode) != CODE_FOR_nothing)
5000           return false;
5001
5002       return targetm.expand_divmod_libfunc != NULL;
5003     }
5004
5005   return false;
5006 }
5007
5008 /* Check if stmt is candidate for divmod transform.  */
5009
5010 static bool
5011 divmod_candidate_p (gassign *stmt)
5012 {
5013   tree type = TREE_TYPE (gimple_assign_lhs (stmt));
5014   machine_mode mode = TYPE_MODE (type);
5015   optab divmod_optab, div_optab;
5016
5017   if (TYPE_UNSIGNED (type))
5018     {
5019       divmod_optab = udivmod_optab;
5020       div_optab = udiv_optab;
5021     }
5022   else
5023     {
5024       divmod_optab = sdivmod_optab;
5025       div_optab = sdiv_optab;
5026     }
5027
5028   tree op1 = gimple_assign_rhs1 (stmt);
5029   tree op2 = gimple_assign_rhs2 (stmt);
5030
5031   /* Disable the transform if either is a constant, since division-by-constant
5032      may have specialized expansion.  */
5033   if (CONSTANT_CLASS_P (op1))
5034     return false;
5035
5036   if (CONSTANT_CLASS_P (op2))
5037     {
5038       if (integer_pow2p (op2))
5039         return false;
5040
5041       if (element_precision (type) <= HOST_BITS_PER_WIDE_INT
5042           && element_precision (type) <= BITS_PER_WORD)
5043         return false;
5044
5045       /* If the divisor is not power of 2 and the precision wider than
5046          HWI, expand_divmod punts on that, so in that case it is better
5047          to use divmod optab or libfunc.  Similarly if choose_multiplier
5048          might need pre/post shifts of BITS_PER_WORD or more.  */
5049     }
5050
5051   /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should
5052      expand using the [su]divv optabs.  */
5053   if (TYPE_OVERFLOW_TRAPS (type))
5054     return false;
5055
5056   if (!target_supports_divmod_p (divmod_optab, div_optab, mode))
5057     return false;
5058
5059   return true;
5060 }
5061
5062 /* This function looks for:
5063    t1 = a TRUNC_DIV_EXPR b;
5064    t2 = a TRUNC_MOD_EXPR b;
5065    and transforms it to the following sequence:
5066    complex_tmp = DIVMOD (a, b);
5067    t1 = REALPART_EXPR(a);
5068    t2 = IMAGPART_EXPR(b);
5069    For conditions enabling the transform see divmod_candidate_p().
5070
5071    The pass has three parts:
5072    1) Find top_stmt which is trunc_div or trunc_mod stmt and dominates all
5073       other trunc_div_expr and trunc_mod_expr stmts.
5074    2) Add top_stmt and all trunc_div and trunc_mod stmts dominated by top_stmt
5075       to stmts vector.
5076    3) Insert DIVMOD call just before top_stmt and update entries in
5077       stmts vector to use return value of DIMOVD (REALEXPR_PART for div,
5078       IMAGPART_EXPR for mod).  */
5079
5080 static bool
5081 convert_to_divmod (gassign *stmt)
5082 {
5083   if (stmt_can_throw_internal (cfun, stmt)
5084       || !divmod_candidate_p (stmt))
5085     return false;
5086
5087   tree op1 = gimple_assign_rhs1 (stmt);
5088   tree op2 = gimple_assign_rhs2 (stmt);
5089
5090   imm_use_iterator use_iter;
5091   gimple *use_stmt;
5092   auto_vec<gimple *> stmts;
5093
5094   gimple *top_stmt = stmt;
5095   basic_block top_bb = gimple_bb (stmt);
5096
5097   /* Part 1: Try to set top_stmt to "topmost" stmt that dominates
5098      at-least stmt and possibly other trunc_div/trunc_mod stmts
5099      having same operands as stmt.  */
5100
5101   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, op1)
5102     {
5103       if (is_gimple_assign (use_stmt)
5104           && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
5105               || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
5106           && operand_equal_p (op1, gimple_assign_rhs1 (use_stmt), 0)
5107           && operand_equal_p (op2, gimple_assign_rhs2 (use_stmt), 0))
5108         {
5109           if (stmt_can_throw_internal (cfun, use_stmt))
5110             continue;
5111
5112           basic_block bb = gimple_bb (use_stmt);
5113
5114           if (bb == top_bb)
5115             {
5116               if (gimple_uid (use_stmt) < gimple_uid (top_stmt))
5117                 top_stmt = use_stmt;
5118             }
5119           else if (dominated_by_p (CDI_DOMINATORS, top_bb, bb))
5120             {
5121               top_bb = bb;
5122               top_stmt = use_stmt;
5123             }
5124         }
5125     }
5126
5127   tree top_op1 = gimple_assign_rhs1 (top_stmt);
5128   tree top_op2 = gimple_assign_rhs2 (top_stmt);
5129
5130   stmts.safe_push (top_stmt);
5131   bool div_seen = (gimple_assign_rhs_code (top_stmt) == TRUNC_DIV_EXPR);
5132
5133   /* Part 2: Add all trunc_div/trunc_mod statements domianted by top_bb
5134      to stmts vector. The 2nd loop will always add stmt to stmts vector, since
5135      gimple_bb (top_stmt) dominates gimple_bb (stmt), so the
5136      2nd loop ends up adding at-least single trunc_mod_expr stmt.  */
5137
5138   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, top_op1)
5139     {
5140       if (is_gimple_assign (use_stmt)
5141           && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
5142               || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
5143           && operand_equal_p (top_op1, gimple_assign_rhs1 (use_stmt), 0)
5144           && operand_equal_p (top_op2, gimple_assign_rhs2 (use_stmt), 0))
5145         {
5146           if (use_stmt == top_stmt
5147               || stmt_can_throw_internal (cfun, use_stmt)
5148               || !dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt), top_bb))
5149             continue;
5150
5151           stmts.safe_push (use_stmt);
5152           if (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR)
5153             div_seen = true;
5154         }
5155     }
5156
5157   if (!div_seen)
5158     return false;
5159
5160   /* Part 3: Create libcall to internal fn DIVMOD:
5161      divmod_tmp = DIVMOD (op1, op2).  */
5162
5163   gcall *call_stmt = gimple_build_call_internal (IFN_DIVMOD, 2, op1, op2);
5164   tree res = make_temp_ssa_name (build_complex_type (TREE_TYPE (op1)),
5165                                  call_stmt, "divmod_tmp");
5166   gimple_call_set_lhs (call_stmt, res);
5167   /* We rejected throwing statements above.  */
5168   gimple_call_set_nothrow (call_stmt, true);
5169
5170   /* Insert the call before top_stmt.  */
5171   gimple_stmt_iterator top_stmt_gsi = gsi_for_stmt (top_stmt);
5172   gsi_insert_before (&top_stmt_gsi, call_stmt, GSI_SAME_STMT);
5173
5174   widen_mul_stats.divmod_calls_inserted++;
5175
5176   /* Update all statements in stmts vector:
5177      lhs = op1 TRUNC_DIV_EXPR op2 -> lhs = REALPART_EXPR<divmod_tmp>
5178      lhs = op1 TRUNC_MOD_EXPR op2 -> lhs = IMAGPART_EXPR<divmod_tmp>.  */
5179
5180   for (unsigned i = 0; stmts.iterate (i, &use_stmt); ++i)
5181     {
5182       tree new_rhs;
5183
5184       switch (gimple_assign_rhs_code (use_stmt))
5185         {
5186           case TRUNC_DIV_EXPR:
5187             new_rhs = fold_build1 (REALPART_EXPR, TREE_TYPE (op1), res);
5188             break;
5189
5190           case TRUNC_MOD_EXPR:
5191             new_rhs = fold_build1 (IMAGPART_EXPR, TREE_TYPE (op1), res);
5192             break;
5193
5194           default:
5195             gcc_unreachable ();
5196         }
5197
5198       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
5199       gimple_assign_set_rhs_from_tree (&gsi, new_rhs);
5200       update_stmt (use_stmt);
5201     }
5202
5203   return true;
5204 }
5205
5206 /* Process a single gimple assignment STMT, which has a RSHIFT_EXPR as
5207    its rhs, and try to convert it into a MULT_HIGHPART_EXPR.  The return
5208    value is true iff we converted the statement.  */
5209
5210 static bool
5211 convert_mult_to_highpart (gassign *stmt, gimple_stmt_iterator *gsi)
5212 {
5213   tree lhs = gimple_assign_lhs (stmt);
5214   tree stype = TREE_TYPE (lhs);
5215   tree sarg0 = gimple_assign_rhs1 (stmt);
5216   tree sarg1 = gimple_assign_rhs2 (stmt);
5217
5218   if (TREE_CODE (stype) != INTEGER_TYPE
5219       || TREE_CODE (sarg1) != INTEGER_CST
5220       || TREE_CODE (sarg0) != SSA_NAME
5221       || !tree_fits_uhwi_p (sarg1)
5222       || !has_single_use (sarg0))
5223     return false;
5224
5225   gassign *def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (sarg0));
5226   if (!def)
5227     return false;
5228
5229   enum tree_code mcode = gimple_assign_rhs_code (def);
5230   if (mcode == NOP_EXPR)
5231     {
5232       tree tmp = gimple_assign_rhs1 (def);
5233       if (TREE_CODE (tmp) != SSA_NAME || !has_single_use (tmp))
5234         return false;
5235       def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (tmp));
5236       if (!def)
5237         return false;
5238       mcode = gimple_assign_rhs_code (def);
5239     }
5240
5241   if (mcode != WIDEN_MULT_EXPR
5242       || gimple_bb (def) != gimple_bb (stmt))
5243     return false;
5244   tree mtype = TREE_TYPE (gimple_assign_lhs (def));
5245   if (TREE_CODE (mtype) != INTEGER_TYPE
5246       || TYPE_PRECISION (mtype) != TYPE_PRECISION (stype))
5247     return false;
5248
5249   tree mop1 = gimple_assign_rhs1 (def);
5250   tree mop2 = gimple_assign_rhs2 (def);
5251   tree optype = TREE_TYPE (mop1);
5252   bool unsignedp = TYPE_UNSIGNED (optype);
5253   unsigned int prec = TYPE_PRECISION (optype);
5254
5255   if (unsignedp != TYPE_UNSIGNED (mtype)
5256       || TYPE_PRECISION (mtype) != 2 * prec)
5257     return false;
5258
5259   unsigned HOST_WIDE_INT bits = tree_to_uhwi (sarg1);
5260   if (bits < prec || bits >= 2 * prec)
5261     return false;
5262
5263   /* For the time being, require operands to have the same sign.  */
5264   if (unsignedp != TYPE_UNSIGNED (TREE_TYPE (mop2)))
5265     return false;
5266
5267   machine_mode mode = TYPE_MODE (optype);
5268   optab tab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
5269   if (optab_handler (tab, mode) == CODE_FOR_nothing)
5270     return false;
5271
5272   location_t loc = gimple_location (stmt);
5273   tree highpart1 = build_and_insert_binop (gsi, loc, "highparttmp",
5274                                            MULT_HIGHPART_EXPR, mop1, mop2);
5275   tree highpart2 = highpart1;
5276   tree ntype = optype;
5277
5278   if (TYPE_UNSIGNED (stype) != TYPE_UNSIGNED (optype))
5279     {
5280       ntype = TYPE_UNSIGNED (stype) ? unsigned_type_for (optype)
5281                                     : signed_type_for (optype);
5282       highpart2 = build_and_insert_cast (gsi, loc, ntype, highpart1);
5283     }
5284   if (bits > prec)
5285     highpart2 = build_and_insert_binop (gsi, loc, "highparttmp",
5286                                         RSHIFT_EXPR, highpart2,
5287                                         build_int_cst (ntype, bits - prec));
5288
5289   gassign *new_stmt = gimple_build_assign (lhs, NOP_EXPR, highpart2);
5290   gsi_replace (gsi, new_stmt, true);
5291
5292   widen_mul_stats.highpart_mults_inserted++;
5293   return true;
5294 }
5295
5296 /* If target has spaceship<MODE>3 expander, pattern recognize
5297    <bb 2> [local count: 1073741824]:
5298    if (a_2(D) == b_3(D))
5299      goto <bb 6>; [34.00%]
5300    else
5301      goto <bb 3>; [66.00%]
5302
5303    <bb 3> [local count: 708669601]:
5304    if (a_2(D) < b_3(D))
5305      goto <bb 6>; [1.04%]
5306    else
5307      goto <bb 4>; [98.96%]
5308
5309    <bb 4> [local count: 701299439]:
5310    if (a_2(D) > b_3(D))
5311      goto <bb 5>; [48.89%]
5312    else
5313      goto <bb 6>; [51.11%]
5314
5315    <bb 5> [local count: 342865295]:
5316
5317    <bb 6> [local count: 1073741824]:
5318    and turn it into:
5319    <bb 2> [local count: 1073741824]:
5320    _1 = .SPACESHIP (a_2(D), b_3(D));
5321    if (_1 == 0)
5322      goto <bb 6>; [34.00%]
5323    else
5324      goto <bb 3>; [66.00%]
5325
5326    <bb 3> [local count: 708669601]:
5327    if (_1 == -1)
5328      goto <bb 6>; [1.04%]
5329    else
5330      goto <bb 4>; [98.96%]
5331
5332    <bb 4> [local count: 701299439]:
5333    if (_1 == 1)
5334      goto <bb 5>; [48.89%]
5335    else
5336      goto <bb 6>; [51.11%]
5337
5338    <bb 5> [local count: 342865295]:
5339
5340    <bb 6> [local count: 1073741824]:
5341    so that the backend can emit optimal comparison and
5342    conditional jump sequence.  */
5343
5344 static void
5345 optimize_spaceship (gcond *stmt)
5346 {
5347   enum tree_code code = gimple_cond_code (stmt);
5348   if (code != EQ_EXPR && code != NE_EXPR)
5349     return;
5350   tree arg1 = gimple_cond_lhs (stmt);
5351   tree arg2 = gimple_cond_rhs (stmt);
5352   if (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg1))
5353       || optab_handler (spaceship_optab,
5354                         TYPE_MODE (TREE_TYPE (arg1))) == CODE_FOR_nothing
5355       || operand_equal_p (arg1, arg2, 0))
5356     return;
5357
5358   basic_block bb0 = gimple_bb (stmt), bb1, bb2 = NULL;
5359   edge em1 = NULL, e1 = NULL, e2 = NULL;
5360   bb1 = EDGE_SUCC (bb0, 1)->dest;
5361   if (((EDGE_SUCC (bb0, 0)->flags & EDGE_TRUE_VALUE) != 0) ^ (code == EQ_EXPR))
5362     bb1 = EDGE_SUCC (bb0, 0)->dest;
5363
5364   gcond *g = safe_dyn_cast <gcond *> (*gsi_last_bb (bb1));
5365   if (g == NULL
5366       || !single_pred_p (bb1)
5367       || (operand_equal_p (gimple_cond_lhs (g), arg1, 0)
5368           ? !operand_equal_p (gimple_cond_rhs (g), arg2, 0)
5369           : (!operand_equal_p (gimple_cond_lhs (g), arg2, 0)
5370              || !operand_equal_p (gimple_cond_rhs (g), arg1, 0)))
5371       || !cond_only_block_p (bb1))
5372     return;
5373
5374   enum tree_code ccode = (operand_equal_p (gimple_cond_lhs (g), arg1, 0)
5375                           ? LT_EXPR : GT_EXPR);
5376   switch (gimple_cond_code (g))
5377     {
5378     case LT_EXPR:
5379     case LE_EXPR:
5380       break;
5381     case GT_EXPR:
5382     case GE_EXPR:
5383       ccode = ccode == LT_EXPR ? GT_EXPR : LT_EXPR;
5384       break;
5385     default:
5386       return;
5387     }
5388
5389   for (int i = 0; i < 2; ++i)
5390     {
5391       /* With NaNs, </<=/>/>= are false, so we need to look for the
5392          third comparison on the false edge from whatever non-equality
5393          comparison the second comparison is.  */
5394       if (HONOR_NANS (TREE_TYPE (arg1))
5395           && (EDGE_SUCC (bb1, i)->flags & EDGE_TRUE_VALUE) != 0)
5396         continue;
5397
5398       bb2 = EDGE_SUCC (bb1, i)->dest;
5399       g = safe_dyn_cast <gcond *> (*gsi_last_bb (bb2));
5400       if (g == NULL
5401           || !single_pred_p (bb2)
5402           || (operand_equal_p (gimple_cond_lhs (g), arg1, 0)
5403               ? !operand_equal_p (gimple_cond_rhs (g), arg2, 0)
5404               : (!operand_equal_p (gimple_cond_lhs (g), arg2, 0)
5405                  || !operand_equal_p (gimple_cond_rhs (g), arg1, 0)))
5406           || !cond_only_block_p (bb2)
5407           || EDGE_SUCC (bb2, 0)->dest == EDGE_SUCC (bb2, 1)->dest)
5408         continue;
5409
5410       enum tree_code ccode2
5411         = (operand_equal_p (gimple_cond_lhs (g), arg1, 0) ? LT_EXPR : GT_EXPR);
5412       switch (gimple_cond_code (g))
5413         {
5414         case LT_EXPR:
5415         case LE_EXPR:
5416           break;
5417         case GT_EXPR:
5418         case GE_EXPR:
5419           ccode2 = ccode2 == LT_EXPR ? GT_EXPR : LT_EXPR;
5420           break;
5421         default:
5422           continue;
5423         }
5424       if (HONOR_NANS (TREE_TYPE (arg1)) && ccode == ccode2)
5425         continue;
5426
5427       if ((ccode == LT_EXPR)
5428           ^ ((EDGE_SUCC (bb1, i)->flags & EDGE_TRUE_VALUE) != 0))
5429         {
5430           em1 = EDGE_SUCC (bb1, 1 - i);
5431           e1 = EDGE_SUCC (bb2, 0);
5432           e2 = EDGE_SUCC (bb2, 1);
5433           if ((ccode2 == LT_EXPR) ^ ((e1->flags & EDGE_TRUE_VALUE) == 0))
5434             std::swap (e1, e2);
5435         }
5436       else
5437         {
5438           e1 = EDGE_SUCC (bb1, 1 - i);
5439           em1 = EDGE_SUCC (bb2, 0);
5440           e2 = EDGE_SUCC (bb2, 1);
5441           if ((ccode2 != LT_EXPR) ^ ((em1->flags & EDGE_TRUE_VALUE) == 0))
5442             std::swap (em1, e2);
5443         }
5444       break;
5445     }
5446
5447   if (em1 == NULL)
5448     {
5449       if ((ccode == LT_EXPR)
5450           ^ ((EDGE_SUCC (bb1, 0)->flags & EDGE_TRUE_VALUE) != 0))
5451         {
5452           em1 = EDGE_SUCC (bb1, 1);
5453           e1 = EDGE_SUCC (bb1, 0);
5454           e2 = (e1->flags & EDGE_TRUE_VALUE) ? em1 : e1;
5455         }
5456       else
5457         {
5458           em1 = EDGE_SUCC (bb1, 0);
5459           e1 = EDGE_SUCC (bb1, 1);
5460           e2 = (e1->flags & EDGE_TRUE_VALUE) ? em1 : e1;
5461         }
5462     }
5463
5464   gcall *gc = gimple_build_call_internal (IFN_SPACESHIP, 2, arg1, arg2);
5465   tree lhs = make_ssa_name (integer_type_node);
5466   gimple_call_set_lhs (gc, lhs);
5467   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5468   gsi_insert_before (&gsi, gc, GSI_SAME_STMT);
5469
5470   gimple_cond_set_lhs (stmt, lhs);
5471   gimple_cond_set_rhs (stmt, integer_zero_node);
5472   update_stmt (stmt);
5473
5474   gcond *cond = as_a <gcond *> (*gsi_last_bb (bb1));
5475   gimple_cond_set_lhs (cond, lhs);
5476   if (em1->src == bb1 && e2 != em1)
5477     {
5478       gimple_cond_set_rhs (cond, integer_minus_one_node);
5479       gimple_cond_set_code (cond, (em1->flags & EDGE_TRUE_VALUE)
5480                                   ? EQ_EXPR : NE_EXPR);
5481     }
5482   else
5483     {
5484       gcc_assert (e1->src == bb1 && e2 != e1);
5485       gimple_cond_set_rhs (cond, integer_one_node);
5486       gimple_cond_set_code (cond, (e1->flags & EDGE_TRUE_VALUE)
5487                                   ? EQ_EXPR : NE_EXPR);
5488     }
5489   update_stmt (cond);
5490
5491   if (e2 != e1 && e2 != em1)
5492     {
5493       cond = as_a <gcond *> (*gsi_last_bb (bb2));
5494       gimple_cond_set_lhs (cond, lhs);
5495       if (em1->src == bb2)
5496         gimple_cond_set_rhs (cond, integer_minus_one_node);
5497       else
5498         {
5499           gcc_assert (e1->src == bb2);
5500           gimple_cond_set_rhs (cond, integer_one_node);
5501         }
5502       gimple_cond_set_code (cond,
5503                             (e2->flags & EDGE_TRUE_VALUE) ? NE_EXPR : EQ_EXPR);
5504       update_stmt (cond);
5505     }
5506
5507   wide_int wm1 = wi::minus_one (TYPE_PRECISION (integer_type_node));
5508   wide_int w2 = wi::two (TYPE_PRECISION (integer_type_node));
5509   value_range vr (TREE_TYPE (lhs), wm1, w2);
5510   set_range_info (lhs, vr);
5511 }
5512
5513
5514 /* Find integer multiplications where the operands are extended from
5515    smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
5516    or MULT_HIGHPART_EXPR where appropriate.  */
5517
5518 namespace {
5519
5520 const pass_data pass_data_optimize_widening_mul =
5521 {
5522   GIMPLE_PASS, /* type */
5523   "widening_mul", /* name */
5524   OPTGROUP_NONE, /* optinfo_flags */
5525   TV_TREE_WIDEN_MUL, /* tv_id */
5526   PROP_ssa, /* properties_required */
5527   0, /* properties_provided */
5528   0, /* properties_destroyed */
5529   0, /* todo_flags_start */
5530   TODO_update_ssa, /* todo_flags_finish */
5531 };
5532
5533 class pass_optimize_widening_mul : public gimple_opt_pass
5534 {
5535 public:
5536   pass_optimize_widening_mul (gcc::context *ctxt)
5537     : gimple_opt_pass (pass_data_optimize_widening_mul, ctxt)
5538   {}
5539
5540   /* opt_pass methods: */
5541   bool gate (function *) final override
5542     {
5543       return flag_expensive_optimizations && optimize;
5544     }
5545
5546   unsigned int execute (function *) final override;
5547
5548 }; // class pass_optimize_widening_mul
5549
5550 /* Walker class to perform the transformation in reverse dominance order. */
5551
5552 class math_opts_dom_walker : public dom_walker
5553 {
5554 public:
5555   /* Constructor, CFG_CHANGED is a pointer to a boolean flag that will be set
5556      if walking modidifes the CFG.  */
5557
5558   math_opts_dom_walker (bool *cfg_changed_p)
5559     : dom_walker (CDI_DOMINATORS), m_last_result_set (),
5560       m_cfg_changed_p (cfg_changed_p) {}
5561
5562   /* The actual actions performed in the walk.  */
5563
5564   void after_dom_children (basic_block) final override;
5565
5566   /* Set of results of chains of multiply and add statement combinations that
5567      were not transformed into FMAs because of active deferring.  */
5568   hash_set<tree> m_last_result_set;
5569
5570   /* Pointer to a flag of the user that needs to be set if CFG has been
5571      modified.  */
5572   bool *m_cfg_changed_p;
5573 };
5574
5575 void
5576 math_opts_dom_walker::after_dom_children (basic_block bb)
5577 {
5578   gimple_stmt_iterator gsi;
5579
5580   fma_deferring_state fma_state (param_avoid_fma_max_bits > 0);
5581
5582   for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
5583     {
5584       gimple *stmt = gsi_stmt (gsi);
5585       enum tree_code code;
5586
5587       if (is_gimple_assign (stmt))
5588         {
5589           code = gimple_assign_rhs_code (stmt);
5590           switch (code)
5591             {
5592             case MULT_EXPR:
5593               if (!convert_mult_to_widen (stmt, &gsi)
5594                   && !convert_expand_mult_copysign (stmt, &gsi)
5595                   && convert_mult_to_fma (stmt,
5596                                           gimple_assign_rhs1 (stmt),
5597                                           gimple_assign_rhs2 (stmt),
5598                                           &fma_state))
5599                 {
5600                   gsi_remove (&gsi, true);
5601                   release_defs (stmt);
5602                   continue;
5603                 }
5604               match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
5605               break;
5606
5607             case PLUS_EXPR:
5608             case MINUS_EXPR:
5609               if (!convert_plusminus_to_widen (&gsi, stmt, code))
5610                 {
5611                   match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
5612                   if (gsi_stmt (gsi) == stmt)
5613                     match_uaddc_usubc (&gsi, stmt, code);
5614                 }
5615               break;
5616
5617             case BIT_NOT_EXPR:
5618               if (match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p))
5619                 continue;
5620               break;
5621
5622             case TRUNC_MOD_EXPR:
5623               convert_to_divmod (as_a<gassign *> (stmt));
5624               break;
5625
5626             case RSHIFT_EXPR:
5627               convert_mult_to_highpart (as_a<gassign *> (stmt), &gsi);
5628               break;
5629
5630             case BIT_IOR_EXPR:
5631             case BIT_XOR_EXPR:
5632               match_uaddc_usubc (&gsi, stmt, code);
5633               break;
5634
5635             default:;
5636             }
5637         }
5638       else if (is_gimple_call (stmt))
5639         {
5640           switch (gimple_call_combined_fn (stmt))
5641             {
5642             CASE_CFN_POW:
5643               if (gimple_call_lhs (stmt)
5644                   && TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST
5645                   && real_equal (&TREE_REAL_CST (gimple_call_arg (stmt, 1)),
5646                                  &dconst2)
5647                   && convert_mult_to_fma (stmt,
5648                                           gimple_call_arg (stmt, 0),
5649                                           gimple_call_arg (stmt, 0),
5650                                           &fma_state))
5651                 {
5652                   unlink_stmt_vdef (stmt);
5653                   if (gsi_remove (&gsi, true)
5654                       && gimple_purge_dead_eh_edges (bb))
5655                     *m_cfg_changed_p = true;
5656                   release_defs (stmt);
5657                   continue;
5658                 }
5659               break;
5660
5661             case CFN_COND_MUL:
5662               if (convert_mult_to_fma (stmt,
5663                                        gimple_call_arg (stmt, 1),
5664                                        gimple_call_arg (stmt, 2),
5665                                        &fma_state,
5666                                        gimple_call_arg (stmt, 0)))
5667
5668                 {
5669                   gsi_remove (&gsi, true);
5670                   release_defs (stmt);
5671                   continue;
5672                 }
5673               break;
5674
5675             case CFN_COND_LEN_MUL:
5676               if (convert_mult_to_fma (stmt,
5677                                        gimple_call_arg (stmt, 1),
5678                                        gimple_call_arg (stmt, 2),
5679                                        &fma_state,
5680                                        gimple_call_arg (stmt, 0),
5681                                        gimple_call_arg (stmt, 4),
5682                                        gimple_call_arg (stmt, 5)))
5683
5684                 {
5685                   gsi_remove (&gsi, true);
5686                   release_defs (stmt);
5687                   continue;
5688                 }
5689               break;
5690
5691             case CFN_LAST:
5692               cancel_fma_deferring (&fma_state);
5693               break;
5694
5695             default:
5696               break;
5697             }
5698         }
5699       else if (gimple_code (stmt) == GIMPLE_COND)
5700         optimize_spaceship (as_a <gcond *> (stmt));
5701       gsi_next (&gsi);
5702     }
5703   if (fma_state.m_deferring_p
5704       && fma_state.m_initial_phi)
5705     {
5706       gcc_checking_assert (fma_state.m_last_result);
5707       if (!last_fma_candidate_feeds_initial_phi (&fma_state,
5708                                                  &m_last_result_set))
5709         cancel_fma_deferring (&fma_state);
5710       else
5711         m_last_result_set.add (fma_state.m_last_result);
5712     }
5713 }
5714
5715
5716 unsigned int
5717 pass_optimize_widening_mul::execute (function *fun)
5718 {
5719   bool cfg_changed = false;
5720
5721   memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
5722   calculate_dominance_info (CDI_DOMINATORS);
5723   renumber_gimple_stmt_uids (cfun);
5724
5725   math_opts_dom_walker (&cfg_changed).walk (ENTRY_BLOCK_PTR_FOR_FN (cfun));
5726
5727   statistics_counter_event (fun, "widening multiplications inserted",
5728                             widen_mul_stats.widen_mults_inserted);
5729   statistics_counter_event (fun, "widening maccs inserted",
5730                             widen_mul_stats.maccs_inserted);
5731   statistics_counter_event (fun, "fused multiply-adds inserted",
5732                             widen_mul_stats.fmas_inserted);
5733   statistics_counter_event (fun, "divmod calls inserted",
5734                             widen_mul_stats.divmod_calls_inserted);
5735   statistics_counter_event (fun, "highpart multiplications inserted",
5736                             widen_mul_stats.highpart_mults_inserted);
5737
5738   return cfg_changed ? TODO_cleanup_cfg : 0;
5739 }
5740
5741 } // anon namespace
5742
5743 gimple_opt_pass *
5744 make_pass_optimize_widening_mul (gcc::context *ctxt)
5745 {
5746   return new pass_optimize_widening_mul (ctxt);
5747 }