gcc/tree-ssa-math-opts.c

   1 /* Global, SSA-based optimizations using mathematical identities.
   2    Copyright (C) 2005-2021 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* Currently, the only mini-pass in this file tries to CSE reciprocal
  21    operations.  These are common in sequences such as this one:
  22
  23         modulus = sqrt(x*x + y*y + z*z);
  24         x = x / modulus;
  25         y = y / modulus;
  26         z = z / modulus;
  27
  28    that can be optimized to
  29
  30         modulus = sqrt(x*x + y*y + z*z);
  31         rmodulus = 1.0 / modulus;
  32         x = x * rmodulus;
  33         y = y * rmodulus;
  34         z = z * rmodulus;
  35
  36    We do this for loop invariant divisors, and with this pass whenever
  37    we notice that a division has the same divisor multiple times.
  38
  39    Of course, like in PRE, we don't insert a division if a dominator
  40    already has one.  However, this cannot be done as an extension of
  41    PRE for several reasons.
  42
  43    First of all, with some experiments it was found out that the
  44    transformation is not always useful if there are only two divisions
  45    by the same divisor.  This is probably because modern processors
  46    can pipeline the divisions; on older, in-order processors it should
  47    still be effective to optimize two divisions by the same number.
  48    We make this a param, and it shall be called N in the remainder of
  49    this comment.
  50
  51    Second, if trapping math is active, we have less freedom on where
  52    to insert divisions: we can only do so in basic blocks that already
  53    contain one.  (If divisions don't trap, instead, we can insert
  54    divisions elsewhere, which will be in blocks that are common dominators
  55    of those that have the division).
  56
  57    We really don't want to compute the reciprocal unless a division will
  58    be found.  To do this, we won't insert the division in a basic block
  59    that has less than N divisions *post-dominating* it.
  60
  61    The algorithm constructs a subset of the dominator tree, holding the
  62    blocks containing the divisions and the common dominators to them,
  63    and walk it twice.  The first walk is in post-order, and it annotates
  64    each block with the number of divisions that post-dominate it: this
  65    gives information on where divisions can be inserted profitably.
  66    The second walk is in pre-order, and it inserts divisions as explained
  67    above, and replaces divisions by multiplications.
  68
  69    In the best case, the cost of the pass is O(n_statements).  In the
  70    worst-case, the cost is due to creating the dominator tree subset,
  71    with a cost of O(n_basic_blocks ^ 2); however this can only happen
  72    for n_statements / n_basic_blocks statements.  So, the amortized cost
  73    of creating the dominator tree subset is O(n_basic_blocks) and the
  74    worst-case cost of the pass is O(n_statements * n_basic_blocks).
  75
  76    More practically, the cost will be small because there are few
  77    divisions, and they tend to be in the same basic block, so insert_bb
  78    is called very few times.
  79
  80    If we did this using domwalk.c, an efficient implementation would have
  81    to work on all the variables in a single pass, because we could not
  82    work on just a subset of the dominator tree, as we do now, and the
  83    cost would also be something like O(n_statements * n_basic_blocks).
  84    The data structures would be more complex in order to work on all the
  85    variables in a single pass.  */
  86
  87 #include "config.h"
  88 #include "system.h"
  89 #include "coretypes.h"
  90 #include "backend.h"
  91 #include "target.h"
  92 #include "rtl.h"
  93 #include "tree.h"
  94 #include "gimple.h"
  95 #include "predict.h"
  96 #include "alloc-pool.h"
  97 #include "tree-pass.h"
  98 #include "ssa.h"
  99 #include "optabs-tree.h"
 100 #include "gimple-pretty-print.h"
 101 #include "alias.h"
 102 #include "fold-const.h"
 103 #include "gimple-fold.h"
 104 #include "gimple-iterator.h"
 105 #include "gimplify.h"
 106 #include "gimplify-me.h"
 107 #include "stor-layout.h"
 108 #include "tree-cfg.h"
 109 #include "tree-dfa.h"
 110 #include "tree-ssa.h"
 111 #include "builtins.h"
 112 #include "internal-fn.h"
 113 #include "case-cfn-macros.h"
 114 #include "optabs-libfuncs.h"
 115 #include "tree-eh.h"
 116 #include "targhooks.h"
 117 #include "domwalk.h"
 118 #include "tree-ssa-math-opts.h"
 119
 120 /* This structure represents one basic block that either computes a
 121    division, or is a common dominator for basic block that compute a
 122    division.  */
 123 struct occurrence {
 124   /* The basic block represented by this structure.  */
 125   basic_block bb = basic_block();
 126
 127   /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
 128      inserted in BB.  */
 129   tree recip_def = tree();
 130
 131   /* If non-NULL, the SSA_NAME holding the definition for a squared
 132      reciprocal inserted in BB.  */
 133   tree square_recip_def = tree();
 134
 135   /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
 136      was inserted in BB.  */
 137   gimple *recip_def_stmt = nullptr;
 138
 139   /* Pointer to a list of "struct occurrence"s for blocks dominated
 140      by BB.  */
 141   struct occurrence *children = nullptr;
 142
 143   /* Pointer to the next "struct occurrence"s in the list of blocks
 144      sharing a common dominator.  */
 145   struct occurrence *next = nullptr;
 146
 147   /* The number of divisions that are in BB before compute_merit.  The
 148      number of divisions that are in BB or post-dominate it after
 149      compute_merit.  */
 150   int num_divisions = 0;
 151
 152   /* True if the basic block has a division, false if it is a common
 153      dominator for basic blocks that do.  If it is false and trapping
 154      math is active, BB is not a candidate for inserting a reciprocal.  */
 155   bool bb_has_division = false;
 156
 157   /* Construct a struct occurrence for basic block BB, and whose
 158      children list is headed by CHILDREN.  */
 159   occurrence (basic_block bb, struct occurrence *children)
 160   : bb (bb), children (children)
 161   {
 162     bb->aux = this;
 163   }
 164
 165   /* Destroy a struct occurrence and remove it from its basic block.  */
 166   ~occurrence ()
 167   {
 168     bb->aux = nullptr;
 169   }
 170
 171   /* Allocate memory for a struct occurrence from OCC_POOL.  */
 172   static void* operator new (size_t);
 173
 174   /* Return memory for a struct occurrence to OCC_POOL.  */
 175   static void operator delete (void*, size_t);
 176 };
 177
 178 static struct
 179 {
 180   /* Number of 1.0/X ops inserted.  */
 181   int rdivs_inserted;
 182
 183   /* Number of 1.0/FUNC ops inserted.  */
 184   int rfuncs_inserted;
 185 } reciprocal_stats;
 186
 187 static struct
 188 {
 189   /* Number of cexpi calls inserted.  */
 190   int inserted;
 191
 192   /* Number of conversions removed.  */
 193   int conv_removed;
 194
 195 } sincos_stats;
 196
 197 static struct
 198 {
 199   /* Number of widening multiplication ops inserted.  */
 200   int widen_mults_inserted;
 201
 202   /* Number of integer multiply-and-accumulate ops inserted.  */
 203   int maccs_inserted;
 204
 205   /* Number of fp fused multiply-add ops inserted.  */
 206   int fmas_inserted;
 207
 208   /* Number of divmod calls inserted.  */
 209   int divmod_calls_inserted;
 210 } widen_mul_stats;
 211
 212 /* The instance of "struct occurrence" representing the highest
 213    interesting block in the dominator tree.  */
 214 static struct occurrence *occ_head;
 215
 216 /* Allocation pool for getting instances of "struct occurrence".  */
 217 static object_allocator<occurrence> *occ_pool;
 218
 219 void* occurrence::operator new (size_t n)
 220 {
 221   gcc_assert (n == sizeof(occurrence));
 222   return occ_pool->allocate_raw ();
 223 }
 224
 225 void occurrence::operator delete (void *occ, size_t n)
 226 {
 227   gcc_assert (n == sizeof(occurrence));
 228   occ_pool->remove_raw (occ);
 229 }
 230
 231 /* Insert NEW_OCC into our subset of the dominator tree.  P_HEAD points to a
 232    list of "struct occurrence"s, one per basic block, having IDOM as
 233    their common dominator.
 234
 235    We try to insert NEW_OCC as deep as possible in the tree, and we also
 236    insert any other block that is a common dominator for BB and one
 237    block already in the tree.  */
 238
 239 static void
 240 insert_bb (struct occurrence *new_occ, basic_block idom,
 241            struct occurrence **p_head)
 242 {
 243   struct occurrence *occ, **p_occ;
 244
 245   for (p_occ = p_head; (occ = *p_occ) != NULL; )
 246     {
 247       basic_block bb = new_occ->bb, occ_bb = occ->bb;
 248       basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb);
 249       if (dom == bb)
 250         {
 251           /* BB dominates OCC_BB.  OCC becomes NEW_OCC's child: remove OCC
 252              from its list.  */
 253           *p_occ = occ->next;
 254           occ->next = new_occ->children;
 255           new_occ->children = occ;
 256
 257           /* Try the next block (it may as well be dominated by BB).  */
 258         }
 259
 260       else if (dom == occ_bb)
 261         {
 262           /* OCC_BB dominates BB.  Tail recurse to look deeper.  */
 263           insert_bb (new_occ, dom, &occ->children);
 264           return;
 265         }
 266
 267       else if (dom != idom)
 268         {
 269           gcc_assert (!dom->aux);
 270
 271           /* There is a dominator between IDOM and BB, add it and make
 272              two children out of NEW_OCC and OCC.  First, remove OCC from
 273              its list.  */
 274           *p_occ = occ->next;
 275           new_occ->next = occ;
 276           occ->next = NULL;
 277
 278           /* None of the previous blocks has DOM as a dominator: if we tail
 279              recursed, we would reexamine them uselessly. Just switch BB with
 280              DOM, and go on looking for blocks dominated by DOM.  */
 281           new_occ = new occurrence (dom, new_occ);
 282         }
 283
 284       else
 285         {
 286           /* Nothing special, go on with the next element.  */
 287           p_occ = &occ->next;
 288         }
 289     }
 290
 291   /* No place was found as a child of IDOM.  Make BB a sibling of IDOM.  */
 292   new_occ->next = *p_head;
 293   *p_head = new_occ;
 294 }
 295
 296 /* Register that we found a division in BB.
 297    IMPORTANCE is a measure of how much weighting to give
 298    that division.  Use IMPORTANCE = 2 to register a single
 299    division.  If the division is going to be found multiple
 300    times use 1 (as it is with squares).  */
 301
 302 static inline void
 303 register_division_in (basic_block bb, int importance)
 304 {
 305   struct occurrence *occ;
 306
 307   occ = (struct occurrence *) bb->aux;
 308   if (!occ)
 309     {
 310       occ = new occurrence (bb, NULL);
 311       insert_bb (occ, ENTRY_BLOCK_PTR_FOR_FN (cfun), &occ_head);
 312     }
 313
 314   occ->bb_has_division = true;
 315   occ->num_divisions += importance;
 316 }
 317
 318
 319 /* Compute the number of divisions that postdominate each block in OCC and
 320    its children.  */
 321
 322 static void
 323 compute_merit (struct occurrence *occ)
 324 {
 325   struct occurrence *occ_child;
 326   basic_block dom = occ->bb;
 327
 328   for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
 329     {
 330       basic_block bb;
 331       if (occ_child->children)
 332         compute_merit (occ_child);
 333
 334       if (flag_exceptions)
 335         bb = single_noncomplex_succ (dom);
 336       else
 337         bb = dom;
 338
 339       if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
 340         occ->num_divisions += occ_child->num_divisions;
 341     }
 342 }
 343
 344
 345 /* Return whether USE_STMT is a floating-point division by DEF.  */
 346 static inline bool
 347 is_division_by (gimple *use_stmt, tree def)
 348 {
 349   return is_gimple_assign (use_stmt)
 350          && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
 351          && gimple_assign_rhs2 (use_stmt) == def
 352          /* Do not recognize x / x as valid division, as we are getting
 353             confused later by replacing all immediate uses x in such
 354             a stmt.  */
 355          && gimple_assign_rhs1 (use_stmt) != def
 356          && !stmt_can_throw_internal (cfun, use_stmt);
 357 }
 358
 359 /* Return TRUE if USE_STMT is a multiplication of DEF by A.  */
 360 static inline bool
 361 is_mult_by (gimple *use_stmt, tree def, tree a)
 362 {
 363   if (gimple_code (use_stmt) == GIMPLE_ASSIGN
 364       && gimple_assign_rhs_code (use_stmt) == MULT_EXPR)
 365     {
 366       tree op0 = gimple_assign_rhs1 (use_stmt);
 367       tree op1 = gimple_assign_rhs2 (use_stmt);
 368
 369       return (op0 == def && op1 == a)
 370               || (op0 == a && op1 == def);
 371     }
 372   return 0;
 373 }
 374
 375 /* Return whether USE_STMT is DEF * DEF.  */
 376 static inline bool
 377 is_square_of (gimple *use_stmt, tree def)
 378 {
 379   return is_mult_by (use_stmt, def, def);
 380 }
 381
 382 /* Return whether USE_STMT is a floating-point division by
 383    DEF * DEF.  */
 384 static inline bool
 385 is_division_by_square (gimple *use_stmt, tree def)
 386 {
 387   if (gimple_code (use_stmt) == GIMPLE_ASSIGN
 388       && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
 389       && gimple_assign_rhs1 (use_stmt) != gimple_assign_rhs2 (use_stmt)
 390       && !stmt_can_throw_internal (cfun, use_stmt))
 391     {
 392       tree denominator = gimple_assign_rhs2 (use_stmt);
 393       if (TREE_CODE (denominator) == SSA_NAME)
 394         return is_square_of (SSA_NAME_DEF_STMT (denominator), def);
 395     }
 396   return 0;
 397 }
 398
 399 /* Walk the subset of the dominator tree rooted at OCC, setting the
 400    RECIP_DEF field to a definition of 1.0 / DEF that can be used in
 401    the given basic block.  The field may be left NULL, of course,
 402    if it is not possible or profitable to do the optimization.
 403
 404    DEF_BSI is an iterator pointing at the statement defining DEF.
 405    If RECIP_DEF is set, a dominator already has a computation that can
 406    be used.
 407
 408    If should_insert_square_recip is set, then this also inserts
 409    the square of the reciprocal immediately after the definition
 410    of the reciprocal.  */
 411
 412 static void
 413 insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
 414                     tree def, tree recip_def, tree square_recip_def,
 415                     int should_insert_square_recip, int threshold)
 416 {
 417   tree type;
 418   gassign *new_stmt, *new_square_stmt;
 419   gimple_stmt_iterator gsi;
 420   struct occurrence *occ_child;
 421
 422   if (!recip_def
 423       && (occ->bb_has_division || !flag_trapping_math)
 424       /* Divide by two as all divisions are counted twice in
 425          the costing loop.  */
 426       && occ->num_divisions / 2 >= threshold)
 427     {
 428       /* Make a variable with the replacement and substitute it.  */
 429       type = TREE_TYPE (def);
 430       recip_def = create_tmp_reg (type, "reciptmp");
 431       new_stmt = gimple_build_assign (recip_def, RDIV_EXPR,
 432                                       build_one_cst (type), def);
 433
 434       if (should_insert_square_recip)
 435         {
 436           square_recip_def = create_tmp_reg (type, "powmult_reciptmp");
 437           new_square_stmt = gimple_build_assign (square_recip_def, MULT_EXPR,
 438                                                  recip_def, recip_def);
 439         }
 440
 441       if (occ->bb_has_division)
 442         {
 443           /* Case 1: insert before an existing division.  */
 444           gsi = gsi_after_labels (occ->bb);
 445           while (!gsi_end_p (gsi)
 446                  && (!is_division_by (gsi_stmt (gsi), def))
 447                  && (!is_division_by_square (gsi_stmt (gsi), def)))
 448             gsi_next (&gsi);
 449
 450           gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
 451           if (should_insert_square_recip)
 452             gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
 453         }
 454       else if (def_gsi && occ->bb == gsi_bb (*def_gsi))
 455         {
 456           /* Case 2: insert right after the definition.  Note that this will
 457              never happen if the definition statement can throw, because in
 458              that case the sole successor of the statement's basic block will
 459              dominate all the uses as well.  */
 460           gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
 461           if (should_insert_square_recip)
 462             gsi_insert_after (def_gsi, new_square_stmt, GSI_NEW_STMT);
 463         }
 464       else
 465         {
 466           /* Case 3: insert in a basic block not containing defs/uses.  */
 467           gsi = gsi_after_labels (occ->bb);
 468           gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
 469           if (should_insert_square_recip)
 470             gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
 471         }
 472
 473       reciprocal_stats.rdivs_inserted++;
 474
 475       occ->recip_def_stmt = new_stmt;
 476     }
 477
 478   occ->recip_def = recip_def;
 479   occ->square_recip_def = square_recip_def;
 480   for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
 481     insert_reciprocals (def_gsi, occ_child, def, recip_def,
 482                         square_recip_def, should_insert_square_recip,
 483                         threshold);
 484 }
 485
 486 /* Replace occurrences of expr / (x * x) with expr * ((1 / x) * (1 / x)).
 487    Take as argument the use for (x * x).  */
 488 static inline void
 489 replace_reciprocal_squares (use_operand_p use_p)
 490 {
 491   gimple *use_stmt = USE_STMT (use_p);
 492   basic_block bb = gimple_bb (use_stmt);
 493   struct occurrence *occ = (struct occurrence *) bb->aux;
 494
 495   if (optimize_bb_for_speed_p (bb) && occ->square_recip_def
 496       && occ->recip_def)
 497     {
 498       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
 499       gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
 500       gimple_assign_set_rhs2 (use_stmt, occ->square_recip_def);
 501       SET_USE (use_p, occ->square_recip_def);
 502       fold_stmt_inplace (&gsi);
 503       update_stmt (use_stmt);
 504     }
 505 }
 506
 507
 508 /* Replace the division at USE_P with a multiplication by the reciprocal, if
 509    possible.  */
 510
 511 static inline void
 512 replace_reciprocal (use_operand_p use_p)
 513 {
 514   gimple *use_stmt = USE_STMT (use_p);
 515   basic_block bb = gimple_bb (use_stmt);
 516   struct occurrence *occ = (struct occurrence *) bb->aux;
 517
 518   if (optimize_bb_for_speed_p (bb)
 519       && occ->recip_def && use_stmt != occ->recip_def_stmt)
 520     {
 521       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
 522       gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
 523       SET_USE (use_p, occ->recip_def);
 524       fold_stmt_inplace (&gsi);
 525       update_stmt (use_stmt);
 526     }
 527 }
 528
 529
 530 /* Free OCC and return one more "struct occurrence" to be freed.  */
 531
 532 static struct occurrence *
 533 free_bb (struct occurrence *occ)
 534 {
 535   struct occurrence *child, *next;
 536
 537   /* First get the two pointers hanging off OCC.  */
 538   next = occ->next;
 539   child = occ->children;
 540   delete occ;
 541
 542   /* Now ensure that we don't recurse unless it is necessary.  */
 543   if (!child)
 544     return next;
 545   else
 546     {
 547       while (next)
 548         next = free_bb (next);
 549
 550       return child;
 551     }
 552 }
 553
 554 /* Transform sequences like
 555    t = sqrt (a)
 556    x = 1.0 / t;
 557    r1 = x * x;
 558    r2 = a * x;
 559    into:
 560    t = sqrt (a)
 561    r1 = 1.0 / a;
 562    r2 = t;
 563    x = r1 * r2;
 564    depending on the uses of x, r1, r2.  This removes one multiplication and
 565    allows the sqrt and division operations to execute in parallel.
 566    DEF_GSI is the gsi of the initial division by sqrt that defines
 567    DEF (x in the example above).  */
 568
 569 static void
 570 optimize_recip_sqrt (gimple_stmt_iterator *def_gsi, tree def)
 571 {
 572   gimple *use_stmt;
 573   imm_use_iterator use_iter;
 574   gimple *stmt = gsi_stmt (*def_gsi);
 575   tree x = def;
 576   tree orig_sqrt_ssa_name = gimple_assign_rhs2 (stmt);
 577   tree div_rhs1 = gimple_assign_rhs1 (stmt);
 578
 579   if (TREE_CODE (orig_sqrt_ssa_name) != SSA_NAME
 580       || TREE_CODE (div_rhs1) != REAL_CST
 581       || !real_equal (&TREE_REAL_CST (div_rhs1), &dconst1))
 582     return;
 583
 584   gcall *sqrt_stmt
 585     = dyn_cast <gcall *> (SSA_NAME_DEF_STMT (orig_sqrt_ssa_name));
 586
 587   if (!sqrt_stmt || !gimple_call_lhs (sqrt_stmt))
 588     return;
 589
 590   switch (gimple_call_combined_fn (sqrt_stmt))
 591     {
 592     CASE_CFN_SQRT:
 593     CASE_CFN_SQRT_FN:
 594       break;
 595
 596     default:
 597       return;
 598     }
 599   tree a = gimple_call_arg (sqrt_stmt, 0);
 600
 601   /* We have 'a' and 'x'.  Now analyze the uses of 'x'.  */
 602
 603   /* Statements that use x in x * x.  */
 604   auto_vec<gimple *> sqr_stmts;
 605   /* Statements that use x in a * x.  */
 606   auto_vec<gimple *> mult_stmts;
 607   bool has_other_use = false;
 608   bool mult_on_main_path = false;
 609
 610   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, x)
 611     {
 612       if (is_gimple_debug (use_stmt))
 613         continue;
 614       if (is_square_of (use_stmt, x))
 615         {
 616           sqr_stmts.safe_push (use_stmt);
 617           if (gimple_bb (use_stmt) == gimple_bb (stmt))
 618             mult_on_main_path = true;
 619         }
 620       else if (is_mult_by (use_stmt, x, a))
 621         {
 622           mult_stmts.safe_push (use_stmt);
 623           if (gimple_bb (use_stmt) == gimple_bb (stmt))
 624             mult_on_main_path = true;
 625         }
 626       else
 627         has_other_use = true;
 628     }
 629
 630   /* In the x * x and a * x cases we just rewire stmt operands or
 631      remove multiplications.  In the has_other_use case we introduce
 632      a multiplication so make sure we don't introduce a multiplication
 633      on a path where there was none.  */
 634   if (has_other_use && !mult_on_main_path)
 635     return;
 636
 637   if (sqr_stmts.is_empty () && mult_stmts.is_empty ())
 638     return;
 639
 640   /* If x = 1.0 / sqrt (a) has uses other than those optimized here we want
 641      to be able to compose it from the sqr and mult cases.  */
 642   if (has_other_use && (sqr_stmts.is_empty () || mult_stmts.is_empty ()))
 643     return;
 644
 645   if (dump_file)
 646     {
 647       fprintf (dump_file, "Optimizing reciprocal sqrt multiplications of\n");
 648       print_gimple_stmt (dump_file, sqrt_stmt, 0, TDF_NONE);
 649       print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
 650       fprintf (dump_file, "\n");
 651     }
 652
 653   bool delete_div = !has_other_use;
 654   tree sqr_ssa_name = NULL_TREE;
 655   if (!sqr_stmts.is_empty ())
 656     {
 657       /* r1 = x * x.  Transform the original
 658          x = 1.0 / t
 659          into
 660          tmp1 = 1.0 / a
 661          r1 = tmp1.  */
 662
 663       sqr_ssa_name
 664         = make_temp_ssa_name (TREE_TYPE (a), NULL, "recip_sqrt_sqr");
 665
 666       if (dump_file)
 667         {
 668           fprintf (dump_file, "Replacing original division\n");
 669           print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
 670           fprintf (dump_file, "with new division\n");
 671         }
 672       stmt
 673         = gimple_build_assign (sqr_ssa_name, gimple_assign_rhs_code (stmt),
 674                                gimple_assign_rhs1 (stmt), a);
 675       gsi_insert_before (def_gsi, stmt, GSI_SAME_STMT);
 676       gsi_remove (def_gsi, true);
 677       *def_gsi = gsi_for_stmt (stmt);
 678       fold_stmt_inplace (def_gsi);
 679       update_stmt (stmt);
 680
 681       if (dump_file)
 682         print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
 683
 684       delete_div = false;
 685       gimple *sqr_stmt;
 686       unsigned int i;
 687       FOR_EACH_VEC_ELT (sqr_stmts, i, sqr_stmt)
 688         {
 689           gimple_stmt_iterator gsi2 = gsi_for_stmt (sqr_stmt);
 690           gimple_assign_set_rhs_from_tree (&gsi2, sqr_ssa_name);
 691           update_stmt (sqr_stmt);
 692         }
 693     }
 694   if (!mult_stmts.is_empty ())
 695     {
 696       /* r2 = a * x.  Transform this into:
 697          r2 = t (The original sqrt (a)).  */
 698       unsigned int i;
 699       gimple *mult_stmt = NULL;
 700       FOR_EACH_VEC_ELT (mult_stmts, i, mult_stmt)
 701         {
 702           gimple_stmt_iterator gsi2 = gsi_for_stmt (mult_stmt);
 703
 704           if (dump_file)
 705             {
 706               fprintf (dump_file, "Replacing squaring multiplication\n");
 707               print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE);
 708               fprintf (dump_file, "with assignment\n");
 709             }
 710           gimple_assign_set_rhs_from_tree (&gsi2, orig_sqrt_ssa_name);
 711           fold_stmt_inplace (&gsi2);
 712           update_stmt (mult_stmt);
 713           if (dump_file)
 714             print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE);
 715       }
 716     }
 717
 718   if (has_other_use)
 719     {
 720       /* Using the two temporaries tmp1, tmp2 from above
 721          the original x is now:
 722          x = tmp1 * tmp2.  */
 723       gcc_assert (orig_sqrt_ssa_name);
 724       gcc_assert (sqr_ssa_name);
 725
 726       gimple *new_stmt
 727         = gimple_build_assign (x, MULT_EXPR,
 728                                orig_sqrt_ssa_name, sqr_ssa_name);
 729       gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
 730       update_stmt (stmt);
 731     }
 732   else if (delete_div)
 733     {
 734       /* Remove the original division.  */
 735       gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt);
 736       gsi_remove (&gsi2, true);
 737       release_defs (stmt);
 738     }
 739   else
 740     release_ssa_name (x);
 741 }
 742
 743 /* Look for floating-point divisions among DEF's uses, and try to
 744    replace them by multiplications with the reciprocal.  Add
 745    as many statements computing the reciprocal as needed.
 746
 747    DEF must be a GIMPLE register of a floating-point type.  */
 748
 749 static void
 750 execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
 751 {
 752   use_operand_p use_p, square_use_p;
 753   imm_use_iterator use_iter, square_use_iter;
 754   tree square_def;
 755   struct occurrence *occ;
 756   int count = 0;
 757   int threshold;
 758   int square_recip_count = 0;
 759   int sqrt_recip_count = 0;
 760
 761   gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && TREE_CODE (def) == SSA_NAME);
 762   threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
 763
 764   /* If DEF is a square (x * x), count the number of divisions by x.
 765      If there are more divisions by x than by (DEF * DEF), prefer to optimize
 766      the reciprocal of x instead of DEF.  This improves cases like:
 767        def = x * x
 768        t0 = a / def
 769        t1 = b / def
 770        t2 = c / x
 771      Reciprocal optimization of x results in 1 division rather than 2 or 3.  */
 772   gimple *def_stmt = SSA_NAME_DEF_STMT (def);
 773
 774   if (is_gimple_assign (def_stmt)
 775       && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
 776       && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME
 777       && gimple_assign_rhs1 (def_stmt) == gimple_assign_rhs2 (def_stmt))
 778     {
 779       tree op0 = gimple_assign_rhs1 (def_stmt);
 780
 781       FOR_EACH_IMM_USE_FAST (use_p, use_iter, op0)
 782         {
 783           gimple *use_stmt = USE_STMT (use_p);
 784           if (is_division_by (use_stmt, op0))
 785             sqrt_recip_count++;
 786         }
 787     }
 788
 789   FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
 790     {
 791       gimple *use_stmt = USE_STMT (use_p);
 792       if (is_division_by (use_stmt, def))
 793         {
 794           register_division_in (gimple_bb (use_stmt), 2);
 795           count++;
 796         }
 797
 798       if (is_square_of (use_stmt, def))
 799         {
 800           square_def = gimple_assign_lhs (use_stmt);
 801           FOR_EACH_IMM_USE_FAST (square_use_p, square_use_iter, square_def)
 802             {
 803               gimple *square_use_stmt = USE_STMT (square_use_p);
 804               if (is_division_by (square_use_stmt, square_def))
 805                 {
 806                   /* This is executed twice for each division by a square.  */
 807                   register_division_in (gimple_bb (square_use_stmt), 1);
 808                   square_recip_count++;
 809                 }
 810             }
 811         }
 812     }
 813
 814   /* Square reciprocals were counted twice above.  */
 815   square_recip_count /= 2;
 816
 817   /* If it is more profitable to optimize 1 / x, don't optimize 1 / (x * x).  */
 818   if (sqrt_recip_count > square_recip_count)
 819     goto out;
 820
 821   /* Do the expensive part only if we can hope to optimize something.  */
 822   if (count + square_recip_count >= threshold && count >= 1)
 823     {
 824       gimple *use_stmt;
 825       for (occ = occ_head; occ; occ = occ->next)
 826         {
 827           compute_merit (occ);
 828           insert_reciprocals (def_gsi, occ, def, NULL, NULL,
 829                               square_recip_count, threshold);
 830         }
 831
 832       FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
 833         {
 834           if (is_division_by (use_stmt, def))
 835             {
 836               FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
 837                 replace_reciprocal (use_p);
 838             }
 839           else if (square_recip_count > 0 && is_square_of (use_stmt, def))
 840             {
 841               FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
 842                 {
 843                   /* Find all uses of the square that are divisions and
 844                    * replace them by multiplications with the inverse.  */
 845                   imm_use_iterator square_iterator;
 846                   gimple *powmult_use_stmt = USE_STMT (use_p);
 847                   tree powmult_def_name = gimple_assign_lhs (powmult_use_stmt);
 848
 849                   FOR_EACH_IMM_USE_STMT (powmult_use_stmt,
 850                                          square_iterator, powmult_def_name)
 851                     FOR_EACH_IMM_USE_ON_STMT (square_use_p, square_iterator)
 852                       {
 853                         gimple *powmult_use_stmt = USE_STMT (square_use_p);
 854                         if (is_division_by (powmult_use_stmt, powmult_def_name))
 855                           replace_reciprocal_squares (square_use_p);
 856                       }
 857                 }
 858             }
 859         }
 860     }
 861
 862 out:
 863   for (occ = occ_head; occ; )
 864     occ = free_bb (occ);
 865
 866   occ_head = NULL;
 867 }
 868
 869 /* Return an internal function that implements the reciprocal of CALL,
 870    or IFN_LAST if there is no such function that the target supports.  */
 871
 872 internal_fn
 873 internal_fn_reciprocal (gcall *call)
 874 {
 875   internal_fn ifn;
 876
 877   switch (gimple_call_combined_fn (call))
 878     {
 879     CASE_CFN_SQRT:
 880     CASE_CFN_SQRT_FN:
 881       ifn = IFN_RSQRT;
 882       break;
 883
 884     default:
 885       return IFN_LAST;
 886     }
 887
 888   tree_pair types = direct_internal_fn_types (ifn, call);
 889   if (!direct_internal_fn_supported_p (ifn, types, OPTIMIZE_FOR_SPEED))
 890     return IFN_LAST;
 891
 892   return ifn;
 893 }
 894
 895 /* Go through all the floating-point SSA_NAMEs, and call
 896    execute_cse_reciprocals_1 on each of them.  */
 897 namespace {
 898
 899 const pass_data pass_data_cse_reciprocals =
 900 {
 901   GIMPLE_PASS, /* type */
 902   "recip", /* name */
 903   OPTGROUP_NONE, /* optinfo_flags */
 904   TV_TREE_RECIP, /* tv_id */
 905   PROP_ssa, /* properties_required */
 906   0, /* properties_provided */
 907   0, /* properties_destroyed */
 908   0, /* todo_flags_start */
 909   TODO_update_ssa, /* todo_flags_finish */
 910 };
 911
 912 class pass_cse_reciprocals : public gimple_opt_pass
 913 {
 914 public:
 915   pass_cse_reciprocals (gcc::context *ctxt)
 916     : gimple_opt_pass (pass_data_cse_reciprocals, ctxt)
 917   {}
 918
 919   /* opt_pass methods: */
 920   virtual bool gate (function *) { return optimize && flag_reciprocal_math; }
 921   virtual unsigned int execute (function *);
 922
 923 }; // class pass_cse_reciprocals
 924
 925 unsigned int
 926 pass_cse_reciprocals::execute (function *fun)
 927 {
 928   basic_block bb;
 929   tree arg;
 930
 931   occ_pool = new object_allocator<occurrence> ("dominators for recip");
 932
 933   memset (&reciprocal_stats, 0, sizeof (reciprocal_stats));
 934   calculate_dominance_info (CDI_DOMINATORS);
 935   calculate_dominance_info (CDI_POST_DOMINATORS);
 936
 937   if (flag_checking)
 938     FOR_EACH_BB_FN (bb, fun)
 939       gcc_assert (!bb->aux);
 940
 941   for (arg = DECL_ARGUMENTS (fun->decl); arg; arg = DECL_CHAIN (arg))
 942     if (FLOAT_TYPE_P (TREE_TYPE (arg))
 943         && is_gimple_reg (arg))
 944       {
 945         tree name = ssa_default_def (fun, arg);
 946         if (name)
 947           execute_cse_reciprocals_1 (NULL, name);
 948       }
 949
 950   FOR_EACH_BB_FN (bb, fun)
 951     {
 952       tree def;
 953
 954       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
 955            gsi_next (&gsi))
 956         {
 957           gphi *phi = gsi.phi ();
 958           def = PHI_RESULT (phi);
 959           if (! virtual_operand_p (def)
 960               && FLOAT_TYPE_P (TREE_TYPE (def)))
 961             execute_cse_reciprocals_1 (NULL, def);
 962         }
 963
 964       for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
 965            gsi_next (&gsi))
 966         {
 967           gimple *stmt = gsi_stmt (gsi);
 968
 969           if (gimple_has_lhs (stmt)
 970               && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
 971               && FLOAT_TYPE_P (TREE_TYPE (def))
 972               && TREE_CODE (def) == SSA_NAME)
 973             {
 974               execute_cse_reciprocals_1 (&gsi, def);
 975               stmt = gsi_stmt (gsi);
 976               if (flag_unsafe_math_optimizations
 977                   && is_gimple_assign (stmt)
 978                   && gimple_assign_lhs (stmt) == def
 979                   && !stmt_can_throw_internal (cfun, stmt)
 980                   && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
 981                 optimize_recip_sqrt (&gsi, def);
 982             }
 983         }
 984
 985       if (optimize_bb_for_size_p (bb))
 986         continue;
 987
 988       /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b).  */
 989       for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
 990            gsi_next (&gsi))
 991         {
 992           gimple *stmt = gsi_stmt (gsi);
 993
 994           if (is_gimple_assign (stmt)
 995               && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
 996             {
 997               tree arg1 = gimple_assign_rhs2 (stmt);
 998               gimple *stmt1;
 999
1000               if (TREE_CODE (arg1) != SSA_NAME)
1001                 continue;
1002
1003               stmt1 = SSA_NAME_DEF_STMT (arg1);
1004
1005               if (is_gimple_call (stmt1)
1006                   && gimple_call_lhs (stmt1))
1007                 {
1008                   bool fail;
1009                   imm_use_iterator ui;
1010                   use_operand_p use_p;
1011                   tree fndecl = NULL_TREE;
1012
1013                   gcall *call = as_a <gcall *> (stmt1);
1014                   internal_fn ifn = internal_fn_reciprocal (call);
1015                   if (ifn == IFN_LAST)
1016                     {
1017                       fndecl = gimple_call_fndecl (call);
1018                       if (!fndecl
1019                           || !fndecl_built_in_p (fndecl, BUILT_IN_MD))
1020                         continue;
1021                       fndecl = targetm.builtin_reciprocal (fndecl);
1022                       if (!fndecl)
1023                         continue;
1024                     }
1025
1026                   /* Check that all uses of the SSA name are divisions,
1027                      otherwise replacing the defining statement will do
1028                      the wrong thing.  */
1029                   fail = false;
1030                   FOR_EACH_IMM_USE_FAST (use_p, ui, arg1)
1031                     {
1032                       gimple *stmt2 = USE_STMT (use_p);
1033                       if (is_gimple_debug (stmt2))
1034                         continue;
1035                       if (!is_gimple_assign (stmt2)
1036                           || gimple_assign_rhs_code (stmt2) != RDIV_EXPR
1037                           || gimple_assign_rhs1 (stmt2) == arg1
1038                           || gimple_assign_rhs2 (stmt2) != arg1)
1039                         {
1040                           fail = true;
1041                           break;
1042                         }
1043                     }
1044                   if (fail)
1045                     continue;
1046
1047                   gimple_replace_ssa_lhs (call, arg1);
1048                   if (gimple_call_internal_p (call) != (ifn != IFN_LAST))
1049                     {
1050                       auto_vec<tree, 4> args;
1051                       for (unsigned int i = 0;
1052                            i < gimple_call_num_args (call); i++)
1053                         args.safe_push (gimple_call_arg (call, i));
1054                       gcall *stmt2;
1055                       if (ifn == IFN_LAST)
1056                         stmt2 = gimple_build_call_vec (fndecl, args);
1057                       else
1058                         stmt2 = gimple_build_call_internal_vec (ifn, args);
1059                       gimple_call_set_lhs (stmt2, arg1);
1060                       gimple_move_vops (stmt2, call);
1061                       gimple_call_set_nothrow (stmt2,
1062                                                gimple_call_nothrow_p (call));
1063                       gimple_stmt_iterator gsi2 = gsi_for_stmt (call);
1064                       gsi_replace (&gsi2, stmt2, true);
1065                     }
1066                   else
1067                     {
1068                       if (ifn == IFN_LAST)
1069                         gimple_call_set_fndecl (call, fndecl);
1070                       else
1071                         gimple_call_set_internal_fn (call, ifn);
1072                       update_stmt (call);
1073                     }
1074                   reciprocal_stats.rfuncs_inserted++;
1075
1076                   FOR_EACH_IMM_USE_STMT (stmt, ui, arg1)
1077                     {
1078                       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1079                       gimple_assign_set_rhs_code (stmt, MULT_EXPR);
1080                       fold_stmt_inplace (&gsi);
1081                       update_stmt (stmt);
1082                     }
1083                 }
1084             }
1085         }
1086     }
1087
1088   statistics_counter_event (fun, "reciprocal divs inserted",
1089                             reciprocal_stats.rdivs_inserted);
1090   statistics_counter_event (fun, "reciprocal functions inserted",
1091                             reciprocal_stats.rfuncs_inserted);
1092
1093   free_dominance_info (CDI_DOMINATORS);
1094   free_dominance_info (CDI_POST_DOMINATORS);
1095   delete occ_pool;
1096   return 0;
1097 }
1098
1099 } // anon namespace
1100
1101 gimple_opt_pass *
1102 make_pass_cse_reciprocals (gcc::context *ctxt)
1103 {
1104   return new pass_cse_reciprocals (ctxt);
1105 }
1106
1107 /* If NAME is the result of a type conversion, look for other
1108    equivalent dominating or dominated conversions, and replace all
1109    uses with the earliest dominating name, removing the redundant
1110    conversions.  Return the prevailing name.  */
1111
1112 static tree
1113 execute_cse_conv_1 (tree name)
1114 {
1115   if (SSA_NAME_IS_DEFAULT_DEF (name)
1116       || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name))
1117     return name;
1118
1119   gimple *def_stmt = SSA_NAME_DEF_STMT (name);
1120
1121   if (!gimple_assign_cast_p (def_stmt))
1122     return name;
1123
1124   tree src = gimple_assign_rhs1 (def_stmt);
1125
1126   if (TREE_CODE (src) != SSA_NAME)
1127     return name;
1128
1129   imm_use_iterator use_iter;
1130   gimple *use_stmt;
1131
1132   /* Find the earliest dominating def.    */
1133   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, src)
1134     {
1135       if (use_stmt == def_stmt
1136           || !gimple_assign_cast_p (use_stmt))
1137         continue;
1138
1139       tree lhs = gimple_assign_lhs (use_stmt);
1140
1141       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
1142           || (gimple_assign_rhs1 (use_stmt)
1143               != gimple_assign_rhs1 (def_stmt))
1144           || !types_compatible_p (TREE_TYPE (name), TREE_TYPE (lhs)))
1145         continue;
1146
1147       bool use_dominates;
1148       if (gimple_bb (def_stmt) == gimple_bb (use_stmt))
1149         {
1150           gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
1151           while (!gsi_end_p (gsi) && gsi_stmt (gsi) != def_stmt)
1152             gsi_next (&gsi);
1153           use_dominates = !gsi_end_p (gsi);
1154         }
1155       else if (dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt),
1156                                gimple_bb (def_stmt)))
1157         use_dominates = false;
1158       else if (dominated_by_p (CDI_DOMINATORS, gimple_bb (def_stmt),
1159                                gimple_bb (use_stmt)))
1160         use_dominates = true;
1161       else
1162         continue;
1163
1164       if (use_dominates)
1165         {
1166           std::swap (name, lhs);
1167           std::swap (def_stmt, use_stmt);
1168         }
1169     }
1170
1171   /* Now go through all uses of SRC again, replacing the equivalent
1172      dominated conversions.  We may replace defs that were not
1173      dominated by the then-prevailing defs when we first visited
1174      them.  */
1175   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, src)
1176     {
1177       if (use_stmt == def_stmt
1178           || !gimple_assign_cast_p (use_stmt))
1179         continue;
1180
1181       tree lhs = gimple_assign_lhs (use_stmt);
1182
1183       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
1184           || (gimple_assign_rhs1 (use_stmt)
1185               != gimple_assign_rhs1 (def_stmt))
1186           || !types_compatible_p (TREE_TYPE (name), TREE_TYPE (lhs)))
1187         continue;
1188
1189       if (gimple_bb (def_stmt) == gimple_bb (use_stmt)
1190           || dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt),
1191                              gimple_bb (def_stmt)))
1192         {
1193           sincos_stats.conv_removed++;
1194
1195           gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
1196           replace_uses_by (lhs, name);
1197           gsi_remove (&gsi, true);
1198         }
1199     }
1200
1201   return name;
1202 }
1203
1204 /* Records an occurrence at statement USE_STMT in the vector of trees
1205    STMTS if it is dominated by *TOP_BB or dominates it or this basic block
1206    is not yet initialized.  Returns true if the occurrence was pushed on
1207    the vector.  Adjusts *TOP_BB to be the basic block dominating all
1208    statements in the vector.  */
1209
1210 static bool
1211 maybe_record_sincos (vec<gimple *> *stmts,
1212                      basic_block *top_bb, gimple *use_stmt)
1213 {
1214   basic_block use_bb = gimple_bb (use_stmt);
1215   if (*top_bb
1216       && (*top_bb == use_bb
1217           || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb)))
1218     stmts->safe_push (use_stmt);
1219   else if (!*top_bb
1220            || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb))
1221     {
1222       stmts->safe_push (use_stmt);
1223       *top_bb = use_bb;
1224     }
1225   else
1226     return false;
1227
1228   return true;
1229 }
1230
1231 /* Look for sin, cos and cexpi calls with the same argument NAME and
1232    create a single call to cexpi CSEing the result in this case.
1233    We first walk over all immediate uses of the argument collecting
1234    statements that we can CSE in a vector and in a second pass replace
1235    the statement rhs with a REALPART or IMAGPART expression on the
1236    result of the cexpi call we insert before the use statement that
1237    dominates all other candidates.  */
1238
1239 static bool
1240 execute_cse_sincos_1 (tree name)
1241 {
1242   gimple_stmt_iterator gsi;
1243   imm_use_iterator use_iter;
1244   tree fndecl, res, type = NULL_TREE;
1245   gimple *def_stmt, *use_stmt, *stmt;
1246   int seen_cos = 0, seen_sin = 0, seen_cexpi = 0;
1247   auto_vec<gimple *> stmts;
1248   basic_block top_bb = NULL;
1249   int i;
1250   bool cfg_changed = false;
1251
1252   name = execute_cse_conv_1 (name);
1253
1254   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
1255     {
1256       if (gimple_code (use_stmt) != GIMPLE_CALL
1257           || !gimple_call_lhs (use_stmt))
1258         continue;
1259
1260       switch (gimple_call_combined_fn (use_stmt))
1261         {
1262         CASE_CFN_COS:
1263           seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1264           break;
1265
1266         CASE_CFN_SIN:
1267           seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1268           break;
1269
1270         CASE_CFN_CEXPI:
1271           seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1272           break;
1273
1274         default:;
1275           continue;
1276         }
1277
1278       tree t = mathfn_built_in_type (gimple_call_combined_fn (use_stmt));
1279       if (!type)
1280         {
1281           type = t;
1282           t = TREE_TYPE (name);
1283         }
1284       /* This checks that NAME has the right type in the first round,
1285          and, in subsequent rounds, that the built_in type is the same
1286          type, or a compatible type.  */
1287       if (type != t && !types_compatible_p (type, t))
1288         return false;
1289     }
1290   if (seen_cos + seen_sin + seen_cexpi <= 1)
1291     return false;
1292
1293   /* Simply insert cexpi at the beginning of top_bb but not earlier than
1294      the name def statement.  */
1295   fndecl = mathfn_built_in (type, BUILT_IN_CEXPI);
1296   if (!fndecl)
1297     return false;
1298   stmt = gimple_build_call (fndecl, 1, name);
1299   res = make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl)), stmt, "sincostmp");
1300   gimple_call_set_lhs (stmt, res);
1301
1302   def_stmt = SSA_NAME_DEF_STMT (name);
1303   if (!SSA_NAME_IS_DEFAULT_DEF (name)
1304       && gimple_code (def_stmt) != GIMPLE_PHI
1305       && gimple_bb (def_stmt) == top_bb)
1306     {
1307       gsi = gsi_for_stmt (def_stmt);
1308       gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1309     }
1310   else
1311     {
1312       gsi = gsi_after_labels (top_bb);
1313       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1314     }
1315   sincos_stats.inserted++;
1316
1317   /* And adjust the recorded old call sites.  */
1318   for (i = 0; stmts.iterate (i, &use_stmt); ++i)
1319     {
1320       tree rhs = NULL;
1321
1322       switch (gimple_call_combined_fn (use_stmt))
1323         {
1324         CASE_CFN_COS:
1325           rhs = fold_build1 (REALPART_EXPR, type, res);
1326           break;
1327
1328         CASE_CFN_SIN:
1329           rhs = fold_build1 (IMAGPART_EXPR, type, res);
1330           break;
1331
1332         CASE_CFN_CEXPI:
1333           rhs = res;
1334           break;
1335
1336         default:;
1337           gcc_unreachable ();
1338         }
1339
1340         /* Replace call with a copy.  */
1341         stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs);
1342
1343         gsi = gsi_for_stmt (use_stmt);
1344         gsi_replace (&gsi, stmt, true);
1345         if (gimple_purge_dead_eh_edges (gimple_bb (stmt)))
1346           cfg_changed = true;
1347     }
1348
1349   return cfg_changed;
1350 }
1351
1352 /* To evaluate powi(x,n), the floating point value x raised to the
1353    constant integer exponent n, we use a hybrid algorithm that
1354    combines the "window method" with look-up tables.  For an
1355    introduction to exponentiation algorithms and "addition chains",
1356    see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
1357    "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
1358    3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
1359    Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998.  */
1360
1361 /* Provide a default value for POWI_MAX_MULTS, the maximum number of
1362    multiplications to inline before calling the system library's pow
1363    function.  powi(x,n) requires at worst 2*bits(n)-2 multiplications,
1364    so this default never requires calling pow, powf or powl.  */
1365
1366 #ifndef POWI_MAX_MULTS
1367 #define POWI_MAX_MULTS  (2*HOST_BITS_PER_WIDE_INT-2)
1368 #endif
1369
1370 /* The size of the "optimal power tree" lookup table.  All
1371    exponents less than this value are simply looked up in the
1372    powi_table below.  This threshold is also used to size the
1373    cache of pseudo registers that hold intermediate results.  */
1374 #define POWI_TABLE_SIZE 256
1375
1376 /* The size, in bits of the window, used in the "window method"
1377    exponentiation algorithm.  This is equivalent to a radix of
1378    (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method".  */
1379 #define POWI_WINDOW_SIZE 3
1380
1381 /* The following table is an efficient representation of an
1382    "optimal power tree".  For each value, i, the corresponding
1383    value, j, in the table states than an optimal evaluation
1384    sequence for calculating pow(x,i) can be found by evaluating
1385    pow(x,j)*pow(x,i-j).  An optimal power tree for the first
1386    100 integers is given in Knuth's "Seminumerical algorithms".  */
1387
1388 static const unsigned char powi_table[POWI_TABLE_SIZE] =
1389   {
1390       0,   1,   1,   2,   2,   3,   3,   4,  /*   0 -   7 */
1391       4,   6,   5,   6,   6,  10,   7,   9,  /*   8 -  15 */
1392       8,  16,   9,  16,  10,  12,  11,  13,  /*  16 -  23 */
1393      12,  17,  13,  18,  14,  24,  15,  26,  /*  24 -  31 */
1394      16,  17,  17,  19,  18,  33,  19,  26,  /*  32 -  39 */
1395      20,  25,  21,  40,  22,  27,  23,  44,  /*  40 -  47 */
1396      24,  32,  25,  34,  26,  29,  27,  44,  /*  48 -  55 */
1397      28,  31,  29,  34,  30,  60,  31,  36,  /*  56 -  63 */
1398      32,  64,  33,  34,  34,  46,  35,  37,  /*  64 -  71 */
1399      36,  65,  37,  50,  38,  48,  39,  69,  /*  72 -  79 */
1400      40,  49,  41,  43,  42,  51,  43,  58,  /*  80 -  87 */
1401      44,  64,  45,  47,  46,  59,  47,  76,  /*  88 -  95 */
1402      48,  65,  49,  66,  50,  67,  51,  66,  /*  96 - 103 */
1403      52,  70,  53,  74,  54, 104,  55,  74,  /* 104 - 111 */
1404      56,  64,  57,  69,  58,  78,  59,  68,  /* 112 - 119 */
1405      60,  61,  61,  80,  62,  75,  63,  68,  /* 120 - 127 */
1406      64,  65,  65, 128,  66, 129,  67,  90,  /* 128 - 135 */
1407      68,  73,  69, 131,  70,  94,  71,  88,  /* 136 - 143 */
1408      72, 128,  73,  98,  74, 132,  75, 121,  /* 144 - 151 */
1409      76, 102,  77, 124,  78, 132,  79, 106,  /* 152 - 159 */
1410      80,  97,  81, 160,  82,  99,  83, 134,  /* 160 - 167 */
1411      84,  86,  85,  95,  86, 160,  87, 100,  /* 168 - 175 */
1412      88, 113,  89,  98,  90, 107,  91, 122,  /* 176 - 183 */
1413      92, 111,  93, 102,  94, 126,  95, 150,  /* 184 - 191 */
1414      96, 128,  97, 130,  98, 133,  99, 195,  /* 192 - 199 */
1415     100, 128, 101, 123, 102, 164, 103, 138,  /* 200 - 207 */
1416     104, 145, 105, 146, 106, 109, 107, 149,  /* 208 - 215 */
1417     108, 200, 109, 146, 110, 170, 111, 157,  /* 216 - 223 */
1418     112, 128, 113, 130, 114, 182, 115, 132,  /* 224 - 231 */
1419     116, 200, 117, 132, 118, 158, 119, 206,  /* 232 - 239 */
1420     120, 240, 121, 162, 122, 147, 123, 152,  /* 240 - 247 */
1421     124, 166, 125, 214, 126, 138, 127, 153,  /* 248 - 255 */
1422   };
1423
1424
1425 /* Return the number of multiplications required to calculate
1426    powi(x,n) where n is less than POWI_TABLE_SIZE.  This is a
1427    subroutine of powi_cost.  CACHE is an array indicating
1428    which exponents have already been calculated.  */
1429
1430 static int
1431 powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache)
1432 {
1433   /* If we've already calculated this exponent, then this evaluation
1434      doesn't require any additional multiplications.  */
1435   if (cache[n])
1436     return 0;
1437
1438   cache[n] = true;
1439   return powi_lookup_cost (n - powi_table[n], cache)
1440          + powi_lookup_cost (powi_table[n], cache) + 1;
1441 }
1442
1443 /* Return the number of multiplications required to calculate
1444    powi(x,n) for an arbitrary x, given the exponent N.  This
1445    function needs to be kept in sync with powi_as_mults below.  */
1446
1447 static int
1448 powi_cost (HOST_WIDE_INT n)
1449 {
1450   bool cache[POWI_TABLE_SIZE];
1451   unsigned HOST_WIDE_INT digit;
1452   unsigned HOST_WIDE_INT val;
1453   int result;
1454
1455   if (n == 0)
1456     return 0;
1457
1458   /* Ignore the reciprocal when calculating the cost.  */
1459   val = (n < 0) ? -n : n;
1460
1461   /* Initialize the exponent cache.  */
1462   memset (cache, 0, POWI_TABLE_SIZE * sizeof (bool));
1463   cache[1] = true;
1464
1465   result = 0;
1466
1467   while (val >= POWI_TABLE_SIZE)
1468     {
1469       if (val & 1)
1470         {
1471           digit = val & ((1 << POWI_WINDOW_SIZE) - 1);
1472           result += powi_lookup_cost (digit, cache)
1473                     + POWI_WINDOW_SIZE + 1;
1474           val >>= POWI_WINDOW_SIZE;
1475         }
1476       else
1477         {
1478           val >>= 1;
1479           result++;
1480         }
1481     }
1482
1483   return result + powi_lookup_cost (val, cache);
1484 }
1485
1486 /* Recursive subroutine of powi_as_mults.  This function takes the
1487    array, CACHE, of already calculated exponents and an exponent N and
1488    returns a tree that corresponds to CACHE[1]**N, with type TYPE.  */
1489
1490 static tree
1491 powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type,
1492                  HOST_WIDE_INT n, tree *cache)
1493 {
1494   tree op0, op1, ssa_target;
1495   unsigned HOST_WIDE_INT digit;
1496   gassign *mult_stmt;
1497
1498   if (n < POWI_TABLE_SIZE && cache[n])
1499     return cache[n];
1500
1501   ssa_target = make_temp_ssa_name (type, NULL, "powmult");
1502
1503   if (n < POWI_TABLE_SIZE)
1504     {
1505       cache[n] = ssa_target;
1506       op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache);
1507       op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache);
1508     }
1509   else if (n & 1)
1510     {
1511       digit = n & ((1 << POWI_WINDOW_SIZE) - 1);
1512       op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache);
1513       op1 = powi_as_mults_1 (gsi, loc, type, digit, cache);
1514     }
1515   else
1516     {
1517       op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache);
1518       op1 = op0;
1519     }
1520
1521   mult_stmt = gimple_build_assign (ssa_target, MULT_EXPR, op0, op1);
1522   gimple_set_location (mult_stmt, loc);
1523   gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
1524
1525   return ssa_target;
1526 }
1527
1528 /* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
1529    This function needs to be kept in sync with powi_cost above.  */
1530
1531 tree
1532 powi_as_mults (gimple_stmt_iterator *gsi, location_t loc,
1533                tree arg0, HOST_WIDE_INT n)
1534 {
1535   tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0);
1536   gassign *div_stmt;
1537   tree target;
1538
1539   if (n == 0)
1540     return build_one_cst (type);
1541
1542   memset (cache, 0, sizeof (cache));
1543   cache[1] = arg0;
1544
1545   result = powi_as_mults_1 (gsi, loc, type, (n < 0) ? -n : n, cache);
1546   if (n >= 0)
1547     return result;
1548
1549   /* If the original exponent was negative, reciprocate the result.  */
1550   target = make_temp_ssa_name (type, NULL, "powmult");
1551   div_stmt = gimple_build_assign (target, RDIV_EXPR,
1552                                   build_real (type, dconst1), result);
1553   gimple_set_location (div_stmt, loc);
1554   gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
1555
1556   return target;
1557 }
1558
1559 /* ARG0 and N are the two arguments to a powi builtin in GSI with
1560    location info LOC.  If the arguments are appropriate, create an
1561    equivalent sequence of statements prior to GSI using an optimal
1562    number of multiplications, and return an expession holding the
1563    result.  */
1564
1565 static tree
1566 gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc,
1567                             tree arg0, HOST_WIDE_INT n)
1568 {
1569   /* Avoid largest negative number.  */
1570   if (n != -n
1571       && ((n >= -1 && n <= 2)
1572           || (optimize_function_for_speed_p (cfun)
1573               && powi_cost (n) <= POWI_MAX_MULTS)))
1574     return powi_as_mults (gsi, loc, arg0, n);
1575
1576   return NULL_TREE;
1577 }
1578
1579 /* Build a gimple call statement that calls FN with argument ARG.
1580    Set the lhs of the call statement to a fresh SSA name.  Insert the
1581    statement prior to GSI's current position, and return the fresh
1582    SSA name.  */
1583
1584 static tree
1585 build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc,
1586                        tree fn, tree arg)
1587 {
1588   gcall *call_stmt;
1589   tree ssa_target;
1590
1591   call_stmt = gimple_build_call (fn, 1, arg);
1592   ssa_target = make_temp_ssa_name (TREE_TYPE (arg), NULL, "powroot");
1593   gimple_set_lhs (call_stmt, ssa_target);
1594   gimple_set_location (call_stmt, loc);
1595   gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
1596
1597   return ssa_target;
1598 }
1599
1600 /* Build a gimple binary operation with the given CODE and arguments
1601    ARG0, ARG1, assigning the result to a new SSA name for variable
1602    TARGET.  Insert the statement prior to GSI's current position, and
1603    return the fresh SSA name.*/
1604
1605 static tree
1606 build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc,
1607                         const char *name, enum tree_code code,
1608                         tree arg0, tree arg1)
1609 {
1610   tree result = make_temp_ssa_name (TREE_TYPE (arg0), NULL, name);
1611   gassign *stmt = gimple_build_assign (result, code, arg0, arg1);
1612   gimple_set_location (stmt, loc);
1613   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1614   return result;
1615 }
1616
1617 /* Build a gimple reference operation with the given CODE and argument
1618    ARG, assigning the result to a new SSA name of TYPE with NAME.
1619    Insert the statement prior to GSI's current position, and return
1620    the fresh SSA name.  */
1621
1622 static inline tree
1623 build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type,
1624                       const char *name, enum tree_code code, tree arg0)
1625 {
1626   tree result = make_temp_ssa_name (type, NULL, name);
1627   gimple *stmt = gimple_build_assign (result, build1 (code, type, arg0));
1628   gimple_set_location (stmt, loc);
1629   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1630   return result;
1631 }
1632
1633 /* Build a gimple assignment to cast VAL to TYPE.  Insert the statement
1634    prior to GSI's current position, and return the fresh SSA name.  */
1635
1636 static tree
1637 build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,
1638                        tree type, tree val)
1639 {
1640   tree result = make_ssa_name (type);
1641   gassign *stmt = gimple_build_assign (result, NOP_EXPR, val);
1642   gimple_set_location (stmt, loc);
1643   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1644   return result;
1645 }
1646
1647 struct pow_synth_sqrt_info
1648 {
1649   bool *factors;
1650   unsigned int deepest;
1651   unsigned int num_mults;
1652 };
1653
1654 /* Return true iff the real value C can be represented as a
1655    sum of powers of 0.5 up to N.  That is:
1656    C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1.
1657    Record in INFO the various parameters of the synthesis algorithm such
1658    as the factors a[i], the maximum 0.5 power and the number of
1659    multiplications that will be required.  */
1660
1661 bool
1662 representable_as_half_series_p (REAL_VALUE_TYPE c, unsigned n,
1663                                  struct pow_synth_sqrt_info *info)
1664 {
1665   REAL_VALUE_TYPE factor = dconsthalf;
1666   REAL_VALUE_TYPE remainder = c;
1667
1668   info->deepest = 0;
1669   info->num_mults = 0;
1670   memset (info->factors, 0, n * sizeof (bool));
1671
1672   for (unsigned i = 0; i < n; i++)
1673     {
1674       REAL_VALUE_TYPE res;
1675
1676       /* If something inexact happened bail out now.  */
1677       if (real_arithmetic (&res, MINUS_EXPR, &remainder, &factor))
1678         return false;
1679
1680       /* We have hit zero.  The number is representable as a sum
1681          of powers of 0.5.  */
1682       if (real_equal (&res, &dconst0))
1683         {
1684           info->factors[i] = true;
1685           info->deepest = i + 1;
1686           return true;
1687         }
1688       else if (!REAL_VALUE_NEGATIVE (res))
1689         {
1690           remainder = res;
1691           info->factors[i] = true;
1692           info->num_mults++;
1693         }
1694       else
1695         info->factors[i] = false;
1696
1697       real_arithmetic (&factor, MULT_EXPR, &factor, &dconsthalf);
1698     }
1699   return false;
1700 }
1701
1702 /* Return the tree corresponding to FN being applied
1703    to ARG N times at GSI and LOC.
1704    Look up previous results from CACHE if need be.
1705    cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times.  */
1706
1707 static tree
1708 get_fn_chain (tree arg, unsigned int n, gimple_stmt_iterator *gsi,
1709               tree fn, location_t loc, tree *cache)
1710 {
1711   tree res = cache[n];
1712   if (!res)
1713     {
1714       tree prev = get_fn_chain (arg, n - 1, gsi, fn, loc, cache);
1715       res = build_and_insert_call (gsi, loc, fn, prev);
1716       cache[n] = res;
1717     }
1718
1719   return res;
1720 }
1721
1722 /* Print to STREAM the repeated application of function FNAME to ARG
1723    N times.  So, for FNAME = "foo", ARG = "x", N = 2 it would print:
1724    "foo (foo (x))".  */
1725
1726 static void
1727 print_nested_fn (FILE* stream, const char *fname, const char* arg,
1728                  unsigned int n)
1729 {
1730   if (n == 0)
1731     fprintf (stream, "%s", arg);
1732   else
1733     {
1734       fprintf (stream, "%s (", fname);
1735       print_nested_fn (stream, fname, arg, n - 1);
1736       fprintf (stream, ")");
1737     }
1738 }
1739
1740 /* Print to STREAM the fractional sequence of sqrt chains
1741    applied to ARG, described by INFO.  Used for the dump file.  */
1742
1743 static void
1744 dump_fractional_sqrt_sequence (FILE *stream, const char *arg,
1745                                 struct pow_synth_sqrt_info *info)
1746 {
1747   for (unsigned int i = 0; i < info->deepest; i++)
1748     {
1749       bool is_set = info->factors[i];
1750       if (is_set)
1751         {
1752           print_nested_fn (stream, "sqrt", arg, i + 1);
1753           if (i != info->deepest - 1)
1754             fprintf (stream, " * ");
1755         }
1756     }
1757 }
1758
1759 /* Print to STREAM a representation of raising ARG to an integer
1760    power N.  Used for the dump file.  */
1761
1762 static void
1763 dump_integer_part (FILE *stream, const char* arg, HOST_WIDE_INT n)
1764 {
1765   if (n > 1)
1766     fprintf (stream, "powi (%s, " HOST_WIDE_INT_PRINT_DEC ")", arg, n);
1767   else if (n == 1)
1768     fprintf (stream, "%s", arg);
1769 }
1770
1771 /* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of
1772    square roots.  Place at GSI and LOC.  Limit the maximum depth
1773    of the sqrt chains to MAX_DEPTH.  Return the tree holding the
1774    result of the expanded sequence or NULL_TREE if the expansion failed.
1775
1776    This routine assumes that ARG1 is a real number with a fractional part
1777    (the integer exponent case will have been handled earlier in
1778    gimple_expand_builtin_pow).
1779
1780    For ARG1 > 0.0:
1781    * For ARG1 composed of a whole part WHOLE_PART and a fractional part
1782      FRAC_PART i.e. WHOLE_PART == floor (ARG1) and
1783                     FRAC_PART == ARG1 - WHOLE_PART:
1784      Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where
1785      POW (ARG0, FRAC_PART) is expanded as a product of square root chains
1786      if it can be expressed as such, that is if FRAC_PART satisfies:
1787      FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i))
1788      where integer a[i] is either 0 or 1.
1789
1790      Example:
1791      POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625)
1792        --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x)))
1793
1794    For ARG1 < 0.0 there are two approaches:
1795    * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1)
1796          is calculated as above.
1797
1798      Example:
1799      POW (x, -5.625) == 1.0 / POW (x, 5.625)
1800        -->  1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x))))
1801
1802    * (B) : WHOLE_PART := - ceil (abs (ARG1))
1803            FRAC_PART  := ARG1 - WHOLE_PART
1804      and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART).
1805      Example:
1806      POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6)
1807        --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6))
1808
1809    For ARG1 < 0.0 we choose between (A) and (B) depending on
1810    how many multiplications we'd have to do.
1811    So, for the example in (B): POW (x, -5.875), if we were to
1812    follow algorithm (A) we would produce:
1813    1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X)))
1814    which contains more multiplications than approach (B).
1815
1816    Hopefully, this approach will eliminate potentially expensive POW library
1817    calls when unsafe floating point math is enabled and allow the compiler to
1818    further optimise the multiplies, square roots and divides produced by this
1819    function.  */
1820
1821 static tree
1822 expand_pow_as_sqrts (gimple_stmt_iterator *gsi, location_t loc,
1823                      tree arg0, tree arg1, HOST_WIDE_INT max_depth)
1824 {
1825   tree type = TREE_TYPE (arg0);
1826   machine_mode mode = TYPE_MODE (type);
1827   tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1828   bool one_over = true;
1829
1830   if (!sqrtfn)
1831     return NULL_TREE;
1832
1833   if (TREE_CODE (arg1) != REAL_CST)
1834     return NULL_TREE;
1835
1836   REAL_VALUE_TYPE exp_init = TREE_REAL_CST (arg1);
1837
1838   gcc_assert (max_depth > 0);
1839   tree *cache = XALLOCAVEC (tree, max_depth + 1);
1840
1841   struct pow_synth_sqrt_info synth_info;
1842   synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1843   synth_info.deepest = 0;
1844   synth_info.num_mults = 0;
1845
1846   bool neg_exp = REAL_VALUE_NEGATIVE (exp_init);
1847   REAL_VALUE_TYPE exp = real_value_abs (&exp_init);
1848
1849   /* The whole and fractional parts of exp.  */
1850   REAL_VALUE_TYPE whole_part;
1851   REAL_VALUE_TYPE frac_part;
1852
1853   real_floor (&whole_part, mode, &exp);
1854   real_arithmetic (&frac_part, MINUS_EXPR, &exp, &whole_part);
1855
1856
1857   REAL_VALUE_TYPE ceil_whole = dconst0;
1858   REAL_VALUE_TYPE ceil_fract = dconst0;
1859
1860   if (neg_exp)
1861     {
1862       real_ceil (&ceil_whole, mode, &exp);
1863       real_arithmetic (&ceil_fract, MINUS_EXPR, &ceil_whole, &exp);
1864     }
1865
1866   if (!representable_as_half_series_p (frac_part, max_depth, &synth_info))
1867     return NULL_TREE;
1868
1869   /* Check whether it's more profitable to not use 1.0 / ...  */
1870   if (neg_exp)
1871     {
1872       struct pow_synth_sqrt_info alt_synth_info;
1873       alt_synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1874       alt_synth_info.deepest = 0;
1875       alt_synth_info.num_mults = 0;
1876
1877       if (representable_as_half_series_p (ceil_fract, max_depth,
1878                                            &alt_synth_info)
1879           && alt_synth_info.deepest <= synth_info.deepest
1880           && alt_synth_info.num_mults < synth_info.num_mults)
1881         {
1882           whole_part = ceil_whole;
1883           frac_part = ceil_fract;
1884           synth_info.deepest = alt_synth_info.deepest;
1885           synth_info.num_mults = alt_synth_info.num_mults;
1886           memcpy (synth_info.factors, alt_synth_info.factors,
1887                   (max_depth + 1) * sizeof (bool));
1888           one_over = false;
1889         }
1890     }
1891
1892   HOST_WIDE_INT n = real_to_integer (&whole_part);
1893   REAL_VALUE_TYPE cint;
1894   real_from_integer (&cint, VOIDmode, n, SIGNED);
1895
1896   if (!real_identical (&whole_part, &cint))
1897     return NULL_TREE;
1898
1899   if (powi_cost (n) + synth_info.num_mults > POWI_MAX_MULTS)
1900     return NULL_TREE;
1901
1902   memset (cache, 0, (max_depth + 1) * sizeof (tree));
1903
1904   tree integer_res = n == 0 ? build_real (type, dconst1) : arg0;
1905
1906   /* Calculate the integer part of the exponent.  */
1907   if (n > 1)
1908     {
1909       integer_res = gimple_expand_builtin_powi (gsi, loc, arg0, n);
1910       if (!integer_res)
1911         return NULL_TREE;
1912     }
1913
1914   if (dump_file)
1915     {
1916       char string[64];
1917
1918       real_to_decimal (string, &exp_init, sizeof (string), 0, 1);
1919       fprintf (dump_file, "synthesizing pow (x, %s) as:\n", string);
1920
1921       if (neg_exp)
1922         {
1923           if (one_over)
1924             {
1925               fprintf (dump_file, "1.0 / (");
1926               dump_integer_part (dump_file, "x", n);
1927               if (n > 0)
1928                 fprintf (dump_file, " * ");
1929               dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1930               fprintf (dump_file, ")");
1931             }
1932           else
1933             {
1934               dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1935               fprintf (dump_file, " / (");
1936               dump_integer_part (dump_file, "x", n);
1937               fprintf (dump_file, ")");
1938             }
1939         }
1940       else
1941         {
1942           dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1943           if (n > 0)
1944             fprintf (dump_file, " * ");
1945           dump_integer_part (dump_file, "x", n);
1946         }
1947
1948       fprintf (dump_file, "\ndeepest sqrt chain: %d\n", synth_info.deepest);
1949     }
1950
1951
1952   tree fract_res = NULL_TREE;
1953   cache[0] = arg0;
1954
1955   /* Calculate the fractional part of the exponent.  */
1956   for (unsigned i = 0; i < synth_info.deepest; i++)
1957     {
1958       if (synth_info.factors[i])
1959         {
1960           tree sqrt_chain = get_fn_chain (arg0, i + 1, gsi, sqrtfn, loc, cache);
1961
1962           if (!fract_res)
1963               fract_res = sqrt_chain;
1964
1965           else
1966             fract_res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1967                                            fract_res, sqrt_chain);
1968         }
1969     }
1970
1971   tree res = NULL_TREE;
1972
1973   if (neg_exp)
1974     {
1975       if (one_over)
1976         {
1977           if (n > 0)
1978             res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1979                                            fract_res, integer_res);
1980           else
1981             res = fract_res;
1982
1983           res = build_and_insert_binop (gsi, loc, "powrootrecip", RDIV_EXPR,
1984                                           build_real (type, dconst1), res);
1985         }
1986       else
1987         {
1988           res = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1989                                          fract_res, integer_res);
1990         }
1991     }
1992   else
1993     res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1994                                    fract_res, integer_res);
1995   return res;
1996 }
1997
1998 /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
1999    with location info LOC.  If possible, create an equivalent and
2000    less expensive sequence of statements prior to GSI, and return an
2001    expession holding the result.  */
2002
2003 static tree
2004 gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,
2005                            tree arg0, tree arg1)
2006 {
2007   REAL_VALUE_TYPE c, cint, dconst1_3, dconst1_4, dconst1_6;
2008   REAL_VALUE_TYPE c2, dconst3;
2009   HOST_WIDE_INT n;
2010   tree type, sqrtfn, cbrtfn, sqrt_arg0, result, cbrt_x, powi_cbrt_x;
2011   machine_mode mode;
2012   bool speed_p = optimize_bb_for_speed_p (gsi_bb (*gsi));
2013   bool hw_sqrt_exists, c_is_int, c2_is_int;
2014
2015   dconst1_4 = dconst1;
2016   SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
2017
2018   /* If the exponent isn't a constant, there's nothing of interest
2019      to be done.  */
2020   if (TREE_CODE (arg1) != REAL_CST)
2021     return NULL_TREE;
2022
2023   /* Don't perform the operation if flag_signaling_nans is on
2024      and the operand is a signaling NaN.  */
2025   if (HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg1)))
2026       && ((TREE_CODE (arg0) == REAL_CST
2027            && REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg0)))
2028           || REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg1))))
2029     return NULL_TREE;
2030
2031   /* If the exponent is equivalent to an integer, expand to an optimal
2032      multiplication sequence when profitable.  */
2033   c = TREE_REAL_CST (arg1);
2034   n = real_to_integer (&c);
2035   real_from_integer (&cint, VOIDmode, n, SIGNED);
2036   c_is_int = real_identical (&c, &cint);
2037
2038   if (c_is_int
2039       && ((n >= -1 && n <= 2)
2040           || (flag_unsafe_math_optimizations
2041               && speed_p
2042               && powi_cost (n) <= POWI_MAX_MULTS)))
2043     return gimple_expand_builtin_powi (gsi, loc, arg0, n);
2044
2045   /* Attempt various optimizations using sqrt and cbrt.  */
2046   type = TREE_TYPE (arg0);
2047   mode = TYPE_MODE (type);
2048   sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
2049
2050   /* Optimize pow(x,0.5) = sqrt(x).  This replacement is always safe
2051      unless signed zeros must be maintained.  pow(-0,0.5) = +0, while
2052      sqrt(-0) = -0.  */
2053   if (sqrtfn
2054       && real_equal (&c, &dconsthalf)
2055       && !HONOR_SIGNED_ZEROS (mode))
2056     return build_and_insert_call (gsi, loc, sqrtfn, arg0);
2057
2058   hw_sqrt_exists = optab_handler (sqrt_optab, mode) != CODE_FOR_nothing;
2059
2060   /* Optimize pow(x,1./3.) = cbrt(x).  This requires unsafe math
2061      optimizations since 1./3. is not exactly representable.  If x
2062      is negative and finite, the correct value of pow(x,1./3.) is
2063      a NaN with the "invalid" exception raised, because the value
2064      of 1./3. actually has an even denominator.  The correct value
2065      of cbrt(x) is a negative real value.  */
2066   cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT);
2067   dconst1_3 = real_value_truncate (mode, dconst_third ());
2068
2069   if (flag_unsafe_math_optimizations
2070       && cbrtfn
2071       && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2072       && real_equal (&c, &dconst1_3))
2073     return build_and_insert_call (gsi, loc, cbrtfn, arg0);
2074
2075   /* Optimize pow(x,1./6.) = cbrt(sqrt(x)).  Don't do this optimization
2076      if we don't have a hardware sqrt insn.  */
2077   dconst1_6 = dconst1_3;
2078   SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
2079
2080   if (flag_unsafe_math_optimizations
2081       && sqrtfn
2082       && cbrtfn
2083       && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2084       && speed_p
2085       && hw_sqrt_exists
2086       && real_equal (&c, &dconst1_6))
2087     {
2088       /* sqrt(x)  */
2089       sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
2090
2091       /* cbrt(sqrt(x))  */
2092       return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0);
2093     }
2094
2095
2096   /* Attempt to expand the POW as a product of square root chains.
2097      Expand the 0.25 case even when otpimising for size.  */
2098   if (flag_unsafe_math_optimizations
2099       && sqrtfn
2100       && hw_sqrt_exists
2101       && (speed_p || real_equal (&c, &dconst1_4))
2102       && !HONOR_SIGNED_ZEROS (mode))
2103     {
2104       unsigned int max_depth = speed_p
2105                                 ? param_max_pow_sqrt_depth
2106                                 : 2;
2107
2108       tree expand_with_sqrts
2109         = expand_pow_as_sqrts (gsi, loc, arg0, arg1, max_depth);
2110
2111       if (expand_with_sqrts)
2112         return expand_with_sqrts;
2113     }
2114
2115   real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
2116   n = real_to_integer (&c2);
2117   real_from_integer (&cint, VOIDmode, n, SIGNED);
2118   c2_is_int = real_identical (&c2, &cint);
2119
2120   /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
2121
2122      powi(x, n/3) * powi(cbrt(x), n%3),                    n > 0;
2123      1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)),  n < 0.
2124
2125      Do not calculate the first factor when n/3 = 0.  As cbrt(x) is
2126      different from pow(x, 1./3.) due to rounding and behavior with
2127      negative x, we need to constrain this transformation to unsafe
2128      math and positive x or finite math.  */
2129   real_from_integer (&dconst3, VOIDmode, 3, SIGNED);
2130   real_arithmetic (&c2, MULT_EXPR, &c, &dconst3);
2131   real_round (&c2, mode, &c2);
2132   n = real_to_integer (&c2);
2133   real_from_integer (&cint, VOIDmode, n, SIGNED);
2134   real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3);
2135   real_convert (&c2, mode, &c2);
2136
2137   if (flag_unsafe_math_optimizations
2138       && cbrtfn
2139       && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2140       && real_identical (&c2, &c)
2141       && !c2_is_int
2142       && optimize_function_for_speed_p (cfun)
2143       && powi_cost (n / 3) <= POWI_MAX_MULTS)
2144     {
2145       tree powi_x_ndiv3 = NULL_TREE;
2146
2147       /* Attempt to fold powi(arg0, abs(n/3)) into multiplies.  If not
2148          possible or profitable, give up.  Skip the degenerate case when
2149          abs(n) < 3, where the result is always 1.  */
2150       if (absu_hwi (n) >= 3)
2151         {
2152           powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0,
2153                                                      abs_hwi (n / 3));
2154           if (!powi_x_ndiv3)
2155             return NULL_TREE;
2156         }
2157
2158       /* Calculate powi(cbrt(x), n%3).  Don't use gimple_expand_builtin_powi
2159          as that creates an unnecessary variable.  Instead, just produce
2160          either cbrt(x) or cbrt(x) * cbrt(x).  */
2161       cbrt_x = build_and_insert_call (gsi, loc, cbrtfn, arg0);
2162
2163       if (absu_hwi (n) % 3 == 1)
2164         powi_cbrt_x = cbrt_x;
2165       else
2166         powi_cbrt_x = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
2167                                               cbrt_x, cbrt_x);
2168
2169       /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1.  */
2170       if (absu_hwi (n) < 3)
2171         result = powi_cbrt_x;
2172       else
2173         result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
2174                                          powi_x_ndiv3, powi_cbrt_x);
2175
2176       /* If n is negative, reciprocate the result.  */
2177       if (n < 0)
2178         result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
2179                                          build_real (type, dconst1), result);
2180
2181       return result;
2182     }
2183
2184   /* No optimizations succeeded.  */
2185   return NULL_TREE;
2186 }
2187
2188 /* ARG is the argument to a cabs builtin call in GSI with location info
2189    LOC.  Create a sequence of statements prior to GSI that calculates
2190    sqrt(R*R + I*I), where R and I are the real and imaginary components
2191    of ARG, respectively.  Return an expression holding the result.  */
2192
2193 static tree
2194 gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg)
2195 {
2196   tree real_part, imag_part, addend1, addend2, sum, result;
2197   tree type = TREE_TYPE (TREE_TYPE (arg));
2198   tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
2199   machine_mode mode = TYPE_MODE (type);
2200
2201   if (!flag_unsafe_math_optimizations
2202       || !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi)))
2203       || !sqrtfn
2204       || optab_handler (sqrt_optab, mode) == CODE_FOR_nothing)
2205     return NULL_TREE;
2206
2207   real_part = build_and_insert_ref (gsi, loc, type, "cabs",
2208                                     REALPART_EXPR, arg);
2209   addend1 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
2210                                     real_part, real_part);
2211   imag_part = build_and_insert_ref (gsi, loc, type, "cabs",
2212                                     IMAGPART_EXPR, arg);
2213   addend2 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
2214                                     imag_part, imag_part);
2215   sum = build_and_insert_binop (gsi, loc, "cabs", PLUS_EXPR, addend1, addend2);
2216   result = build_and_insert_call (gsi, loc, sqrtfn, sum);
2217
2218   return result;
2219 }
2220
2221 /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
2222    on the SSA_NAME argument of each of them.  Also expand powi(x,n) into
2223    an optimal number of multiplies, when n is a constant.  */
2224
2225 namespace {
2226
2227 const pass_data pass_data_cse_sincos =
2228 {
2229   GIMPLE_PASS, /* type */
2230   "sincos", /* name */
2231   OPTGROUP_NONE, /* optinfo_flags */
2232   TV_TREE_SINCOS, /* tv_id */
2233   PROP_ssa, /* properties_required */
2234   PROP_gimple_opt_math, /* properties_provided */
2235   0, /* properties_destroyed */
2236   0, /* todo_flags_start */
2237   TODO_update_ssa, /* todo_flags_finish */
2238 };
2239
2240 class pass_cse_sincos : public gimple_opt_pass
2241 {
2242 public:
2243   pass_cse_sincos (gcc::context *ctxt)
2244     : gimple_opt_pass (pass_data_cse_sincos, ctxt)
2245   {}
2246
2247   /* opt_pass methods: */
2248   virtual bool gate (function *)
2249     {
2250       /* We no longer require either sincos or cexp, since powi expansion
2251          piggybacks on this pass.  */
2252       return optimize;
2253     }
2254
2255   virtual unsigned int execute (function *);
2256
2257 }; // class pass_cse_sincos
2258
2259 unsigned int
2260 pass_cse_sincos::execute (function *fun)
2261 {
2262   basic_block bb;
2263   bool cfg_changed = false;
2264
2265   calculate_dominance_info (CDI_DOMINATORS);
2266   memset (&sincos_stats, 0, sizeof (sincos_stats));
2267
2268   FOR_EACH_BB_FN (bb, fun)
2269     {
2270       gimple_stmt_iterator gsi;
2271       bool cleanup_eh = false;
2272
2273       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2274         {
2275           gimple *stmt = gsi_stmt (gsi);
2276
2277           /* Only the last stmt in a bb could throw, no need to call
2278              gimple_purge_dead_eh_edges if we change something in the middle
2279              of a basic block.  */
2280           cleanup_eh = false;
2281
2282           if (is_gimple_call (stmt)
2283               && gimple_call_lhs (stmt))
2284             {
2285               tree arg, arg0, arg1, result;
2286               HOST_WIDE_INT n;
2287               location_t loc;
2288
2289               switch (gimple_call_combined_fn (stmt))
2290                 {
2291                 CASE_CFN_COS:
2292                 CASE_CFN_SIN:
2293                 CASE_CFN_CEXPI:
2294                   arg = gimple_call_arg (stmt, 0);
2295                   /* Make sure we have either sincos or cexp.  */
2296                   if (!targetm.libc_has_function (function_c99_math_complex,
2297                                                   TREE_TYPE (arg))
2298                       && !targetm.libc_has_function (function_sincos,
2299                                                      TREE_TYPE (arg)))
2300                     break;
2301
2302                   if (TREE_CODE (arg) == SSA_NAME)
2303                     cfg_changed |= execute_cse_sincos_1 (arg);
2304                   break;
2305
2306                 CASE_CFN_POW:
2307                   arg0 = gimple_call_arg (stmt, 0);
2308                   arg1 = gimple_call_arg (stmt, 1);
2309
2310                   loc = gimple_location (stmt);
2311                   result = gimple_expand_builtin_pow (&gsi, loc, arg0, arg1);
2312
2313                   if (result)
2314                     {
2315                       tree lhs = gimple_get_lhs (stmt);
2316                       gassign *new_stmt = gimple_build_assign (lhs, result);
2317                       gimple_set_location (new_stmt, loc);
2318                       unlink_stmt_vdef (stmt);
2319                       gsi_replace (&gsi, new_stmt, true);
2320                       cleanup_eh = true;
2321                       if (gimple_vdef (stmt))
2322                         release_ssa_name (gimple_vdef (stmt));
2323                     }
2324                   break;
2325
2326                 CASE_CFN_POWI:
2327                   arg0 = gimple_call_arg (stmt, 0);
2328                   arg1 = gimple_call_arg (stmt, 1);
2329                   loc = gimple_location (stmt);
2330
2331                   if (real_minus_onep (arg0))
2332                     {
2333                       tree t0, t1, cond, one, minus_one;
2334                       gassign *stmt;
2335
2336                       t0 = TREE_TYPE (arg0);
2337                       t1 = TREE_TYPE (arg1);
2338                       one = build_real (t0, dconst1);
2339                       minus_one = build_real (t0, dconstm1);
2340
2341                       cond = make_temp_ssa_name (t1, NULL, "powi_cond");
2342                       stmt = gimple_build_assign (cond, BIT_AND_EXPR,
2343                                                   arg1, build_int_cst (t1, 1));
2344                       gimple_set_location (stmt, loc);
2345                       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2346
2347                       result = make_temp_ssa_name (t0, NULL, "powi");
2348                       stmt = gimple_build_assign (result, COND_EXPR, cond,
2349                                                   minus_one, one);
2350                       gimple_set_location (stmt, loc);
2351                       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2352                     }
2353                   else
2354                     {
2355                       if (!tree_fits_shwi_p (arg1))
2356                         break;
2357
2358                       n = tree_to_shwi (arg1);
2359                       result = gimple_expand_builtin_powi (&gsi, loc, arg0, n);
2360                     }
2361
2362                   if (result)
2363                     {
2364                       tree lhs = gimple_get_lhs (stmt);
2365                       gassign *new_stmt = gimple_build_assign (lhs, result);
2366                       gimple_set_location (new_stmt, loc);
2367                       unlink_stmt_vdef (stmt);
2368                       gsi_replace (&gsi, new_stmt, true);
2369                       cleanup_eh = true;
2370                       if (gimple_vdef (stmt))
2371                         release_ssa_name (gimple_vdef (stmt));
2372                     }
2373                   break;
2374
2375                 CASE_CFN_CABS:
2376                   arg0 = gimple_call_arg (stmt, 0);
2377                   loc = gimple_location (stmt);
2378                   result = gimple_expand_builtin_cabs (&gsi, loc, arg0);
2379
2380                   if (result)
2381                     {
2382                       tree lhs = gimple_get_lhs (stmt);
2383                       gassign *new_stmt = gimple_build_assign (lhs, result);
2384                       gimple_set_location (new_stmt, loc);
2385                       unlink_stmt_vdef (stmt);
2386                       gsi_replace (&gsi, new_stmt, true);
2387                       cleanup_eh = true;
2388                       if (gimple_vdef (stmt))
2389                         release_ssa_name (gimple_vdef (stmt));
2390                     }
2391                   break;
2392
2393                 default:;
2394                 }
2395             }
2396         }
2397       if (cleanup_eh)
2398         cfg_changed |= gimple_purge_dead_eh_edges (bb);
2399     }
2400
2401   statistics_counter_event (fun, "sincos statements inserted",
2402                             sincos_stats.inserted);
2403   statistics_counter_event (fun, "conv statements removed",
2404                             sincos_stats.conv_removed);
2405
2406   return cfg_changed ? TODO_cleanup_cfg : 0;
2407 }
2408
2409 } // anon namespace
2410
2411 gimple_opt_pass *
2412 make_pass_cse_sincos (gcc::context *ctxt)
2413 {
2414   return new pass_cse_sincos (ctxt);
2415 }
2416
2417 /* Return true if stmt is a type conversion operation that can be stripped
2418    when used in a widening multiply operation.  */
2419 static bool
2420 widening_mult_conversion_strippable_p (tree result_type, gimple *stmt)
2421 {
2422   enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
2423
2424   if (TREE_CODE (result_type) == INTEGER_TYPE)
2425     {
2426       tree op_type;
2427       tree inner_op_type;
2428
2429       if (!CONVERT_EXPR_CODE_P (rhs_code))
2430         return false;
2431
2432       op_type = TREE_TYPE (gimple_assign_lhs (stmt));
2433
2434       /* If the type of OP has the same precision as the result, then
2435          we can strip this conversion.  The multiply operation will be
2436          selected to create the correct extension as a by-product.  */
2437       if (TYPE_PRECISION (result_type) == TYPE_PRECISION (op_type))
2438         return true;
2439
2440       /* We can also strip a conversion if it preserves the signed-ness of
2441          the operation and doesn't narrow the range.  */
2442       inner_op_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
2443
2444       /* If the inner-most type is unsigned, then we can strip any
2445          intermediate widening operation.  If it's signed, then the
2446          intermediate widening operation must also be signed.  */
2447       if ((TYPE_UNSIGNED (inner_op_type)
2448            || TYPE_UNSIGNED (op_type) == TYPE_UNSIGNED (inner_op_type))
2449           && TYPE_PRECISION (op_type) > TYPE_PRECISION (inner_op_type))
2450         return true;
2451
2452       return false;
2453     }
2454
2455   return rhs_code == FIXED_CONVERT_EXPR;
2456 }
2457
2458 /* Return true if RHS is a suitable operand for a widening multiplication,
2459    assuming a target type of TYPE.
2460    There are two cases:
2461
2462      - RHS makes some value at least twice as wide.  Store that value
2463        in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
2464
2465      - RHS is an integer constant.  Store that value in *NEW_RHS_OUT if so,
2466        but leave *TYPE_OUT untouched.  */
2467
2468 static bool
2469 is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out,
2470                         tree *new_rhs_out)
2471 {
2472   gimple *stmt;
2473   tree type1, rhs1;
2474
2475   if (TREE_CODE (rhs) == SSA_NAME)
2476     {
2477       stmt = SSA_NAME_DEF_STMT (rhs);
2478       if (is_gimple_assign (stmt))
2479         {
2480           if (! widening_mult_conversion_strippable_p (type, stmt))
2481             rhs1 = rhs;
2482           else
2483             {
2484               rhs1 = gimple_assign_rhs1 (stmt);
2485
2486               if (TREE_CODE (rhs1) == INTEGER_CST)
2487                 {
2488                   *new_rhs_out = rhs1;
2489                   *type_out = NULL;
2490                   return true;
2491                 }
2492             }
2493         }
2494       else
2495         rhs1 = rhs;
2496
2497       type1 = TREE_TYPE (rhs1);
2498
2499       if (TREE_CODE (type1) != TREE_CODE (type)
2500           || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type))
2501         return false;
2502
2503       *new_rhs_out = rhs1;
2504       *type_out = type1;
2505       return true;
2506     }
2507
2508   if (TREE_CODE (rhs) == INTEGER_CST)
2509     {
2510       *new_rhs_out = rhs;
2511       *type_out = NULL;
2512       return true;
2513     }
2514
2515   return false;
2516 }
2517
2518 /* Return true if STMT performs a widening multiplication, assuming the
2519    output type is TYPE.  If so, store the unwidened types of the operands
2520    in *TYPE1_OUT and *TYPE2_OUT respectively.  Also fill *RHS1_OUT and
2521    *RHS2_OUT such that converting those operands to types *TYPE1_OUT
2522    and *TYPE2_OUT would give the operands of the multiplication.  */
2523
2524 static bool
2525 is_widening_mult_p (gimple *stmt,
2526                     tree *type1_out, tree *rhs1_out,
2527                     tree *type2_out, tree *rhs2_out)
2528 {
2529   tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2530
2531   if (TREE_CODE (type) == INTEGER_TYPE)
2532     {
2533       if (TYPE_OVERFLOW_TRAPS (type))
2534         return false;
2535     }
2536   else if (TREE_CODE (type) != FIXED_POINT_TYPE)
2537     return false;
2538
2539   if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out,
2540                                rhs1_out))
2541     return false;
2542
2543   if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out,
2544                                rhs2_out))
2545     return false;
2546
2547   if (*type1_out == NULL)
2548     {
2549       if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out))
2550         return false;
2551       *type1_out = *type2_out;
2552     }
2553
2554   if (*type2_out == NULL)
2555     {
2556       if (!int_fits_type_p (*rhs2_out, *type1_out))
2557         return false;
2558       *type2_out = *type1_out;
2559     }
2560
2561   /* Ensure that the larger of the two operands comes first. */
2562   if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out))
2563     {
2564       std::swap (*type1_out, *type2_out);
2565       std::swap (*rhs1_out, *rhs2_out);
2566     }
2567
2568   return true;
2569 }
2570
2571 /* Check to see if the CALL statement is an invocation of copysign
2572    with 1. being the first argument.  */
2573 static bool
2574 is_copysign_call_with_1 (gimple *call)
2575 {
2576   gcall *c = dyn_cast <gcall *> (call);
2577   if (! c)
2578     return false;
2579
2580   enum combined_fn code = gimple_call_combined_fn (c);
2581
2582   if (code == CFN_LAST)
2583     return false;
2584
2585   if (builtin_fn_p (code))
2586     {
2587       switch (as_builtin_fn (code))
2588         {
2589         CASE_FLT_FN (BUILT_IN_COPYSIGN):
2590         CASE_FLT_FN_FLOATN_NX (BUILT_IN_COPYSIGN):
2591           return real_onep (gimple_call_arg (c, 0));
2592         default:
2593           return false;
2594         }
2595     }
2596
2597   if (internal_fn_p (code))
2598     {
2599       switch (as_internal_fn (code))
2600         {
2601         case IFN_COPYSIGN:
2602           return real_onep (gimple_call_arg (c, 0));
2603         default:
2604           return false;
2605         }
2606     }
2607
2608    return false;
2609 }
2610
2611 /* Try to expand the pattern x * copysign (1, y) into xorsign (x, y).
2612    This only happens when the xorsign optab is defined, if the
2613    pattern is not a xorsign pattern or if expansion fails FALSE is
2614    returned, otherwise TRUE is returned.  */
2615 static bool
2616 convert_expand_mult_copysign (gimple *stmt, gimple_stmt_iterator *gsi)
2617 {
2618   tree treeop0, treeop1, lhs, type;
2619   location_t loc = gimple_location (stmt);
2620   lhs = gimple_assign_lhs (stmt);
2621   treeop0 = gimple_assign_rhs1 (stmt);
2622   treeop1 = gimple_assign_rhs2 (stmt);
2623   type = TREE_TYPE (lhs);
2624   machine_mode mode = TYPE_MODE (type);
2625
2626   if (HONOR_SNANS (type))
2627     return false;
2628
2629   if (TREE_CODE (treeop0) == SSA_NAME && TREE_CODE (treeop1) == SSA_NAME)
2630     {
2631       gimple *call0 = SSA_NAME_DEF_STMT (treeop0);
2632       if (!has_single_use (treeop0) || !is_copysign_call_with_1 (call0))
2633         {
2634           call0 = SSA_NAME_DEF_STMT (treeop1);
2635           if (!has_single_use (treeop1) || !is_copysign_call_with_1 (call0))
2636             return false;
2637
2638           treeop1 = treeop0;
2639         }
2640         if (optab_handler (xorsign_optab, mode) == CODE_FOR_nothing)
2641           return false;
2642
2643         gcall *c = as_a<gcall*> (call0);
2644         treeop0 = gimple_call_arg (c, 1);
2645
2646         gcall *call_stmt
2647           = gimple_build_call_internal (IFN_XORSIGN, 2, treeop1, treeop0);
2648         gimple_set_lhs (call_stmt, lhs);
2649         gimple_set_location (call_stmt, loc);
2650         gsi_replace (gsi, call_stmt, true);
2651         return true;
2652     }
2653
2654   return false;
2655 }
2656
2657 /* Process a single gimple statement STMT, which has a MULT_EXPR as
2658    its rhs, and try to convert it into a WIDEN_MULT_EXPR.  The return
2659    value is true iff we converted the statement.  */
2660
2661 static bool
2662 convert_mult_to_widen (gimple *stmt, gimple_stmt_iterator *gsi)
2663 {
2664   tree lhs, rhs1, rhs2, type, type1, type2;
2665   enum insn_code handler;
2666   scalar_int_mode to_mode, from_mode, actual_mode;
2667   optab op;
2668   int actual_precision;
2669   location_t loc = gimple_location (stmt);
2670   bool from_unsigned1, from_unsigned2;
2671
2672   lhs = gimple_assign_lhs (stmt);
2673   type = TREE_TYPE (lhs);
2674   if (TREE_CODE (type) != INTEGER_TYPE)
2675     return false;
2676
2677   if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2))
2678     return false;
2679
2680   to_mode = SCALAR_INT_TYPE_MODE (type);
2681   from_mode = SCALAR_INT_TYPE_MODE (type1);
2682   if (to_mode == from_mode)
2683     return false;
2684
2685   from_unsigned1 = TYPE_UNSIGNED (type1);
2686   from_unsigned2 = TYPE_UNSIGNED (type2);
2687
2688   if (from_unsigned1 && from_unsigned2)
2689     op = umul_widen_optab;
2690   else if (!from_unsigned1 && !from_unsigned2)
2691     op = smul_widen_optab;
2692   else
2693     op = usmul_widen_optab;
2694
2695   handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode,
2696                                                   &actual_mode);
2697
2698   if (handler == CODE_FOR_nothing)
2699     {
2700       if (op != smul_widen_optab)
2701         {
2702           /* We can use a signed multiply with unsigned types as long as
2703              there is a wider mode to use, or it is the smaller of the two
2704              types that is unsigned.  Note that type1 >= type2, always.  */
2705           if ((TYPE_UNSIGNED (type1)
2706                && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2707               || (TYPE_UNSIGNED (type2)
2708                   && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2709             {
2710               if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
2711                   || GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode))
2712                 return false;
2713             }
2714
2715           op = smul_widen_optab;
2716           handler = find_widening_optab_handler_and_mode (op, to_mode,
2717                                                           from_mode,
2718                                                           &actual_mode);
2719
2720           if (handler == CODE_FOR_nothing)
2721             return false;
2722
2723           from_unsigned1 = from_unsigned2 = false;
2724         }
2725       else
2726         return false;
2727     }
2728
2729   /* Ensure that the inputs to the handler are in the correct precison
2730      for the opcode.  This will be the full mode size.  */
2731   actual_precision = GET_MODE_PRECISION (actual_mode);
2732   if (2 * actual_precision > TYPE_PRECISION (type))
2733     return false;
2734   if (actual_precision != TYPE_PRECISION (type1)
2735       || from_unsigned1 != TYPE_UNSIGNED (type1))
2736     rhs1 = build_and_insert_cast (gsi, loc,
2737                                   build_nonstandard_integer_type
2738                                     (actual_precision, from_unsigned1), rhs1);
2739   if (actual_precision != TYPE_PRECISION (type2)
2740       || from_unsigned2 != TYPE_UNSIGNED (type2))
2741     rhs2 = build_and_insert_cast (gsi, loc,
2742                                   build_nonstandard_integer_type
2743                                     (actual_precision, from_unsigned2), rhs2);
2744
2745   /* Handle constants.  */
2746   if (TREE_CODE (rhs1) == INTEGER_CST)
2747     rhs1 = fold_convert (type1, rhs1);
2748   if (TREE_CODE (rhs2) == INTEGER_CST)
2749     rhs2 = fold_convert (type2, rhs2);
2750
2751   gimple_assign_set_rhs1 (stmt, rhs1);
2752   gimple_assign_set_rhs2 (stmt, rhs2);
2753   gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
2754   update_stmt (stmt);
2755   widen_mul_stats.widen_mults_inserted++;
2756   return true;
2757 }
2758
2759 /* Process a single gimple statement STMT, which is found at the
2760    iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
2761    rhs (given by CODE), and try to convert it into a
2762    WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR.  The return value
2763    is true iff we converted the statement.  */
2764
2765 static bool
2766 convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple *stmt,
2767                             enum tree_code code)
2768 {
2769   gimple *rhs1_stmt = NULL, *rhs2_stmt = NULL;
2770   gimple *conv1_stmt = NULL, *conv2_stmt = NULL, *conv_stmt;
2771   tree type, type1, type2, optype;
2772   tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
2773   enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
2774   optab this_optab;
2775   enum tree_code wmult_code;
2776   enum insn_code handler;
2777   scalar_mode to_mode, from_mode, actual_mode;
2778   location_t loc = gimple_location (stmt);
2779   int actual_precision;
2780   bool from_unsigned1, from_unsigned2;
2781
2782   lhs = gimple_assign_lhs (stmt);
2783   type = TREE_TYPE (lhs);
2784   if (TREE_CODE (type) != INTEGER_TYPE
2785       && TREE_CODE (type) != FIXED_POINT_TYPE)
2786     return false;
2787
2788   if (code == MINUS_EXPR)
2789     wmult_code = WIDEN_MULT_MINUS_EXPR;
2790   else
2791     wmult_code = WIDEN_MULT_PLUS_EXPR;
2792
2793   rhs1 = gimple_assign_rhs1 (stmt);
2794   rhs2 = gimple_assign_rhs2 (stmt);
2795
2796   if (TREE_CODE (rhs1) == SSA_NAME)
2797     {
2798       rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2799       if (is_gimple_assign (rhs1_stmt))
2800         rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2801     }
2802
2803   if (TREE_CODE (rhs2) == SSA_NAME)
2804     {
2805       rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2806       if (is_gimple_assign (rhs2_stmt))
2807         rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2808     }
2809
2810   /* Allow for one conversion statement between the multiply
2811      and addition/subtraction statement.  If there are more than
2812      one conversions then we assume they would invalidate this
2813      transformation.  If that's not the case then they should have
2814      been folded before now.  */
2815   if (CONVERT_EXPR_CODE_P (rhs1_code))
2816     {
2817       conv1_stmt = rhs1_stmt;
2818       rhs1 = gimple_assign_rhs1 (rhs1_stmt);
2819       if (TREE_CODE (rhs1) == SSA_NAME)
2820         {
2821           rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2822           if (is_gimple_assign (rhs1_stmt))
2823             rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2824         }
2825       else
2826         return false;
2827     }
2828   if (CONVERT_EXPR_CODE_P (rhs2_code))
2829     {
2830       conv2_stmt = rhs2_stmt;
2831       rhs2 = gimple_assign_rhs1 (rhs2_stmt);
2832       if (TREE_CODE (rhs2) == SSA_NAME)
2833         {
2834           rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2835           if (is_gimple_assign (rhs2_stmt))
2836             rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2837         }
2838       else
2839         return false;
2840     }
2841
2842   /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
2843      is_widening_mult_p, but we still need the rhs returns.
2844
2845      It might also appear that it would be sufficient to use the existing
2846      operands of the widening multiply, but that would limit the choice of
2847      multiply-and-accumulate instructions.
2848
2849      If the widened-multiplication result has more than one uses, it is
2850      probably wiser not to do the conversion.  Also restrict this operation
2851      to single basic block to avoid moving the multiply to a different block
2852      with a higher execution frequency.  */
2853   if (code == PLUS_EXPR
2854       && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
2855     {
2856       if (!has_single_use (rhs1)
2857           || gimple_bb (rhs1_stmt) != gimple_bb (stmt)
2858           || !is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
2859                                   &type2, &mult_rhs2))
2860         return false;
2861       add_rhs = rhs2;
2862       conv_stmt = conv1_stmt;
2863     }
2864   else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
2865     {
2866       if (!has_single_use (rhs2)
2867           || gimple_bb (rhs2_stmt) != gimple_bb (stmt)
2868           || !is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
2869                                   &type2, &mult_rhs2))
2870         return false;
2871       add_rhs = rhs1;
2872       conv_stmt = conv2_stmt;
2873     }
2874   else
2875     return false;
2876
2877   to_mode = SCALAR_TYPE_MODE (type);
2878   from_mode = SCALAR_TYPE_MODE (type1);
2879   if (to_mode == from_mode)
2880     return false;
2881
2882   from_unsigned1 = TYPE_UNSIGNED (type1);
2883   from_unsigned2 = TYPE_UNSIGNED (type2);
2884   optype = type1;
2885
2886   /* There's no such thing as a mixed sign madd yet, so use a wider mode.  */
2887   if (from_unsigned1 != from_unsigned2)
2888     {
2889       if (!INTEGRAL_TYPE_P (type))
2890         return false;
2891       /* We can use a signed multiply with unsigned types as long as
2892          there is a wider mode to use, or it is the smaller of the two
2893          types that is unsigned.  Note that type1 >= type2, always.  */
2894       if ((from_unsigned1
2895            && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2896           || (from_unsigned2
2897               && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2898         {
2899           if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
2900               || GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode))
2901             return false;
2902         }
2903
2904       from_unsigned1 = from_unsigned2 = false;
2905       optype = build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode),
2906                                                false);
2907     }
2908
2909   /* If there was a conversion between the multiply and addition
2910      then we need to make sure it fits a multiply-and-accumulate.
2911      The should be a single mode change which does not change the
2912      value.  */
2913   if (conv_stmt)
2914     {
2915       /* We use the original, unmodified data types for this.  */
2916       tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt));
2917       tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt));
2918       int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2);
2919       bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2);
2920
2921       if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type))
2922         {
2923           /* Conversion is a truncate.  */
2924           if (TYPE_PRECISION (to_type) < data_size)
2925             return false;
2926         }
2927       else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type))
2928         {
2929           /* Conversion is an extend.  Check it's the right sort.  */
2930           if (TYPE_UNSIGNED (from_type) != is_unsigned
2931               && !(is_unsigned && TYPE_PRECISION (from_type) > data_size))
2932             return false;
2933         }
2934       /* else convert is a no-op for our purposes.  */
2935     }
2936
2937   /* Verify that the machine can perform a widening multiply
2938      accumulate in this mode/signedness combination, otherwise
2939      this transformation is likely to pessimize code.  */
2940   this_optab = optab_for_tree_code (wmult_code, optype, optab_default);
2941   handler = find_widening_optab_handler_and_mode (this_optab, to_mode,
2942                                                   from_mode, &actual_mode);
2943
2944   if (handler == CODE_FOR_nothing)
2945     return false;
2946
2947   /* Ensure that the inputs to the handler are in the correct precison
2948      for the opcode.  This will be the full mode size.  */
2949   actual_precision = GET_MODE_PRECISION (actual_mode);
2950   if (actual_precision != TYPE_PRECISION (type1)
2951       || from_unsigned1 != TYPE_UNSIGNED (type1))
2952     mult_rhs1 = build_and_insert_cast (gsi, loc,
2953                                        build_nonstandard_integer_type
2954                                          (actual_precision, from_unsigned1),
2955                                        mult_rhs1);
2956   if (actual_precision != TYPE_PRECISION (type2)
2957       || from_unsigned2 != TYPE_UNSIGNED (type2))
2958     mult_rhs2 = build_and_insert_cast (gsi, loc,
2959                                        build_nonstandard_integer_type
2960                                          (actual_precision, from_unsigned2),
2961                                        mult_rhs2);
2962
2963   if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs)))
2964     add_rhs = build_and_insert_cast (gsi, loc, type, add_rhs);
2965
2966   /* Handle constants.  */
2967   if (TREE_CODE (mult_rhs1) == INTEGER_CST)
2968     mult_rhs1 = fold_convert (type1, mult_rhs1);
2969   if (TREE_CODE (mult_rhs2) == INTEGER_CST)
2970     mult_rhs2 = fold_convert (type2, mult_rhs2);
2971
2972   gimple_assign_set_rhs_with_ops (gsi, wmult_code, mult_rhs1, mult_rhs2,
2973                                   add_rhs);
2974   update_stmt (gsi_stmt (*gsi));
2975   widen_mul_stats.maccs_inserted++;
2976   return true;
2977 }
2978
2979 /* Given a result MUL_RESULT which is a result of a multiplication of OP1 and
2980    OP2 and which we know is used in statements that can be, together with the
2981    multiplication, converted to FMAs, perform the transformation.  */
2982
2983 static void
2984 convert_mult_to_fma_1 (tree mul_result, tree op1, tree op2)
2985 {
2986   tree type = TREE_TYPE (mul_result);
2987   gimple *use_stmt;
2988   imm_use_iterator imm_iter;
2989   gcall *fma_stmt;
2990
2991   FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
2992     {
2993       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
2994       tree addop, mulop1 = op1, result = mul_result;
2995       bool negate_p = false;
2996       gimple_seq seq = NULL;
2997
2998       if (is_gimple_debug (use_stmt))
2999         continue;
3000
3001       if (is_gimple_assign (use_stmt)
3002           && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
3003         {
3004           result = gimple_assign_lhs (use_stmt);
3005           use_operand_p use_p;
3006           gimple *neguse_stmt;
3007           single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt);
3008           gsi_remove (&gsi, true);
3009           release_defs (use_stmt);
3010
3011           use_stmt = neguse_stmt;
3012           gsi = gsi_for_stmt (use_stmt);
3013           negate_p = true;
3014         }
3015
3016       tree cond, else_value, ops[3];
3017       tree_code code;
3018       if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code,
3019                                               ops, &else_value))
3020         gcc_unreachable ();
3021       addop = ops[0] == result ? ops[1] : ops[0];
3022
3023       if (code == MINUS_EXPR)
3024         {
3025           if (ops[0] == result)
3026             /* a * b - c -> a * b + (-c)  */
3027             addop = gimple_build (&seq, NEGATE_EXPR, type, addop);
3028           else
3029             /* a - b * c -> (-b) * c + a */
3030             negate_p = !negate_p;
3031         }
3032
3033       if (negate_p)
3034         mulop1 = gimple_build (&seq, NEGATE_EXPR, type, mulop1);
3035
3036       if (seq)
3037         gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
3038
3039       if (cond)
3040         fma_stmt = gimple_build_call_internal (IFN_COND_FMA, 5, cond, mulop1,
3041                                                op2, addop, else_value);
3042       else
3043         fma_stmt = gimple_build_call_internal (IFN_FMA, 3, mulop1, op2, addop);
3044       gimple_set_lhs (fma_stmt, gimple_get_lhs (use_stmt));
3045       gimple_call_set_nothrow (fma_stmt, !stmt_can_throw_internal (cfun,
3046                                                                    use_stmt));
3047       gsi_replace (&gsi, fma_stmt, true);
3048       /* Follow all SSA edges so that we generate FMS, FNMA and FNMS
3049          regardless of where the negation occurs.  */
3050       gimple *orig_stmt = gsi_stmt (gsi);
3051       if (fold_stmt (&gsi, follow_all_ssa_edges))
3052         {
3053           if (maybe_clean_or_replace_eh_stmt (orig_stmt, gsi_stmt (gsi)))
3054             gcc_unreachable ();
3055           update_stmt (gsi_stmt (gsi));
3056         }
3057
3058       if (dump_file && (dump_flags & TDF_DETAILS))
3059         {
3060           fprintf (dump_file, "Generated FMA ");
3061           print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_NONE);
3062           fprintf (dump_file, "\n");
3063         }
3064
3065       /* If the FMA result is negated in a single use, fold the negation
3066          too.  */
3067       orig_stmt = gsi_stmt (gsi);
3068       use_operand_p use_p;
3069       gimple *neg_stmt;
3070       if (is_gimple_call (orig_stmt)
3071           && gimple_call_internal_p (orig_stmt)
3072           && gimple_call_lhs (orig_stmt)
3073           && TREE_CODE (gimple_call_lhs (orig_stmt)) == SSA_NAME
3074           && single_imm_use (gimple_call_lhs (orig_stmt), &use_p, &neg_stmt)
3075           && is_gimple_assign (neg_stmt)
3076           && gimple_assign_rhs_code (neg_stmt) == NEGATE_EXPR
3077           && !stmt_could_throw_p (cfun, neg_stmt))
3078         {
3079           gsi = gsi_for_stmt (neg_stmt);
3080           if (fold_stmt (&gsi, follow_all_ssa_edges))
3081             {
3082               if (maybe_clean_or_replace_eh_stmt (neg_stmt, gsi_stmt (gsi)))
3083                 gcc_unreachable ();
3084               update_stmt (gsi_stmt (gsi));
3085               if (dump_file && (dump_flags & TDF_DETAILS))
3086                 {
3087                   fprintf (dump_file, "Folded FMA negation ");
3088                   print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_NONE);
3089                   fprintf (dump_file, "\n");
3090                 }
3091             }
3092         }
3093
3094       widen_mul_stats.fmas_inserted++;
3095     }
3096 }
3097
3098 /* Data necessary to perform the actual transformation from a multiplication
3099    and an addition to an FMA after decision is taken it should be done and to
3100    then delete the multiplication statement from the function IL.  */
3101
3102 struct fma_transformation_info
3103 {
3104   gimple *mul_stmt;
3105   tree mul_result;
3106   tree op1;
3107   tree op2;
3108 };
3109
3110 /* Structure containing the current state of FMA deferring, i.e. whether we are
3111    deferring, whether to continue deferring, and all data necessary to come
3112    back and perform all deferred transformations.  */
3113
3114 class fma_deferring_state
3115 {
3116 public:
3117   /* Class constructor.  Pass true as PERFORM_DEFERRING in order to actually
3118      do any deferring.  */
3119
3120   fma_deferring_state (bool perform_deferring)
3121     : m_candidates (), m_mul_result_set (), m_initial_phi (NULL),
3122       m_last_result (NULL_TREE), m_deferring_p (perform_deferring) {}
3123
3124   /* List of FMA candidates for which we the transformation has been determined
3125      possible but we at this point in BB analysis we do not consider them
3126      beneficial.  */
3127   auto_vec<fma_transformation_info, 8> m_candidates;
3128
3129   /* Set of results of multiplication that are part of an already deferred FMA
3130      candidates.  */
3131   hash_set<tree> m_mul_result_set;
3132
3133   /* The PHI that supposedly feeds back result of a FMA to another over loop
3134      boundary.  */
3135   gphi *m_initial_phi;
3136
3137   /* Result of the last produced FMA candidate or NULL if there has not been
3138      one.  */
3139   tree m_last_result;
3140
3141   /* If true, deferring might still be profitable.  If false, transform all
3142      candidates and no longer defer.  */
3143   bool m_deferring_p;
3144 };
3145
3146 /* Transform all deferred FMA candidates and mark STATE as no longer
3147    deferring.  */
3148
3149 static void
3150 cancel_fma_deferring (fma_deferring_state *state)
3151 {
3152   if (!state->m_deferring_p)
3153     return;
3154
3155   for (unsigned i = 0; i < state->m_candidates.length (); i++)
3156     {
3157       if (dump_file && (dump_flags & TDF_DETAILS))
3158         fprintf (dump_file, "Generating deferred FMA\n");
3159
3160       const fma_transformation_info &fti = state->m_candidates[i];
3161       convert_mult_to_fma_1 (fti.mul_result, fti.op1, fti.op2);
3162
3163       gimple_stmt_iterator gsi = gsi_for_stmt (fti.mul_stmt);
3164       gsi_remove (&gsi, true);
3165       release_defs (fti.mul_stmt);
3166     }
3167   state->m_deferring_p = false;
3168 }
3169
3170 /* If OP is an SSA name defined by a PHI node, return the PHI statement.
3171    Otherwise return NULL.  */
3172
3173 static gphi *
3174 result_of_phi (tree op)
3175 {
3176   if (TREE_CODE (op) != SSA_NAME)
3177     return NULL;
3178
3179   return dyn_cast <gphi *> (SSA_NAME_DEF_STMT (op));
3180 }
3181
3182 /* After processing statements of a BB and recording STATE, return true if the
3183    initial phi is fed by the last FMA candidate result ore one such result from
3184    previously processed BBs marked in LAST_RESULT_SET.  */
3185
3186 static bool
3187 last_fma_candidate_feeds_initial_phi (fma_deferring_state *state,
3188                                       hash_set<tree> *last_result_set)
3189 {
3190   ssa_op_iter iter;
3191   use_operand_p use;
3192   FOR_EACH_PHI_ARG (use, state->m_initial_phi, iter, SSA_OP_USE)
3193     {
3194       tree t = USE_FROM_PTR (use);
3195       if (t == state->m_last_result
3196           || last_result_set->contains (t))
3197         return true;
3198     }
3199
3200   return false;
3201 }
3202
3203 /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
3204    with uses in additions and subtractions to form fused multiply-add
3205    operations.  Returns true if successful and MUL_STMT should be removed.
3206    If MUL_COND is nonnull, the multiplication in MUL_STMT is conditional
3207    on MUL_COND, otherwise it is unconditional.
3208
3209    If STATE indicates that we are deferring FMA transformation, that means
3210    that we do not produce FMAs for basic blocks which look like:
3211
3212     <bb 6>
3213     # accumulator_111 = PHI <0.0(5), accumulator_66(6)>
3214     _65 = _14 * _16;
3215     accumulator_66 = _65 + accumulator_111;
3216
3217   or its unrolled version, i.e. with several FMA candidates that feed result
3218   of one into the addend of another.  Instead, we add them to a list in STATE
3219   and if we later discover an FMA candidate that is not part of such a chain,
3220   we go back and perform all deferred past candidates.  */
3221
3222 static bool
3223 convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2,
3224                      fma_deferring_state *state, tree mul_cond = NULL_TREE)
3225 {
3226   tree mul_result = gimple_get_lhs (mul_stmt);
3227   tree type = TREE_TYPE (mul_result);
3228   gimple *use_stmt, *neguse_stmt;
3229   use_operand_p use_p;
3230   imm_use_iterator imm_iter;
3231
3232   if (FLOAT_TYPE_P (type)
3233       && flag_fp_contract_mode == FP_CONTRACT_OFF)
3234     return false;
3235
3236   /* We don't want to do bitfield reduction ops.  */
3237   if (INTEGRAL_TYPE_P (type)
3238       && (!type_has_mode_precision_p (type) || TYPE_OVERFLOW_TRAPS (type)))
3239     return false;
3240
3241   /* If the target doesn't support it, don't generate it.  We assume that
3242      if fma isn't available then fms, fnma or fnms are not either.  */
3243   optimization_type opt_type = bb_optimization_type (gimple_bb (mul_stmt));
3244   if (!direct_internal_fn_supported_p (IFN_FMA, type, opt_type))
3245     return false;
3246
3247   /* If the multiplication has zero uses, it is kept around probably because
3248      of -fnon-call-exceptions.  Don't optimize it away in that case,
3249      it is DCE job.  */
3250   if (has_zero_uses (mul_result))
3251     return false;
3252
3253   bool check_defer
3254     = (state->m_deferring_p
3255        && maybe_le (tree_to_poly_int64 (TYPE_SIZE (type)),
3256                     param_avoid_fma_max_bits));
3257   bool defer = check_defer;
3258   bool seen_negate_p = false;
3259   /* Make sure that the multiplication statement becomes dead after
3260      the transformation, thus that all uses are transformed to FMAs.
3261      This means we assume that an FMA operation has the same cost
3262      as an addition.  */
3263   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
3264     {
3265       tree result = mul_result;
3266       bool negate_p = false;
3267
3268       use_stmt = USE_STMT (use_p);
3269
3270       if (is_gimple_debug (use_stmt))
3271         continue;
3272
3273       /* For now restrict this operations to single basic blocks.  In theory
3274          we would want to support sinking the multiplication in
3275          m = a*b;
3276          if ()
3277            ma = m + c;
3278          else
3279            d = m;
3280          to form a fma in the then block and sink the multiplication to the
3281          else block.  */
3282       if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3283         return false;
3284
3285       /* A negate on the multiplication leads to FNMA.  */
3286       if (is_gimple_assign (use_stmt)
3287           && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
3288         {
3289           ssa_op_iter iter;
3290           use_operand_p usep;
3291
3292           /* If (due to earlier missed optimizations) we have two
3293              negates of the same value, treat them as equivalent
3294              to a single negate with multiple uses.  */
3295           if (seen_negate_p)
3296             return false;
3297
3298           result = gimple_assign_lhs (use_stmt);
3299
3300           /* Make sure the negate statement becomes dead with this
3301              single transformation.  */
3302           if (!single_imm_use (gimple_assign_lhs (use_stmt),
3303                                &use_p, &neguse_stmt))
3304             return false;
3305
3306           /* Make sure the multiplication isn't also used on that stmt.  */
3307           FOR_EACH_PHI_OR_STMT_USE (usep, neguse_stmt, iter, SSA_OP_USE)
3308             if (USE_FROM_PTR (usep) == mul_result)
3309               return false;
3310
3311           /* Re-validate.  */
3312           use_stmt = neguse_stmt;
3313           if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3314             return false;
3315
3316           negate_p = seen_negate_p = true;
3317         }
3318
3319       tree cond, else_value, ops[3];
3320       tree_code code;
3321       if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code, ops,
3322                                               &else_value))
3323         return false;
3324
3325       switch (code)
3326         {
3327         case MINUS_EXPR:
3328           if (ops[1] == result)
3329             negate_p = !negate_p;
3330           break;
3331         case PLUS_EXPR:
3332           break;
3333         default:
3334           /* FMA can only be formed from PLUS and MINUS.  */
3335           return false;
3336         }
3337
3338       if (mul_cond && cond != mul_cond)
3339         return false;
3340
3341       if (cond)
3342         {
3343           if (cond == result || else_value == result)
3344             return false;
3345           if (!direct_internal_fn_supported_p (IFN_COND_FMA, type, opt_type))
3346             return false;
3347         }
3348
3349       /* If the subtrahend (OPS[1]) is computed by a MULT_EXPR that
3350          we'll visit later, we might be able to get a more profitable
3351          match with fnma.
3352          OTOH, if we don't, a negate / fma pair has likely lower latency
3353          that a mult / subtract pair.  */
3354       if (code == MINUS_EXPR
3355           && !negate_p
3356           && ops[0] == result
3357           && !direct_internal_fn_supported_p (IFN_FMS, type, opt_type)
3358           && direct_internal_fn_supported_p (IFN_FNMA, type, opt_type)
3359           && TREE_CODE (ops[1]) == SSA_NAME
3360           && has_single_use (ops[1]))
3361         {
3362           gimple *stmt2 = SSA_NAME_DEF_STMT (ops[1]);
3363           if (is_gimple_assign (stmt2)
3364               && gimple_assign_rhs_code (stmt2) == MULT_EXPR)
3365             return false;
3366         }
3367
3368       /* We can't handle a * b + a * b.  */
3369       if (ops[0] == ops[1])
3370         return false;
3371       /* If deferring, make sure we are not looking at an instruction that
3372          wouldn't have existed if we were not.  */
3373       if (state->m_deferring_p
3374           && (state->m_mul_result_set.contains (ops[0])
3375               || state->m_mul_result_set.contains (ops[1])))
3376         return false;
3377
3378       if (check_defer)
3379         {
3380           tree use_lhs = gimple_get_lhs (use_stmt);
3381           if (state->m_last_result)
3382             {
3383               if (ops[1] == state->m_last_result
3384                   || ops[0] == state->m_last_result)
3385                 defer = true;
3386               else
3387                 defer = false;
3388             }
3389           else
3390             {
3391               gcc_checking_assert (!state->m_initial_phi);
3392               gphi *phi;
3393               if (ops[0] == result)
3394                 phi = result_of_phi (ops[1]);
3395               else
3396                 {
3397                   gcc_assert (ops[1] == result);
3398                   phi = result_of_phi (ops[0]);
3399                 }
3400
3401               if (phi)
3402                 {
3403                   state->m_initial_phi = phi;
3404                   defer = true;
3405                 }
3406               else
3407                 defer = false;
3408             }
3409
3410           state->m_last_result = use_lhs;
3411           check_defer = false;
3412         }
3413       else
3414         defer = false;
3415
3416       /* While it is possible to validate whether or not the exact form that
3417          we've recognized is available in the backend, the assumption is that
3418          if the deferring logic above did not trigger, the transformation is
3419          never a loss.  For instance, suppose the target only has the plain FMA
3420          pattern available.  Consider a*b-c -> fma(a,b,-c): we've exchanged
3421          MUL+SUB for FMA+NEG, which is still two operations.  Consider
3422          -(a*b)-c -> fma(-a,b,-c): we still have 3 operations, but in the FMA
3423          form the two NEGs are independent and could be run in parallel.  */
3424     }
3425
3426   if (defer)
3427     {
3428       fma_transformation_info fti;
3429       fti.mul_stmt = mul_stmt;
3430       fti.mul_result = mul_result;
3431       fti.op1 = op1;
3432       fti.op2 = op2;
3433       state->m_candidates.safe_push (fti);
3434       state->m_mul_result_set.add (mul_result);
3435
3436       if (dump_file && (dump_flags & TDF_DETAILS))
3437         {
3438           fprintf (dump_file, "Deferred generating FMA for multiplication ");
3439           print_gimple_stmt (dump_file, mul_stmt, 0, TDF_NONE);
3440           fprintf (dump_file, "\n");
3441         }
3442
3443       return false;
3444     }
3445   else
3446     {
3447       if (state->m_deferring_p)
3448         cancel_fma_deferring (state);
3449       convert_mult_to_fma_1 (mul_result, op1, op2);
3450       return true;
3451     }
3452 }
3453
3454
3455 /* Helper function of match_arith_overflow.  For MUL_OVERFLOW, if we have
3456    a check for non-zero like:
3457    _1 = x_4(D) * y_5(D);
3458    *res_7(D) = _1;
3459    if (x_4(D) != 0)
3460      goto <bb 3>; [50.00%]
3461    else
3462      goto <bb 4>; [50.00%]
3463
3464    <bb 3> [local count: 536870913]:
3465    _2 = _1 / x_4(D);
3466    _9 = _2 != y_5(D);
3467    _10 = (int) _9;
3468
3469    <bb 4> [local count: 1073741824]:
3470    # iftmp.0_3 = PHI <_10(3), 0(2)>
3471    then in addition to using .MUL_OVERFLOW (x_4(D), y_5(D)) we can also
3472    optimize the x_4(D) != 0 condition to 1.  */
3473
3474 static void
3475 maybe_optimize_guarding_check (vec<gimple *> &mul_stmts, gimple *cond_stmt,
3476                                gimple *div_stmt, bool *cfg_changed)
3477 {
3478   basic_block bb = gimple_bb (cond_stmt);
3479   if (gimple_bb (div_stmt) != bb || !single_pred_p (bb))
3480     return;
3481   edge pred_edge = single_pred_edge (bb);
3482   basic_block pred_bb = pred_edge->src;
3483   if (EDGE_COUNT (pred_bb->succs) != 2)
3484     return;
3485   edge other_edge = EDGE_SUCC (pred_bb, EDGE_SUCC (pred_bb, 0) == pred_edge);
3486   edge other_succ_edge = NULL;
3487   if (gimple_code (cond_stmt) == GIMPLE_COND)
3488     {
3489       if (EDGE_COUNT (bb->succs) != 2)
3490         return;
3491       other_succ_edge = EDGE_SUCC (bb, 0);
3492       if (gimple_cond_code (cond_stmt) == NE_EXPR)
3493         {
3494           if (other_succ_edge->flags & EDGE_TRUE_VALUE)
3495             other_succ_edge = EDGE_SUCC (bb, 1);
3496         }
3497       else if (other_succ_edge->flags & EDGE_FALSE_VALUE)
3498         other_succ_edge = EDGE_SUCC (bb, 0);
3499       if (other_edge->dest != other_succ_edge->dest)
3500         return;
3501     }
3502   else if (!single_succ_p (bb) || other_edge->dest != single_succ (bb))
3503     return;
3504   gimple *zero_cond = last_stmt (pred_bb);
3505   if (zero_cond == NULL
3506       || gimple_code (zero_cond) != GIMPLE_COND
3507       || (gimple_cond_code (zero_cond)
3508           != ((pred_edge->flags & EDGE_TRUE_VALUE) ? NE_EXPR : EQ_EXPR))
3509       || !integer_zerop (gimple_cond_rhs (zero_cond)))
3510     return;
3511   tree zero_cond_lhs = gimple_cond_lhs (zero_cond);
3512   if (TREE_CODE (zero_cond_lhs) != SSA_NAME)
3513     return;
3514   if (gimple_assign_rhs2 (div_stmt) != zero_cond_lhs)
3515     {
3516       /* Allow the divisor to be result of a same precision cast
3517          from zero_cond_lhs.  */
3518       tree rhs2 = gimple_assign_rhs2 (div_stmt);
3519       if (TREE_CODE (rhs2) != SSA_NAME)
3520         return;
3521       gimple *g = SSA_NAME_DEF_STMT (rhs2);
3522       if (!gimple_assign_cast_p (g)
3523           || gimple_assign_rhs1 (g) != gimple_cond_lhs (zero_cond)
3524           || !INTEGRAL_TYPE_P (TREE_TYPE (zero_cond_lhs))
3525           || (TYPE_PRECISION (TREE_TYPE (zero_cond_lhs))
3526               != TYPE_PRECISION (TREE_TYPE (rhs2))))
3527         return;
3528     }
3529   gimple_stmt_iterator gsi = gsi_after_labels (bb);
3530   mul_stmts.quick_push (div_stmt);
3531   if (is_gimple_debug (gsi_stmt (gsi)))
3532     gsi_next_nondebug (&gsi);
3533   unsigned cast_count = 0;
3534   while (gsi_stmt (gsi) != cond_stmt)
3535     {
3536       /* If original mul_stmt has a single use, allow it in the same bb,
3537          we are looking then just at __builtin_mul_overflow_p.
3538          Though, in that case the original mul_stmt will be replaced
3539          by .MUL_OVERFLOW, REALPART_EXPR and IMAGPART_EXPR stmts.  */
3540       gimple *mul_stmt;
3541       unsigned int i;
3542       bool ok = false;
3543       FOR_EACH_VEC_ELT (mul_stmts, i, mul_stmt)
3544         {
3545           if (gsi_stmt (gsi) == mul_stmt)
3546             {
3547               ok = true;
3548               break;
3549             }
3550         }
3551       if (!ok && gimple_assign_cast_p (gsi_stmt (gsi)) && ++cast_count < 4)
3552         ok = true;
3553       if (!ok)
3554         return;
3555       gsi_next_nondebug (&gsi);
3556     }
3557   if (gimple_code (cond_stmt) == GIMPLE_COND)
3558     {
3559       basic_block succ_bb = other_edge->dest;
3560       for (gphi_iterator gpi = gsi_start_phis (succ_bb); !gsi_end_p (gpi);
3561            gsi_next (&gpi))
3562         {
3563           gphi *phi = gpi.phi ();
3564           tree v1 = gimple_phi_arg_def (phi, other_edge->dest_idx);
3565           tree v2 = gimple_phi_arg_def (phi, other_succ_edge->dest_idx);
3566           if (!operand_equal_p (v1, v2, 0))
3567             return;
3568         }
3569     }
3570   else
3571     {
3572       tree lhs = gimple_assign_lhs (cond_stmt);
3573       if (!lhs || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
3574         return;
3575       gsi_next_nondebug (&gsi);
3576       if (!gsi_end_p (gsi))
3577         {
3578           if (gimple_assign_rhs_code (cond_stmt) == COND_EXPR)
3579             return;
3580           gimple *cast_stmt = gsi_stmt (gsi);
3581           if (!gimple_assign_cast_p (cast_stmt))
3582             return;
3583           tree new_lhs = gimple_assign_lhs (cast_stmt);
3584           gsi_next_nondebug (&gsi);
3585           if (!gsi_end_p (gsi)
3586               || !new_lhs
3587               || !INTEGRAL_TYPE_P (TREE_TYPE (new_lhs))
3588               || TYPE_PRECISION (TREE_TYPE (new_lhs)) <= 1)
3589             return;
3590           lhs = new_lhs;
3591         }
3592       edge succ_edge = single_succ_edge (bb);
3593       basic_block succ_bb = succ_edge->dest;
3594       gsi = gsi_start_phis (succ_bb);
3595       if (gsi_end_p (gsi))
3596         return;
3597       gphi *phi = as_a <gphi *> (gsi_stmt (gsi));
3598       gsi_next (&gsi);
3599       if (!gsi_end_p (gsi))
3600         return;
3601       if (gimple_phi_arg_def (phi, succ_edge->dest_idx) != lhs)
3602         return;
3603       tree other_val = gimple_phi_arg_def (phi, other_edge->dest_idx);
3604       if (gimple_assign_rhs_code (cond_stmt) == COND_EXPR)
3605         {
3606           tree cond = gimple_assign_rhs1 (cond_stmt);
3607           if (TREE_CODE (cond) == NE_EXPR)
3608             {
3609               if (!operand_equal_p (other_val,
3610                                     gimple_assign_rhs3 (cond_stmt), 0))
3611                 return;
3612             }
3613           else if (!operand_equal_p (other_val,
3614                                      gimple_assign_rhs2 (cond_stmt), 0))
3615             return;
3616         }
3617       else if (gimple_assign_rhs_code (cond_stmt) == NE_EXPR)
3618         {
3619           if (!integer_zerop (other_val))
3620             return;
3621         }
3622       else if (!integer_onep (other_val))
3623         return;
3624     }
3625   gcond *zero_gcond = as_a <gcond *> (zero_cond);
3626   if (pred_edge->flags & EDGE_TRUE_VALUE)
3627     gimple_cond_make_true (zero_gcond);
3628   else
3629     gimple_cond_make_false (zero_gcond);
3630   update_stmt (zero_cond);
3631   *cfg_changed = true;
3632 }
3633
3634 /* Helper function for arith_overflow_check_p.  Return true
3635    if VAL1 is equal to VAL2 cast to corresponding integral type
3636    with other signedness or vice versa.  */
3637
3638 static bool
3639 arith_cast_equal_p (tree val1, tree val2)
3640 {
3641   if (TREE_CODE (val1) == INTEGER_CST && TREE_CODE (val2) == INTEGER_CST)
3642     return wi::eq_p (wi::to_wide (val1), wi::to_wide (val2));
3643   else if (TREE_CODE (val1) != SSA_NAME || TREE_CODE (val2) != SSA_NAME)
3644     return false;
3645   if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val1))
3646       && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val1)) == val2)
3647     return true;
3648   if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val2))
3649       && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val2)) == val1)
3650     return true;
3651   return false;
3652 }
3653
3654 /* Helper function of match_arith_overflow.  Return 1
3655    if USE_STMT is unsigned overflow check ovf != 0 for
3656    STMT, -1 if USE_STMT is unsigned overflow check ovf == 0
3657    and 0 otherwise.  */
3658
3659 static int
3660 arith_overflow_check_p (gimple *stmt, gimple *cast_stmt, gimple *&use_stmt,
3661                         tree maxval, tree *other)
3662 {
3663   enum tree_code ccode = ERROR_MARK;
3664   tree crhs1 = NULL_TREE, crhs2 = NULL_TREE;
3665   enum tree_code code = gimple_assign_rhs_code (stmt);
3666   tree lhs = gimple_assign_lhs (cast_stmt ? cast_stmt : stmt);
3667   tree rhs1 = gimple_assign_rhs1 (stmt);
3668   tree rhs2 = gimple_assign_rhs2 (stmt);
3669   tree multop = NULL_TREE, divlhs = NULL_TREE;
3670   gimple *cur_use_stmt = use_stmt;
3671
3672   if (code == MULT_EXPR)
3673     {
3674       if (!is_gimple_assign (use_stmt))
3675         return 0;
3676       if (gimple_assign_rhs_code (use_stmt) != TRUNC_DIV_EXPR)
3677         return 0;
3678       if (gimple_assign_rhs1 (use_stmt) != lhs)
3679         return 0;
3680       if (cast_stmt)
3681         {
3682           if (arith_cast_equal_p (gimple_assign_rhs2 (use_stmt), rhs1))
3683             multop = rhs2;
3684           else if (arith_cast_equal_p (gimple_assign_rhs2 (use_stmt), rhs2))
3685             multop = rhs1;
3686           else
3687             return 0;
3688         }
3689       else if (gimple_assign_rhs2 (use_stmt) == rhs1)
3690         multop = rhs2;
3691       else if (operand_equal_p (gimple_assign_rhs2 (use_stmt), rhs2, 0))
3692         multop = rhs1;
3693       else
3694         return 0;
3695       if (stmt_ends_bb_p (use_stmt))
3696         return 0;
3697       divlhs = gimple_assign_lhs (use_stmt);
3698       if (!divlhs)
3699         return 0;
3700       use_operand_p use;
3701       if (!single_imm_use (divlhs, &use, &cur_use_stmt))
3702         return 0;
3703     }
3704   if (gimple_code (cur_use_stmt) == GIMPLE_COND)
3705     {
3706       ccode = gimple_cond_code (cur_use_stmt);
3707       crhs1 = gimple_cond_lhs (cur_use_stmt);
3708       crhs2 = gimple_cond_rhs (cur_use_stmt);
3709     }
3710   else if (is_gimple_assign (cur_use_stmt))
3711     {
3712       if (gimple_assign_rhs_class (cur_use_stmt) == GIMPLE_BINARY_RHS)
3713         {
3714           ccode = gimple_assign_rhs_code (cur_use_stmt);
3715           crhs1 = gimple_assign_rhs1 (cur_use_stmt);
3716           crhs2 = gimple_assign_rhs2 (cur_use_stmt);
3717         }
3718       else if (gimple_assign_rhs_code (cur_use_stmt) == COND_EXPR)
3719         {
3720           tree cond = gimple_assign_rhs1 (cur_use_stmt);
3721           if (COMPARISON_CLASS_P (cond))
3722             {
3723               ccode = TREE_CODE (cond);
3724               crhs1 = TREE_OPERAND (cond, 0);
3725               crhs2 = TREE_OPERAND (cond, 1);
3726             }
3727           else
3728             return 0;
3729         }
3730       else
3731         return 0;
3732     }
3733   else
3734     return 0;
3735
3736   if (TREE_CODE_CLASS (ccode) != tcc_comparison)
3737     return 0;
3738
3739   switch (ccode)
3740     {
3741     case GT_EXPR:
3742     case LE_EXPR:
3743       if (maxval)
3744         {
3745           /* r = a + b; r > maxval or r <= maxval  */
3746           if (crhs1 == lhs
3747               && TREE_CODE (crhs2) == INTEGER_CST
3748               && tree_int_cst_equal (crhs2, maxval))
3749             return ccode == GT_EXPR ? 1 : -1;
3750           break;
3751         }
3752       /* r = a - b; r > a or r <= a
3753          r = a + b; a > r or a <= r or b > r or b <= r.  */
3754       if ((code == MINUS_EXPR && crhs1 == lhs && crhs2 == rhs1)
3755           || (code == PLUS_EXPR && (crhs1 == rhs1 || crhs1 == rhs2)
3756               && crhs2 == lhs))
3757         return ccode == GT_EXPR ? 1 : -1;
3758       /* r = ~a; b > r or b <= r.  */
3759       if (code == BIT_NOT_EXPR && crhs2 == lhs)
3760         {
3761           if (other)
3762             *other = crhs1;
3763           return ccode == GT_EXPR ? 1 : -1;
3764         }
3765       break;
3766     case LT_EXPR:
3767     case GE_EXPR:
3768       if (maxval)
3769         break;
3770       /* r = a - b; a < r or a >= r
3771          r = a + b; r < a or r >= a or r < b or r >= b.  */
3772       if ((code == MINUS_EXPR && crhs1 == rhs1 && crhs2 == lhs)
3773           || (code == PLUS_EXPR && crhs1 == lhs
3774               && (crhs2 == rhs1 || crhs2 == rhs2)))
3775         return ccode == LT_EXPR ? 1 : -1;
3776       /* r = ~a; r < b or r >= b.  */
3777       if (code == BIT_NOT_EXPR && crhs1 == lhs)
3778         {
3779           if (other)
3780             *other = crhs2;
3781           return ccode == LT_EXPR ? 1 : -1;
3782         }
3783       break;
3784     case EQ_EXPR:
3785     case NE_EXPR:
3786       /* r = a * b; _1 = r / a; _1 == b
3787          r = a * b; _1 = r / b; _1 == a
3788          r = a * b; _1 = r / a; _1 != b
3789          r = a * b; _1 = r / b; _1 != a.  */
3790       if (code == MULT_EXPR)
3791         {
3792           if (cast_stmt)
3793             {
3794               if ((crhs1 == divlhs && arith_cast_equal_p (crhs2, multop))
3795                   || (crhs2 == divlhs && arith_cast_equal_p (crhs1, multop)))
3796                 {
3797                   use_stmt = cur_use_stmt;
3798                   return ccode == NE_EXPR ? 1 : -1;
3799                 }
3800             }
3801           else if ((crhs1 == divlhs && operand_equal_p (crhs2, multop, 0))
3802                    || (crhs2 == divlhs && crhs1 == multop))
3803             {
3804               use_stmt = cur_use_stmt;
3805               return ccode == NE_EXPR ? 1 : -1;
3806             }
3807         }
3808       break;
3809     default:
3810       break;
3811     }
3812   return 0;
3813 }
3814
3815 /* Recognize for unsigned x
3816    x = y - z;
3817    if (x > y)
3818    where there are other uses of x and replace it with
3819    _7 = .SUB_OVERFLOW (y, z);
3820    x = REALPART_EXPR <_7>;
3821    _8 = IMAGPART_EXPR <_7>;
3822    if (_8)
3823    and similarly for addition.
3824
3825    Also recognize:
3826    yc = (type) y;
3827    zc = (type) z;
3828    x = yc + zc;
3829    if (x > max)
3830    where y and z have unsigned types with maximum max
3831    and there are other uses of x and all of those cast x
3832    back to that unsigned type and again replace it with
3833    _7 = .ADD_OVERFLOW (y, z);
3834    _9 = REALPART_EXPR <_7>;
3835    _8 = IMAGPART_EXPR <_7>;
3836    if (_8)
3837    and replace (utype) x with _9.
3838
3839    Also recognize:
3840    x = ~z;
3841    if (y > x)
3842    and replace it with
3843    _7 = .ADD_OVERFLOW (y, z);
3844    _8 = IMAGPART_EXPR <_7>;
3845    if (_8)
3846
3847    And also recognize:
3848    z = x * y;
3849    if (x != 0)
3850      goto <bb 3>; [50.00%]
3851    else
3852      goto <bb 4>; [50.00%]
3853
3854    <bb 3> [local count: 536870913]:
3855    _2 = z / x;
3856    _9 = _2 != y;
3857    _10 = (int) _9;
3858
3859    <bb 4> [local count: 1073741824]:
3860    # iftmp.0_3 = PHI <_10(3), 0(2)>
3861    and replace it with
3862    _7 = .MUL_OVERFLOW (x, y);
3863    z = IMAGPART_EXPR <_7>;
3864    _8 = IMAGPART_EXPR <_7>;
3865    _9 = _8 != 0;
3866    iftmp.0_3 = (int) _9;  */
3867
3868 static bool
3869 match_arith_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
3870                       enum tree_code code, bool *cfg_changed)
3871 {
3872   tree lhs = gimple_assign_lhs (stmt);
3873   tree type = TREE_TYPE (lhs);
3874   use_operand_p use_p;
3875   imm_use_iterator iter;
3876   bool use_seen = false;
3877   bool ovf_use_seen = false;
3878   gimple *use_stmt;
3879   gimple *add_stmt = NULL;
3880   bool add_first = false;
3881   gimple *cond_stmt = NULL;
3882   gimple *cast_stmt = NULL;
3883   tree cast_lhs = NULL_TREE;
3884
3885   gcc_checking_assert (code == PLUS_EXPR
3886                        || code == MINUS_EXPR
3887                        || code == MULT_EXPR
3888                        || code == BIT_NOT_EXPR);
3889   if (!INTEGRAL_TYPE_P (type)
3890       || !TYPE_UNSIGNED (type)
3891       || has_zero_uses (lhs)
3892       || (code != PLUS_EXPR
3893           && code != MULT_EXPR
3894           && optab_handler (code == MINUS_EXPR ? usubv4_optab : uaddv4_optab,
3895                             TYPE_MODE (type)) == CODE_FOR_nothing))
3896     return false;
3897
3898   tree rhs1 = gimple_assign_rhs1 (stmt);
3899   tree rhs2 = gimple_assign_rhs2 (stmt);
3900   FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
3901     {
3902       use_stmt = USE_STMT (use_p);
3903       if (is_gimple_debug (use_stmt))
3904         continue;
3905
3906       tree other = NULL_TREE;
3907       if (arith_overflow_check_p (stmt, NULL, use_stmt, NULL_TREE, &other))
3908         {
3909           if (code == BIT_NOT_EXPR)
3910             {
3911               gcc_assert (other);
3912               if (TREE_CODE (other) != SSA_NAME)
3913                 return false;
3914               if (rhs2 == NULL)
3915                 rhs2 = other;
3916               else
3917                 return false;
3918               cond_stmt = use_stmt;
3919             }
3920           ovf_use_seen = true;
3921         }
3922       else
3923         {
3924           use_seen = true;
3925           if (code == MULT_EXPR
3926               && cast_stmt == NULL
3927               && gimple_assign_cast_p (use_stmt))
3928             {
3929               cast_lhs = gimple_assign_lhs (use_stmt);
3930               if (INTEGRAL_TYPE_P (TREE_TYPE (cast_lhs))
3931                   && !TYPE_UNSIGNED (TREE_TYPE (cast_lhs))
3932                   && (TYPE_PRECISION (TREE_TYPE (cast_lhs))
3933                       == TYPE_PRECISION (TREE_TYPE (lhs))))
3934                 cast_stmt = use_stmt;
3935               else
3936                 cast_lhs = NULL_TREE;
3937             }
3938         }
3939       if (ovf_use_seen && use_seen)
3940         break;
3941     }
3942
3943   if (!ovf_use_seen
3944       && code == MULT_EXPR
3945       && cast_stmt)
3946     {
3947       if (TREE_CODE (rhs1) != SSA_NAME
3948           || (TREE_CODE (rhs2) != SSA_NAME && TREE_CODE (rhs2) != INTEGER_CST))
3949         return false;
3950       FOR_EACH_IMM_USE_FAST (use_p, iter, cast_lhs)
3951         {
3952           use_stmt = USE_STMT (use_p);
3953           if (is_gimple_debug (use_stmt))
3954             continue;
3955
3956           if (arith_overflow_check_p (stmt, cast_stmt, use_stmt,
3957                                       NULL_TREE, NULL))
3958             ovf_use_seen = true;
3959         }
3960     }
3961   else
3962     {
3963       cast_stmt = NULL;
3964       cast_lhs = NULL_TREE;
3965     }
3966
3967   tree maxval = NULL_TREE;
3968   if (!ovf_use_seen
3969       || (code != MULT_EXPR && (code == BIT_NOT_EXPR ? use_seen : !use_seen))
3970       || (code == PLUS_EXPR
3971           && optab_handler (uaddv4_optab,
3972                             TYPE_MODE (type)) == CODE_FOR_nothing)
3973       || (code == MULT_EXPR
3974           && optab_handler (cast_stmt ? mulv4_optab : umulv4_optab,
3975                             TYPE_MODE (type)) == CODE_FOR_nothing))
3976     {
3977       if (code != PLUS_EXPR)
3978         return false;
3979       if (TREE_CODE (rhs1) != SSA_NAME
3980           || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs1)))
3981         return false;
3982       rhs1 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs1));
3983       tree type1 = TREE_TYPE (rhs1);
3984       if (!INTEGRAL_TYPE_P (type1)
3985           || !TYPE_UNSIGNED (type1)
3986           || TYPE_PRECISION (type1) >= TYPE_PRECISION (type)
3987           || (TYPE_PRECISION (type1)
3988               != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type1))))
3989         return false;
3990       if (TREE_CODE (rhs2) == INTEGER_CST)
3991         {
3992           if (wi::ne_p (wi::rshift (wi::to_wide (rhs2),
3993                                     TYPE_PRECISION (type1),
3994                                     UNSIGNED), 0))
3995             return false;
3996           rhs2 = fold_convert (type1, rhs2);
3997         }
3998       else
3999         {
4000           if (TREE_CODE (rhs2) != SSA_NAME
4001               || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs2)))
4002             return false;
4003           rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs2));
4004           tree type2 = TREE_TYPE (rhs2);
4005           if (!INTEGRAL_TYPE_P (type2)
4006               || !TYPE_UNSIGNED (type2)
4007               || TYPE_PRECISION (type2) >= TYPE_PRECISION (type)
4008               || (TYPE_PRECISION (type2)
4009                   != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type2))))
4010             return false;
4011         }
4012       if (TYPE_PRECISION (type1) >= TYPE_PRECISION (TREE_TYPE (rhs2)))
4013         type = type1;
4014       else
4015         type = TREE_TYPE (rhs2);
4016
4017       if (TREE_CODE (type) != INTEGER_TYPE
4018           || optab_handler (uaddv4_optab,
4019                             TYPE_MODE (type)) == CODE_FOR_nothing)
4020         return false;
4021
4022       maxval = wide_int_to_tree (type, wi::max_value (TYPE_PRECISION (type),
4023                                                       UNSIGNED));
4024       ovf_use_seen = false;
4025       use_seen = false;
4026       basic_block use_bb = NULL;
4027       FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
4028         {
4029           use_stmt = USE_STMT (use_p);
4030           if (is_gimple_debug (use_stmt))
4031             continue;
4032
4033           if (arith_overflow_check_p (stmt, NULL, use_stmt, maxval, NULL))
4034             {
4035               ovf_use_seen = true;
4036               use_bb = gimple_bb (use_stmt);
4037             }
4038           else
4039             {
4040               if (!gimple_assign_cast_p (use_stmt)
4041                   || gimple_assign_rhs_code (use_stmt) == VIEW_CONVERT_EXPR)
4042                 return false;
4043               tree use_lhs = gimple_assign_lhs (use_stmt);
4044               if (!INTEGRAL_TYPE_P (TREE_TYPE (use_lhs))
4045                   || (TYPE_PRECISION (TREE_TYPE (use_lhs))
4046                       > TYPE_PRECISION (type)))
4047                 return false;
4048               use_seen = true;
4049             }
4050         }
4051       if (!ovf_use_seen)
4052         return false;
4053       if (!useless_type_conversion_p (type, TREE_TYPE (rhs1)))
4054         {
4055           if (!use_seen)
4056             return false;
4057           tree new_rhs1 = make_ssa_name (type);
4058           gimple *g = gimple_build_assign (new_rhs1, NOP_EXPR, rhs1);
4059           gsi_insert_before (gsi, g, GSI_SAME_STMT);
4060           rhs1 = new_rhs1;
4061         }
4062       else if (!useless_type_conversion_p (type, TREE_TYPE (rhs2)))
4063         {
4064           if (!use_seen)
4065             return false;
4066           tree new_rhs2 = make_ssa_name (type);
4067           gimple *g = gimple_build_assign (new_rhs2, NOP_EXPR, rhs2);
4068           gsi_insert_before (gsi, g, GSI_SAME_STMT);
4069           rhs2 = new_rhs2;
4070         }
4071       else if (!use_seen)
4072         {
4073           /* If there are no uses of the wider addition, check if
4074              forwprop has not created a narrower addition.
4075              Require it to be in the same bb as the overflow check.  */
4076           FOR_EACH_IMM_USE_FAST (use_p, iter, rhs1)
4077             {
4078               use_stmt = USE_STMT (use_p);
4079               if (is_gimple_debug (use_stmt))
4080                 continue;
4081
4082               if (use_stmt == stmt)
4083                 continue;
4084
4085               if (!is_gimple_assign (use_stmt)
4086                   || gimple_bb (use_stmt) != use_bb
4087                   || gimple_assign_rhs_code (use_stmt) != PLUS_EXPR)
4088                 continue;
4089
4090               if (gimple_assign_rhs1 (use_stmt) == rhs1)
4091                 {
4092                   if (!operand_equal_p (gimple_assign_rhs2 (use_stmt),
4093                                         rhs2, 0))
4094                     continue;
4095                 }
4096               else if (gimple_assign_rhs2 (use_stmt) == rhs1)
4097                 {
4098                   if (gimple_assign_rhs1 (use_stmt) != rhs2)
4099                     continue;
4100                 }
4101               else
4102                 continue;
4103
4104               add_stmt = use_stmt;
4105               break;
4106             }
4107           if (add_stmt == NULL)
4108             return false;
4109
4110           /* If stmt and add_stmt are in the same bb, we need to find out
4111              which one is earlier.  If they are in different bbs, we've
4112              checked add_stmt is in the same bb as one of the uses of the
4113              stmt lhs, so stmt needs to dominate add_stmt too.  */
4114           if (gimple_bb (stmt) == gimple_bb (add_stmt))
4115             {
4116               gimple_stmt_iterator gsif = *gsi;
4117               gimple_stmt_iterator gsib = *gsi;
4118               int i;
4119               /* Search both forward and backward from stmt and have a small
4120                  upper bound.  */
4121               for (i = 0; i < 128; i++)
4122                 {
4123                   if (!gsi_end_p (gsib))
4124                     {
4125                       gsi_prev_nondebug (&gsib);
4126                       if (gsi_stmt (gsib) == add_stmt)
4127                         {
4128                           add_first = true;
4129                           break;
4130                         }
4131                     }
4132                   else if (gsi_end_p (gsif))
4133                     break;
4134                   if (!gsi_end_p (gsif))
4135                     {
4136                       gsi_next_nondebug (&gsif);
4137                       if (gsi_stmt (gsif) == add_stmt)
4138                         break;
4139                     }
4140                 }
4141               if (i == 128)
4142                 return false;
4143               if (add_first)
4144                 *gsi = gsi_for_stmt (add_stmt);
4145             }
4146         }
4147     }
4148
4149   if (code == BIT_NOT_EXPR)
4150     *gsi = gsi_for_stmt (cond_stmt);
4151
4152   auto_vec<gimple *, 8> mul_stmts;
4153   if (code == MULT_EXPR && cast_stmt)
4154     {
4155       type = TREE_TYPE (cast_lhs);
4156       gimple *g = SSA_NAME_DEF_STMT (rhs1);
4157       if (gimple_assign_cast_p (g)
4158           && useless_type_conversion_p (type,
4159                                         TREE_TYPE (gimple_assign_rhs1 (g)))
4160           && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
4161         rhs1 = gimple_assign_rhs1 (g);
4162       else
4163         {
4164           g = gimple_build_assign (make_ssa_name (type), NOP_EXPR, rhs1);
4165           gsi_insert_before (gsi, g, GSI_SAME_STMT);
4166           rhs1 = gimple_assign_lhs (g);
4167           mul_stmts.quick_push (g);
4168         }
4169       if (TREE_CODE (rhs2) == INTEGER_CST)
4170         rhs2 = fold_convert (type, rhs2);
4171       else
4172         {
4173           g = SSA_NAME_DEF_STMT (rhs2);
4174           if (gimple_assign_cast_p (g)
4175               && useless_type_conversion_p (type,
4176                                             TREE_TYPE (gimple_assign_rhs1 (g)))
4177               && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
4178             rhs2 = gimple_assign_rhs1 (g);
4179           else
4180             {
4181               g = gimple_build_assign (make_ssa_name (type), NOP_EXPR, rhs2);
4182               gsi_insert_before (gsi, g, GSI_SAME_STMT);
4183               rhs2 = gimple_assign_lhs (g);
4184               mul_stmts.quick_push (g);
4185             }
4186         }
4187     }
4188   tree ctype = build_complex_type (type);
4189   gcall *g = gimple_build_call_internal (code == MULT_EXPR
4190                                          ? IFN_MUL_OVERFLOW
4191                                          : code != MINUS_EXPR
4192                                          ? IFN_ADD_OVERFLOW : IFN_SUB_OVERFLOW,
4193                                          2, rhs1, rhs2);
4194   tree ctmp = make_ssa_name (ctype);
4195   gimple_call_set_lhs (g, ctmp);
4196   gsi_insert_before (gsi, g, GSI_SAME_STMT);
4197   tree new_lhs = (maxval || cast_stmt) ? make_ssa_name (type) : lhs;
4198   gassign *g2;
4199   if (code != BIT_NOT_EXPR)
4200     {
4201       g2 = gimple_build_assign (new_lhs, REALPART_EXPR,
4202                                 build1 (REALPART_EXPR, type, ctmp));
4203       if (maxval || cast_stmt)
4204         {
4205           gsi_insert_before (gsi, g2, GSI_SAME_STMT);
4206           if (add_first)
4207             *gsi = gsi_for_stmt (stmt);
4208         }
4209       else
4210         gsi_replace (gsi, g2, true);
4211       if (code == MULT_EXPR)
4212         {
4213           mul_stmts.quick_push (g);
4214           mul_stmts.quick_push (g2);
4215           if (cast_stmt)
4216             {
4217               g2 = gimple_build_assign (lhs, NOP_EXPR, new_lhs);
4218               gsi_replace (gsi, g2, true);
4219               mul_stmts.quick_push (g2);
4220             }
4221         }
4222     }
4223   tree ovf = make_ssa_name (type);
4224   g2 = gimple_build_assign (ovf, IMAGPART_EXPR,
4225                             build1 (IMAGPART_EXPR, type, ctmp));
4226   if (code != BIT_NOT_EXPR)
4227     gsi_insert_after (gsi, g2, GSI_NEW_STMT);
4228   else
4229     gsi_insert_before (gsi, g2, GSI_SAME_STMT);
4230   if (code == MULT_EXPR)
4231     mul_stmts.quick_push (g2);
4232
4233   FOR_EACH_IMM_USE_STMT (use_stmt, iter, cast_lhs ? cast_lhs : lhs)
4234     {
4235       if (is_gimple_debug (use_stmt))
4236         continue;
4237
4238       gimple *orig_use_stmt = use_stmt;
4239       int ovf_use = arith_overflow_check_p (stmt, cast_stmt, use_stmt,
4240                                             maxval, NULL);
4241       if (ovf_use == 0)
4242         {
4243           gcc_assert (code != BIT_NOT_EXPR);
4244           if (maxval)
4245             {
4246               tree use_lhs = gimple_assign_lhs (use_stmt);
4247               gimple_assign_set_rhs1 (use_stmt, new_lhs);
4248               if (useless_type_conversion_p (TREE_TYPE (use_lhs),
4249                                              TREE_TYPE (new_lhs)))
4250                 gimple_assign_set_rhs_code (use_stmt, SSA_NAME);
4251               update_stmt (use_stmt);
4252             }
4253           continue;
4254         }
4255       if (gimple_code (use_stmt) == GIMPLE_COND)
4256         {
4257           gcond *cond_stmt = as_a <gcond *> (use_stmt);
4258           gimple_cond_set_lhs (cond_stmt, ovf);
4259           gimple_cond_set_rhs (cond_stmt, build_int_cst (type, 0));
4260           gimple_cond_set_code (cond_stmt, ovf_use == 1 ? NE_EXPR : EQ_EXPR);
4261         }
4262       else
4263         {
4264           gcc_checking_assert (is_gimple_assign (use_stmt));
4265           if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
4266             {
4267               gimple_assign_set_rhs1 (use_stmt, ovf);
4268               gimple_assign_set_rhs2 (use_stmt, build_int_cst (type, 0));
4269               gimple_assign_set_rhs_code (use_stmt,
4270                                           ovf_use == 1 ? NE_EXPR : EQ_EXPR);
4271             }
4272           else
4273             {
4274               gcc_checking_assert (gimple_assign_rhs_code (use_stmt)
4275                                    == COND_EXPR);
4276               tree cond = build2 (ovf_use == 1 ? NE_EXPR : EQ_EXPR,
4277                                   boolean_type_node, ovf,
4278                                   build_int_cst (type, 0));
4279               gimple_assign_set_rhs1 (use_stmt, cond);
4280             }
4281         }
4282       update_stmt (use_stmt);
4283       if (code == MULT_EXPR && use_stmt != orig_use_stmt)
4284         {
4285           gimple_stmt_iterator gsi2 = gsi_for_stmt (orig_use_stmt);
4286           maybe_optimize_guarding_check (mul_stmts, use_stmt, orig_use_stmt,
4287                                          cfg_changed);
4288           gsi_remove (&gsi2, true);
4289           release_ssa_name (gimple_assign_lhs (orig_use_stmt));
4290         }
4291     }
4292   if (maxval)
4293     {
4294       gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt);
4295       gsi_remove (&gsi2, true);
4296       if (add_stmt)
4297         {
4298           gimple *g = gimple_build_assign (gimple_assign_lhs (add_stmt),
4299                                            new_lhs);
4300           gsi2 = gsi_for_stmt (add_stmt);
4301           gsi_replace (&gsi2, g, true);
4302         }
4303     }
4304   else if (code == BIT_NOT_EXPR)
4305     {
4306       *gsi = gsi_for_stmt (stmt);
4307       gsi_remove (gsi, true);
4308       release_ssa_name (lhs);
4309       return true;
4310     }
4311   return false;
4312 }
4313
4314 /* Return true if target has support for divmod.  */
4315
4316 static bool
4317 target_supports_divmod_p (optab divmod_optab, optab div_optab, machine_mode mode)
4318 {
4319   /* If target supports hardware divmod insn, use it for divmod.  */
4320   if (optab_handler (divmod_optab, mode) != CODE_FOR_nothing)
4321     return true;
4322
4323   /* Check if libfunc for divmod is available.  */
4324   rtx libfunc = optab_libfunc (divmod_optab, mode);
4325   if (libfunc != NULL_RTX)
4326     {
4327       /* If optab_handler exists for div_optab, perhaps in a wider mode,
4328          we don't want to use the libfunc even if it exists for given mode.  */
4329       machine_mode div_mode;
4330       FOR_EACH_MODE_FROM (div_mode, mode)
4331         if (optab_handler (div_optab, div_mode) != CODE_FOR_nothing)
4332           return false;
4333
4334       return targetm.expand_divmod_libfunc != NULL;
4335     }
4336
4337   return false;
4338 }
4339
4340 /* Check if stmt is candidate for divmod transform.  */
4341
4342 static bool
4343 divmod_candidate_p (gassign *stmt)
4344 {
4345   tree type = TREE_TYPE (gimple_assign_lhs (stmt));
4346   machine_mode mode = TYPE_MODE (type);
4347   optab divmod_optab, div_optab;
4348
4349   if (TYPE_UNSIGNED (type))
4350     {
4351       divmod_optab = udivmod_optab;
4352       div_optab = udiv_optab;
4353     }
4354   else
4355     {
4356       divmod_optab = sdivmod_optab;
4357       div_optab = sdiv_optab;
4358     }
4359
4360   tree op1 = gimple_assign_rhs1 (stmt);
4361   tree op2 = gimple_assign_rhs2 (stmt);
4362
4363   /* Disable the transform if either is a constant, since division-by-constant
4364      may have specialized expansion.  */
4365   if (CONSTANT_CLASS_P (op1))
4366     return false;
4367
4368   if (CONSTANT_CLASS_P (op2))
4369     {
4370       if (integer_pow2p (op2))
4371         return false;
4372
4373       if (TYPE_PRECISION (type) <= HOST_BITS_PER_WIDE_INT
4374           && TYPE_PRECISION (type) <= BITS_PER_WORD)
4375         return false;
4376
4377       /* If the divisor is not power of 2 and the precision wider than
4378          HWI, expand_divmod punts on that, so in that case it is better
4379          to use divmod optab or libfunc.  Similarly if choose_multiplier
4380          might need pre/post shifts of BITS_PER_WORD or more.  */
4381     }
4382
4383   /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should
4384      expand using the [su]divv optabs.  */
4385   if (TYPE_OVERFLOW_TRAPS (type))
4386     return false;
4387
4388   if (!target_supports_divmod_p (divmod_optab, div_optab, mode))
4389     return false;
4390
4391   return true;
4392 }
4393
4394 /* This function looks for:
4395    t1 = a TRUNC_DIV_EXPR b;
4396    t2 = a TRUNC_MOD_EXPR b;
4397    and transforms it to the following sequence:
4398    complex_tmp = DIVMOD (a, b);
4399    t1 = REALPART_EXPR(a);
4400    t2 = IMAGPART_EXPR(b);
4401    For conditions enabling the transform see divmod_candidate_p().
4402
4403    The pass has three parts:
4404    1) Find top_stmt which is trunc_div or trunc_mod stmt and dominates all
4405       other trunc_div_expr and trunc_mod_expr stmts.
4406    2) Add top_stmt and all trunc_div and trunc_mod stmts dominated by top_stmt
4407       to stmts vector.
4408    3) Insert DIVMOD call just before top_stmt and update entries in
4409       stmts vector to use return value of DIMOVD (REALEXPR_PART for div,
4410       IMAGPART_EXPR for mod).  */
4411
4412 static bool
4413 convert_to_divmod (gassign *stmt)
4414 {
4415   if (stmt_can_throw_internal (cfun, stmt)
4416       || !divmod_candidate_p (stmt))
4417     return false;
4418
4419   tree op1 = gimple_assign_rhs1 (stmt);
4420   tree op2 = gimple_assign_rhs2 (stmt);
4421
4422   imm_use_iterator use_iter;
4423   gimple *use_stmt;
4424   auto_vec<gimple *> stmts;
4425
4426   gimple *top_stmt = stmt;
4427   basic_block top_bb = gimple_bb (stmt);
4428
4429   /* Part 1: Try to set top_stmt to "topmost" stmt that dominates
4430      at-least stmt and possibly other trunc_div/trunc_mod stmts
4431      having same operands as stmt.  */
4432
4433   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, op1)
4434     {
4435       if (is_gimple_assign (use_stmt)
4436           && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
4437               || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
4438           && operand_equal_p (op1, gimple_assign_rhs1 (use_stmt), 0)
4439           && operand_equal_p (op2, gimple_assign_rhs2 (use_stmt), 0))
4440         {
4441           if (stmt_can_throw_internal (cfun, use_stmt))
4442             continue;
4443
4444           basic_block bb = gimple_bb (use_stmt);
4445
4446           if (bb == top_bb)
4447             {
4448               if (gimple_uid (use_stmt) < gimple_uid (top_stmt))
4449                 top_stmt = use_stmt;
4450             }
4451           else if (dominated_by_p (CDI_DOMINATORS, top_bb, bb))
4452             {
4453               top_bb = bb;
4454               top_stmt = use_stmt;
4455             }
4456         }
4457     }
4458
4459   tree top_op1 = gimple_assign_rhs1 (top_stmt);
4460   tree top_op2 = gimple_assign_rhs2 (top_stmt);
4461
4462   stmts.safe_push (top_stmt);
4463   bool div_seen = (gimple_assign_rhs_code (top_stmt) == TRUNC_DIV_EXPR);
4464
4465   /* Part 2: Add all trunc_div/trunc_mod statements domianted by top_bb
4466      to stmts vector. The 2nd loop will always add stmt to stmts vector, since
4467      gimple_bb (top_stmt) dominates gimple_bb (stmt), so the
4468      2nd loop ends up adding at-least single trunc_mod_expr stmt.  */
4469
4470   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, top_op1)
4471     {
4472       if (is_gimple_assign (use_stmt)
4473           && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
4474               || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
4475           && operand_equal_p (top_op1, gimple_assign_rhs1 (use_stmt), 0)
4476           && operand_equal_p (top_op2, gimple_assign_rhs2 (use_stmt), 0))
4477         {
4478           if (use_stmt == top_stmt
4479               || stmt_can_throw_internal (cfun, use_stmt)
4480               || !dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt), top_bb))
4481             continue;
4482
4483           stmts.safe_push (use_stmt);
4484           if (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR)
4485             div_seen = true;
4486         }
4487     }
4488
4489   if (!div_seen)
4490     return false;
4491
4492   /* Part 3: Create libcall to internal fn DIVMOD:
4493      divmod_tmp = DIVMOD (op1, op2).  */
4494
4495   gcall *call_stmt = gimple_build_call_internal (IFN_DIVMOD, 2, op1, op2);
4496   tree res = make_temp_ssa_name (build_complex_type (TREE_TYPE (op1)),
4497                                  call_stmt, "divmod_tmp");
4498   gimple_call_set_lhs (call_stmt, res);
4499   /* We rejected throwing statements above.  */
4500   gimple_call_set_nothrow (call_stmt, true);
4501
4502   /* Insert the call before top_stmt.  */
4503   gimple_stmt_iterator top_stmt_gsi = gsi_for_stmt (top_stmt);
4504   gsi_insert_before (&top_stmt_gsi, call_stmt, GSI_SAME_STMT);
4505
4506   widen_mul_stats.divmod_calls_inserted++;
4507
4508   /* Update all statements in stmts vector:
4509      lhs = op1 TRUNC_DIV_EXPR op2 -> lhs = REALPART_EXPR<divmod_tmp>
4510      lhs = op1 TRUNC_MOD_EXPR op2 -> lhs = IMAGPART_EXPR<divmod_tmp>.  */
4511
4512   for (unsigned i = 0; stmts.iterate (i, &use_stmt); ++i)
4513     {
4514       tree new_rhs;
4515
4516       switch (gimple_assign_rhs_code (use_stmt))
4517         {
4518           case TRUNC_DIV_EXPR:
4519             new_rhs = fold_build1 (REALPART_EXPR, TREE_TYPE (op1), res);
4520             break;
4521
4522           case TRUNC_MOD_EXPR:
4523             new_rhs = fold_build1 (IMAGPART_EXPR, TREE_TYPE (op1), res);
4524             break;
4525
4526           default:
4527             gcc_unreachable ();
4528         }
4529
4530       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
4531       gimple_assign_set_rhs_from_tree (&gsi, new_rhs);
4532       update_stmt (use_stmt);
4533     }
4534
4535   return true;
4536 }
4537
4538 /* Find integer multiplications where the operands are extended from
4539    smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
4540    where appropriate.  */
4541
4542 namespace {
4543
4544 const pass_data pass_data_optimize_widening_mul =
4545 {
4546   GIMPLE_PASS, /* type */
4547   "widening_mul", /* name */
4548   OPTGROUP_NONE, /* optinfo_flags */
4549   TV_TREE_WIDEN_MUL, /* tv_id */
4550   PROP_ssa, /* properties_required */
4551   0, /* properties_provided */
4552   0, /* properties_destroyed */
4553   0, /* todo_flags_start */
4554   TODO_update_ssa, /* todo_flags_finish */
4555 };
4556
4557 class pass_optimize_widening_mul : public gimple_opt_pass
4558 {
4559 public:
4560   pass_optimize_widening_mul (gcc::context *ctxt)
4561     : gimple_opt_pass (pass_data_optimize_widening_mul, ctxt)
4562   {}
4563
4564   /* opt_pass methods: */
4565   virtual bool gate (function *)
4566     {
4567       return flag_expensive_optimizations && optimize;
4568     }
4569
4570   virtual unsigned int execute (function *);
4571
4572 }; // class pass_optimize_widening_mul
4573
4574 /* Walker class to perform the transformation in reverse dominance order. */
4575
4576 class math_opts_dom_walker : public dom_walker
4577 {
4578 public:
4579   /* Constructor, CFG_CHANGED is a pointer to a boolean flag that will be set
4580      if walking modidifes the CFG.  */
4581
4582   math_opts_dom_walker (bool *cfg_changed_p)
4583     : dom_walker (CDI_DOMINATORS), m_last_result_set (),
4584       m_cfg_changed_p (cfg_changed_p) {}
4585
4586   /* The actual actions performed in the walk.  */
4587
4588   virtual void after_dom_children (basic_block);
4589
4590   /* Set of results of chains of multiply and add statement combinations that
4591      were not transformed into FMAs because of active deferring.  */
4592   hash_set<tree> m_last_result_set;
4593
4594   /* Pointer to a flag of the user that needs to be set if CFG has been
4595      modified.  */
4596   bool *m_cfg_changed_p;
4597 };
4598
4599 void
4600 math_opts_dom_walker::after_dom_children (basic_block bb)
4601 {
4602   gimple_stmt_iterator gsi;
4603
4604   fma_deferring_state fma_state (param_avoid_fma_max_bits > 0);
4605
4606   for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
4607     {
4608       gimple *stmt = gsi_stmt (gsi);
4609       enum tree_code code;
4610
4611       if (is_gimple_assign (stmt))
4612         {
4613           code = gimple_assign_rhs_code (stmt);
4614           switch (code)
4615             {
4616             case MULT_EXPR:
4617               if (!convert_mult_to_widen (stmt, &gsi)
4618                   && !convert_expand_mult_copysign (stmt, &gsi)
4619                   && convert_mult_to_fma (stmt,
4620                                           gimple_assign_rhs1 (stmt),
4621                                           gimple_assign_rhs2 (stmt),
4622                                           &fma_state))
4623                 {
4624                   gsi_remove (&gsi, true);
4625                   release_defs (stmt);
4626                   continue;
4627                 }
4628               match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
4629               break;
4630
4631             case PLUS_EXPR:
4632             case MINUS_EXPR:
4633               if (!convert_plusminus_to_widen (&gsi, stmt, code))
4634                 match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
4635               break;
4636
4637             case BIT_NOT_EXPR:
4638               if (match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p))
4639                 continue;
4640               break;
4641
4642             case TRUNC_MOD_EXPR:
4643               convert_to_divmod (as_a<gassign *> (stmt));
4644               break;
4645
4646             default:;
4647             }
4648         }
4649       else if (is_gimple_call (stmt))
4650         {
4651           switch (gimple_call_combined_fn (stmt))
4652             {
4653             CASE_CFN_POW:
4654               if (gimple_call_lhs (stmt)
4655                   && TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST
4656                   && real_equal (&TREE_REAL_CST (gimple_call_arg (stmt, 1)),
4657                                  &dconst2)
4658                   && convert_mult_to_fma (stmt,
4659                                           gimple_call_arg (stmt, 0),
4660                                           gimple_call_arg (stmt, 0),
4661                                           &fma_state))
4662                 {
4663                   unlink_stmt_vdef (stmt);
4664                   if (gsi_remove (&gsi, true)
4665                       && gimple_purge_dead_eh_edges (bb))
4666                     *m_cfg_changed_p = true;
4667                   release_defs (stmt);
4668                   continue;
4669                 }
4670               break;
4671
4672             case CFN_COND_MUL:
4673               if (convert_mult_to_fma (stmt,
4674                                        gimple_call_arg (stmt, 1),
4675                                        gimple_call_arg (stmt, 2),
4676                                        &fma_state,
4677                                        gimple_call_arg (stmt, 0)))
4678
4679                 {
4680                   gsi_remove (&gsi, true);
4681                   release_defs (stmt);
4682                   continue;
4683                 }
4684               break;
4685
4686             case CFN_LAST:
4687               cancel_fma_deferring (&fma_state);
4688               break;
4689
4690             default:
4691               break;
4692             }
4693         }
4694       gsi_next (&gsi);
4695     }
4696   if (fma_state.m_deferring_p
4697       && fma_state.m_initial_phi)
4698     {
4699       gcc_checking_assert (fma_state.m_last_result);
4700       if (!last_fma_candidate_feeds_initial_phi (&fma_state,
4701                                                  &m_last_result_set))
4702         cancel_fma_deferring (&fma_state);
4703       else
4704         m_last_result_set.add (fma_state.m_last_result);
4705     }
4706 }
4707
4708
4709 unsigned int
4710 pass_optimize_widening_mul::execute (function *fun)
4711 {
4712   bool cfg_changed = false;
4713
4714   memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
4715   calculate_dominance_info (CDI_DOMINATORS);
4716   renumber_gimple_stmt_uids (cfun);
4717
4718   math_opts_dom_walker (&cfg_changed).walk (ENTRY_BLOCK_PTR_FOR_FN (cfun));
4719
4720   statistics_counter_event (fun, "widening multiplications inserted",
4721                             widen_mul_stats.widen_mults_inserted);
4722   statistics_counter_event (fun, "widening maccs inserted",
4723                             widen_mul_stats.maccs_inserted);
4724   statistics_counter_event (fun, "fused multiply-adds inserted",
4725                             widen_mul_stats.fmas_inserted);
4726   statistics_counter_event (fun, "divmod calls inserted",
4727                             widen_mul_stats.divmod_calls_inserted);
4728
4729   return cfg_changed ? TODO_cleanup_cfg : 0;
4730 }
4731
4732 } // anon namespace
4733
4734 gimple_opt_pass *
4735 make_pass_optimize_widening_mul (gcc::context *ctxt)
4736 {
4737   return new pass_optimize_widening_mul (ctxt);
4738 }