gcc/tree-ssa-math-opts.c

   1 /* Global, SSA-based optimizations using mathematical identities.
   2    Copyright (C) 2005-2018 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* Currently, the only mini-pass in this file tries to CSE reciprocal
  21    operations.  These are common in sequences such as this one:
  22
  23         modulus = sqrt(x*x + y*y + z*z);
  24         x = x / modulus;
  25         y = y / modulus;
  26         z = z / modulus;
  27
  28    that can be optimized to
  29
  30         modulus = sqrt(x*x + y*y + z*z);
  31         rmodulus = 1.0 / modulus;
  32         x = x * rmodulus;
  33         y = y * rmodulus;
  34         z = z * rmodulus;
  35
  36    We do this for loop invariant divisors, and with this pass whenever
  37    we notice that a division has the same divisor multiple times.
  38
  39    Of course, like in PRE, we don't insert a division if a dominator
  40    already has one.  However, this cannot be done as an extension of
  41    PRE for several reasons.
  42
  43    First of all, with some experiments it was found out that the
  44    transformation is not always useful if there are only two divisions
  45    by the same divisor.  This is probably because modern processors
  46    can pipeline the divisions; on older, in-order processors it should
  47    still be effective to optimize two divisions by the same number.
  48    We make this a param, and it shall be called N in the remainder of
  49    this comment.
  50
  51    Second, if trapping math is active, we have less freedom on where
  52    to insert divisions: we can only do so in basic blocks that already
  53    contain one.  (If divisions don't trap, instead, we can insert
  54    divisions elsewhere, which will be in blocks that are common dominators
  55    of those that have the division).
  56
  57    We really don't want to compute the reciprocal unless a division will
  58    be found.  To do this, we won't insert the division in a basic block
  59    that has less than N divisions *post-dominating* it.
  60
  61    The algorithm constructs a subset of the dominator tree, holding the
  62    blocks containing the divisions and the common dominators to them,
  63    and walk it twice.  The first walk is in post-order, and it annotates
  64    each block with the number of divisions that post-dominate it: this
  65    gives information on where divisions can be inserted profitably.
  66    The second walk is in pre-order, and it inserts divisions as explained
  67    above, and replaces divisions by multiplications.
  68
  69    In the best case, the cost of the pass is O(n_statements).  In the
  70    worst-case, the cost is due to creating the dominator tree subset,
  71    with a cost of O(n_basic_blocks ^ 2); however this can only happen
  72    for n_statements / n_basic_blocks statements.  So, the amortized cost
  73    of creating the dominator tree subset is O(n_basic_blocks) and the
  74    worst-case cost of the pass is O(n_statements * n_basic_blocks).
  75
  76    More practically, the cost will be small because there are few
  77    divisions, and they tend to be in the same basic block, so insert_bb
  78    is called very few times.
  79
  80    If we did this using domwalk.c, an efficient implementation would have
  81    to work on all the variables in a single pass, because we could not
  82    work on just a subset of the dominator tree, as we do now, and the
  83    cost would also be something like O(n_statements * n_basic_blocks).
  84    The data structures would be more complex in order to work on all the
  85    variables in a single pass.  */
  86
  87 #include "config.h"
  88 #include "system.h"
  89 #include "coretypes.h"
  90 #include "backend.h"
  91 #include "target.h"
  92 #include "rtl.h"
  93 #include "tree.h"
  94 #include "gimple.h"
  95 #include "predict.h"
  96 #include "alloc-pool.h"
  97 #include "tree-pass.h"
  98 #include "ssa.h"
  99 #include "optabs-tree.h"
 100 #include "gimple-pretty-print.h"
 101 #include "alias.h"
 102 #include "fold-const.h"
 103 #include "gimple-fold.h"
 104 #include "gimple-iterator.h"
 105 #include "gimplify.h"
 106 #include "gimplify-me.h"
 107 #include "stor-layout.h"
 108 #include "tree-cfg.h"
 109 #include "tree-dfa.h"
 110 #include "tree-ssa.h"
 111 #include "builtins.h"
 112 #include "params.h"
 113 #include "internal-fn.h"
 114 #include "case-cfn-macros.h"
 115 #include "optabs-libfuncs.h"
 116 #include "tree-eh.h"
 117 #include "targhooks.h"
 118
 119 /* This structure represents one basic block that either computes a
 120    division, or is a common dominator for basic block that compute a
 121    division.  */
 122 struct occurrence {
 123   /* The basic block represented by this structure.  */
 124   basic_block bb;
 125
 126   /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
 127      inserted in BB.  */
 128   tree recip_def;
 129
 130   /* If non-NULL, the SSA_NAME holding the definition for a squared
 131      reciprocal inserted in BB.  */
 132   tree square_recip_def;
 133
 134   /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
 135      was inserted in BB.  */
 136   gimple *recip_def_stmt;
 137
 138   /* Pointer to a list of "struct occurrence"s for blocks dominated
 139      by BB.  */
 140   struct occurrence *children;
 141
 142   /* Pointer to the next "struct occurrence"s in the list of blocks
 143      sharing a common dominator.  */
 144   struct occurrence *next;
 145
 146   /* The number of divisions that are in BB before compute_merit.  The
 147      number of divisions that are in BB or post-dominate it after
 148      compute_merit.  */
 149   int num_divisions;
 150
 151   /* True if the basic block has a division, false if it is a common
 152      dominator for basic blocks that do.  If it is false and trapping
 153      math is active, BB is not a candidate for inserting a reciprocal.  */
 154   bool bb_has_division;
 155 };
 156
 157 static struct
 158 {
 159   /* Number of 1.0/X ops inserted.  */
 160   int rdivs_inserted;
 161
 162   /* Number of 1.0/FUNC ops inserted.  */
 163   int rfuncs_inserted;
 164 } reciprocal_stats;
 165
 166 static struct
 167 {
 168   /* Number of cexpi calls inserted.  */
 169   int inserted;
 170 } sincos_stats;
 171
 172 static struct
 173 {
 174   /* Number of widening multiplication ops inserted.  */
 175   int widen_mults_inserted;
 176
 177   /* Number of integer multiply-and-accumulate ops inserted.  */
 178   int maccs_inserted;
 179
 180   /* Number of fp fused multiply-add ops inserted.  */
 181   int fmas_inserted;
 182
 183   /* Number of divmod calls inserted.  */
 184   int divmod_calls_inserted;
 185 } widen_mul_stats;
 186
 187 /* The instance of "struct occurrence" representing the highest
 188    interesting block in the dominator tree.  */
 189 static struct occurrence *occ_head;
 190
 191 /* Allocation pool for getting instances of "struct occurrence".  */
 192 static object_allocator<occurrence> *occ_pool;
 193
 194
 195
 196 /* Allocate and return a new struct occurrence for basic block BB, and
 197    whose children list is headed by CHILDREN.  */
 198 static struct occurrence *
 199 occ_new (basic_block bb, struct occurrence *children)
 200 {
 201   struct occurrence *occ;
 202
 203   bb->aux = occ = occ_pool->allocate ();
 204   memset (occ, 0, sizeof (struct occurrence));
 205
 206   occ->bb = bb;
 207   occ->children = children;
 208   return occ;
 209 }
 210
 211
 212 /* Insert NEW_OCC into our subset of the dominator tree.  P_HEAD points to a
 213    list of "struct occurrence"s, one per basic block, having IDOM as
 214    their common dominator.
 215
 216    We try to insert NEW_OCC as deep as possible in the tree, and we also
 217    insert any other block that is a common dominator for BB and one
 218    block already in the tree.  */
 219
 220 static void
 221 insert_bb (struct occurrence *new_occ, basic_block idom,
 222            struct occurrence **p_head)
 223 {
 224   struct occurrence *occ, **p_occ;
 225
 226   for (p_occ = p_head; (occ = *p_occ) != NULL; )
 227     {
 228       basic_block bb = new_occ->bb, occ_bb = occ->bb;
 229       basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb);
 230       if (dom == bb)
 231         {
 232           /* BB dominates OCC_BB.  OCC becomes NEW_OCC's child: remove OCC
 233              from its list.  */
 234           *p_occ = occ->next;
 235           occ->next = new_occ->children;
 236           new_occ->children = occ;
 237
 238           /* Try the next block (it may as well be dominated by BB).  */
 239         }
 240
 241       else if (dom == occ_bb)
 242         {
 243           /* OCC_BB dominates BB.  Tail recurse to look deeper.  */
 244           insert_bb (new_occ, dom, &occ->children);
 245           return;
 246         }
 247
 248       else if (dom != idom)
 249         {
 250           gcc_assert (!dom->aux);
 251
 252           /* There is a dominator between IDOM and BB, add it and make
 253              two children out of NEW_OCC and OCC.  First, remove OCC from
 254              its list.  */
 255           *p_occ = occ->next;
 256           new_occ->next = occ;
 257           occ->next = NULL;
 258
 259           /* None of the previous blocks has DOM as a dominator: if we tail
 260              recursed, we would reexamine them uselessly. Just switch BB with
 261              DOM, and go on looking for blocks dominated by DOM.  */
 262           new_occ = occ_new (dom, new_occ);
 263         }
 264
 265       else
 266         {
 267           /* Nothing special, go on with the next element.  */
 268           p_occ = &occ->next;
 269         }
 270     }
 271
 272   /* No place was found as a child of IDOM.  Make BB a sibling of IDOM.  */
 273   new_occ->next = *p_head;
 274   *p_head = new_occ;
 275 }
 276
 277 /* Register that we found a division in BB.
 278    IMPORTANCE is a measure of how much weighting to give
 279    that division.  Use IMPORTANCE = 2 to register a single
 280    division.  If the division is going to be found multiple
 281    times use 1 (as it is with squares).  */
 282
 283 static inline void
 284 register_division_in (basic_block bb, int importance)
 285 {
 286   struct occurrence *occ;
 287
 288   occ = (struct occurrence *) bb->aux;
 289   if (!occ)
 290     {
 291       occ = occ_new (bb, NULL);
 292       insert_bb (occ, ENTRY_BLOCK_PTR_FOR_FN (cfun), &occ_head);
 293     }
 294
 295   occ->bb_has_division = true;
 296   occ->num_divisions += importance;
 297 }
 298
 299
 300 /* Compute the number of divisions that postdominate each block in OCC and
 301    its children.  */
 302
 303 static void
 304 compute_merit (struct occurrence *occ)
 305 {
 306   struct occurrence *occ_child;
 307   basic_block dom = occ->bb;
 308
 309   for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
 310     {
 311       basic_block bb;
 312       if (occ_child->children)
 313         compute_merit (occ_child);
 314
 315       if (flag_exceptions)
 316         bb = single_noncomplex_succ (dom);
 317       else
 318         bb = dom;
 319
 320       if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
 321         occ->num_divisions += occ_child->num_divisions;
 322     }
 323 }
 324
 325
 326 /* Return whether USE_STMT is a floating-point division by DEF.  */
 327 static inline bool
 328 is_division_by (gimple *use_stmt, tree def)
 329 {
 330   return is_gimple_assign (use_stmt)
 331          && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
 332          && gimple_assign_rhs2 (use_stmt) == def
 333          /* Do not recognize x / x as valid division, as we are getting
 334             confused later by replacing all immediate uses x in such
 335             a stmt.  */
 336          && gimple_assign_rhs1 (use_stmt) != def;
 337 }
 338
 339 /* Return whether USE_STMT is DEF * DEF.  */
 340 static inline bool
 341 is_square_of (gimple *use_stmt, tree def)
 342 {
 343   if (gimple_code (use_stmt) == GIMPLE_ASSIGN
 344       && gimple_assign_rhs_code (use_stmt) == MULT_EXPR)
 345     {
 346       tree op0 = gimple_assign_rhs1 (use_stmt);
 347       tree op1 = gimple_assign_rhs2 (use_stmt);
 348
 349       return op0 == op1 && op0 == def;
 350     }
 351   return 0;
 352 }
 353
 354 /* Return whether USE_STMT is a floating-point division by
 355    DEF * DEF.  */
 356 static inline bool
 357 is_division_by_square (gimple *use_stmt, tree def)
 358 {
 359   if (gimple_code (use_stmt) == GIMPLE_ASSIGN
 360       && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
 361       && gimple_assign_rhs1 (use_stmt) != gimple_assign_rhs2 (use_stmt))
 362     {
 363       tree denominator = gimple_assign_rhs2 (use_stmt);
 364       if (TREE_CODE (denominator) == SSA_NAME)
 365         {
 366           return is_square_of (SSA_NAME_DEF_STMT (denominator), def);
 367         }
 368     }
 369   return 0;
 370 }
 371
 372 /* Walk the subset of the dominator tree rooted at OCC, setting the
 373    RECIP_DEF field to a definition of 1.0 / DEF that can be used in
 374    the given basic block.  The field may be left NULL, of course,
 375    if it is not possible or profitable to do the optimization.
 376
 377    DEF_BSI is an iterator pointing at the statement defining DEF.
 378    If RECIP_DEF is set, a dominator already has a computation that can
 379    be used.
 380
 381    If should_insert_square_recip is set, then this also inserts
 382    the square of the reciprocal immediately after the definition
 383    of the reciprocal.  */
 384
 385 static void
 386 insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
 387                     tree def, tree recip_def, tree square_recip_def,
 388                     int should_insert_square_recip, int threshold)
 389 {
 390   tree type;
 391   gassign *new_stmt, *new_square_stmt;
 392   gimple_stmt_iterator gsi;
 393   struct occurrence *occ_child;
 394
 395   if (!recip_def
 396       && (occ->bb_has_division || !flag_trapping_math)
 397       /* Divide by two as all divisions are counted twice in
 398          the costing loop.  */
 399       && occ->num_divisions / 2 >= threshold)
 400     {
 401       /* Make a variable with the replacement and substitute it.  */
 402       type = TREE_TYPE (def);
 403       recip_def = create_tmp_reg (type, "reciptmp");
 404       new_stmt = gimple_build_assign (recip_def, RDIV_EXPR,
 405                                       build_one_cst (type), def);
 406
 407       if (should_insert_square_recip)
 408         {
 409           square_recip_def = create_tmp_reg (type, "powmult_reciptmp");
 410           new_square_stmt = gimple_build_assign (square_recip_def, MULT_EXPR,
 411                                                  recip_def, recip_def);
 412         }
 413
 414       if (occ->bb_has_division)
 415         {
 416           /* Case 1: insert before an existing division.  */
 417           gsi = gsi_after_labels (occ->bb);
 418           while (!gsi_end_p (gsi)
 419                  && (!is_division_by (gsi_stmt (gsi), def))
 420                  && (!is_division_by_square (gsi_stmt (gsi), def)))
 421             gsi_next (&gsi);
 422
 423           gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
 424         }
 425       else if (def_gsi && occ->bb == def_gsi->bb)
 426         {
 427           /* Case 2: insert right after the definition.  Note that this will
 428              never happen if the definition statement can throw, because in
 429              that case the sole successor of the statement's basic block will
 430              dominate all the uses as well.  */
 431           gsi = *def_gsi;
 432           gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
 433         }
 434       else
 435         {
 436           /* Case 3: insert in a basic block not containing defs/uses.  */
 437           gsi = gsi_after_labels (occ->bb);
 438           gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
 439         }
 440
 441       /* Regardless of which case the reciprocal as inserted in,
 442          we insert the square immediately after the reciprocal.  */
 443       if (should_insert_square_recip)
 444         gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
 445
 446       reciprocal_stats.rdivs_inserted++;
 447
 448       occ->recip_def_stmt = new_stmt;
 449     }
 450
 451   occ->recip_def = recip_def;
 452   occ->square_recip_def = square_recip_def;
 453   for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
 454     insert_reciprocals (def_gsi, occ_child, def, recip_def,
 455                         square_recip_def, should_insert_square_recip,
 456                         threshold);
 457 }
 458
 459 /* Replace occurrences of expr / (x * x) with expr * ((1 / x) * (1 / x)).
 460    Take as argument the use for (x * x).  */
 461 static inline void
 462 replace_reciprocal_squares (use_operand_p use_p)
 463 {
 464   gimple *use_stmt = USE_STMT (use_p);
 465   basic_block bb = gimple_bb (use_stmt);
 466   struct occurrence *occ = (struct occurrence *) bb->aux;
 467
 468   if (optimize_bb_for_speed_p (bb) && occ->square_recip_def
 469       && occ->recip_def)
 470     {
 471       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
 472       gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
 473       gimple_assign_set_rhs2 (use_stmt, occ->square_recip_def);
 474       SET_USE (use_p, occ->square_recip_def);
 475       fold_stmt_inplace (&gsi);
 476       update_stmt (use_stmt);
 477     }
 478 }
 479
 480
 481 /* Replace the division at USE_P with a multiplication by the reciprocal, if
 482    possible.  */
 483
 484 static inline void
 485 replace_reciprocal (use_operand_p use_p)
 486 {
 487   gimple *use_stmt = USE_STMT (use_p);
 488   basic_block bb = gimple_bb (use_stmt);
 489   struct occurrence *occ = (struct occurrence *) bb->aux;
 490
 491   if (optimize_bb_for_speed_p (bb)
 492       && occ->recip_def && use_stmt != occ->recip_def_stmt)
 493     {
 494       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
 495       gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
 496       SET_USE (use_p, occ->recip_def);
 497       fold_stmt_inplace (&gsi);
 498       update_stmt (use_stmt);
 499     }
 500 }
 501
 502
 503 /* Free OCC and return one more "struct occurrence" to be freed.  */
 504
 505 static struct occurrence *
 506 free_bb (struct occurrence *occ)
 507 {
 508   struct occurrence *child, *next;
 509
 510   /* First get the two pointers hanging off OCC.  */
 511   next = occ->next;
 512   child = occ->children;
 513   occ->bb->aux = NULL;
 514   occ_pool->remove (occ);
 515
 516   /* Now ensure that we don't recurse unless it is necessary.  */
 517   if (!child)
 518     return next;
 519   else
 520     {
 521       while (next)
 522         next = free_bb (next);
 523
 524       return child;
 525     }
 526 }
 527
 528
 529 /* Look for floating-point divisions among DEF's uses, and try to
 530    replace them by multiplications with the reciprocal.  Add
 531    as many statements computing the reciprocal as needed.
 532
 533    DEF must be a GIMPLE register of a floating-point type.  */
 534
 535 static void
 536 execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
 537 {
 538   use_operand_p use_p, square_use_p;
 539   imm_use_iterator use_iter, square_use_iter;
 540   tree square_def;
 541   struct occurrence *occ;
 542   int count = 0;
 543   int threshold;
 544   int square_recip_count = 0;
 545   int sqrt_recip_count = 0;
 546
 547   gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && TREE_CODE (def) == SSA_NAME);
 548   threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
 549
 550   /* If DEF is a square (x * x), count the number of divisions by x.
 551      If there are more divisions by x than by (DEF * DEF), prefer to optimize
 552      the reciprocal of x instead of DEF.  This improves cases like:
 553        def = x * x
 554        t0 = a / def
 555        t1 = b / def
 556        t2 = c / x
 557      Reciprocal optimization of x results in 1 division rather than 2 or 3.  */
 558   gimple *def_stmt = SSA_NAME_DEF_STMT (def);
 559
 560   if (is_gimple_assign (def_stmt)
 561       && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
 562       && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME
 563       && gimple_assign_rhs1 (def_stmt) == gimple_assign_rhs2 (def_stmt))
 564     {
 565       tree op0 = gimple_assign_rhs1 (def_stmt);
 566
 567       FOR_EACH_IMM_USE_FAST (use_p, use_iter, op0)
 568         {
 569           gimple *use_stmt = USE_STMT (use_p);
 570           if (is_division_by (use_stmt, op0))
 571             sqrt_recip_count++;
 572         }
 573     }
 574
 575   FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
 576     {
 577       gimple *use_stmt = USE_STMT (use_p);
 578       if (is_division_by (use_stmt, def))
 579         {
 580           register_division_in (gimple_bb (use_stmt), 2);
 581           count++;
 582         }
 583
 584       if (is_square_of (use_stmt, def))
 585         {
 586           square_def = gimple_assign_lhs (use_stmt);
 587           FOR_EACH_IMM_USE_FAST (square_use_p, square_use_iter, square_def)
 588             {
 589               gimple *square_use_stmt = USE_STMT (square_use_p);
 590               if (is_division_by (square_use_stmt, square_def))
 591                 {
 592                   /* This is executed twice for each division by a square.  */
 593                   register_division_in (gimple_bb (square_use_stmt), 1);
 594                   square_recip_count++;
 595                 }
 596             }
 597         }
 598     }
 599
 600   /* Square reciprocals were counted twice above.  */
 601   square_recip_count /= 2;
 602
 603   /* If it is more profitable to optimize 1 / x, don't optimize 1 / (x * x).  */
 604   if (sqrt_recip_count > square_recip_count)
 605     return;
 606
 607   /* Do the expensive part only if we can hope to optimize something.  */
 608   if (count + square_recip_count >= threshold && count >= 1)
 609     {
 610       gimple *use_stmt;
 611       for (occ = occ_head; occ; occ = occ->next)
 612         {
 613           compute_merit (occ);
 614           insert_reciprocals (def_gsi, occ, def, NULL, NULL,
 615                               square_recip_count, threshold);
 616         }
 617
 618       FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
 619         {
 620           if (is_division_by (use_stmt, def))
 621             {
 622               FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
 623                 replace_reciprocal (use_p);
 624             }
 625           else if (square_recip_count > 0 && is_square_of (use_stmt, def))
 626             {
 627               FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
 628                 {
 629                   /* Find all uses of the square that are divisions and
 630                    * replace them by multiplications with the inverse.  */
 631                   imm_use_iterator square_iterator;
 632                   gimple *powmult_use_stmt = USE_STMT (use_p);
 633                   tree powmult_def_name = gimple_assign_lhs (powmult_use_stmt);
 634
 635                   FOR_EACH_IMM_USE_STMT (powmult_use_stmt,
 636                                          square_iterator, powmult_def_name)
 637                     FOR_EACH_IMM_USE_ON_STMT (square_use_p, square_iterator)
 638                       {
 639                         gimple *powmult_use_stmt = USE_STMT (square_use_p);
 640                         if (is_division_by (powmult_use_stmt, powmult_def_name))
 641                           replace_reciprocal_squares (square_use_p);
 642                       }
 643                 }
 644             }
 645         }
 646     }
 647
 648   for (occ = occ_head; occ; )
 649     occ = free_bb (occ);
 650
 651   occ_head = NULL;
 652 }
 653
 654 /* Return an internal function that implements the reciprocal of CALL,
 655    or IFN_LAST if there is no such function that the target supports.  */
 656
 657 internal_fn
 658 internal_fn_reciprocal (gcall *call)
 659 {
 660   internal_fn ifn;
 661
 662   switch (gimple_call_combined_fn (call))
 663     {
 664     CASE_CFN_SQRT:
 665     CASE_CFN_SQRT_FN:
 666       ifn = IFN_RSQRT;
 667       break;
 668
 669     default:
 670       return IFN_LAST;
 671     }
 672
 673   tree_pair types = direct_internal_fn_types (ifn, call);
 674   if (!direct_internal_fn_supported_p (ifn, types, OPTIMIZE_FOR_SPEED))
 675     return IFN_LAST;
 676
 677   return ifn;
 678 }
 679
 680 /* Go through all the floating-point SSA_NAMEs, and call
 681    execute_cse_reciprocals_1 on each of them.  */
 682 namespace {
 683
 684 const pass_data pass_data_cse_reciprocals =
 685 {
 686   GIMPLE_PASS, /* type */
 687   "recip", /* name */
 688   OPTGROUP_NONE, /* optinfo_flags */
 689   TV_TREE_RECIP, /* tv_id */
 690   PROP_ssa, /* properties_required */
 691   0, /* properties_provided */
 692   0, /* properties_destroyed */
 693   0, /* todo_flags_start */
 694   TODO_update_ssa, /* todo_flags_finish */
 695 };
 696
 697 class pass_cse_reciprocals : public gimple_opt_pass
 698 {
 699 public:
 700   pass_cse_reciprocals (gcc::context *ctxt)
 701     : gimple_opt_pass (pass_data_cse_reciprocals, ctxt)
 702   {}
 703
 704   /* opt_pass methods: */
 705   virtual bool gate (function *) { return optimize && flag_reciprocal_math; }
 706   virtual unsigned int execute (function *);
 707
 708 }; // class pass_cse_reciprocals
 709
 710 unsigned int
 711 pass_cse_reciprocals::execute (function *fun)
 712 {
 713   basic_block bb;
 714   tree arg;
 715
 716   occ_pool = new object_allocator<occurrence> ("dominators for recip");
 717
 718   memset (&reciprocal_stats, 0, sizeof (reciprocal_stats));
 719   calculate_dominance_info (CDI_DOMINATORS);
 720   calculate_dominance_info (CDI_POST_DOMINATORS);
 721
 722   if (flag_checking)
 723     FOR_EACH_BB_FN (bb, fun)
 724       gcc_assert (!bb->aux);
 725
 726   for (arg = DECL_ARGUMENTS (fun->decl); arg; arg = DECL_CHAIN (arg))
 727     if (FLOAT_TYPE_P (TREE_TYPE (arg))
 728         && is_gimple_reg (arg))
 729       {
 730         tree name = ssa_default_def (fun, arg);
 731         if (name)
 732           execute_cse_reciprocals_1 (NULL, name);
 733       }
 734
 735   FOR_EACH_BB_FN (bb, fun)
 736     {
 737       tree def;
 738
 739       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
 740            gsi_next (&gsi))
 741         {
 742           gphi *phi = gsi.phi ();
 743           def = PHI_RESULT (phi);
 744           if (! virtual_operand_p (def)
 745               && FLOAT_TYPE_P (TREE_TYPE (def)))
 746             execute_cse_reciprocals_1 (NULL, def);
 747         }
 748
 749       for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
 750            gsi_next (&gsi))
 751         {
 752           gimple *stmt = gsi_stmt (gsi);
 753
 754           if (gimple_has_lhs (stmt)
 755               && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
 756               && FLOAT_TYPE_P (TREE_TYPE (def))
 757               && TREE_CODE (def) == SSA_NAME)
 758             execute_cse_reciprocals_1 (&gsi, def);
 759         }
 760
 761       if (optimize_bb_for_size_p (bb))
 762         continue;
 763
 764       /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b).  */
 765       for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
 766            gsi_next (&gsi))
 767         {
 768           gimple *stmt = gsi_stmt (gsi);
 769
 770           if (is_gimple_assign (stmt)
 771               && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
 772             {
 773               tree arg1 = gimple_assign_rhs2 (stmt);
 774               gimple *stmt1;
 775
 776               if (TREE_CODE (arg1) != SSA_NAME)
 777                 continue;
 778
 779               stmt1 = SSA_NAME_DEF_STMT (arg1);
 780
 781               if (is_gimple_call (stmt1)
 782                   && gimple_call_lhs (stmt1))
 783                 {
 784                   bool fail;
 785                   imm_use_iterator ui;
 786                   use_operand_p use_p;
 787                   tree fndecl = NULL_TREE;
 788
 789                   gcall *call = as_a <gcall *> (stmt1);
 790                   internal_fn ifn = internal_fn_reciprocal (call);
 791                   if (ifn == IFN_LAST)
 792                     {
 793                       fndecl = gimple_call_fndecl (call);
 794                       if (!fndecl
 795                           || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_MD)
 796                         continue;
 797                       fndecl = targetm.builtin_reciprocal (fndecl);
 798                       if (!fndecl)
 799                         continue;
 800                     }
 801
 802                   /* Check that all uses of the SSA name are divisions,
 803                      otherwise replacing the defining statement will do
 804                      the wrong thing.  */
 805                   fail = false;
 806                   FOR_EACH_IMM_USE_FAST (use_p, ui, arg1)
 807                     {
 808                       gimple *stmt2 = USE_STMT (use_p);
 809                       if (is_gimple_debug (stmt2))
 810                         continue;
 811                       if (!is_gimple_assign (stmt2)
 812                           || gimple_assign_rhs_code (stmt2) != RDIV_EXPR
 813                           || gimple_assign_rhs1 (stmt2) == arg1
 814                           || gimple_assign_rhs2 (stmt2) != arg1)
 815                         {
 816                           fail = true;
 817                           break;
 818                         }
 819                     }
 820                   if (fail)
 821                     continue;
 822
 823                   gimple_replace_ssa_lhs (call, arg1);
 824                   if (gimple_call_internal_p (call) != (ifn != IFN_LAST))
 825                     {
 826                       auto_vec<tree, 4> args;
 827                       for (unsigned int i = 0;
 828                            i < gimple_call_num_args (call); i++)
 829                         args.safe_push (gimple_call_arg (call, i));
 830                       gcall *stmt2;
 831                       if (ifn == IFN_LAST)
 832                         stmt2 = gimple_build_call_vec (fndecl, args);
 833                       else
 834                         stmt2 = gimple_build_call_internal_vec (ifn, args);
 835                       gimple_call_set_lhs (stmt2, arg1);
 836                       if (gimple_vdef (call))
 837                         {
 838                           gimple_set_vdef (stmt2, gimple_vdef (call));
 839                           SSA_NAME_DEF_STMT (gimple_vdef (stmt2)) = stmt2;
 840                         }
 841                       gimple_call_set_nothrow (stmt2,
 842                                                gimple_call_nothrow_p (call));
 843                       gimple_set_vuse (stmt2, gimple_vuse (call));
 844                       gimple_stmt_iterator gsi2 = gsi_for_stmt (call);
 845                       gsi_replace (&gsi2, stmt2, true);
 846                     }
 847                   else
 848                     {
 849                       if (ifn == IFN_LAST)
 850                         gimple_call_set_fndecl (call, fndecl);
 851                       else
 852                         gimple_call_set_internal_fn (call, ifn);
 853                       update_stmt (call);
 854                     }
 855                   reciprocal_stats.rfuncs_inserted++;
 856
 857                   FOR_EACH_IMM_USE_STMT (stmt, ui, arg1)
 858                     {
 859                       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
 860                       gimple_assign_set_rhs_code (stmt, MULT_EXPR);
 861                       fold_stmt_inplace (&gsi);
 862                       update_stmt (stmt);
 863                     }
 864                 }
 865             }
 866         }
 867     }
 868
 869   statistics_counter_event (fun, "reciprocal divs inserted",
 870                             reciprocal_stats.rdivs_inserted);
 871   statistics_counter_event (fun, "reciprocal functions inserted",
 872                             reciprocal_stats.rfuncs_inserted);
 873
 874   free_dominance_info (CDI_DOMINATORS);
 875   free_dominance_info (CDI_POST_DOMINATORS);
 876   delete occ_pool;
 877   return 0;
 878 }
 879
 880 } // anon namespace
 881
 882 gimple_opt_pass *
 883 make_pass_cse_reciprocals (gcc::context *ctxt)
 884 {
 885   return new pass_cse_reciprocals (ctxt);
 886 }
 887
 888 /* Records an occurrence at statement USE_STMT in the vector of trees
 889    STMTS if it is dominated by *TOP_BB or dominates it or this basic block
 890    is not yet initialized.  Returns true if the occurrence was pushed on
 891    the vector.  Adjusts *TOP_BB to be the basic block dominating all
 892    statements in the vector.  */
 893
 894 static bool
 895 maybe_record_sincos (vec<gimple *> *stmts,
 896                      basic_block *top_bb, gimple *use_stmt)
 897 {
 898   basic_block use_bb = gimple_bb (use_stmt);
 899   if (*top_bb
 900       && (*top_bb == use_bb
 901           || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb)))
 902     stmts->safe_push (use_stmt);
 903   else if (!*top_bb
 904            || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb))
 905     {
 906       stmts->safe_push (use_stmt);
 907       *top_bb = use_bb;
 908     }
 909   else
 910     return false;
 911
 912   return true;
 913 }
 914
 915 /* Look for sin, cos and cexpi calls with the same argument NAME and
 916    create a single call to cexpi CSEing the result in this case.
 917    We first walk over all immediate uses of the argument collecting
 918    statements that we can CSE in a vector and in a second pass replace
 919    the statement rhs with a REALPART or IMAGPART expression on the
 920    result of the cexpi call we insert before the use statement that
 921    dominates all other candidates.  */
 922
 923 static bool
 924 execute_cse_sincos_1 (tree name)
 925 {
 926   gimple_stmt_iterator gsi;
 927   imm_use_iterator use_iter;
 928   tree fndecl, res, type;
 929   gimple *def_stmt, *use_stmt, *stmt;
 930   int seen_cos = 0, seen_sin = 0, seen_cexpi = 0;
 931   auto_vec<gimple *> stmts;
 932   basic_block top_bb = NULL;
 933   int i;
 934   bool cfg_changed = false;
 935
 936   type = TREE_TYPE (name);
 937   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
 938     {
 939       if (gimple_code (use_stmt) != GIMPLE_CALL
 940           || !gimple_call_lhs (use_stmt))
 941         continue;
 942
 943       switch (gimple_call_combined_fn (use_stmt))
 944         {
 945         CASE_CFN_COS:
 946           seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
 947           break;
 948
 949         CASE_CFN_SIN:
 950           seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
 951           break;
 952
 953         CASE_CFN_CEXPI:
 954           seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
 955           break;
 956
 957         default:;
 958         }
 959     }
 960
 961   if (seen_cos + seen_sin + seen_cexpi <= 1)
 962     return false;
 963
 964   /* Simply insert cexpi at the beginning of top_bb but not earlier than
 965      the name def statement.  */
 966   fndecl = mathfn_built_in (type, BUILT_IN_CEXPI);
 967   if (!fndecl)
 968     return false;
 969   stmt = gimple_build_call (fndecl, 1, name);
 970   res = make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl)), stmt, "sincostmp");
 971   gimple_call_set_lhs (stmt, res);
 972
 973   def_stmt = SSA_NAME_DEF_STMT (name);
 974   if (!SSA_NAME_IS_DEFAULT_DEF (name)
 975       && gimple_code (def_stmt) != GIMPLE_PHI
 976       && gimple_bb (def_stmt) == top_bb)
 977     {
 978       gsi = gsi_for_stmt (def_stmt);
 979       gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
 980     }
 981   else
 982     {
 983       gsi = gsi_after_labels (top_bb);
 984       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
 985     }
 986   sincos_stats.inserted++;
 987
 988   /* And adjust the recorded old call sites.  */
 989   for (i = 0; stmts.iterate (i, &use_stmt); ++i)
 990     {
 991       tree rhs = NULL;
 992
 993       switch (gimple_call_combined_fn (use_stmt))
 994         {
 995         CASE_CFN_COS:
 996           rhs = fold_build1 (REALPART_EXPR, type, res);
 997           break;
 998
 999         CASE_CFN_SIN:
1000           rhs = fold_build1 (IMAGPART_EXPR, type, res);
1001           break;
1002
1003         CASE_CFN_CEXPI:
1004           rhs = res;
1005           break;
1006
1007         default:;
1008           gcc_unreachable ();
1009         }
1010
1011         /* Replace call with a copy.  */
1012         stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs);
1013
1014         gsi = gsi_for_stmt (use_stmt);
1015         gsi_replace (&gsi, stmt, true);
1016         if (gimple_purge_dead_eh_edges (gimple_bb (stmt)))
1017           cfg_changed = true;
1018     }
1019
1020   return cfg_changed;
1021 }
1022
1023 /* To evaluate powi(x,n), the floating point value x raised to the
1024    constant integer exponent n, we use a hybrid algorithm that
1025    combines the "window method" with look-up tables.  For an
1026    introduction to exponentiation algorithms and "addition chains",
1027    see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
1028    "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
1029    3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
1030    Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998.  */
1031
1032 /* Provide a default value for POWI_MAX_MULTS, the maximum number of
1033    multiplications to inline before calling the system library's pow
1034    function.  powi(x,n) requires at worst 2*bits(n)-2 multiplications,
1035    so this default never requires calling pow, powf or powl.  */
1036
1037 #ifndef POWI_MAX_MULTS
1038 #define POWI_MAX_MULTS  (2*HOST_BITS_PER_WIDE_INT-2)
1039 #endif
1040
1041 /* The size of the "optimal power tree" lookup table.  All
1042    exponents less than this value are simply looked up in the
1043    powi_table below.  This threshold is also used to size the
1044    cache of pseudo registers that hold intermediate results.  */
1045 #define POWI_TABLE_SIZE 256
1046
1047 /* The size, in bits of the window, used in the "window method"
1048    exponentiation algorithm.  This is equivalent to a radix of
1049    (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method".  */
1050 #define POWI_WINDOW_SIZE 3
1051
1052 /* The following table is an efficient representation of an
1053    "optimal power tree".  For each value, i, the corresponding
1054    value, j, in the table states than an optimal evaluation
1055    sequence for calculating pow(x,i) can be found by evaluating
1056    pow(x,j)*pow(x,i-j).  An optimal power tree for the first
1057    100 integers is given in Knuth's "Seminumerical algorithms".  */
1058
1059 static const unsigned char powi_table[POWI_TABLE_SIZE] =
1060   {
1061       0,   1,   1,   2,   2,   3,   3,   4,  /*   0 -   7 */
1062       4,   6,   5,   6,   6,  10,   7,   9,  /*   8 -  15 */
1063       8,  16,   9,  16,  10,  12,  11,  13,  /*  16 -  23 */
1064      12,  17,  13,  18,  14,  24,  15,  26,  /*  24 -  31 */
1065      16,  17,  17,  19,  18,  33,  19,  26,  /*  32 -  39 */
1066      20,  25,  21,  40,  22,  27,  23,  44,  /*  40 -  47 */
1067      24,  32,  25,  34,  26,  29,  27,  44,  /*  48 -  55 */
1068      28,  31,  29,  34,  30,  60,  31,  36,  /*  56 -  63 */
1069      32,  64,  33,  34,  34,  46,  35,  37,  /*  64 -  71 */
1070      36,  65,  37,  50,  38,  48,  39,  69,  /*  72 -  79 */
1071      40,  49,  41,  43,  42,  51,  43,  58,  /*  80 -  87 */
1072      44,  64,  45,  47,  46,  59,  47,  76,  /*  88 -  95 */
1073      48,  65,  49,  66,  50,  67,  51,  66,  /*  96 - 103 */
1074      52,  70,  53,  74,  54, 104,  55,  74,  /* 104 - 111 */
1075      56,  64,  57,  69,  58,  78,  59,  68,  /* 112 - 119 */
1076      60,  61,  61,  80,  62,  75,  63,  68,  /* 120 - 127 */
1077      64,  65,  65, 128,  66, 129,  67,  90,  /* 128 - 135 */
1078      68,  73,  69, 131,  70,  94,  71,  88,  /* 136 - 143 */
1079      72, 128,  73,  98,  74, 132,  75, 121,  /* 144 - 151 */
1080      76, 102,  77, 124,  78, 132,  79, 106,  /* 152 - 159 */
1081      80,  97,  81, 160,  82,  99,  83, 134,  /* 160 - 167 */
1082      84,  86,  85,  95,  86, 160,  87, 100,  /* 168 - 175 */
1083      88, 113,  89,  98,  90, 107,  91, 122,  /* 176 - 183 */
1084      92, 111,  93, 102,  94, 126,  95, 150,  /* 184 - 191 */
1085      96, 128,  97, 130,  98, 133,  99, 195,  /* 192 - 199 */
1086     100, 128, 101, 123, 102, 164, 103, 138,  /* 200 - 207 */
1087     104, 145, 105, 146, 106, 109, 107, 149,  /* 208 - 215 */
1088     108, 200, 109, 146, 110, 170, 111, 157,  /* 216 - 223 */
1089     112, 128, 113, 130, 114, 182, 115, 132,  /* 224 - 231 */
1090     116, 200, 117, 132, 118, 158, 119, 206,  /* 232 - 239 */
1091     120, 240, 121, 162, 122, 147, 123, 152,  /* 240 - 247 */
1092     124, 166, 125, 214, 126, 138, 127, 153,  /* 248 - 255 */
1093   };
1094
1095
1096 /* Return the number of multiplications required to calculate
1097    powi(x,n) where n is less than POWI_TABLE_SIZE.  This is a
1098    subroutine of powi_cost.  CACHE is an array indicating
1099    which exponents have already been calculated.  */
1100
1101 static int
1102 powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache)
1103 {
1104   /* If we've already calculated this exponent, then this evaluation
1105      doesn't require any additional multiplications.  */
1106   if (cache[n])
1107     return 0;
1108
1109   cache[n] = true;
1110   return powi_lookup_cost (n - powi_table[n], cache)
1111          + powi_lookup_cost (powi_table[n], cache) + 1;
1112 }
1113
1114 /* Return the number of multiplications required to calculate
1115    powi(x,n) for an arbitrary x, given the exponent N.  This
1116    function needs to be kept in sync with powi_as_mults below.  */
1117
1118 static int
1119 powi_cost (HOST_WIDE_INT n)
1120 {
1121   bool cache[POWI_TABLE_SIZE];
1122   unsigned HOST_WIDE_INT digit;
1123   unsigned HOST_WIDE_INT val;
1124   int result;
1125
1126   if (n == 0)
1127     return 0;
1128
1129   /* Ignore the reciprocal when calculating the cost.  */
1130   val = (n < 0) ? -n : n;
1131
1132   /* Initialize the exponent cache.  */
1133   memset (cache, 0, POWI_TABLE_SIZE * sizeof (bool));
1134   cache[1] = true;
1135
1136   result = 0;
1137
1138   while (val >= POWI_TABLE_SIZE)
1139     {
1140       if (val & 1)
1141         {
1142           digit = val & ((1 << POWI_WINDOW_SIZE) - 1);
1143           result += powi_lookup_cost (digit, cache)
1144                     + POWI_WINDOW_SIZE + 1;
1145           val >>= POWI_WINDOW_SIZE;
1146         }
1147       else
1148         {
1149           val >>= 1;
1150           result++;
1151         }
1152     }
1153
1154   return result + powi_lookup_cost (val, cache);
1155 }
1156
1157 /* Recursive subroutine of powi_as_mults.  This function takes the
1158    array, CACHE, of already calculated exponents and an exponent N and
1159    returns a tree that corresponds to CACHE[1]**N, with type TYPE.  */
1160
1161 static tree
1162 powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type,
1163                  HOST_WIDE_INT n, tree *cache)
1164 {
1165   tree op0, op1, ssa_target;
1166   unsigned HOST_WIDE_INT digit;
1167   gassign *mult_stmt;
1168
1169   if (n < POWI_TABLE_SIZE && cache[n])
1170     return cache[n];
1171
1172   ssa_target = make_temp_ssa_name (type, NULL, "powmult");
1173
1174   if (n < POWI_TABLE_SIZE)
1175     {
1176       cache[n] = ssa_target;
1177       op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache);
1178       op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache);
1179     }
1180   else if (n & 1)
1181     {
1182       digit = n & ((1 << POWI_WINDOW_SIZE) - 1);
1183       op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache);
1184       op1 = powi_as_mults_1 (gsi, loc, type, digit, cache);
1185     }
1186   else
1187     {
1188       op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache);
1189       op1 = op0;
1190     }
1191
1192   mult_stmt = gimple_build_assign (ssa_target, MULT_EXPR, op0, op1);
1193   gimple_set_location (mult_stmt, loc);
1194   gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
1195
1196   return ssa_target;
1197 }
1198
1199 /* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
1200    This function needs to be kept in sync with powi_cost above.  */
1201
1202 static tree
1203 powi_as_mults (gimple_stmt_iterator *gsi, location_t loc,
1204                tree arg0, HOST_WIDE_INT n)
1205 {
1206   tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0);
1207   gassign *div_stmt;
1208   tree target;
1209
1210   if (n == 0)
1211     return build_real (type, dconst1);
1212
1213   memset (cache, 0,  sizeof (cache));
1214   cache[1] = arg0;
1215
1216   result = powi_as_mults_1 (gsi, loc, type, (n < 0) ? -n : n, cache);
1217   if (n >= 0)
1218     return result;
1219
1220   /* If the original exponent was negative, reciprocate the result.  */
1221   target = make_temp_ssa_name (type, NULL, "powmult");
1222   div_stmt = gimple_build_assign (target, RDIV_EXPR,
1223                                   build_real (type, dconst1), result);
1224   gimple_set_location (div_stmt, loc);
1225   gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
1226
1227   return target;
1228 }
1229
1230 /* ARG0 and N are the two arguments to a powi builtin in GSI with
1231    location info LOC.  If the arguments are appropriate, create an
1232    equivalent sequence of statements prior to GSI using an optimal
1233    number of multiplications, and return an expession holding the
1234    result.  */
1235
1236 static tree
1237 gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc,
1238                             tree arg0, HOST_WIDE_INT n)
1239 {
1240   /* Avoid largest negative number.  */
1241   if (n != -n
1242       && ((n >= -1 && n <= 2)
1243           || (optimize_function_for_speed_p (cfun)
1244               && powi_cost (n) <= POWI_MAX_MULTS)))
1245     return powi_as_mults (gsi, loc, arg0, n);
1246
1247   return NULL_TREE;
1248 }
1249
1250 /* Build a gimple call statement that calls FN with argument ARG.
1251    Set the lhs of the call statement to a fresh SSA name.  Insert the
1252    statement prior to GSI's current position, and return the fresh
1253    SSA name.  */
1254
1255 static tree
1256 build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc,
1257                        tree fn, tree arg)
1258 {
1259   gcall *call_stmt;
1260   tree ssa_target;
1261
1262   call_stmt = gimple_build_call (fn, 1, arg);
1263   ssa_target = make_temp_ssa_name (TREE_TYPE (arg), NULL, "powroot");
1264   gimple_set_lhs (call_stmt, ssa_target);
1265   gimple_set_location (call_stmt, loc);
1266   gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
1267
1268   return ssa_target;
1269 }
1270
1271 /* Build a gimple binary operation with the given CODE and arguments
1272    ARG0, ARG1, assigning the result to a new SSA name for variable
1273    TARGET.  Insert the statement prior to GSI's current position, and
1274    return the fresh SSA name.*/
1275
1276 static tree
1277 build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc,
1278                         const char *name, enum tree_code code,
1279                         tree arg0, tree arg1)
1280 {
1281   tree result = make_temp_ssa_name (TREE_TYPE (arg0), NULL, name);
1282   gassign *stmt = gimple_build_assign (result, code, arg0, arg1);
1283   gimple_set_location (stmt, loc);
1284   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1285   return result;
1286 }
1287
1288 /* Build a gimple reference operation with the given CODE and argument
1289    ARG, assigning the result to a new SSA name of TYPE with NAME.
1290    Insert the statement prior to GSI's current position, and return
1291    the fresh SSA name.  */
1292
1293 static inline tree
1294 build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type,
1295                       const char *name, enum tree_code code, tree arg0)
1296 {
1297   tree result = make_temp_ssa_name (type, NULL, name);
1298   gimple *stmt = gimple_build_assign (result, build1 (code, type, arg0));
1299   gimple_set_location (stmt, loc);
1300   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1301   return result;
1302 }
1303
1304 /* Build a gimple assignment to cast VAL to TYPE.  Insert the statement
1305    prior to GSI's current position, and return the fresh SSA name.  */
1306
1307 static tree
1308 build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,
1309                        tree type, tree val)
1310 {
1311   tree result = make_ssa_name (type);
1312   gassign *stmt = gimple_build_assign (result, NOP_EXPR, val);
1313   gimple_set_location (stmt, loc);
1314   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1315   return result;
1316 }
1317
1318 struct pow_synth_sqrt_info
1319 {
1320   bool *factors;
1321   unsigned int deepest;
1322   unsigned int num_mults;
1323 };
1324
1325 /* Return true iff the real value C can be represented as a
1326    sum of powers of 0.5 up to N.  That is:
1327    C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1.
1328    Record in INFO the various parameters of the synthesis algorithm such
1329    as the factors a[i], the maximum 0.5 power and the number of
1330    multiplications that will be required.  */
1331
1332 bool
1333 representable_as_half_series_p (REAL_VALUE_TYPE c, unsigned n,
1334                                  struct pow_synth_sqrt_info *info)
1335 {
1336   REAL_VALUE_TYPE factor = dconsthalf;
1337   REAL_VALUE_TYPE remainder = c;
1338
1339   info->deepest = 0;
1340   info->num_mults = 0;
1341   memset (info->factors, 0, n * sizeof (bool));
1342
1343   for (unsigned i = 0; i < n; i++)
1344     {
1345       REAL_VALUE_TYPE res;
1346
1347       /* If something inexact happened bail out now.  */
1348       if (real_arithmetic (&res, MINUS_EXPR, &remainder, &factor))
1349         return false;
1350
1351       /* We have hit zero.  The number is representable as a sum
1352          of powers of 0.5.  */
1353       if (real_equal (&res, &dconst0))
1354         {
1355           info->factors[i] = true;
1356           info->deepest = i + 1;
1357           return true;
1358         }
1359       else if (!REAL_VALUE_NEGATIVE (res))
1360         {
1361           remainder = res;
1362           info->factors[i] = true;
1363           info->num_mults++;
1364         }
1365       else
1366         info->factors[i] = false;
1367
1368       real_arithmetic (&factor, MULT_EXPR, &factor, &dconsthalf);
1369     }
1370   return false;
1371 }
1372
1373 /* Return the tree corresponding to FN being applied
1374    to ARG N times at GSI and LOC.
1375    Look up previous results from CACHE if need be.
1376    cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times.  */
1377
1378 static tree
1379 get_fn_chain (tree arg, unsigned int n, gimple_stmt_iterator *gsi,
1380               tree fn, location_t loc, tree *cache)
1381 {
1382   tree res = cache[n];
1383   if (!res)
1384     {
1385       tree prev = get_fn_chain (arg, n - 1, gsi, fn, loc, cache);
1386       res = build_and_insert_call (gsi, loc, fn, prev);
1387       cache[n] = res;
1388     }
1389
1390   return res;
1391 }
1392
1393 /* Print to STREAM the repeated application of function FNAME to ARG
1394    N times.  So, for FNAME = "foo", ARG = "x", N = 2 it would print:
1395    "foo (foo (x))".  */
1396
1397 static void
1398 print_nested_fn (FILE* stream, const char *fname, const char* arg,
1399                  unsigned int n)
1400 {
1401   if (n == 0)
1402     fprintf (stream, "%s", arg);
1403   else
1404     {
1405       fprintf (stream, "%s (", fname);
1406       print_nested_fn (stream, fname, arg, n - 1);
1407       fprintf (stream, ")");
1408     }
1409 }
1410
1411 /* Print to STREAM the fractional sequence of sqrt chains
1412    applied to ARG, described by INFO.  Used for the dump file.  */
1413
1414 static void
1415 dump_fractional_sqrt_sequence (FILE *stream, const char *arg,
1416                                 struct pow_synth_sqrt_info *info)
1417 {
1418   for (unsigned int i = 0; i < info->deepest; i++)
1419     {
1420       bool is_set = info->factors[i];
1421       if (is_set)
1422         {
1423           print_nested_fn (stream, "sqrt", arg, i + 1);
1424           if (i != info->deepest - 1)
1425             fprintf (stream, " * ");
1426         }
1427     }
1428 }
1429
1430 /* Print to STREAM a representation of raising ARG to an integer
1431    power N.  Used for the dump file.  */
1432
1433 static void
1434 dump_integer_part (FILE *stream, const char* arg, HOST_WIDE_INT n)
1435 {
1436   if (n > 1)
1437     fprintf (stream, "powi (%s, " HOST_WIDE_INT_PRINT_DEC ")", arg, n);
1438   else if (n == 1)
1439     fprintf (stream, "%s", arg);
1440 }
1441
1442 /* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of
1443    square roots.  Place at GSI and LOC.  Limit the maximum depth
1444    of the sqrt chains to MAX_DEPTH.  Return the tree holding the
1445    result of the expanded sequence or NULL_TREE if the expansion failed.
1446
1447    This routine assumes that ARG1 is a real number with a fractional part
1448    (the integer exponent case will have been handled earlier in
1449    gimple_expand_builtin_pow).
1450
1451    For ARG1 > 0.0:
1452    * For ARG1 composed of a whole part WHOLE_PART and a fractional part
1453      FRAC_PART i.e. WHOLE_PART == floor (ARG1) and
1454                     FRAC_PART == ARG1 - WHOLE_PART:
1455      Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where
1456      POW (ARG0, FRAC_PART) is expanded as a product of square root chains
1457      if it can be expressed as such, that is if FRAC_PART satisfies:
1458      FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i))
1459      where integer a[i] is either 0 or 1.
1460
1461      Example:
1462      POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625)
1463        --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x)))
1464
1465    For ARG1 < 0.0 there are two approaches:
1466    * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1)
1467          is calculated as above.
1468
1469      Example:
1470      POW (x, -5.625) == 1.0 / POW (x, 5.625)
1471        -->  1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x))))
1472
1473    * (B) : WHOLE_PART := - ceil (abs (ARG1))
1474            FRAC_PART  := ARG1 - WHOLE_PART
1475      and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART).
1476      Example:
1477      POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6)
1478        --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6))
1479
1480    For ARG1 < 0.0 we choose between (A) and (B) depending on
1481    how many multiplications we'd have to do.
1482    So, for the example in (B): POW (x, -5.875), if we were to
1483    follow algorithm (A) we would produce:
1484    1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X)))
1485    which contains more multiplications than approach (B).
1486
1487    Hopefully, this approach will eliminate potentially expensive POW library
1488    calls when unsafe floating point math is enabled and allow the compiler to
1489    further optimise the multiplies, square roots and divides produced by this
1490    function.  */
1491
1492 static tree
1493 expand_pow_as_sqrts (gimple_stmt_iterator *gsi, location_t loc,
1494                      tree arg0, tree arg1, HOST_WIDE_INT max_depth)
1495 {
1496   tree type = TREE_TYPE (arg0);
1497   machine_mode mode = TYPE_MODE (type);
1498   tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1499   bool one_over = true;
1500
1501   if (!sqrtfn)
1502     return NULL_TREE;
1503
1504   if (TREE_CODE (arg1) != REAL_CST)
1505     return NULL_TREE;
1506
1507   REAL_VALUE_TYPE exp_init = TREE_REAL_CST (arg1);
1508
1509   gcc_assert (max_depth > 0);
1510   tree *cache = XALLOCAVEC (tree, max_depth + 1);
1511
1512   struct pow_synth_sqrt_info synth_info;
1513   synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1514   synth_info.deepest = 0;
1515   synth_info.num_mults = 0;
1516
1517   bool neg_exp = REAL_VALUE_NEGATIVE (exp_init);
1518   REAL_VALUE_TYPE exp = real_value_abs (&exp_init);
1519
1520   /* The whole and fractional parts of exp.  */
1521   REAL_VALUE_TYPE whole_part;
1522   REAL_VALUE_TYPE frac_part;
1523
1524   real_floor (&whole_part, mode, &exp);
1525   real_arithmetic (&frac_part, MINUS_EXPR, &exp, &whole_part);
1526
1527
1528   REAL_VALUE_TYPE ceil_whole = dconst0;
1529   REAL_VALUE_TYPE ceil_fract = dconst0;
1530
1531   if (neg_exp)
1532     {
1533       real_ceil (&ceil_whole, mode, &exp);
1534       real_arithmetic (&ceil_fract, MINUS_EXPR, &ceil_whole, &exp);
1535     }
1536
1537   if (!representable_as_half_series_p (frac_part, max_depth, &synth_info))
1538     return NULL_TREE;
1539
1540   /* Check whether it's more profitable to not use 1.0 / ...  */
1541   if (neg_exp)
1542     {
1543       struct pow_synth_sqrt_info alt_synth_info;
1544       alt_synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1545       alt_synth_info.deepest = 0;
1546       alt_synth_info.num_mults = 0;
1547
1548       if (representable_as_half_series_p (ceil_fract, max_depth,
1549                                            &alt_synth_info)
1550           && alt_synth_info.deepest <= synth_info.deepest
1551           && alt_synth_info.num_mults < synth_info.num_mults)
1552         {
1553           whole_part = ceil_whole;
1554           frac_part = ceil_fract;
1555           synth_info.deepest = alt_synth_info.deepest;
1556           synth_info.num_mults = alt_synth_info.num_mults;
1557           memcpy (synth_info.factors, alt_synth_info.factors,
1558                   (max_depth + 1) * sizeof (bool));
1559           one_over = false;
1560         }
1561     }
1562
1563   HOST_WIDE_INT n = real_to_integer (&whole_part);
1564   REAL_VALUE_TYPE cint;
1565   real_from_integer (&cint, VOIDmode, n, SIGNED);
1566
1567   if (!real_identical (&whole_part, &cint))
1568     return NULL_TREE;
1569
1570   if (powi_cost (n) + synth_info.num_mults > POWI_MAX_MULTS)
1571     return NULL_TREE;
1572
1573   memset (cache, 0, (max_depth + 1) * sizeof (tree));
1574
1575   tree integer_res = n == 0 ? build_real (type, dconst1) : arg0;
1576
1577   /* Calculate the integer part of the exponent.  */
1578   if (n > 1)
1579     {
1580       integer_res = gimple_expand_builtin_powi (gsi, loc, arg0, n);
1581       if (!integer_res)
1582         return NULL_TREE;
1583     }
1584
1585   if (dump_file)
1586     {
1587       char string[64];
1588
1589       real_to_decimal (string, &exp_init, sizeof (string), 0, 1);
1590       fprintf (dump_file, "synthesizing pow (x, %s) as:\n", string);
1591
1592       if (neg_exp)
1593         {
1594           if (one_over)
1595             {
1596               fprintf (dump_file, "1.0 / (");
1597               dump_integer_part (dump_file, "x", n);
1598               if (n > 0)
1599                 fprintf (dump_file, " * ");
1600               dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1601               fprintf (dump_file, ")");
1602             }
1603           else
1604             {
1605               dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1606               fprintf (dump_file, " / (");
1607               dump_integer_part (dump_file, "x", n);
1608               fprintf (dump_file, ")");
1609             }
1610         }
1611       else
1612         {
1613           dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1614           if (n > 0)
1615             fprintf (dump_file, " * ");
1616           dump_integer_part (dump_file, "x", n);
1617         }
1618
1619       fprintf (dump_file, "\ndeepest sqrt chain: %d\n", synth_info.deepest);
1620     }
1621
1622
1623   tree fract_res = NULL_TREE;
1624   cache[0] = arg0;
1625
1626   /* Calculate the fractional part of the exponent.  */
1627   for (unsigned i = 0; i < synth_info.deepest; i++)
1628     {
1629       if (synth_info.factors[i])
1630         {
1631           tree sqrt_chain = get_fn_chain (arg0, i + 1, gsi, sqrtfn, loc, cache);
1632
1633           if (!fract_res)
1634               fract_res = sqrt_chain;
1635
1636           else
1637             fract_res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1638                                            fract_res, sqrt_chain);
1639         }
1640     }
1641
1642   tree res = NULL_TREE;
1643
1644   if (neg_exp)
1645     {
1646       if (one_over)
1647         {
1648           if (n > 0)
1649             res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1650                                            fract_res, integer_res);
1651           else
1652             res = fract_res;
1653
1654           res = build_and_insert_binop (gsi, loc, "powrootrecip", RDIV_EXPR,
1655                                           build_real (type, dconst1), res);
1656         }
1657       else
1658         {
1659           res = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1660                                          fract_res, integer_res);
1661         }
1662     }
1663   else
1664     res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1665                                    fract_res, integer_res);
1666   return res;
1667 }
1668
1669 /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
1670    with location info LOC.  If possible, create an equivalent and
1671    less expensive sequence of statements prior to GSI, and return an
1672    expession holding the result.  */
1673
1674 static tree
1675 gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,
1676                            tree arg0, tree arg1)
1677 {
1678   REAL_VALUE_TYPE c, cint, dconst1_3, dconst1_4, dconst1_6;
1679   REAL_VALUE_TYPE c2, dconst3;
1680   HOST_WIDE_INT n;
1681   tree type, sqrtfn, cbrtfn, sqrt_arg0, result, cbrt_x, powi_cbrt_x;
1682   machine_mode mode;
1683   bool speed_p = optimize_bb_for_speed_p (gsi_bb (*gsi));
1684   bool hw_sqrt_exists, c_is_int, c2_is_int;
1685
1686   dconst1_4 = dconst1;
1687   SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
1688
1689   /* If the exponent isn't a constant, there's nothing of interest
1690      to be done.  */
1691   if (TREE_CODE (arg1) != REAL_CST)
1692     return NULL_TREE;
1693
1694   /* Don't perform the operation if flag_signaling_nans is on
1695      and the operand is a signaling NaN.  */
1696   if (HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg1)))
1697       && ((TREE_CODE (arg0) == REAL_CST
1698            && REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg0)))
1699           || REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg1))))
1700     return NULL_TREE;
1701
1702   /* If the exponent is equivalent to an integer, expand to an optimal
1703      multiplication sequence when profitable.  */
1704   c = TREE_REAL_CST (arg1);
1705   n = real_to_integer (&c);
1706   real_from_integer (&cint, VOIDmode, n, SIGNED);
1707   c_is_int = real_identical (&c, &cint);
1708
1709   if (c_is_int
1710       && ((n >= -1 && n <= 2)
1711           || (flag_unsafe_math_optimizations
1712               && speed_p
1713               && powi_cost (n) <= POWI_MAX_MULTS)))
1714     return gimple_expand_builtin_powi (gsi, loc, arg0, n);
1715
1716   /* Attempt various optimizations using sqrt and cbrt.  */
1717   type = TREE_TYPE (arg0);
1718   mode = TYPE_MODE (type);
1719   sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1720
1721   /* Optimize pow(x,0.5) = sqrt(x).  This replacement is always safe
1722      unless signed zeros must be maintained.  pow(-0,0.5) = +0, while
1723      sqrt(-0) = -0.  */
1724   if (sqrtfn
1725       && real_equal (&c, &dconsthalf)
1726       && !HONOR_SIGNED_ZEROS (mode))
1727     return build_and_insert_call (gsi, loc, sqrtfn, arg0);
1728
1729   hw_sqrt_exists = optab_handler (sqrt_optab, mode) != CODE_FOR_nothing;
1730
1731   /* Optimize pow(x,1./3.) = cbrt(x).  This requires unsafe math
1732      optimizations since 1./3. is not exactly representable.  If x
1733      is negative and finite, the correct value of pow(x,1./3.) is
1734      a NaN with the "invalid" exception raised, because the value
1735      of 1./3. actually has an even denominator.  The correct value
1736      of cbrt(x) is a negative real value.  */
1737   cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT);
1738   dconst1_3 = real_value_truncate (mode, dconst_third ());
1739
1740   if (flag_unsafe_math_optimizations
1741       && cbrtfn
1742       && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
1743       && real_equal (&c, &dconst1_3))
1744     return build_and_insert_call (gsi, loc, cbrtfn, arg0);
1745
1746   /* Optimize pow(x,1./6.) = cbrt(sqrt(x)).  Don't do this optimization
1747      if we don't have a hardware sqrt insn.  */
1748   dconst1_6 = dconst1_3;
1749   SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
1750
1751   if (flag_unsafe_math_optimizations
1752       && sqrtfn
1753       && cbrtfn
1754       && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
1755       && speed_p
1756       && hw_sqrt_exists
1757       && real_equal (&c, &dconst1_6))
1758     {
1759       /* sqrt(x)  */
1760       sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
1761
1762       /* cbrt(sqrt(x))  */
1763       return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0);
1764     }
1765
1766
1767   /* Attempt to expand the POW as a product of square root chains.
1768      Expand the 0.25 case even when otpimising for size.  */
1769   if (flag_unsafe_math_optimizations
1770       && sqrtfn
1771       && hw_sqrt_exists
1772       && (speed_p || real_equal (&c, &dconst1_4))
1773       && !HONOR_SIGNED_ZEROS (mode))
1774     {
1775       unsigned int max_depth = speed_p
1776                                 ? PARAM_VALUE (PARAM_MAX_POW_SQRT_DEPTH)
1777                                 : 2;
1778
1779       tree expand_with_sqrts
1780         = expand_pow_as_sqrts (gsi, loc, arg0, arg1, max_depth);
1781
1782       if (expand_with_sqrts)
1783         return expand_with_sqrts;
1784     }
1785
1786   real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
1787   n = real_to_integer (&c2);
1788   real_from_integer (&cint, VOIDmode, n, SIGNED);
1789   c2_is_int = real_identical (&c2, &cint);
1790
1791   /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
1792
1793      powi(x, n/3) * powi(cbrt(x), n%3),                    n > 0;
1794      1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)),  n < 0.
1795
1796      Do not calculate the first factor when n/3 = 0.  As cbrt(x) is
1797      different from pow(x, 1./3.) due to rounding and behavior with
1798      negative x, we need to constrain this transformation to unsafe
1799      math and positive x or finite math.  */
1800   real_from_integer (&dconst3, VOIDmode, 3, SIGNED);
1801   real_arithmetic (&c2, MULT_EXPR, &c, &dconst3);
1802   real_round (&c2, mode, &c2);
1803   n = real_to_integer (&c2);
1804   real_from_integer (&cint, VOIDmode, n, SIGNED);
1805   real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3);
1806   real_convert (&c2, mode, &c2);
1807
1808   if (flag_unsafe_math_optimizations
1809       && cbrtfn
1810       && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
1811       && real_identical (&c2, &c)
1812       && !c2_is_int
1813       && optimize_function_for_speed_p (cfun)
1814       && powi_cost (n / 3) <= POWI_MAX_MULTS)
1815     {
1816       tree powi_x_ndiv3 = NULL_TREE;
1817
1818       /* Attempt to fold powi(arg0, abs(n/3)) into multiplies.  If not
1819          possible or profitable, give up.  Skip the degenerate case when
1820          abs(n) < 3, where the result is always 1.  */
1821       if (absu_hwi (n) >= 3)
1822         {
1823           powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0,
1824                                                      abs_hwi (n / 3));
1825           if (!powi_x_ndiv3)
1826             return NULL_TREE;
1827         }
1828
1829       /* Calculate powi(cbrt(x), n%3).  Don't use gimple_expand_builtin_powi
1830          as that creates an unnecessary variable.  Instead, just produce
1831          either cbrt(x) or cbrt(x) * cbrt(x).  */
1832       cbrt_x = build_and_insert_call (gsi, loc, cbrtfn, arg0);
1833
1834       if (absu_hwi (n) % 3 == 1)
1835         powi_cbrt_x = cbrt_x;
1836       else
1837         powi_cbrt_x = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1838                                               cbrt_x, cbrt_x);
1839
1840       /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1.  */
1841       if (absu_hwi (n) < 3)
1842         result = powi_cbrt_x;
1843       else
1844         result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1845                                          powi_x_ndiv3, powi_cbrt_x);
1846
1847       /* If n is negative, reciprocate the result.  */
1848       if (n < 0)
1849         result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1850                                          build_real (type, dconst1), result);
1851
1852       return result;
1853     }
1854
1855   /* No optimizations succeeded.  */
1856   return NULL_TREE;
1857 }
1858
1859 /* ARG is the argument to a cabs builtin call in GSI with location info
1860    LOC.  Create a sequence of statements prior to GSI that calculates
1861    sqrt(R*R + I*I), where R and I are the real and imaginary components
1862    of ARG, respectively.  Return an expression holding the result.  */
1863
1864 static tree
1865 gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg)
1866 {
1867   tree real_part, imag_part, addend1, addend2, sum, result;
1868   tree type = TREE_TYPE (TREE_TYPE (arg));
1869   tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1870   machine_mode mode = TYPE_MODE (type);
1871
1872   if (!flag_unsafe_math_optimizations
1873       || !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi)))
1874       || !sqrtfn
1875       || optab_handler (sqrt_optab, mode) == CODE_FOR_nothing)
1876     return NULL_TREE;
1877
1878   real_part = build_and_insert_ref (gsi, loc, type, "cabs",
1879                                     REALPART_EXPR, arg);
1880   addend1 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
1881                                     real_part, real_part);
1882   imag_part = build_and_insert_ref (gsi, loc, type, "cabs",
1883                                     IMAGPART_EXPR, arg);
1884   addend2 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
1885                                     imag_part, imag_part);
1886   sum = build_and_insert_binop (gsi, loc, "cabs", PLUS_EXPR, addend1, addend2);
1887   result = build_and_insert_call (gsi, loc, sqrtfn, sum);
1888
1889   return result;
1890 }
1891
1892 /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
1893    on the SSA_NAME argument of each of them.  Also expand powi(x,n) into
1894    an optimal number of multiplies, when n is a constant.  */
1895
1896 namespace {
1897
1898 const pass_data pass_data_cse_sincos =
1899 {
1900   GIMPLE_PASS, /* type */
1901   "sincos", /* name */
1902   OPTGROUP_NONE, /* optinfo_flags */
1903   TV_TREE_SINCOS, /* tv_id */
1904   PROP_ssa, /* properties_required */
1905   PROP_gimple_opt_math, /* properties_provided */
1906   0, /* properties_destroyed */
1907   0, /* todo_flags_start */
1908   TODO_update_ssa, /* todo_flags_finish */
1909 };
1910
1911 class pass_cse_sincos : public gimple_opt_pass
1912 {
1913 public:
1914   pass_cse_sincos (gcc::context *ctxt)
1915     : gimple_opt_pass (pass_data_cse_sincos, ctxt)
1916   {}
1917
1918   /* opt_pass methods: */
1919   virtual bool gate (function *)
1920     {
1921       /* We no longer require either sincos or cexp, since powi expansion
1922          piggybacks on this pass.  */
1923       return optimize;
1924     }
1925
1926   virtual unsigned int execute (function *);
1927
1928 }; // class pass_cse_sincos
1929
1930 unsigned int
1931 pass_cse_sincos::execute (function *fun)
1932 {
1933   basic_block bb;
1934   bool cfg_changed = false;
1935
1936   calculate_dominance_info (CDI_DOMINATORS);
1937   memset (&sincos_stats, 0, sizeof (sincos_stats));
1938
1939   FOR_EACH_BB_FN (bb, fun)
1940     {
1941       gimple_stmt_iterator gsi;
1942       bool cleanup_eh = false;
1943
1944       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1945         {
1946           gimple *stmt = gsi_stmt (gsi);
1947
1948           /* Only the last stmt in a bb could throw, no need to call
1949              gimple_purge_dead_eh_edges if we change something in the middle
1950              of a basic block.  */
1951           cleanup_eh = false;
1952
1953           if (is_gimple_call (stmt)
1954               && gimple_call_lhs (stmt))
1955             {
1956               tree arg, arg0, arg1, result;
1957               HOST_WIDE_INT n;
1958               location_t loc;
1959
1960               switch (gimple_call_combined_fn (stmt))
1961                 {
1962                 CASE_CFN_COS:
1963                 CASE_CFN_SIN:
1964                 CASE_CFN_CEXPI:
1965                   /* Make sure we have either sincos or cexp.  */
1966                   if (!targetm.libc_has_function (function_c99_math_complex)
1967                       && !targetm.libc_has_function (function_sincos))
1968                     break;
1969
1970                   arg = gimple_call_arg (stmt, 0);
1971                   if (TREE_CODE (arg) == SSA_NAME)
1972                     cfg_changed |= execute_cse_sincos_1 (arg);
1973                   break;
1974
1975                 CASE_CFN_POW:
1976                   arg0 = gimple_call_arg (stmt, 0);
1977                   arg1 = gimple_call_arg (stmt, 1);
1978
1979                   loc = gimple_location (stmt);
1980                   result = gimple_expand_builtin_pow (&gsi, loc, arg0, arg1);
1981
1982                   if (result)
1983                     {
1984                       tree lhs = gimple_get_lhs (stmt);
1985                       gassign *new_stmt = gimple_build_assign (lhs, result);
1986                       gimple_set_location (new_stmt, loc);
1987                       unlink_stmt_vdef (stmt);
1988                       gsi_replace (&gsi, new_stmt, true);
1989                       cleanup_eh = true;
1990                       if (gimple_vdef (stmt))
1991                         release_ssa_name (gimple_vdef (stmt));
1992                     }
1993                   break;
1994
1995                 CASE_CFN_POWI:
1996                   arg0 = gimple_call_arg (stmt, 0);
1997                   arg1 = gimple_call_arg (stmt, 1);
1998                   loc = gimple_location (stmt);
1999
2000                   if (real_minus_onep (arg0))
2001                     {
2002                       tree t0, t1, cond, one, minus_one;
2003                       gassign *stmt;
2004
2005                       t0 = TREE_TYPE (arg0);
2006                       t1 = TREE_TYPE (arg1);
2007                       one = build_real (t0, dconst1);
2008                       minus_one = build_real (t0, dconstm1);
2009
2010                       cond = make_temp_ssa_name (t1, NULL, "powi_cond");
2011                       stmt = gimple_build_assign (cond, BIT_AND_EXPR,
2012                                                   arg1, build_int_cst (t1, 1));
2013                       gimple_set_location (stmt, loc);
2014                       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2015
2016                       result = make_temp_ssa_name (t0, NULL, "powi");
2017                       stmt = gimple_build_assign (result, COND_EXPR, cond,
2018                                                   minus_one, one);
2019                       gimple_set_location (stmt, loc);
2020                       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2021                     }
2022                   else
2023                     {
2024                       if (!tree_fits_shwi_p (arg1))
2025                         break;
2026
2027                       n = tree_to_shwi (arg1);
2028                       result = gimple_expand_builtin_powi (&gsi, loc, arg0, n);
2029                     }
2030
2031                   if (result)
2032                     {
2033                       tree lhs = gimple_get_lhs (stmt);
2034                       gassign *new_stmt = gimple_build_assign (lhs, result);
2035                       gimple_set_location (new_stmt, loc);
2036                       unlink_stmt_vdef (stmt);
2037                       gsi_replace (&gsi, new_stmt, true);
2038                       cleanup_eh = true;
2039                       if (gimple_vdef (stmt))
2040                         release_ssa_name (gimple_vdef (stmt));
2041                     }
2042                   break;
2043
2044                 CASE_CFN_CABS:
2045                   arg0 = gimple_call_arg (stmt, 0);
2046                   loc = gimple_location (stmt);
2047                   result = gimple_expand_builtin_cabs (&gsi, loc, arg0);
2048
2049                   if (result)
2050                     {
2051                       tree lhs = gimple_get_lhs (stmt);
2052                       gassign *new_stmt = gimple_build_assign (lhs, result);
2053                       gimple_set_location (new_stmt, loc);
2054                       unlink_stmt_vdef (stmt);
2055                       gsi_replace (&gsi, new_stmt, true);
2056                       cleanup_eh = true;
2057                       if (gimple_vdef (stmt))
2058                         release_ssa_name (gimple_vdef (stmt));
2059                     }
2060                   break;
2061
2062                 default:;
2063                 }
2064             }
2065         }
2066       if (cleanup_eh)
2067         cfg_changed |= gimple_purge_dead_eh_edges (bb);
2068     }
2069
2070   statistics_counter_event (fun, "sincos statements inserted",
2071                             sincos_stats.inserted);
2072
2073   return cfg_changed ? TODO_cleanup_cfg : 0;
2074 }
2075
2076 } // anon namespace
2077
2078 gimple_opt_pass *
2079 make_pass_cse_sincos (gcc::context *ctxt)
2080 {
2081   return new pass_cse_sincos (ctxt);
2082 }
2083
2084 /* Return true if stmt is a type conversion operation that can be stripped
2085    when used in a widening multiply operation.  */
2086 static bool
2087 widening_mult_conversion_strippable_p (tree result_type, gimple *stmt)
2088 {
2089   enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
2090
2091   if (TREE_CODE (result_type) == INTEGER_TYPE)
2092     {
2093       tree op_type;
2094       tree inner_op_type;
2095
2096       if (!CONVERT_EXPR_CODE_P (rhs_code))
2097         return false;
2098
2099       op_type = TREE_TYPE (gimple_assign_lhs (stmt));
2100
2101       /* If the type of OP has the same precision as the result, then
2102          we can strip this conversion.  The multiply operation will be
2103          selected to create the correct extension as a by-product.  */
2104       if (TYPE_PRECISION (result_type) == TYPE_PRECISION (op_type))
2105         return true;
2106
2107       /* We can also strip a conversion if it preserves the signed-ness of
2108          the operation and doesn't narrow the range.  */
2109       inner_op_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
2110
2111       /* If the inner-most type is unsigned, then we can strip any
2112          intermediate widening operation.  If it's signed, then the
2113          intermediate widening operation must also be signed.  */
2114       if ((TYPE_UNSIGNED (inner_op_type)
2115            || TYPE_UNSIGNED (op_type) == TYPE_UNSIGNED (inner_op_type))
2116           && TYPE_PRECISION (op_type) > TYPE_PRECISION (inner_op_type))
2117         return true;
2118
2119       return false;
2120     }
2121
2122   return rhs_code == FIXED_CONVERT_EXPR;
2123 }
2124
2125 /* Return true if RHS is a suitable operand for a widening multiplication,
2126    assuming a target type of TYPE.
2127    There are two cases:
2128
2129      - RHS makes some value at least twice as wide.  Store that value
2130        in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
2131
2132      - RHS is an integer constant.  Store that value in *NEW_RHS_OUT if so,
2133        but leave *TYPE_OUT untouched.  */
2134
2135 static bool
2136 is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out,
2137                         tree *new_rhs_out)
2138 {
2139   gimple *stmt;
2140   tree type1, rhs1;
2141
2142   if (TREE_CODE (rhs) == SSA_NAME)
2143     {
2144       stmt = SSA_NAME_DEF_STMT (rhs);
2145       if (is_gimple_assign (stmt))
2146         {
2147           if (! widening_mult_conversion_strippable_p (type, stmt))
2148             rhs1 = rhs;
2149           else
2150             {
2151               rhs1 = gimple_assign_rhs1 (stmt);
2152
2153               if (TREE_CODE (rhs1) == INTEGER_CST)
2154                 {
2155                   *new_rhs_out = rhs1;
2156                   *type_out = NULL;
2157                   return true;
2158                 }
2159             }
2160         }
2161       else
2162         rhs1 = rhs;
2163
2164       type1 = TREE_TYPE (rhs1);
2165
2166       if (TREE_CODE (type1) != TREE_CODE (type)
2167           || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type))
2168         return false;
2169
2170       *new_rhs_out = rhs1;
2171       *type_out = type1;
2172       return true;
2173     }
2174
2175   if (TREE_CODE (rhs) == INTEGER_CST)
2176     {
2177       *new_rhs_out = rhs;
2178       *type_out = NULL;
2179       return true;
2180     }
2181
2182   return false;
2183 }
2184
2185 /* Return true if STMT performs a widening multiplication, assuming the
2186    output type is TYPE.  If so, store the unwidened types of the operands
2187    in *TYPE1_OUT and *TYPE2_OUT respectively.  Also fill *RHS1_OUT and
2188    *RHS2_OUT such that converting those operands to types *TYPE1_OUT
2189    and *TYPE2_OUT would give the operands of the multiplication.  */
2190
2191 static bool
2192 is_widening_mult_p (gimple *stmt,
2193                     tree *type1_out, tree *rhs1_out,
2194                     tree *type2_out, tree *rhs2_out)
2195 {
2196   tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2197
2198   if (TREE_CODE (type) == INTEGER_TYPE)
2199     {
2200       if (TYPE_OVERFLOW_TRAPS (type))
2201         return false;
2202     }
2203   else if (TREE_CODE (type) != FIXED_POINT_TYPE)
2204     return false;
2205
2206   if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out,
2207                                rhs1_out))
2208     return false;
2209
2210   if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out,
2211                                rhs2_out))
2212     return false;
2213
2214   if (*type1_out == NULL)
2215     {
2216       if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out))
2217         return false;
2218       *type1_out = *type2_out;
2219     }
2220
2221   if (*type2_out == NULL)
2222     {
2223       if (!int_fits_type_p (*rhs2_out, *type1_out))
2224         return false;
2225       *type2_out = *type1_out;
2226     }
2227
2228   /* Ensure that the larger of the two operands comes first. */
2229   if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out))
2230     {
2231       std::swap (*type1_out, *type2_out);
2232       std::swap (*rhs1_out, *rhs2_out);
2233     }
2234
2235   return true;
2236 }
2237
2238 /* Check to see if the CALL statement is an invocation of copysign
2239    with 1. being the first argument.  */
2240 static bool
2241 is_copysign_call_with_1 (gimple *call)
2242 {
2243   gcall *c = dyn_cast <gcall *> (call);
2244   if (! c)
2245     return false;
2246
2247   enum combined_fn code = gimple_call_combined_fn (c);
2248
2249   if (code == CFN_LAST)
2250     return false;
2251
2252   if (builtin_fn_p (code))
2253     {
2254       switch (as_builtin_fn (code))
2255         {
2256         CASE_FLT_FN (BUILT_IN_COPYSIGN):
2257         CASE_FLT_FN_FLOATN_NX (BUILT_IN_COPYSIGN):
2258           return real_onep (gimple_call_arg (c, 0));
2259         default:
2260           return false;
2261         }
2262     }
2263
2264   if (internal_fn_p (code))
2265     {
2266       switch (as_internal_fn (code))
2267         {
2268         case IFN_COPYSIGN:
2269           return real_onep (gimple_call_arg (c, 0));
2270         default:
2271           return false;
2272         }
2273     }
2274
2275    return false;
2276 }
2277
2278 /* Try to expand the pattern x * copysign (1, y) into xorsign (x, y).
2279    This only happens when the the xorsign optab is defined, if the
2280    pattern is not a xorsign pattern or if expansion fails FALSE is
2281    returned, otherwise TRUE is returned.  */
2282 static bool
2283 convert_expand_mult_copysign (gimple *stmt, gimple_stmt_iterator *gsi)
2284 {
2285   tree treeop0, treeop1, lhs, type;
2286   location_t loc = gimple_location (stmt);
2287   lhs = gimple_assign_lhs (stmt);
2288   treeop0 = gimple_assign_rhs1 (stmt);
2289   treeop1 = gimple_assign_rhs2 (stmt);
2290   type = TREE_TYPE (lhs);
2291   machine_mode mode = TYPE_MODE (type);
2292
2293   if (HONOR_SNANS (type))
2294     return false;
2295
2296   if (TREE_CODE (treeop0) == SSA_NAME && TREE_CODE (treeop1) == SSA_NAME)
2297     {
2298       gimple *call0 = SSA_NAME_DEF_STMT (treeop0);
2299       if (!has_single_use (treeop0) || !is_copysign_call_with_1 (call0))
2300         {
2301           call0 = SSA_NAME_DEF_STMT (treeop1);
2302           if (!has_single_use (treeop1) || !is_copysign_call_with_1 (call0))
2303             return false;
2304
2305           treeop1 = treeop0;
2306         }
2307         if (optab_handler (xorsign_optab, mode) == CODE_FOR_nothing)
2308           return false;
2309
2310         gcall *c = as_a<gcall*> (call0);
2311         treeop0 = gimple_call_arg (c, 1);
2312
2313         gcall *call_stmt
2314           = gimple_build_call_internal (IFN_XORSIGN, 2, treeop1, treeop0);
2315         gimple_set_lhs (call_stmt, lhs);
2316         gimple_set_location (call_stmt, loc);
2317         gsi_replace (gsi, call_stmt, true);
2318         return true;
2319     }
2320
2321   return false;
2322 }
2323
2324 /* Process a single gimple statement STMT, which has a MULT_EXPR as
2325    its rhs, and try to convert it into a WIDEN_MULT_EXPR.  The return
2326    value is true iff we converted the statement.  */
2327
2328 static bool
2329 convert_mult_to_widen (gimple *stmt, gimple_stmt_iterator *gsi)
2330 {
2331   tree lhs, rhs1, rhs2, type, type1, type2;
2332   enum insn_code handler;
2333   scalar_int_mode to_mode, from_mode, actual_mode;
2334   optab op;
2335   int actual_precision;
2336   location_t loc = gimple_location (stmt);
2337   bool from_unsigned1, from_unsigned2;
2338
2339   lhs = gimple_assign_lhs (stmt);
2340   type = TREE_TYPE (lhs);
2341   if (TREE_CODE (type) != INTEGER_TYPE)
2342     return false;
2343
2344   if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2))
2345     return false;
2346
2347   to_mode = SCALAR_INT_TYPE_MODE (type);
2348   from_mode = SCALAR_INT_TYPE_MODE (type1);
2349   if (to_mode == from_mode)
2350     return false;
2351
2352   from_unsigned1 = TYPE_UNSIGNED (type1);
2353   from_unsigned2 = TYPE_UNSIGNED (type2);
2354
2355   if (from_unsigned1 && from_unsigned2)
2356     op = umul_widen_optab;
2357   else if (!from_unsigned1 && !from_unsigned2)
2358     op = smul_widen_optab;
2359   else
2360     op = usmul_widen_optab;
2361
2362   handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode,
2363                                                   &actual_mode);
2364
2365   if (handler == CODE_FOR_nothing)
2366     {
2367       if (op != smul_widen_optab)
2368         {
2369           /* We can use a signed multiply with unsigned types as long as
2370              there is a wider mode to use, or it is the smaller of the two
2371              types that is unsigned.  Note that type1 >= type2, always.  */
2372           if ((TYPE_UNSIGNED (type1)
2373                && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2374               || (TYPE_UNSIGNED (type2)
2375                   && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2376             {
2377               if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
2378                   || GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode))
2379                 return false;
2380             }
2381
2382           op = smul_widen_optab;
2383           handler = find_widening_optab_handler_and_mode (op, to_mode,
2384                                                           from_mode,
2385                                                           &actual_mode);
2386
2387           if (handler == CODE_FOR_nothing)
2388             return false;
2389
2390           from_unsigned1 = from_unsigned2 = false;
2391         }
2392       else
2393         return false;
2394     }
2395
2396   /* Ensure that the inputs to the handler are in the correct precison
2397      for the opcode.  This will be the full mode size.  */
2398   actual_precision = GET_MODE_PRECISION (actual_mode);
2399   if (2 * actual_precision > TYPE_PRECISION (type))
2400     return false;
2401   if (actual_precision != TYPE_PRECISION (type1)
2402       || from_unsigned1 != TYPE_UNSIGNED (type1))
2403     rhs1 = build_and_insert_cast (gsi, loc,
2404                                   build_nonstandard_integer_type
2405                                     (actual_precision, from_unsigned1), rhs1);
2406   if (actual_precision != TYPE_PRECISION (type2)
2407       || from_unsigned2 != TYPE_UNSIGNED (type2))
2408     rhs2 = build_and_insert_cast (gsi, loc,
2409                                   build_nonstandard_integer_type
2410                                     (actual_precision, from_unsigned2), rhs2);
2411
2412   /* Handle constants.  */
2413   if (TREE_CODE (rhs1) == INTEGER_CST)
2414     rhs1 = fold_convert (type1, rhs1);
2415   if (TREE_CODE (rhs2) == INTEGER_CST)
2416     rhs2 = fold_convert (type2, rhs2);
2417
2418   gimple_assign_set_rhs1 (stmt, rhs1);
2419   gimple_assign_set_rhs2 (stmt, rhs2);
2420   gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
2421   update_stmt (stmt);
2422   widen_mul_stats.widen_mults_inserted++;
2423   return true;
2424 }
2425
2426 /* Process a single gimple statement STMT, which is found at the
2427    iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
2428    rhs (given by CODE), and try to convert it into a
2429    WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR.  The return value
2430    is true iff we converted the statement.  */
2431
2432 static bool
2433 convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple *stmt,
2434                             enum tree_code code)
2435 {
2436   gimple *rhs1_stmt = NULL, *rhs2_stmt = NULL;
2437   gimple *conv1_stmt = NULL, *conv2_stmt = NULL, *conv_stmt;
2438   tree type, type1, type2, optype;
2439   tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
2440   enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
2441   optab this_optab;
2442   enum tree_code wmult_code;
2443   enum insn_code handler;
2444   scalar_mode to_mode, from_mode, actual_mode;
2445   location_t loc = gimple_location (stmt);
2446   int actual_precision;
2447   bool from_unsigned1, from_unsigned2;
2448
2449   lhs = gimple_assign_lhs (stmt);
2450   type = TREE_TYPE (lhs);
2451   if (TREE_CODE (type) != INTEGER_TYPE
2452       && TREE_CODE (type) != FIXED_POINT_TYPE)
2453     return false;
2454
2455   if (code == MINUS_EXPR)
2456     wmult_code = WIDEN_MULT_MINUS_EXPR;
2457   else
2458     wmult_code = WIDEN_MULT_PLUS_EXPR;
2459
2460   rhs1 = gimple_assign_rhs1 (stmt);
2461   rhs2 = gimple_assign_rhs2 (stmt);
2462
2463   if (TREE_CODE (rhs1) == SSA_NAME)
2464     {
2465       rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2466       if (is_gimple_assign (rhs1_stmt))
2467         rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2468     }
2469
2470   if (TREE_CODE (rhs2) == SSA_NAME)
2471     {
2472       rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2473       if (is_gimple_assign (rhs2_stmt))
2474         rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2475     }
2476
2477   /* Allow for one conversion statement between the multiply
2478      and addition/subtraction statement.  If there are more than
2479      one conversions then we assume they would invalidate this
2480      transformation.  If that's not the case then they should have
2481      been folded before now.  */
2482   if (CONVERT_EXPR_CODE_P (rhs1_code))
2483     {
2484       conv1_stmt = rhs1_stmt;
2485       rhs1 = gimple_assign_rhs1 (rhs1_stmt);
2486       if (TREE_CODE (rhs1) == SSA_NAME)
2487         {
2488           rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2489           if (is_gimple_assign (rhs1_stmt))
2490             rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2491         }
2492       else
2493         return false;
2494     }
2495   if (CONVERT_EXPR_CODE_P (rhs2_code))
2496     {
2497       conv2_stmt = rhs2_stmt;
2498       rhs2 = gimple_assign_rhs1 (rhs2_stmt);
2499       if (TREE_CODE (rhs2) == SSA_NAME)
2500         {
2501           rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2502           if (is_gimple_assign (rhs2_stmt))
2503             rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2504         }
2505       else
2506         return false;
2507     }
2508
2509   /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
2510      is_widening_mult_p, but we still need the rhs returns.
2511
2512      It might also appear that it would be sufficient to use the existing
2513      operands of the widening multiply, but that would limit the choice of
2514      multiply-and-accumulate instructions.
2515
2516      If the widened-multiplication result has more than one uses, it is
2517      probably wiser not to do the conversion.  */
2518   if (code == PLUS_EXPR
2519       && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
2520     {
2521       if (!has_single_use (rhs1)
2522           || !is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
2523                                   &type2, &mult_rhs2))
2524         return false;
2525       add_rhs = rhs2;
2526       conv_stmt = conv1_stmt;
2527     }
2528   else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
2529     {
2530       if (!has_single_use (rhs2)
2531           || !is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
2532                                   &type2, &mult_rhs2))
2533         return false;
2534       add_rhs = rhs1;
2535       conv_stmt = conv2_stmt;
2536     }
2537   else
2538     return false;
2539
2540   to_mode = SCALAR_TYPE_MODE (type);
2541   from_mode = SCALAR_TYPE_MODE (type1);
2542   if (to_mode == from_mode)
2543     return false;
2544
2545   from_unsigned1 = TYPE_UNSIGNED (type1);
2546   from_unsigned2 = TYPE_UNSIGNED (type2);
2547   optype = type1;
2548
2549   /* There's no such thing as a mixed sign madd yet, so use a wider mode.  */
2550   if (from_unsigned1 != from_unsigned2)
2551     {
2552       if (!INTEGRAL_TYPE_P (type))
2553         return false;
2554       /* We can use a signed multiply with unsigned types as long as
2555          there is a wider mode to use, or it is the smaller of the two
2556          types that is unsigned.  Note that type1 >= type2, always.  */
2557       if ((from_unsigned1
2558            && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2559           || (from_unsigned2
2560               && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2561         {
2562           if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
2563               || GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode))
2564             return false;
2565         }
2566
2567       from_unsigned1 = from_unsigned2 = false;
2568       optype = build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode),
2569                                                false);
2570     }
2571
2572   /* If there was a conversion between the multiply and addition
2573      then we need to make sure it fits a multiply-and-accumulate.
2574      The should be a single mode change which does not change the
2575      value.  */
2576   if (conv_stmt)
2577     {
2578       /* We use the original, unmodified data types for this.  */
2579       tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt));
2580       tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt));
2581       int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2);
2582       bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2);
2583
2584       if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type))
2585         {
2586           /* Conversion is a truncate.  */
2587           if (TYPE_PRECISION (to_type) < data_size)
2588             return false;
2589         }
2590       else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type))
2591         {
2592           /* Conversion is an extend.  Check it's the right sort.  */
2593           if (TYPE_UNSIGNED (from_type) != is_unsigned
2594               && !(is_unsigned && TYPE_PRECISION (from_type) > data_size))
2595             return false;
2596         }
2597       /* else convert is a no-op for our purposes.  */
2598     }
2599
2600   /* Verify that the machine can perform a widening multiply
2601      accumulate in this mode/signedness combination, otherwise
2602      this transformation is likely to pessimize code.  */
2603   this_optab = optab_for_tree_code (wmult_code, optype, optab_default);
2604   handler = find_widening_optab_handler_and_mode (this_optab, to_mode,
2605                                                   from_mode, &actual_mode);
2606
2607   if (handler == CODE_FOR_nothing)
2608     return false;
2609
2610   /* Ensure that the inputs to the handler are in the correct precison
2611      for the opcode.  This will be the full mode size.  */
2612   actual_precision = GET_MODE_PRECISION (actual_mode);
2613   if (actual_precision != TYPE_PRECISION (type1)
2614       || from_unsigned1 != TYPE_UNSIGNED (type1))
2615     mult_rhs1 = build_and_insert_cast (gsi, loc,
2616                                        build_nonstandard_integer_type
2617                                          (actual_precision, from_unsigned1),
2618                                        mult_rhs1);
2619   if (actual_precision != TYPE_PRECISION (type2)
2620       || from_unsigned2 != TYPE_UNSIGNED (type2))
2621     mult_rhs2 = build_and_insert_cast (gsi, loc,
2622                                        build_nonstandard_integer_type
2623                                          (actual_precision, from_unsigned2),
2624                                        mult_rhs2);
2625
2626   if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs)))
2627     add_rhs = build_and_insert_cast (gsi, loc, type, add_rhs);
2628
2629   /* Handle constants.  */
2630   if (TREE_CODE (mult_rhs1) == INTEGER_CST)
2631     mult_rhs1 = fold_convert (type1, mult_rhs1);
2632   if (TREE_CODE (mult_rhs2) == INTEGER_CST)
2633     mult_rhs2 = fold_convert (type2, mult_rhs2);
2634
2635   gimple_assign_set_rhs_with_ops (gsi, wmult_code, mult_rhs1, mult_rhs2,
2636                                   add_rhs);
2637   update_stmt (gsi_stmt (*gsi));
2638   widen_mul_stats.maccs_inserted++;
2639   return true;
2640 }
2641
2642 /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
2643    with uses in additions and subtractions to form fused multiply-add
2644    operations.  Returns true if successful and MUL_STMT should be removed.  */
2645
2646 static bool
2647 convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2)
2648 {
2649   tree mul_result = gimple_get_lhs (mul_stmt);
2650   tree type = TREE_TYPE (mul_result);
2651   gimple *use_stmt, *neguse_stmt;
2652   gassign *fma_stmt;
2653   use_operand_p use_p;
2654   imm_use_iterator imm_iter;
2655
2656   if (FLOAT_TYPE_P (type)
2657       && flag_fp_contract_mode == FP_CONTRACT_OFF)
2658     return false;
2659
2660   /* We don't want to do bitfield reduction ops.  */
2661   if (INTEGRAL_TYPE_P (type)
2662       && (!type_has_mode_precision_p (type) || TYPE_OVERFLOW_TRAPS (type)))
2663     return false;
2664
2665   /* If the target doesn't support it, don't generate it.  We assume that
2666      if fma isn't available then fms, fnma or fnms are not either.  */
2667   if (optab_handler (fma_optab, TYPE_MODE (type)) == CODE_FOR_nothing)
2668     return false;
2669
2670   /* If the multiplication has zero uses, it is kept around probably because
2671      of -fnon-call-exceptions.  Don't optimize it away in that case,
2672      it is DCE job.  */
2673   if (has_zero_uses (mul_result))
2674     return false;
2675
2676   /* Make sure that the multiplication statement becomes dead after
2677      the transformation, thus that all uses are transformed to FMAs.
2678      This means we assume that an FMA operation has the same cost
2679      as an addition.  */
2680   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
2681     {
2682       enum tree_code use_code;
2683       tree result = mul_result;
2684       bool negate_p = false;
2685
2686       use_stmt = USE_STMT (use_p);
2687
2688       if (is_gimple_debug (use_stmt))
2689         continue;
2690
2691       /* For now restrict this operations to single basic blocks.  In theory
2692          we would want to support sinking the multiplication in
2693          m = a*b;
2694          if ()
2695            ma = m + c;
2696          else
2697            d = m;
2698          to form a fma in the then block and sink the multiplication to the
2699          else block.  */
2700       if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
2701         return false;
2702
2703       if (!is_gimple_assign (use_stmt))
2704         return false;
2705
2706       use_code = gimple_assign_rhs_code (use_stmt);
2707
2708       /* A negate on the multiplication leads to FNMA.  */
2709       if (use_code == NEGATE_EXPR)
2710         {
2711           ssa_op_iter iter;
2712           use_operand_p usep;
2713
2714           result = gimple_assign_lhs (use_stmt);
2715
2716           /* Make sure the negate statement becomes dead with this
2717              single transformation.  */
2718           if (!single_imm_use (gimple_assign_lhs (use_stmt),
2719                                &use_p, &neguse_stmt))
2720             return false;
2721
2722           /* Make sure the multiplication isn't also used on that stmt.  */
2723           FOR_EACH_PHI_OR_STMT_USE (usep, neguse_stmt, iter, SSA_OP_USE)
2724             if (USE_FROM_PTR (usep) == mul_result)
2725               return false;
2726
2727           /* Re-validate.  */
2728           use_stmt = neguse_stmt;
2729           if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
2730             return false;
2731           if (!is_gimple_assign (use_stmt))
2732             return false;
2733
2734           use_code = gimple_assign_rhs_code (use_stmt);
2735           negate_p = true;
2736         }
2737
2738       switch (use_code)
2739         {
2740         case MINUS_EXPR:
2741           if (gimple_assign_rhs2 (use_stmt) == result)
2742             negate_p = !negate_p;
2743           break;
2744         case PLUS_EXPR:
2745           break;
2746         default:
2747           /* FMA can only be formed from PLUS and MINUS.  */
2748           return false;
2749         }
2750
2751       /* If the subtrahend (gimple_assign_rhs2 (use_stmt)) is computed
2752          by a MULT_EXPR that we'll visit later, we might be able to
2753          get a more profitable match with fnma.
2754          OTOH, if we don't, a negate / fma pair has likely lower latency
2755          that a mult / subtract pair.  */
2756       if (use_code == MINUS_EXPR && !negate_p
2757           && gimple_assign_rhs1 (use_stmt) == result
2758           && optab_handler (fms_optab, TYPE_MODE (type)) == CODE_FOR_nothing
2759           && optab_handler (fnma_optab, TYPE_MODE (type)) != CODE_FOR_nothing)
2760         {
2761           tree rhs2 = gimple_assign_rhs2 (use_stmt);
2762
2763           if (TREE_CODE (rhs2) == SSA_NAME)
2764             {
2765               gimple *stmt2 = SSA_NAME_DEF_STMT (rhs2);
2766               if (has_single_use (rhs2)
2767                   && is_gimple_assign (stmt2)
2768                   && gimple_assign_rhs_code (stmt2) == MULT_EXPR)
2769               return false;
2770             }
2771         }
2772
2773       /* We can't handle a * b + a * b.  */
2774       if (gimple_assign_rhs1 (use_stmt) == gimple_assign_rhs2 (use_stmt))
2775         return false;
2776
2777       /* While it is possible to validate whether or not the exact form
2778          that we've recognized is available in the backend, the assumption
2779          is that the transformation is never a loss.  For instance, suppose
2780          the target only has the plain FMA pattern available.  Consider
2781          a*b-c -> fma(a,b,-c): we've exchanged MUL+SUB for FMA+NEG, which
2782          is still two operations.  Consider -(a*b)-c -> fma(-a,b,-c): we
2783          still have 3 operations, but in the FMA form the two NEGs are
2784          independent and could be run in parallel.  */
2785     }
2786
2787   FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
2788     {
2789       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
2790       enum tree_code use_code;
2791       tree addop, mulop1 = op1, result = mul_result;
2792       bool negate_p = false;
2793
2794       if (is_gimple_debug (use_stmt))
2795         continue;
2796
2797       use_code = gimple_assign_rhs_code (use_stmt);
2798       if (use_code == NEGATE_EXPR)
2799         {
2800           result = gimple_assign_lhs (use_stmt);
2801           single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt);
2802           gsi_remove (&gsi, true);
2803           release_defs (use_stmt);
2804
2805           use_stmt = neguse_stmt;
2806           gsi = gsi_for_stmt (use_stmt);
2807           use_code = gimple_assign_rhs_code (use_stmt);
2808           negate_p = true;
2809         }
2810
2811       if (gimple_assign_rhs1 (use_stmt) == result)
2812         {
2813           addop = gimple_assign_rhs2 (use_stmt);
2814           /* a * b - c -> a * b + (-c)  */
2815           if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
2816             addop = force_gimple_operand_gsi (&gsi,
2817                                               build1 (NEGATE_EXPR,
2818                                                       type, addop),
2819                                               true, NULL_TREE, true,
2820                                               GSI_SAME_STMT);
2821         }
2822       else
2823         {
2824           addop = gimple_assign_rhs1 (use_stmt);
2825           /* a - b * c -> (-b) * c + a */
2826           if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
2827             negate_p = !negate_p;
2828         }
2829
2830       if (negate_p)
2831         mulop1 = force_gimple_operand_gsi (&gsi,
2832                                            build1 (NEGATE_EXPR,
2833                                                    type, mulop1),
2834                                            true, NULL_TREE, true,
2835                                            GSI_SAME_STMT);
2836
2837       fma_stmt = gimple_build_assign (gimple_assign_lhs (use_stmt),
2838                                       FMA_EXPR, mulop1, op2, addop);
2839       gsi_replace (&gsi, fma_stmt, true);
2840       widen_mul_stats.fmas_inserted++;
2841     }
2842
2843   return true;
2844 }
2845
2846
2847 /* Helper function of match_uaddsub_overflow.  Return 1
2848    if USE_STMT is unsigned overflow check ovf != 0 for
2849    STMT, -1 if USE_STMT is unsigned overflow check ovf == 0
2850    and 0 otherwise.  */
2851
2852 static int
2853 uaddsub_overflow_check_p (gimple *stmt, gimple *use_stmt)
2854 {
2855   enum tree_code ccode = ERROR_MARK;
2856   tree crhs1 = NULL_TREE, crhs2 = NULL_TREE;
2857   if (gimple_code (use_stmt) == GIMPLE_COND)
2858     {
2859       ccode = gimple_cond_code (use_stmt);
2860       crhs1 = gimple_cond_lhs (use_stmt);
2861       crhs2 = gimple_cond_rhs (use_stmt);
2862     }
2863   else if (is_gimple_assign (use_stmt))
2864     {
2865       if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
2866         {
2867           ccode = gimple_assign_rhs_code (use_stmt);
2868           crhs1 = gimple_assign_rhs1 (use_stmt);
2869           crhs2 = gimple_assign_rhs2 (use_stmt);
2870         }
2871       else if (gimple_assign_rhs_code (use_stmt) == COND_EXPR)
2872         {
2873           tree cond = gimple_assign_rhs1 (use_stmt);
2874           if (COMPARISON_CLASS_P (cond))
2875             {
2876               ccode = TREE_CODE (cond);
2877               crhs1 = TREE_OPERAND (cond, 0);
2878               crhs2 = TREE_OPERAND (cond, 1);
2879             }
2880           else
2881             return 0;
2882         }
2883       else
2884         return 0;
2885     }
2886   else
2887     return 0;
2888
2889   if (TREE_CODE_CLASS (ccode) != tcc_comparison)
2890     return 0;
2891
2892   enum tree_code code = gimple_assign_rhs_code (stmt);
2893   tree lhs = gimple_assign_lhs (stmt);
2894   tree rhs1 = gimple_assign_rhs1 (stmt);
2895   tree rhs2 = gimple_assign_rhs2 (stmt);
2896
2897   switch (ccode)
2898     {
2899     case GT_EXPR:
2900     case LE_EXPR:
2901       /* r = a - b; r > a or r <= a
2902          r = a + b; a > r or a <= r or b > r or b <= r.  */
2903       if ((code == MINUS_EXPR && crhs1 == lhs && crhs2 == rhs1)
2904           || (code == PLUS_EXPR && (crhs1 == rhs1 || crhs1 == rhs2)
2905               && crhs2 == lhs))
2906         return ccode == GT_EXPR ? 1 : -1;
2907       break;
2908     case LT_EXPR:
2909     case GE_EXPR:
2910       /* r = a - b; a < r or a >= r
2911          r = a + b; r < a or r >= a or r < b or r >= b.  */
2912       if ((code == MINUS_EXPR && crhs1 == rhs1 && crhs2 == lhs)
2913           || (code == PLUS_EXPR && crhs1 == lhs
2914               && (crhs2 == rhs1 || crhs2 == rhs2)))
2915         return ccode == LT_EXPR ? 1 : -1;
2916       break;
2917     default:
2918       break;
2919     }
2920   return 0;
2921 }
2922
2923 /* Recognize for unsigned x
2924    x = y - z;
2925    if (x > y)
2926    where there are other uses of x and replace it with
2927    _7 = SUB_OVERFLOW (y, z);
2928    x = REALPART_EXPR <_7>;
2929    _8 = IMAGPART_EXPR <_7>;
2930    if (_8)
2931    and similarly for addition.  */
2932
2933 static bool
2934 match_uaddsub_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
2935                         enum tree_code code)
2936 {
2937   tree lhs = gimple_assign_lhs (stmt);
2938   tree type = TREE_TYPE (lhs);
2939   use_operand_p use_p;
2940   imm_use_iterator iter;
2941   bool use_seen = false;
2942   bool ovf_use_seen = false;
2943   gimple *use_stmt;
2944
2945   gcc_checking_assert (code == PLUS_EXPR || code == MINUS_EXPR);
2946   if (!INTEGRAL_TYPE_P (type)
2947       || !TYPE_UNSIGNED (type)
2948       || has_zero_uses (lhs)
2949       || has_single_use (lhs)
2950       || optab_handler (code == PLUS_EXPR ? uaddv4_optab : usubv4_optab,
2951                         TYPE_MODE (type)) == CODE_FOR_nothing)
2952     return false;
2953
2954   FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
2955     {
2956       use_stmt = USE_STMT (use_p);
2957       if (is_gimple_debug (use_stmt))
2958         continue;
2959
2960       if (uaddsub_overflow_check_p (stmt, use_stmt))
2961         ovf_use_seen = true;
2962       else
2963         use_seen = true;
2964       if (ovf_use_seen && use_seen)
2965         break;
2966     }
2967
2968   if (!ovf_use_seen || !use_seen)
2969     return false;
2970
2971   tree ctype = build_complex_type (type);
2972   tree rhs1 = gimple_assign_rhs1 (stmt);
2973   tree rhs2 = gimple_assign_rhs2 (stmt);
2974   gcall *g = gimple_build_call_internal (code == PLUS_EXPR
2975                                          ? IFN_ADD_OVERFLOW : IFN_SUB_OVERFLOW,
2976                                          2, rhs1, rhs2);
2977   tree ctmp = make_ssa_name (ctype);
2978   gimple_call_set_lhs (g, ctmp);
2979   gsi_insert_before (gsi, g, GSI_SAME_STMT);
2980   gassign *g2 = gimple_build_assign (lhs, REALPART_EXPR,
2981                                      build1 (REALPART_EXPR, type, ctmp));
2982   gsi_replace (gsi, g2, true);
2983   tree ovf = make_ssa_name (type);
2984   g2 = gimple_build_assign (ovf, IMAGPART_EXPR,
2985                             build1 (IMAGPART_EXPR, type, ctmp));
2986   gsi_insert_after (gsi, g2, GSI_NEW_STMT);
2987
2988   FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
2989     {
2990       if (is_gimple_debug (use_stmt))
2991         continue;
2992
2993       int ovf_use = uaddsub_overflow_check_p (stmt, use_stmt);
2994       if (ovf_use == 0)
2995         continue;
2996       if (gimple_code (use_stmt) == GIMPLE_COND)
2997         {
2998           gcond *cond_stmt = as_a <gcond *> (use_stmt);
2999           gimple_cond_set_lhs (cond_stmt, ovf);
3000           gimple_cond_set_rhs (cond_stmt, build_int_cst (type, 0));
3001           gimple_cond_set_code (cond_stmt, ovf_use == 1 ? NE_EXPR : EQ_EXPR);
3002         }
3003       else
3004         {
3005           gcc_checking_assert (is_gimple_assign (use_stmt));
3006           if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
3007             {
3008               gimple_assign_set_rhs1 (use_stmt, ovf);
3009               gimple_assign_set_rhs2 (use_stmt, build_int_cst (type, 0));
3010               gimple_assign_set_rhs_code (use_stmt,
3011                                           ovf_use == 1 ? NE_EXPR : EQ_EXPR);
3012             }
3013           else
3014             {
3015               gcc_checking_assert (gimple_assign_rhs_code (use_stmt)
3016                                    == COND_EXPR);
3017               tree cond = build2 (ovf_use == 1 ? NE_EXPR : EQ_EXPR,
3018                                   boolean_type_node, ovf,
3019                                   build_int_cst (type, 0));
3020               gimple_assign_set_rhs1 (use_stmt, cond);
3021             }
3022         }
3023       update_stmt (use_stmt);
3024     }
3025   return true;
3026 }
3027
3028 /* Return true if target has support for divmod.  */
3029
3030 static bool
3031 target_supports_divmod_p (optab divmod_optab, optab div_optab, machine_mode mode)
3032 {
3033   /* If target supports hardware divmod insn, use it for divmod.  */
3034   if (optab_handler (divmod_optab, mode) != CODE_FOR_nothing)
3035     return true;
3036
3037   /* Check if libfunc for divmod is available.  */
3038   rtx libfunc = optab_libfunc (divmod_optab, mode);
3039   if (libfunc != NULL_RTX)
3040     {
3041       /* If optab_handler exists for div_optab, perhaps in a wider mode,
3042          we don't want to use the libfunc even if it exists for given mode.  */
3043       machine_mode div_mode;
3044       FOR_EACH_MODE_FROM (div_mode, mode)
3045         if (optab_handler (div_optab, div_mode) != CODE_FOR_nothing)
3046           return false;
3047
3048       return targetm.expand_divmod_libfunc != NULL;
3049     }
3050
3051   return false;
3052 }
3053
3054 /* Check if stmt is candidate for divmod transform.  */
3055
3056 static bool
3057 divmod_candidate_p (gassign *stmt)
3058 {
3059   tree type = TREE_TYPE (gimple_assign_lhs (stmt));
3060   machine_mode mode = TYPE_MODE (type);
3061   optab divmod_optab, div_optab;
3062
3063   if (TYPE_UNSIGNED (type))
3064     {
3065       divmod_optab = udivmod_optab;
3066       div_optab = udiv_optab;
3067     }
3068   else
3069     {
3070       divmod_optab = sdivmod_optab;
3071       div_optab = sdiv_optab;
3072     }
3073
3074   tree op1 = gimple_assign_rhs1 (stmt);
3075   tree op2 = gimple_assign_rhs2 (stmt);
3076
3077   /* Disable the transform if either is a constant, since division-by-constant
3078      may have specialized expansion.  */
3079   if (CONSTANT_CLASS_P (op1) || CONSTANT_CLASS_P (op2))
3080     return false;
3081
3082   /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should
3083      expand using the [su]divv optabs.  */
3084   if (TYPE_OVERFLOW_TRAPS (type))
3085     return false;
3086
3087   if (!target_supports_divmod_p (divmod_optab, div_optab, mode))
3088     return false;
3089
3090   return true;
3091 }
3092
3093 /* This function looks for:
3094    t1 = a TRUNC_DIV_EXPR b;
3095    t2 = a TRUNC_MOD_EXPR b;
3096    and transforms it to the following sequence:
3097    complex_tmp = DIVMOD (a, b);
3098    t1 = REALPART_EXPR(a);
3099    t2 = IMAGPART_EXPR(b);
3100    For conditions enabling the transform see divmod_candidate_p().
3101
3102    The pass has three parts:
3103    1) Find top_stmt which is trunc_div or trunc_mod stmt and dominates all
3104       other trunc_div_expr and trunc_mod_expr stmts.
3105    2) Add top_stmt and all trunc_div and trunc_mod stmts dominated by top_stmt
3106       to stmts vector.
3107    3) Insert DIVMOD call just before top_stmt and update entries in
3108       stmts vector to use return value of DIMOVD (REALEXPR_PART for div,
3109       IMAGPART_EXPR for mod).  */
3110
3111 static bool
3112 convert_to_divmod (gassign *stmt)
3113 {
3114   if (stmt_can_throw_internal (stmt)
3115       || !divmod_candidate_p (stmt))
3116     return false;
3117
3118   tree op1 = gimple_assign_rhs1 (stmt);
3119   tree op2 = gimple_assign_rhs2 (stmt);
3120
3121   imm_use_iterator use_iter;
3122   gimple *use_stmt;
3123   auto_vec<gimple *> stmts;
3124
3125   gimple *top_stmt = stmt;
3126   basic_block top_bb = gimple_bb (stmt);
3127
3128   /* Part 1: Try to set top_stmt to "topmost" stmt that dominates
3129      at-least stmt and possibly other trunc_div/trunc_mod stmts
3130      having same operands as stmt.  */
3131
3132   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, op1)
3133     {
3134       if (is_gimple_assign (use_stmt)
3135           && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
3136               || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
3137           && operand_equal_p (op1, gimple_assign_rhs1 (use_stmt), 0)
3138           && operand_equal_p (op2, gimple_assign_rhs2 (use_stmt), 0))
3139         {
3140           if (stmt_can_throw_internal (use_stmt))
3141             continue;
3142
3143           basic_block bb = gimple_bb (use_stmt);
3144
3145           if (bb == top_bb)
3146             {
3147               if (gimple_uid (use_stmt) < gimple_uid (top_stmt))
3148                 top_stmt = use_stmt;
3149             }
3150           else if (dominated_by_p (CDI_DOMINATORS, top_bb, bb))
3151             {
3152               top_bb = bb;
3153               top_stmt = use_stmt;
3154             }
3155         }
3156     }
3157
3158   tree top_op1 = gimple_assign_rhs1 (top_stmt);
3159   tree top_op2 = gimple_assign_rhs2 (top_stmt);
3160
3161   stmts.safe_push (top_stmt);
3162   bool div_seen = (gimple_assign_rhs_code (top_stmt) == TRUNC_DIV_EXPR);
3163
3164   /* Part 2: Add all trunc_div/trunc_mod statements domianted by top_bb
3165      to stmts vector. The 2nd loop will always add stmt to stmts vector, since
3166      gimple_bb (top_stmt) dominates gimple_bb (stmt), so the
3167      2nd loop ends up adding at-least single trunc_mod_expr stmt.  */
3168
3169   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, top_op1)
3170     {
3171       if (is_gimple_assign (use_stmt)
3172           && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
3173               || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
3174           && operand_equal_p (top_op1, gimple_assign_rhs1 (use_stmt), 0)
3175           && operand_equal_p (top_op2, gimple_assign_rhs2 (use_stmt), 0))
3176         {
3177           if (use_stmt == top_stmt
3178               || stmt_can_throw_internal (use_stmt)
3179               || !dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt), top_bb))
3180             continue;
3181
3182           stmts.safe_push (use_stmt);
3183           if (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR)
3184             div_seen = true;
3185         }
3186     }
3187
3188   if (!div_seen)
3189     return false;
3190
3191   /* Part 3: Create libcall to internal fn DIVMOD:
3192      divmod_tmp = DIVMOD (op1, op2).  */
3193
3194   gcall *call_stmt = gimple_build_call_internal (IFN_DIVMOD, 2, op1, op2);
3195   tree res = make_temp_ssa_name (build_complex_type (TREE_TYPE (op1)),
3196                                  call_stmt, "divmod_tmp");
3197   gimple_call_set_lhs (call_stmt, res);
3198   /* We rejected throwing statements above.  */
3199   gimple_call_set_nothrow (call_stmt, true);
3200
3201   /* Insert the call before top_stmt.  */
3202   gimple_stmt_iterator top_stmt_gsi = gsi_for_stmt (top_stmt);
3203   gsi_insert_before (&top_stmt_gsi, call_stmt, GSI_SAME_STMT);
3204
3205   widen_mul_stats.divmod_calls_inserted++;
3206
3207   /* Update all statements in stmts vector:
3208      lhs = op1 TRUNC_DIV_EXPR op2 -> lhs = REALPART_EXPR<divmod_tmp>
3209      lhs = op1 TRUNC_MOD_EXPR op2 -> lhs = IMAGPART_EXPR<divmod_tmp>.  */
3210
3211   for (unsigned i = 0; stmts.iterate (i, &use_stmt); ++i)
3212     {
3213       tree new_rhs;
3214
3215       switch (gimple_assign_rhs_code (use_stmt))
3216         {
3217           case TRUNC_DIV_EXPR:
3218             new_rhs = fold_build1 (REALPART_EXPR, TREE_TYPE (op1), res);
3219             break;
3220
3221           case TRUNC_MOD_EXPR:
3222             new_rhs = fold_build1 (IMAGPART_EXPR, TREE_TYPE (op1), res);
3223             break;
3224
3225           default:
3226             gcc_unreachable ();
3227         }
3228
3229       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
3230       gimple_assign_set_rhs_from_tree (&gsi, new_rhs);
3231       update_stmt (use_stmt);
3232     }
3233
3234   return true;
3235 }
3236
3237 /* Find integer multiplications where the operands are extended from
3238    smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
3239    where appropriate.  */
3240
3241 namespace {
3242
3243 const pass_data pass_data_optimize_widening_mul =
3244 {
3245   GIMPLE_PASS, /* type */
3246   "widening_mul", /* name */
3247   OPTGROUP_NONE, /* optinfo_flags */
3248   TV_TREE_WIDEN_MUL, /* tv_id */
3249   PROP_ssa, /* properties_required */
3250   0, /* properties_provided */
3251   0, /* properties_destroyed */
3252   0, /* todo_flags_start */
3253   TODO_update_ssa, /* todo_flags_finish */
3254 };
3255
3256 class pass_optimize_widening_mul : public gimple_opt_pass
3257 {
3258 public:
3259   pass_optimize_widening_mul (gcc::context *ctxt)
3260     : gimple_opt_pass (pass_data_optimize_widening_mul, ctxt)
3261   {}
3262
3263   /* opt_pass methods: */
3264   virtual bool gate (function *)
3265     {
3266       return flag_expensive_optimizations && optimize;
3267     }
3268
3269   virtual unsigned int execute (function *);
3270
3271 }; // class pass_optimize_widening_mul
3272
3273 unsigned int
3274 pass_optimize_widening_mul::execute (function *fun)
3275 {
3276   basic_block bb;
3277   bool cfg_changed = false;
3278
3279   memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
3280   calculate_dominance_info (CDI_DOMINATORS);
3281   renumber_gimple_stmt_uids ();
3282
3283   FOR_EACH_BB_FN (bb, fun)
3284     {
3285       gimple_stmt_iterator gsi;
3286
3287       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
3288         {
3289           gimple *stmt = gsi_stmt (gsi);
3290           enum tree_code code;
3291
3292           if (is_gimple_assign (stmt))
3293             {
3294               code = gimple_assign_rhs_code (stmt);
3295               switch (code)
3296                 {
3297                 case MULT_EXPR:
3298                   if (!convert_mult_to_widen (stmt, &gsi)
3299                       && !convert_expand_mult_copysign (stmt, &gsi)
3300                       && convert_mult_to_fma (stmt,
3301                                               gimple_assign_rhs1 (stmt),
3302                                               gimple_assign_rhs2 (stmt)))
3303                     {
3304                       gsi_remove (&gsi, true);
3305                       release_defs (stmt);
3306                       continue;
3307                     }
3308                   break;
3309
3310                 case PLUS_EXPR:
3311                 case MINUS_EXPR:
3312                   if (!convert_plusminus_to_widen (&gsi, stmt, code))
3313                     match_uaddsub_overflow (&gsi, stmt, code);
3314                   break;
3315
3316                 case TRUNC_MOD_EXPR:
3317                   convert_to_divmod (as_a<gassign *> (stmt));
3318                   break;
3319
3320                 default:;
3321                 }
3322             }
3323           else if (is_gimple_call (stmt)
3324                    && gimple_call_lhs (stmt))
3325             {
3326               tree fndecl = gimple_call_fndecl (stmt);
3327               if (fndecl
3328                   && gimple_call_builtin_p (stmt, BUILT_IN_NORMAL))
3329                 {
3330                   switch (DECL_FUNCTION_CODE (fndecl))
3331                     {
3332                       case BUILT_IN_POWF:
3333                       case BUILT_IN_POW:
3334                       case BUILT_IN_POWL:
3335                         if (TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST
3336                             && real_equal
3337                                  (&TREE_REAL_CST (gimple_call_arg (stmt, 1)),
3338                                   &dconst2)
3339                             && convert_mult_to_fma (stmt,
3340                                                     gimple_call_arg (stmt, 0),
3341                                                     gimple_call_arg (stmt, 0)))
3342                           {
3343                             unlink_stmt_vdef (stmt);
3344                             if (gsi_remove (&gsi, true)
3345                                 && gimple_purge_dead_eh_edges (bb))
3346                               cfg_changed = true;
3347                             release_defs (stmt);
3348                             continue;
3349                           }
3350                           break;
3351
3352                       default:;
3353                     }
3354                 }
3355             }
3356           gsi_next (&gsi);
3357         }
3358     }
3359
3360   statistics_counter_event (fun, "widening multiplications inserted",
3361                             widen_mul_stats.widen_mults_inserted);
3362   statistics_counter_event (fun, "widening maccs inserted",
3363                             widen_mul_stats.maccs_inserted);
3364   statistics_counter_event (fun, "fused multiply-adds inserted",
3365                             widen_mul_stats.fmas_inserted);
3366   statistics_counter_event (fun, "divmod calls inserted",
3367                             widen_mul_stats.divmod_calls_inserted);
3368
3369   return cfg_changed ? TODO_cleanup_cfg : 0;
3370 }
3371
3372 } // anon namespace
3373
3374 gimple_opt_pass *
3375 make_pass_optimize_widening_mul (gcc::context *ctxt)
3376 {
3377   return new pass_optimize_widening_mul (ctxt);
3378 }