gcc/tree-ssa-math-opts.c

   1 /* Global, SSA-based optimizations using mathematical identities.
   2    Copyright (C) 2005-2017 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* Currently, the only mini-pass in this file tries to CSE reciprocal
  21    operations.  These are common in sequences such as this one:
  22
  23         modulus = sqrt(x*x + y*y + z*z);
  24         x = x / modulus;
  25         y = y / modulus;
  26         z = z / modulus;
  27
  28    that can be optimized to
  29
  30         modulus = sqrt(x*x + y*y + z*z);
  31         rmodulus = 1.0 / modulus;
  32         x = x * rmodulus;
  33         y = y * rmodulus;
  34         z = z * rmodulus;
  35
  36    We do this for loop invariant divisors, and with this pass whenever
  37    we notice that a division has the same divisor multiple times.
  38
  39    Of course, like in PRE, we don't insert a division if a dominator
  40    already has one.  However, this cannot be done as an extension of
  41    PRE for several reasons.
  42
  43    First of all, with some experiments it was found out that the
  44    transformation is not always useful if there are only two divisions
  45    by the same divisor.  This is probably because modern processors
  46    can pipeline the divisions; on older, in-order processors it should
  47    still be effective to optimize two divisions by the same number.
  48    We make this a param, and it shall be called N in the remainder of
  49    this comment.
  50
  51    Second, if trapping math is active, we have less freedom on where
  52    to insert divisions: we can only do so in basic blocks that already
  53    contain one.  (If divisions don't trap, instead, we can insert
  54    divisions elsewhere, which will be in blocks that are common dominators
  55    of those that have the division).
  56
  57    We really don't want to compute the reciprocal unless a division will
  58    be found.  To do this, we won't insert the division in a basic block
  59    that has less than N divisions *post-dominating* it.
  60
  61    The algorithm constructs a subset of the dominator tree, holding the
  62    blocks containing the divisions and the common dominators to them,
  63    and walk it twice.  The first walk is in post-order, and it annotates
  64    each block with the number of divisions that post-dominate it: this
  65    gives information on where divisions can be inserted profitably.
  66    The second walk is in pre-order, and it inserts divisions as explained
  67    above, and replaces divisions by multiplications.
  68
  69    In the best case, the cost of the pass is O(n_statements).  In the
  70    worst-case, the cost is due to creating the dominator tree subset,
  71    with a cost of O(n_basic_blocks ^ 2); however this can only happen
  72    for n_statements / n_basic_blocks statements.  So, the amortized cost
  73    of creating the dominator tree subset is O(n_basic_blocks) and the
  74    worst-case cost of the pass is O(n_statements * n_basic_blocks).
  75
  76    More practically, the cost will be small because there are few
  77    divisions, and they tend to be in the same basic block, so insert_bb
  78    is called very few times.
  79
  80    If we did this using domwalk.c, an efficient implementation would have
  81    to work on all the variables in a single pass, because we could not
  82    work on just a subset of the dominator tree, as we do now, and the
  83    cost would also be something like O(n_statements * n_basic_blocks).
  84    The data structures would be more complex in order to work on all the
  85    variables in a single pass.  */
  86
  87 #include "config.h"
  88 #include "system.h"
  89 #include "coretypes.h"
  90 #include "backend.h"
  91 #include "target.h"
  92 #include "rtl.h"
  93 #include "tree.h"
  94 #include "gimple.h"
  95 #include "predict.h"
  96 #include "alloc-pool.h"
  97 #include "tree-pass.h"
  98 #include "ssa.h"
  99 #include "optabs-tree.h"
 100 #include "gimple-pretty-print.h"
 101 #include "alias.h"
 102 #include "fold-const.h"
 103 #include "gimple-fold.h"
 104 #include "gimple-iterator.h"
 105 #include "gimplify.h"
 106 #include "gimplify-me.h"
 107 #include "stor-layout.h"
 108 #include "tree-cfg.h"
 109 #include "tree-dfa.h"
 110 #include "tree-ssa.h"
 111 #include "builtins.h"
 112 #include "params.h"
 113 #include "internal-fn.h"
 114 #include "case-cfn-macros.h"
 115 #include "optabs-libfuncs.h"
 116 #include "tree-eh.h"
 117 #include "targhooks.h"
 118
 119 /* This structure represents one basic block that either computes a
 120    division, or is a common dominator for basic block that compute a
 121    division.  */
 122 struct occurrence {
 123   /* The basic block represented by this structure.  */
 124   basic_block bb;
 125
 126   /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
 127      inserted in BB.  */
 128   tree recip_def;
 129
 130   /* If non-NULL, the SSA_NAME holding the definition for a squared
 131      reciprocal inserted in BB.  */
 132   tree square_recip_def;
 133
 134   /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
 135      was inserted in BB.  */
 136   gimple *recip_def_stmt;
 137
 138   /* Pointer to a list of "struct occurrence"s for blocks dominated
 139      by BB.  */
 140   struct occurrence *children;
 141
 142   /* Pointer to the next "struct occurrence"s in the list of blocks
 143      sharing a common dominator.  */
 144   struct occurrence *next;
 145
 146   /* The number of divisions that are in BB before compute_merit.  The
 147      number of divisions that are in BB or post-dominate it after
 148      compute_merit.  */
 149   int num_divisions;
 150
 151   /* True if the basic block has a division, false if it is a common
 152      dominator for basic blocks that do.  If it is false and trapping
 153      math is active, BB is not a candidate for inserting a reciprocal.  */
 154   bool bb_has_division;
 155 };
 156
 157 static struct
 158 {
 159   /* Number of 1.0/X ops inserted.  */
 160   int rdivs_inserted;
 161
 162   /* Number of 1.0/FUNC ops inserted.  */
 163   int rfuncs_inserted;
 164 } reciprocal_stats;
 165
 166 static struct
 167 {
 168   /* Number of cexpi calls inserted.  */
 169   int inserted;
 170 } sincos_stats;
 171
 172 static struct
 173 {
 174   /* Number of widening multiplication ops inserted.  */
 175   int widen_mults_inserted;
 176
 177   /* Number of integer multiply-and-accumulate ops inserted.  */
 178   int maccs_inserted;
 179
 180   /* Number of fp fused multiply-add ops inserted.  */
 181   int fmas_inserted;
 182
 183   /* Number of divmod calls inserted.  */
 184   int divmod_calls_inserted;
 185 } widen_mul_stats;
 186
 187 /* The instance of "struct occurrence" representing the highest
 188    interesting block in the dominator tree.  */
 189 static struct occurrence *occ_head;
 190
 191 /* Allocation pool for getting instances of "struct occurrence".  */
 192 static object_allocator<occurrence> *occ_pool;
 193
 194
 195
 196 /* Allocate and return a new struct occurrence for basic block BB, and
 197    whose children list is headed by CHILDREN.  */
 198 static struct occurrence *
 199 occ_new (basic_block bb, struct occurrence *children)
 200 {
 201   struct occurrence *occ;
 202
 203   bb->aux = occ = occ_pool->allocate ();
 204   memset (occ, 0, sizeof (struct occurrence));
 205
 206   occ->bb = bb;
 207   occ->children = children;
 208   return occ;
 209 }
 210
 211
 212 /* Insert NEW_OCC into our subset of the dominator tree.  P_HEAD points to a
 213    list of "struct occurrence"s, one per basic block, having IDOM as
 214    their common dominator.
 215
 216    We try to insert NEW_OCC as deep as possible in the tree, and we also
 217    insert any other block that is a common dominator for BB and one
 218    block already in the tree.  */
 219
 220 static void
 221 insert_bb (struct occurrence *new_occ, basic_block idom,
 222            struct occurrence **p_head)
 223 {
 224   struct occurrence *occ, **p_occ;
 225
 226   for (p_occ = p_head; (occ = *p_occ) != NULL; )
 227     {
 228       basic_block bb = new_occ->bb, occ_bb = occ->bb;
 229       basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb);
 230       if (dom == bb)
 231         {
 232           /* BB dominates OCC_BB.  OCC becomes NEW_OCC's child: remove OCC
 233              from its list.  */
 234           *p_occ = occ->next;
 235           occ->next = new_occ->children;
 236           new_occ->children = occ;
 237
 238           /* Try the next block (it may as well be dominated by BB).  */
 239         }
 240
 241       else if (dom == occ_bb)
 242         {
 243           /* OCC_BB dominates BB.  Tail recurse to look deeper.  */
 244           insert_bb (new_occ, dom, &occ->children);
 245           return;
 246         }
 247
 248       else if (dom != idom)
 249         {
 250           gcc_assert (!dom->aux);
 251
 252           /* There is a dominator between IDOM and BB, add it and make
 253              two children out of NEW_OCC and OCC.  First, remove OCC from
 254              its list.  */
 255           *p_occ = occ->next;
 256           new_occ->next = occ;
 257           occ->next = NULL;
 258
 259           /* None of the previous blocks has DOM as a dominator: if we tail
 260              recursed, we would reexamine them uselessly. Just switch BB with
 261              DOM, and go on looking for blocks dominated by DOM.  */
 262           new_occ = occ_new (dom, new_occ);
 263         }
 264
 265       else
 266         {
 267           /* Nothing special, go on with the next element.  */
 268           p_occ = &occ->next;
 269         }
 270     }
 271
 272   /* No place was found as a child of IDOM.  Make BB a sibling of IDOM.  */
 273   new_occ->next = *p_head;
 274   *p_head = new_occ;
 275 }
 276
 277 /* Register that we found a division in BB.
 278    IMPORTANCE is a measure of how much weighting to give
 279    that division.  Use IMPORTANCE = 2 to register a single
 280    division.  If the division is going to be found multiple
 281    times use 1 (as it is with squares).  */
 282
 283 static inline void
 284 register_division_in (basic_block bb, int importance)
 285 {
 286   struct occurrence *occ;
 287
 288   occ = (struct occurrence *) bb->aux;
 289   if (!occ)
 290     {
 291       occ = occ_new (bb, NULL);
 292       insert_bb (occ, ENTRY_BLOCK_PTR_FOR_FN (cfun), &occ_head);
 293     }
 294
 295   occ->bb_has_division = true;
 296   occ->num_divisions += importance;
 297 }
 298
 299
 300 /* Compute the number of divisions that postdominate each block in OCC and
 301    its children.  */
 302
 303 static void
 304 compute_merit (struct occurrence *occ)
 305 {
 306   struct occurrence *occ_child;
 307   basic_block dom = occ->bb;
 308
 309   for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
 310     {
 311       basic_block bb;
 312       if (occ_child->children)
 313         compute_merit (occ_child);
 314
 315       if (flag_exceptions)
 316         bb = single_noncomplex_succ (dom);
 317       else
 318         bb = dom;
 319
 320       if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
 321         occ->num_divisions += occ_child->num_divisions;
 322     }
 323 }
 324
 325
 326 /* Return whether USE_STMT is a floating-point division by DEF.  */
 327 static inline bool
 328 is_division_by (gimple *use_stmt, tree def)
 329 {
 330   return is_gimple_assign (use_stmt)
 331          && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
 332          && gimple_assign_rhs2 (use_stmt) == def
 333          /* Do not recognize x / x as valid division, as we are getting
 334             confused later by replacing all immediate uses x in such
 335             a stmt.  */
 336          && gimple_assign_rhs1 (use_stmt) != def;
 337 }
 338
 339 /* Return whether USE_STMT is DEF * DEF.  */
 340 static inline bool
 341 is_square_of (gimple *use_stmt, tree def)
 342 {
 343   if (gimple_code (use_stmt) == GIMPLE_ASSIGN
 344       && gimple_assign_rhs_code (use_stmt) == MULT_EXPR)
 345     {
 346       tree op0 = gimple_assign_rhs1 (use_stmt);
 347       tree op1 = gimple_assign_rhs2 (use_stmt);
 348
 349       return op0 == op1 && op0 == def;
 350     }
 351   return 0;
 352 }
 353
 354 /* Return whether USE_STMT is a floating-point division by
 355    DEF * DEF.  */
 356 static inline bool
 357 is_division_by_square (gimple *use_stmt, tree def)
 358 {
 359   if (gimple_code (use_stmt) == GIMPLE_ASSIGN
 360       && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
 361       && gimple_assign_rhs1 (use_stmt) != gimple_assign_rhs2 (use_stmt))
 362     {
 363       tree denominator = gimple_assign_rhs2 (use_stmt);
 364       if (TREE_CODE (denominator) == SSA_NAME)
 365         {
 366           return is_square_of (SSA_NAME_DEF_STMT (denominator), def);
 367         }
 368     }
 369   return 0;
 370 }
 371
 372 /* Walk the subset of the dominator tree rooted at OCC, setting the
 373    RECIP_DEF field to a definition of 1.0 / DEF that can be used in
 374    the given basic block.  The field may be left NULL, of course,
 375    if it is not possible or profitable to do the optimization.
 376
 377    DEF_BSI is an iterator pointing at the statement defining DEF.
 378    If RECIP_DEF is set, a dominator already has a computation that can
 379    be used.
 380
 381    If should_insert_square_recip is set, then this also inserts
 382    the square of the reciprocal immediately after the definition
 383    of the reciprocal.  */
 384
 385 static void
 386 insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
 387                     tree def, tree recip_def, tree square_recip_def,
 388                     int should_insert_square_recip, int threshold)
 389 {
 390   tree type;
 391   gassign *new_stmt, *new_square_stmt;
 392   gimple_stmt_iterator gsi;
 393   struct occurrence *occ_child;
 394
 395   if (!recip_def
 396       && (occ->bb_has_division || !flag_trapping_math)
 397       /* Divide by two as all divisions are counted twice in
 398          the costing loop.  */
 399       && occ->num_divisions / 2 >= threshold)
 400     {
 401       /* Make a variable with the replacement and substitute it.  */
 402       type = TREE_TYPE (def);
 403       recip_def = create_tmp_reg (type, "reciptmp");
 404       new_stmt = gimple_build_assign (recip_def, RDIV_EXPR,
 405                                       build_one_cst (type), def);
 406
 407       if (should_insert_square_recip)
 408         {
 409           square_recip_def = create_tmp_reg (type, "powmult_reciptmp");
 410           new_square_stmt = gimple_build_assign (square_recip_def, MULT_EXPR,
 411                                                  recip_def, recip_def);
 412         }
 413
 414       if (occ->bb_has_division)
 415         {
 416           /* Case 1: insert before an existing division.  */
 417           gsi = gsi_after_labels (occ->bb);
 418           while (!gsi_end_p (gsi)
 419                  && (!is_division_by (gsi_stmt (gsi), def))
 420                  && (!is_division_by_square (gsi_stmt (gsi), def)))
 421             gsi_next (&gsi);
 422
 423           gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
 424         }
 425       else if (def_gsi && occ->bb == def_gsi->bb)
 426         {
 427           /* Case 2: insert right after the definition.  Note that this will
 428              never happen if the definition statement can throw, because in
 429              that case the sole successor of the statement's basic block will
 430              dominate all the uses as well.  */
 431           gsi = *def_gsi;
 432           gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
 433         }
 434       else
 435         {
 436           /* Case 3: insert in a basic block not containing defs/uses.  */
 437           gsi = gsi_after_labels (occ->bb);
 438           gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
 439         }
 440
 441       /* Regardless of which case the reciprocal as inserted in,
 442          we insert the square immediately after the reciprocal.  */
 443       if (should_insert_square_recip)
 444         gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
 445
 446       reciprocal_stats.rdivs_inserted++;
 447
 448       occ->recip_def_stmt = new_stmt;
 449     }
 450
 451   occ->recip_def = recip_def;
 452   occ->square_recip_def = square_recip_def;
 453   for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
 454     insert_reciprocals (def_gsi, occ_child, def, recip_def,
 455                         square_recip_def, should_insert_square_recip,
 456                         threshold);
 457 }
 458
 459 /* Replace occurrences of expr / (x * x) with expr * ((1 / x) * (1 / x)).
 460    Take as argument the use for (x * x).  */
 461 static inline void
 462 replace_reciprocal_squares (use_operand_p use_p)
 463 {
 464   gimple *use_stmt = USE_STMT (use_p);
 465   basic_block bb = gimple_bb (use_stmt);
 466   struct occurrence *occ = (struct occurrence *) bb->aux;
 467
 468   if (optimize_bb_for_speed_p (bb) && occ->square_recip_def
 469       && occ->recip_def)
 470     {
 471       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
 472       gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
 473       gimple_assign_set_rhs2 (use_stmt, occ->square_recip_def);
 474       SET_USE (use_p, occ->square_recip_def);
 475       fold_stmt_inplace (&gsi);
 476       update_stmt (use_stmt);
 477     }
 478 }
 479
 480
 481 /* Replace the division at USE_P with a multiplication by the reciprocal, if
 482    possible.  */
 483
 484 static inline void
 485 replace_reciprocal (use_operand_p use_p)
 486 {
 487   gimple *use_stmt = USE_STMT (use_p);
 488   basic_block bb = gimple_bb (use_stmt);
 489   struct occurrence *occ = (struct occurrence *) bb->aux;
 490
 491   if (optimize_bb_for_speed_p (bb)
 492       && occ->recip_def && use_stmt != occ->recip_def_stmt)
 493     {
 494       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
 495       gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
 496       SET_USE (use_p, occ->recip_def);
 497       fold_stmt_inplace (&gsi);
 498       update_stmt (use_stmt);
 499     }
 500 }
 501
 502
 503 /* Free OCC and return one more "struct occurrence" to be freed.  */
 504
 505 static struct occurrence *
 506 free_bb (struct occurrence *occ)
 507 {
 508   struct occurrence *child, *next;
 509
 510   /* First get the two pointers hanging off OCC.  */
 511   next = occ->next;
 512   child = occ->children;
 513   occ->bb->aux = NULL;
 514   occ_pool->remove (occ);
 515
 516   /* Now ensure that we don't recurse unless it is necessary.  */
 517   if (!child)
 518     return next;
 519   else
 520     {
 521       while (next)
 522         next = free_bb (next);
 523
 524       return child;
 525     }
 526 }
 527
 528
 529 /* Look for floating-point divisions among DEF's uses, and try to
 530    replace them by multiplications with the reciprocal.  Add
 531    as many statements computing the reciprocal as needed.
 532
 533    DEF must be a GIMPLE register of a floating-point type.  */
 534
 535 static void
 536 execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
 537 {
 538   use_operand_p use_p, square_use_p;
 539   imm_use_iterator use_iter, square_use_iter;
 540   tree square_def;
 541   struct occurrence *occ;
 542   int count = 0;
 543   int threshold;
 544   int square_recip_count = 0;
 545   int sqrt_recip_count = 0;
 546
 547   gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && is_gimple_reg (def));
 548   threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
 549
 550   /* If this is a square (x * x), we should check whether there are any
 551      enough divisions by x on it's own to warrant waiting for that pass.  */
 552   if (TREE_CODE (def) == SSA_NAME)
 553     {
 554       gimple *def_stmt = SSA_NAME_DEF_STMT (def);
 555
 556       if (is_gimple_assign (def_stmt)
 557           && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
 558           && gimple_assign_rhs1 (def_stmt) == gimple_assign_rhs2 (def_stmt))
 559         {
 560           /* This statement is a square of something.  We should take this
 561              in to account, as it may be more profitable to not extract
 562              the reciprocal here.  */
 563           tree op0 = gimple_assign_rhs1 (def_stmt);
 564           FOR_EACH_IMM_USE_FAST (use_p, use_iter, op0)
 565             {
 566               gimple *use_stmt = USE_STMT (use_p);
 567               if (is_division_by (use_stmt, op0))
 568                 sqrt_recip_count ++;
 569             }
 570         }
 571     }
 572
 573   FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
 574     {
 575       gimple *use_stmt = USE_STMT (use_p);
 576       if (is_division_by (use_stmt, def))
 577         {
 578           register_division_in (gimple_bb (use_stmt), 2);
 579           count++;
 580         }
 581
 582       if (is_square_of (use_stmt, def))
 583         {
 584           square_def = gimple_assign_lhs (use_stmt);
 585           FOR_EACH_IMM_USE_FAST (square_use_p, square_use_iter, square_def)
 586             {
 587               gimple *square_use_stmt = USE_STMT (square_use_p);
 588               if (is_division_by (square_use_stmt, square_def))
 589                 {
 590                   /* Halve the relative importance as this is called twice
 591                      for each division by a square.  */
 592                   register_division_in (gimple_bb (square_use_stmt), 1);
 593                   square_recip_count ++;
 594                 }
 595             }
 596         }
 597     }
 598
 599   /* Square reciprocals will have been counted twice.  */
 600   square_recip_count /= 2;
 601
 602   if (sqrt_recip_count > square_recip_count)
 603     /* It will be more profitable to extract a 1 / x expression later,
 604        so it is not worth attempting to extract 1 / (x * x) now.  */
 605     return;
 606
 607   /* Do the expensive part only if we can hope to optimize something.  */
 608   if (count + square_recip_count >= threshold
 609       && count >= 1)
 610     {
 611       gimple *use_stmt;
 612       for (occ = occ_head; occ; occ = occ->next)
 613         {
 614           compute_merit (occ);
 615           insert_reciprocals (def_gsi, occ, def, NULL, NULL,
 616                               square_recip_count, threshold);
 617         }
 618
 619       FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
 620         {
 621           if (is_division_by (use_stmt, def))
 622             {
 623               FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
 624                 replace_reciprocal (use_p);
 625             }
 626           else if (square_recip_count > 0
 627                    && is_square_of (use_stmt, def))
 628             {
 629               FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
 630                 {
 631                   /* Find all uses of the square that are divisions and
 632                    * replace them by multiplications with the inverse.  */
 633                   imm_use_iterator square_iterator;
 634                   gimple *powmult_use_stmt = USE_STMT (use_p);
 635                   tree powmult_def_name = gimple_assign_lhs (powmult_use_stmt);
 636
 637                   FOR_EACH_IMM_USE_STMT (powmult_use_stmt,
 638                                          square_iterator, powmult_def_name)
 639                     FOR_EACH_IMM_USE_ON_STMT (square_use_p, square_iterator)
 640                       {
 641                         gimple *powmult_use_stmt = USE_STMT (square_use_p);
 642                         if (is_division_by (powmult_use_stmt, powmult_def_name))
 643                           replace_reciprocal_squares (square_use_p);
 644                       }
 645                 }
 646             }
 647         }
 648     }
 649
 650   for (occ = occ_head; occ; )
 651     occ = free_bb (occ);
 652
 653   occ_head = NULL;
 654 }
 655
 656 /* Return an internal function that implements the reciprocal of CALL,
 657    or IFN_LAST if there is no such function that the target supports.  */
 658
 659 internal_fn
 660 internal_fn_reciprocal (gcall *call)
 661 {
 662   internal_fn ifn;
 663
 664   switch (gimple_call_combined_fn (call))
 665     {
 666     CASE_CFN_SQRT:
 667     CASE_CFN_SQRT_FN:
 668       ifn = IFN_RSQRT;
 669       break;
 670
 671     default:
 672       return IFN_LAST;
 673     }
 674
 675   tree_pair types = direct_internal_fn_types (ifn, call);
 676   if (!direct_internal_fn_supported_p (ifn, types, OPTIMIZE_FOR_SPEED))
 677     return IFN_LAST;
 678
 679   return ifn;
 680 }
 681
 682 /* Go through all the floating-point SSA_NAMEs, and call
 683    execute_cse_reciprocals_1 on each of them.  */
 684 namespace {
 685
 686 const pass_data pass_data_cse_reciprocals =
 687 {
 688   GIMPLE_PASS, /* type */
 689   "recip", /* name */
 690   OPTGROUP_NONE, /* optinfo_flags */
 691   TV_TREE_RECIP, /* tv_id */
 692   PROP_ssa, /* properties_required */
 693   0, /* properties_provided */
 694   0, /* properties_destroyed */
 695   0, /* todo_flags_start */
 696   TODO_update_ssa, /* todo_flags_finish */
 697 };
 698
 699 class pass_cse_reciprocals : public gimple_opt_pass
 700 {
 701 public:
 702   pass_cse_reciprocals (gcc::context *ctxt)
 703     : gimple_opt_pass (pass_data_cse_reciprocals, ctxt)
 704   {}
 705
 706   /* opt_pass methods: */
 707   virtual bool gate (function *) { return optimize && flag_reciprocal_math; }
 708   virtual unsigned int execute (function *);
 709
 710 }; // class pass_cse_reciprocals
 711
 712 unsigned int
 713 pass_cse_reciprocals::execute (function *fun)
 714 {
 715   basic_block bb;
 716   tree arg;
 717
 718   occ_pool = new object_allocator<occurrence> ("dominators for recip");
 719
 720   memset (&reciprocal_stats, 0, sizeof (reciprocal_stats));
 721   calculate_dominance_info (CDI_DOMINATORS);
 722   calculate_dominance_info (CDI_POST_DOMINATORS);
 723
 724   if (flag_checking)
 725     FOR_EACH_BB_FN (bb, fun)
 726       gcc_assert (!bb->aux);
 727
 728   for (arg = DECL_ARGUMENTS (fun->decl); arg; arg = DECL_CHAIN (arg))
 729     if (FLOAT_TYPE_P (TREE_TYPE (arg))
 730         && is_gimple_reg (arg))
 731       {
 732         tree name = ssa_default_def (fun, arg);
 733         if (name)
 734           execute_cse_reciprocals_1 (NULL, name);
 735       }
 736
 737   FOR_EACH_BB_FN (bb, fun)
 738     {
 739       tree def;
 740
 741       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
 742            gsi_next (&gsi))
 743         {
 744           gphi *phi = gsi.phi ();
 745           def = PHI_RESULT (phi);
 746           if (! virtual_operand_p (def)
 747               && FLOAT_TYPE_P (TREE_TYPE (def)))
 748             execute_cse_reciprocals_1 (NULL, def);
 749         }
 750
 751       for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
 752            gsi_next (&gsi))
 753         {
 754           gimple *stmt = gsi_stmt (gsi);
 755
 756           if (gimple_has_lhs (stmt)
 757               && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
 758               && FLOAT_TYPE_P (TREE_TYPE (def))
 759               && TREE_CODE (def) == SSA_NAME)
 760             execute_cse_reciprocals_1 (&gsi, def);
 761         }
 762
 763       if (optimize_bb_for_size_p (bb))
 764         continue;
 765
 766       /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b).  */
 767       for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
 768            gsi_next (&gsi))
 769         {
 770           gimple *stmt = gsi_stmt (gsi);
 771
 772           if (is_gimple_assign (stmt)
 773               && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
 774             {
 775               tree arg1 = gimple_assign_rhs2 (stmt);
 776               gimple *stmt1;
 777
 778               if (TREE_CODE (arg1) != SSA_NAME)
 779                 continue;
 780
 781               stmt1 = SSA_NAME_DEF_STMT (arg1);
 782
 783               if (is_gimple_call (stmt1)
 784                   && gimple_call_lhs (stmt1))
 785                 {
 786                   bool fail;
 787                   imm_use_iterator ui;
 788                   use_operand_p use_p;
 789                   tree fndecl = NULL_TREE;
 790
 791                   gcall *call = as_a <gcall *> (stmt1);
 792                   internal_fn ifn = internal_fn_reciprocal (call);
 793                   if (ifn == IFN_LAST)
 794                     {
 795                       fndecl = gimple_call_fndecl (call);
 796                       if (!fndecl
 797                           || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_MD)
 798                         continue;
 799                       fndecl = targetm.builtin_reciprocal (fndecl);
 800                       if (!fndecl)
 801                         continue;
 802                     }
 803
 804                   /* Check that all uses of the SSA name are divisions,
 805                      otherwise replacing the defining statement will do
 806                      the wrong thing.  */
 807                   fail = false;
 808                   FOR_EACH_IMM_USE_FAST (use_p, ui, arg1)
 809                     {
 810                       gimple *stmt2 = USE_STMT (use_p);
 811                       if (is_gimple_debug (stmt2))
 812                         continue;
 813                       if (!is_gimple_assign (stmt2)
 814                           || gimple_assign_rhs_code (stmt2) != RDIV_EXPR
 815                           || gimple_assign_rhs1 (stmt2) == arg1
 816                           || gimple_assign_rhs2 (stmt2) != arg1)
 817                         {
 818                           fail = true;
 819                           break;
 820                         }
 821                     }
 822                   if (fail)
 823                     continue;
 824
 825                   gimple_replace_ssa_lhs (call, arg1);
 826                   if (gimple_call_internal_p (call) != (ifn != IFN_LAST))
 827                     {
 828                       auto_vec<tree, 4> args;
 829                       for (unsigned int i = 0;
 830                            i < gimple_call_num_args (call); i++)
 831                         args.safe_push (gimple_call_arg (call, i));
 832                       gcall *stmt2;
 833                       if (ifn == IFN_LAST)
 834                         stmt2 = gimple_build_call_vec (fndecl, args);
 835                       else
 836                         stmt2 = gimple_build_call_internal_vec (ifn, args);
 837                       gimple_call_set_lhs (stmt2, arg1);
 838                       if (gimple_vdef (call))
 839                         {
 840                           gimple_set_vdef (stmt2, gimple_vdef (call));
 841                           SSA_NAME_DEF_STMT (gimple_vdef (stmt2)) = stmt2;
 842                         }
 843                       gimple_call_set_nothrow (stmt2,
 844                                                gimple_call_nothrow_p (call));
 845                       gimple_set_vuse (stmt2, gimple_vuse (call));
 846                       gimple_stmt_iterator gsi2 = gsi_for_stmt (call);
 847                       gsi_replace (&gsi2, stmt2, true);
 848                     }
 849                   else
 850                     {
 851                       if (ifn == IFN_LAST)
 852                         gimple_call_set_fndecl (call, fndecl);
 853                       else
 854                         gimple_call_set_internal_fn (call, ifn);
 855                       update_stmt (call);
 856                     }
 857                   reciprocal_stats.rfuncs_inserted++;
 858
 859                   FOR_EACH_IMM_USE_STMT (stmt, ui, arg1)
 860                     {
 861                       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
 862                       gimple_assign_set_rhs_code (stmt, MULT_EXPR);
 863                       fold_stmt_inplace (&gsi);
 864                       update_stmt (stmt);
 865                     }
 866                 }
 867             }
 868         }
 869     }
 870
 871   statistics_counter_event (fun, "reciprocal divs inserted",
 872                             reciprocal_stats.rdivs_inserted);
 873   statistics_counter_event (fun, "reciprocal functions inserted",
 874                             reciprocal_stats.rfuncs_inserted);
 875
 876   free_dominance_info (CDI_DOMINATORS);
 877   free_dominance_info (CDI_POST_DOMINATORS);
 878   delete occ_pool;
 879   return 0;
 880 }
 881
 882 } // anon namespace
 883
 884 gimple_opt_pass *
 885 make_pass_cse_reciprocals (gcc::context *ctxt)
 886 {
 887   return new pass_cse_reciprocals (ctxt);
 888 }
 889
 890 /* Records an occurrence at statement USE_STMT in the vector of trees
 891    STMTS if it is dominated by *TOP_BB or dominates it or this basic block
 892    is not yet initialized.  Returns true if the occurrence was pushed on
 893    the vector.  Adjusts *TOP_BB to be the basic block dominating all
 894    statements in the vector.  */
 895
 896 static bool
 897 maybe_record_sincos (vec<gimple *> *stmts,
 898                      basic_block *top_bb, gimple *use_stmt)
 899 {
 900   basic_block use_bb = gimple_bb (use_stmt);
 901   if (*top_bb
 902       && (*top_bb == use_bb
 903           || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb)))
 904     stmts->safe_push (use_stmt);
 905   else if (!*top_bb
 906            || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb))
 907     {
 908       stmts->safe_push (use_stmt);
 909       *top_bb = use_bb;
 910     }
 911   else
 912     return false;
 913
 914   return true;
 915 }
 916
 917 /* Look for sin, cos and cexpi calls with the same argument NAME and
 918    create a single call to cexpi CSEing the result in this case.
 919    We first walk over all immediate uses of the argument collecting
 920    statements that we can CSE in a vector and in a second pass replace
 921    the statement rhs with a REALPART or IMAGPART expression on the
 922    result of the cexpi call we insert before the use statement that
 923    dominates all other candidates.  */
 924
 925 static bool
 926 execute_cse_sincos_1 (tree name)
 927 {
 928   gimple_stmt_iterator gsi;
 929   imm_use_iterator use_iter;
 930   tree fndecl, res, type;
 931   gimple *def_stmt, *use_stmt, *stmt;
 932   int seen_cos = 0, seen_sin = 0, seen_cexpi = 0;
 933   auto_vec<gimple *> stmts;
 934   basic_block top_bb = NULL;
 935   int i;
 936   bool cfg_changed = false;
 937
 938   type = TREE_TYPE (name);
 939   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
 940     {
 941       if (gimple_code (use_stmt) != GIMPLE_CALL
 942           || !gimple_call_lhs (use_stmt))
 943         continue;
 944
 945       switch (gimple_call_combined_fn (use_stmt))
 946         {
 947         CASE_CFN_COS:
 948           seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
 949           break;
 950
 951         CASE_CFN_SIN:
 952           seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
 953           break;
 954
 955         CASE_CFN_CEXPI:
 956           seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
 957           break;
 958
 959         default:;
 960         }
 961     }
 962
 963   if (seen_cos + seen_sin + seen_cexpi <= 1)
 964     return false;
 965
 966   /* Simply insert cexpi at the beginning of top_bb but not earlier than
 967      the name def statement.  */
 968   fndecl = mathfn_built_in (type, BUILT_IN_CEXPI);
 969   if (!fndecl)
 970     return false;
 971   stmt = gimple_build_call (fndecl, 1, name);
 972   res = make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl)), stmt, "sincostmp");
 973   gimple_call_set_lhs (stmt, res);
 974
 975   def_stmt = SSA_NAME_DEF_STMT (name);
 976   if (!SSA_NAME_IS_DEFAULT_DEF (name)
 977       && gimple_code (def_stmt) != GIMPLE_PHI
 978       && gimple_bb (def_stmt) == top_bb)
 979     {
 980       gsi = gsi_for_stmt (def_stmt);
 981       gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
 982     }
 983   else
 984     {
 985       gsi = gsi_after_labels (top_bb);
 986       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
 987     }
 988   sincos_stats.inserted++;
 989
 990   /* And adjust the recorded old call sites.  */
 991   for (i = 0; stmts.iterate (i, &use_stmt); ++i)
 992     {
 993       tree rhs = NULL;
 994
 995       switch (gimple_call_combined_fn (use_stmt))
 996         {
 997         CASE_CFN_COS:
 998           rhs = fold_build1 (REALPART_EXPR, type, res);
 999           break;
1000
1001         CASE_CFN_SIN:
1002           rhs = fold_build1 (IMAGPART_EXPR, type, res);
1003           break;
1004
1005         CASE_CFN_CEXPI:
1006           rhs = res;
1007           break;
1008
1009         default:;
1010           gcc_unreachable ();
1011         }
1012
1013         /* Replace call with a copy.  */
1014         stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs);
1015
1016         gsi = gsi_for_stmt (use_stmt);
1017         gsi_replace (&gsi, stmt, true);
1018         if (gimple_purge_dead_eh_edges (gimple_bb (stmt)))
1019           cfg_changed = true;
1020     }
1021
1022   return cfg_changed;
1023 }
1024
1025 /* To evaluate powi(x,n), the floating point value x raised to the
1026    constant integer exponent n, we use a hybrid algorithm that
1027    combines the "window method" with look-up tables.  For an
1028    introduction to exponentiation algorithms and "addition chains",
1029    see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
1030    "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
1031    3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
1032    Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998.  */
1033
1034 /* Provide a default value for POWI_MAX_MULTS, the maximum number of
1035    multiplications to inline before calling the system library's pow
1036    function.  powi(x,n) requires at worst 2*bits(n)-2 multiplications,
1037    so this default never requires calling pow, powf or powl.  */
1038
1039 #ifndef POWI_MAX_MULTS
1040 #define POWI_MAX_MULTS  (2*HOST_BITS_PER_WIDE_INT-2)
1041 #endif
1042
1043 /* The size of the "optimal power tree" lookup table.  All
1044    exponents less than this value are simply looked up in the
1045    powi_table below.  This threshold is also used to size the
1046    cache of pseudo registers that hold intermediate results.  */
1047 #define POWI_TABLE_SIZE 256
1048
1049 /* The size, in bits of the window, used in the "window method"
1050    exponentiation algorithm.  This is equivalent to a radix of
1051    (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method".  */
1052 #define POWI_WINDOW_SIZE 3
1053
1054 /* The following table is an efficient representation of an
1055    "optimal power tree".  For each value, i, the corresponding
1056    value, j, in the table states than an optimal evaluation
1057    sequence for calculating pow(x,i) can be found by evaluating
1058    pow(x,j)*pow(x,i-j).  An optimal power tree for the first
1059    100 integers is given in Knuth's "Seminumerical algorithms".  */
1060
1061 static const unsigned char powi_table[POWI_TABLE_SIZE] =
1062   {
1063       0,   1,   1,   2,   2,   3,   3,   4,  /*   0 -   7 */
1064       4,   6,   5,   6,   6,  10,   7,   9,  /*   8 -  15 */
1065       8,  16,   9,  16,  10,  12,  11,  13,  /*  16 -  23 */
1066      12,  17,  13,  18,  14,  24,  15,  26,  /*  24 -  31 */
1067      16,  17,  17,  19,  18,  33,  19,  26,  /*  32 -  39 */
1068      20,  25,  21,  40,  22,  27,  23,  44,  /*  40 -  47 */
1069      24,  32,  25,  34,  26,  29,  27,  44,  /*  48 -  55 */
1070      28,  31,  29,  34,  30,  60,  31,  36,  /*  56 -  63 */
1071      32,  64,  33,  34,  34,  46,  35,  37,  /*  64 -  71 */
1072      36,  65,  37,  50,  38,  48,  39,  69,  /*  72 -  79 */
1073      40,  49,  41,  43,  42,  51,  43,  58,  /*  80 -  87 */
1074      44,  64,  45,  47,  46,  59,  47,  76,  /*  88 -  95 */
1075      48,  65,  49,  66,  50,  67,  51,  66,  /*  96 - 103 */
1076      52,  70,  53,  74,  54, 104,  55,  74,  /* 104 - 111 */
1077      56,  64,  57,  69,  58,  78,  59,  68,  /* 112 - 119 */
1078      60,  61,  61,  80,  62,  75,  63,  68,  /* 120 - 127 */
1079      64,  65,  65, 128,  66, 129,  67,  90,  /* 128 - 135 */
1080      68,  73,  69, 131,  70,  94,  71,  88,  /* 136 - 143 */
1081      72, 128,  73,  98,  74, 132,  75, 121,  /* 144 - 151 */
1082      76, 102,  77, 124,  78, 132,  79, 106,  /* 152 - 159 */
1083      80,  97,  81, 160,  82,  99,  83, 134,  /* 160 - 167 */
1084      84,  86,  85,  95,  86, 160,  87, 100,  /* 168 - 175 */
1085      88, 113,  89,  98,  90, 107,  91, 122,  /* 176 - 183 */
1086      92, 111,  93, 102,  94, 126,  95, 150,  /* 184 - 191 */
1087      96, 128,  97, 130,  98, 133,  99, 195,  /* 192 - 199 */
1088     100, 128, 101, 123, 102, 164, 103, 138,  /* 200 - 207 */
1089     104, 145, 105, 146, 106, 109, 107, 149,  /* 208 - 215 */
1090     108, 200, 109, 146, 110, 170, 111, 157,  /* 216 - 223 */
1091     112, 128, 113, 130, 114, 182, 115, 132,  /* 224 - 231 */
1092     116, 200, 117, 132, 118, 158, 119, 206,  /* 232 - 239 */
1093     120, 240, 121, 162, 122, 147, 123, 152,  /* 240 - 247 */
1094     124, 166, 125, 214, 126, 138, 127, 153,  /* 248 - 255 */
1095   };
1096
1097
1098 /* Return the number of multiplications required to calculate
1099    powi(x,n) where n is less than POWI_TABLE_SIZE.  This is a
1100    subroutine of powi_cost.  CACHE is an array indicating
1101    which exponents have already been calculated.  */
1102
1103 static int
1104 powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache)
1105 {
1106   /* If we've already calculated this exponent, then this evaluation
1107      doesn't require any additional multiplications.  */
1108   if (cache[n])
1109     return 0;
1110
1111   cache[n] = true;
1112   return powi_lookup_cost (n - powi_table[n], cache)
1113          + powi_lookup_cost (powi_table[n], cache) + 1;
1114 }
1115
1116 /* Return the number of multiplications required to calculate
1117    powi(x,n) for an arbitrary x, given the exponent N.  This
1118    function needs to be kept in sync with powi_as_mults below.  */
1119
1120 static int
1121 powi_cost (HOST_WIDE_INT n)
1122 {
1123   bool cache[POWI_TABLE_SIZE];
1124   unsigned HOST_WIDE_INT digit;
1125   unsigned HOST_WIDE_INT val;
1126   int result;
1127
1128   if (n == 0)
1129     return 0;
1130
1131   /* Ignore the reciprocal when calculating the cost.  */
1132   val = (n < 0) ? -n : n;
1133
1134   /* Initialize the exponent cache.  */
1135   memset (cache, 0, POWI_TABLE_SIZE * sizeof (bool));
1136   cache[1] = true;
1137
1138   result = 0;
1139
1140   while (val >= POWI_TABLE_SIZE)
1141     {
1142       if (val & 1)
1143         {
1144           digit = val & ((1 << POWI_WINDOW_SIZE) - 1);
1145           result += powi_lookup_cost (digit, cache)
1146                     + POWI_WINDOW_SIZE + 1;
1147           val >>= POWI_WINDOW_SIZE;
1148         }
1149       else
1150         {
1151           val >>= 1;
1152           result++;
1153         }
1154     }
1155
1156   return result + powi_lookup_cost (val, cache);
1157 }
1158
1159 /* Recursive subroutine of powi_as_mults.  This function takes the
1160    array, CACHE, of already calculated exponents and an exponent N and
1161    returns a tree that corresponds to CACHE[1]**N, with type TYPE.  */
1162
1163 static tree
1164 powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type,
1165                  HOST_WIDE_INT n, tree *cache)
1166 {
1167   tree op0, op1, ssa_target;
1168   unsigned HOST_WIDE_INT digit;
1169   gassign *mult_stmt;
1170
1171   if (n < POWI_TABLE_SIZE && cache[n])
1172     return cache[n];
1173
1174   ssa_target = make_temp_ssa_name (type, NULL, "powmult");
1175
1176   if (n < POWI_TABLE_SIZE)
1177     {
1178       cache[n] = ssa_target;
1179       op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache);
1180       op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache);
1181     }
1182   else if (n & 1)
1183     {
1184       digit = n & ((1 << POWI_WINDOW_SIZE) - 1);
1185       op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache);
1186       op1 = powi_as_mults_1 (gsi, loc, type, digit, cache);
1187     }
1188   else
1189     {
1190       op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache);
1191       op1 = op0;
1192     }
1193
1194   mult_stmt = gimple_build_assign (ssa_target, MULT_EXPR, op0, op1);
1195   gimple_set_location (mult_stmt, loc);
1196   gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
1197
1198   return ssa_target;
1199 }
1200
1201 /* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
1202    This function needs to be kept in sync with powi_cost above.  */
1203
1204 static tree
1205 powi_as_mults (gimple_stmt_iterator *gsi, location_t loc,
1206                tree arg0, HOST_WIDE_INT n)
1207 {
1208   tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0);
1209   gassign *div_stmt;
1210   tree target;
1211
1212   if (n == 0)
1213     return build_real (type, dconst1);
1214
1215   memset (cache, 0,  sizeof (cache));
1216   cache[1] = arg0;
1217
1218   result = powi_as_mults_1 (gsi, loc, type, (n < 0) ? -n : n, cache);
1219   if (n >= 0)
1220     return result;
1221
1222   /* If the original exponent was negative, reciprocate the result.  */
1223   target = make_temp_ssa_name (type, NULL, "powmult");
1224   div_stmt = gimple_build_assign (target, RDIV_EXPR,
1225                                   build_real (type, dconst1), result);
1226   gimple_set_location (div_stmt, loc);
1227   gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
1228
1229   return target;
1230 }
1231
1232 /* ARG0 and N are the two arguments to a powi builtin in GSI with
1233    location info LOC.  If the arguments are appropriate, create an
1234    equivalent sequence of statements prior to GSI using an optimal
1235    number of multiplications, and return an expession holding the
1236    result.  */
1237
1238 static tree
1239 gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc,
1240                             tree arg0, HOST_WIDE_INT n)
1241 {
1242   /* Avoid largest negative number.  */
1243   if (n != -n
1244       && ((n >= -1 && n <= 2)
1245           || (optimize_function_for_speed_p (cfun)
1246               && powi_cost (n) <= POWI_MAX_MULTS)))
1247     return powi_as_mults (gsi, loc, arg0, n);
1248
1249   return NULL_TREE;
1250 }
1251
1252 /* Build a gimple call statement that calls FN with argument ARG.
1253    Set the lhs of the call statement to a fresh SSA name.  Insert the
1254    statement prior to GSI's current position, and return the fresh
1255    SSA name.  */
1256
1257 static tree
1258 build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc,
1259                        tree fn, tree arg)
1260 {
1261   gcall *call_stmt;
1262   tree ssa_target;
1263
1264   call_stmt = gimple_build_call (fn, 1, arg);
1265   ssa_target = make_temp_ssa_name (TREE_TYPE (arg), NULL, "powroot");
1266   gimple_set_lhs (call_stmt, ssa_target);
1267   gimple_set_location (call_stmt, loc);
1268   gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
1269
1270   return ssa_target;
1271 }
1272
1273 /* Build a gimple binary operation with the given CODE and arguments
1274    ARG0, ARG1, assigning the result to a new SSA name for variable
1275    TARGET.  Insert the statement prior to GSI's current position, and
1276    return the fresh SSA name.*/
1277
1278 static tree
1279 build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc,
1280                         const char *name, enum tree_code code,
1281                         tree arg0, tree arg1)
1282 {
1283   tree result = make_temp_ssa_name (TREE_TYPE (arg0), NULL, name);
1284   gassign *stmt = gimple_build_assign (result, code, arg0, arg1);
1285   gimple_set_location (stmt, loc);
1286   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1287   return result;
1288 }
1289
1290 /* Build a gimple reference operation with the given CODE and argument
1291    ARG, assigning the result to a new SSA name of TYPE with NAME.
1292    Insert the statement prior to GSI's current position, and return
1293    the fresh SSA name.  */
1294
1295 static inline tree
1296 build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type,
1297                       const char *name, enum tree_code code, tree arg0)
1298 {
1299   tree result = make_temp_ssa_name (type, NULL, name);
1300   gimple *stmt = gimple_build_assign (result, build1 (code, type, arg0));
1301   gimple_set_location (stmt, loc);
1302   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1303   return result;
1304 }
1305
1306 /* Build a gimple assignment to cast VAL to TYPE.  Insert the statement
1307    prior to GSI's current position, and return the fresh SSA name.  */
1308
1309 static tree
1310 build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,
1311                        tree type, tree val)
1312 {
1313   tree result = make_ssa_name (type);
1314   gassign *stmt = gimple_build_assign (result, NOP_EXPR, val);
1315   gimple_set_location (stmt, loc);
1316   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1317   return result;
1318 }
1319
1320 struct pow_synth_sqrt_info
1321 {
1322   bool *factors;
1323   unsigned int deepest;
1324   unsigned int num_mults;
1325 };
1326
1327 /* Return true iff the real value C can be represented as a
1328    sum of powers of 0.5 up to N.  That is:
1329    C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1.
1330    Record in INFO the various parameters of the synthesis algorithm such
1331    as the factors a[i], the maximum 0.5 power and the number of
1332    multiplications that will be required.  */
1333
1334 bool
1335 representable_as_half_series_p (REAL_VALUE_TYPE c, unsigned n,
1336                                  struct pow_synth_sqrt_info *info)
1337 {
1338   REAL_VALUE_TYPE factor = dconsthalf;
1339   REAL_VALUE_TYPE remainder = c;
1340
1341   info->deepest = 0;
1342   info->num_mults = 0;
1343   memset (info->factors, 0, n * sizeof (bool));
1344
1345   for (unsigned i = 0; i < n; i++)
1346     {
1347       REAL_VALUE_TYPE res;
1348
1349       /* If something inexact happened bail out now.  */
1350       if (real_arithmetic (&res, MINUS_EXPR, &remainder, &factor))
1351         return false;
1352
1353       /* We have hit zero.  The number is representable as a sum
1354          of powers of 0.5.  */
1355       if (real_equal (&res, &dconst0))
1356         {
1357           info->factors[i] = true;
1358           info->deepest = i + 1;
1359           return true;
1360         }
1361       else if (!REAL_VALUE_NEGATIVE (res))
1362         {
1363           remainder = res;
1364           info->factors[i] = true;
1365           info->num_mults++;
1366         }
1367       else
1368         info->factors[i] = false;
1369
1370       real_arithmetic (&factor, MULT_EXPR, &factor, &dconsthalf);
1371     }
1372   return false;
1373 }
1374
1375 /* Return the tree corresponding to FN being applied
1376    to ARG N times at GSI and LOC.
1377    Look up previous results from CACHE if need be.
1378    cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times.  */
1379
1380 static tree
1381 get_fn_chain (tree arg, unsigned int n, gimple_stmt_iterator *gsi,
1382               tree fn, location_t loc, tree *cache)
1383 {
1384   tree res = cache[n];
1385   if (!res)
1386     {
1387       tree prev = get_fn_chain (arg, n - 1, gsi, fn, loc, cache);
1388       res = build_and_insert_call (gsi, loc, fn, prev);
1389       cache[n] = res;
1390     }
1391
1392   return res;
1393 }
1394
1395 /* Print to STREAM the repeated application of function FNAME to ARG
1396    N times.  So, for FNAME = "foo", ARG = "x", N = 2 it would print:
1397    "foo (foo (x))".  */
1398
1399 static void
1400 print_nested_fn (FILE* stream, const char *fname, const char* arg,
1401                  unsigned int n)
1402 {
1403   if (n == 0)
1404     fprintf (stream, "%s", arg);
1405   else
1406     {
1407       fprintf (stream, "%s (", fname);
1408       print_nested_fn (stream, fname, arg, n - 1);
1409       fprintf (stream, ")");
1410     }
1411 }
1412
1413 /* Print to STREAM the fractional sequence of sqrt chains
1414    applied to ARG, described by INFO.  Used for the dump file.  */
1415
1416 static void
1417 dump_fractional_sqrt_sequence (FILE *stream, const char *arg,
1418                                 struct pow_synth_sqrt_info *info)
1419 {
1420   for (unsigned int i = 0; i < info->deepest; i++)
1421     {
1422       bool is_set = info->factors[i];
1423       if (is_set)
1424         {
1425           print_nested_fn (stream, "sqrt", arg, i + 1);
1426           if (i != info->deepest - 1)
1427             fprintf (stream, " * ");
1428         }
1429     }
1430 }
1431
1432 /* Print to STREAM a representation of raising ARG to an integer
1433    power N.  Used for the dump file.  */
1434
1435 static void
1436 dump_integer_part (FILE *stream, const char* arg, HOST_WIDE_INT n)
1437 {
1438   if (n > 1)
1439     fprintf (stream, "powi (%s, " HOST_WIDE_INT_PRINT_DEC ")", arg, n);
1440   else if (n == 1)
1441     fprintf (stream, "%s", arg);
1442 }
1443
1444 /* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of
1445    square roots.  Place at GSI and LOC.  Limit the maximum depth
1446    of the sqrt chains to MAX_DEPTH.  Return the tree holding the
1447    result of the expanded sequence or NULL_TREE if the expansion failed.
1448
1449    This routine assumes that ARG1 is a real number with a fractional part
1450    (the integer exponent case will have been handled earlier in
1451    gimple_expand_builtin_pow).
1452
1453    For ARG1 > 0.0:
1454    * For ARG1 composed of a whole part WHOLE_PART and a fractional part
1455      FRAC_PART i.e. WHOLE_PART == floor (ARG1) and
1456                     FRAC_PART == ARG1 - WHOLE_PART:
1457      Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where
1458      POW (ARG0, FRAC_PART) is expanded as a product of square root chains
1459      if it can be expressed as such, that is if FRAC_PART satisfies:
1460      FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i))
1461      where integer a[i] is either 0 or 1.
1462
1463      Example:
1464      POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625)
1465        --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x)))
1466
1467    For ARG1 < 0.0 there are two approaches:
1468    * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1)
1469          is calculated as above.
1470
1471      Example:
1472      POW (x, -5.625) == 1.0 / POW (x, 5.625)
1473        -->  1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x))))
1474
1475    * (B) : WHOLE_PART := - ceil (abs (ARG1))
1476            FRAC_PART  := ARG1 - WHOLE_PART
1477      and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART).
1478      Example:
1479      POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6)
1480        --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6))
1481
1482    For ARG1 < 0.0 we choose between (A) and (B) depending on
1483    how many multiplications we'd have to do.
1484    So, for the example in (B): POW (x, -5.875), if we were to
1485    follow algorithm (A) we would produce:
1486    1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X)))
1487    which contains more multiplications than approach (B).
1488
1489    Hopefully, this approach will eliminate potentially expensive POW library
1490    calls when unsafe floating point math is enabled and allow the compiler to
1491    further optimise the multiplies, square roots and divides produced by this
1492    function.  */
1493
1494 static tree
1495 expand_pow_as_sqrts (gimple_stmt_iterator *gsi, location_t loc,
1496                      tree arg0, tree arg1, HOST_WIDE_INT max_depth)
1497 {
1498   tree type = TREE_TYPE (arg0);
1499   machine_mode mode = TYPE_MODE (type);
1500   tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1501   bool one_over = true;
1502
1503   if (!sqrtfn)
1504     return NULL_TREE;
1505
1506   if (TREE_CODE (arg1) != REAL_CST)
1507     return NULL_TREE;
1508
1509   REAL_VALUE_TYPE exp_init = TREE_REAL_CST (arg1);
1510
1511   gcc_assert (max_depth > 0);
1512   tree *cache = XALLOCAVEC (tree, max_depth + 1);
1513
1514   struct pow_synth_sqrt_info synth_info;
1515   synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1516   synth_info.deepest = 0;
1517   synth_info.num_mults = 0;
1518
1519   bool neg_exp = REAL_VALUE_NEGATIVE (exp_init);
1520   REAL_VALUE_TYPE exp = real_value_abs (&exp_init);
1521
1522   /* The whole and fractional parts of exp.  */
1523   REAL_VALUE_TYPE whole_part;
1524   REAL_VALUE_TYPE frac_part;
1525
1526   real_floor (&whole_part, mode, &exp);
1527   real_arithmetic (&frac_part, MINUS_EXPR, &exp, &whole_part);
1528
1529
1530   REAL_VALUE_TYPE ceil_whole = dconst0;
1531   REAL_VALUE_TYPE ceil_fract = dconst0;
1532
1533   if (neg_exp)
1534     {
1535       real_ceil (&ceil_whole, mode, &exp);
1536       real_arithmetic (&ceil_fract, MINUS_EXPR, &ceil_whole, &exp);
1537     }
1538
1539   if (!representable_as_half_series_p (frac_part, max_depth, &synth_info))
1540     return NULL_TREE;
1541
1542   /* Check whether it's more profitable to not use 1.0 / ...  */
1543   if (neg_exp)
1544     {
1545       struct pow_synth_sqrt_info alt_synth_info;
1546       alt_synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1547       alt_synth_info.deepest = 0;
1548       alt_synth_info.num_mults = 0;
1549
1550       if (representable_as_half_series_p (ceil_fract, max_depth,
1551                                            &alt_synth_info)
1552           && alt_synth_info.deepest <= synth_info.deepest
1553           && alt_synth_info.num_mults < synth_info.num_mults)
1554         {
1555           whole_part = ceil_whole;
1556           frac_part = ceil_fract;
1557           synth_info.deepest = alt_synth_info.deepest;
1558           synth_info.num_mults = alt_synth_info.num_mults;
1559           memcpy (synth_info.factors, alt_synth_info.factors,
1560                   (max_depth + 1) * sizeof (bool));
1561           one_over = false;
1562         }
1563     }
1564
1565   HOST_WIDE_INT n = real_to_integer (&whole_part);
1566   REAL_VALUE_TYPE cint;
1567   real_from_integer (&cint, VOIDmode, n, SIGNED);
1568
1569   if (!real_identical (&whole_part, &cint))
1570     return NULL_TREE;
1571
1572   if (powi_cost (n) + synth_info.num_mults > POWI_MAX_MULTS)
1573     return NULL_TREE;
1574
1575   memset (cache, 0, (max_depth + 1) * sizeof (tree));
1576
1577   tree integer_res = n == 0 ? build_real (type, dconst1) : arg0;
1578
1579   /* Calculate the integer part of the exponent.  */
1580   if (n > 1)
1581     {
1582       integer_res = gimple_expand_builtin_powi (gsi, loc, arg0, n);
1583       if (!integer_res)
1584         return NULL_TREE;
1585     }
1586
1587   if (dump_file)
1588     {
1589       char string[64];
1590
1591       real_to_decimal (string, &exp_init, sizeof (string), 0, 1);
1592       fprintf (dump_file, "synthesizing pow (x, %s) as:\n", string);
1593
1594       if (neg_exp)
1595         {
1596           if (one_over)
1597             {
1598               fprintf (dump_file, "1.0 / (");
1599               dump_integer_part (dump_file, "x", n);
1600               if (n > 0)
1601                 fprintf (dump_file, " * ");
1602               dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1603               fprintf (dump_file, ")");
1604             }
1605           else
1606             {
1607               dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1608               fprintf (dump_file, " / (");
1609               dump_integer_part (dump_file, "x", n);
1610               fprintf (dump_file, ")");
1611             }
1612         }
1613       else
1614         {
1615           dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1616           if (n > 0)
1617             fprintf (dump_file, " * ");
1618           dump_integer_part (dump_file, "x", n);
1619         }
1620
1621       fprintf (dump_file, "\ndeepest sqrt chain: %d\n", synth_info.deepest);
1622     }
1623
1624
1625   tree fract_res = NULL_TREE;
1626   cache[0] = arg0;
1627
1628   /* Calculate the fractional part of the exponent.  */
1629   for (unsigned i = 0; i < synth_info.deepest; i++)
1630     {
1631       if (synth_info.factors[i])
1632         {
1633           tree sqrt_chain = get_fn_chain (arg0, i + 1, gsi, sqrtfn, loc, cache);
1634
1635           if (!fract_res)
1636               fract_res = sqrt_chain;
1637
1638           else
1639             fract_res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1640                                            fract_res, sqrt_chain);
1641         }
1642     }
1643
1644   tree res = NULL_TREE;
1645
1646   if (neg_exp)
1647     {
1648       if (one_over)
1649         {
1650           if (n > 0)
1651             res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1652                                            fract_res, integer_res);
1653           else
1654             res = fract_res;
1655
1656           res = build_and_insert_binop (gsi, loc, "powrootrecip", RDIV_EXPR,
1657                                           build_real (type, dconst1), res);
1658         }
1659       else
1660         {
1661           res = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1662                                          fract_res, integer_res);
1663         }
1664     }
1665   else
1666     res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1667                                    fract_res, integer_res);
1668   return res;
1669 }
1670
1671 /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
1672    with location info LOC.  If possible, create an equivalent and
1673    less expensive sequence of statements prior to GSI, and return an
1674    expession holding the result.  */
1675
1676 static tree
1677 gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,
1678                            tree arg0, tree arg1)
1679 {
1680   REAL_VALUE_TYPE c, cint, dconst1_3, dconst1_4, dconst1_6;
1681   REAL_VALUE_TYPE c2, dconst3;
1682   HOST_WIDE_INT n;
1683   tree type, sqrtfn, cbrtfn, sqrt_arg0, result, cbrt_x, powi_cbrt_x;
1684   machine_mode mode;
1685   bool speed_p = optimize_bb_for_speed_p (gsi_bb (*gsi));
1686   bool hw_sqrt_exists, c_is_int, c2_is_int;
1687
1688   dconst1_4 = dconst1;
1689   SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
1690
1691   /* If the exponent isn't a constant, there's nothing of interest
1692      to be done.  */
1693   if (TREE_CODE (arg1) != REAL_CST)
1694     return NULL_TREE;
1695
1696   /* Don't perform the operation if flag_signaling_nans is on
1697      and the operand is a signaling NaN.  */
1698   if (HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg1)))
1699       && ((TREE_CODE (arg0) == REAL_CST
1700            && REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg0)))
1701           || REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg1))))
1702     return NULL_TREE;
1703
1704   /* If the exponent is equivalent to an integer, expand to an optimal
1705      multiplication sequence when profitable.  */
1706   c = TREE_REAL_CST (arg1);
1707   n = real_to_integer (&c);
1708   real_from_integer (&cint, VOIDmode, n, SIGNED);
1709   c_is_int = real_identical (&c, &cint);
1710
1711   if (c_is_int
1712       && ((n >= -1 && n <= 2)
1713           || (flag_unsafe_math_optimizations
1714               && speed_p
1715               && powi_cost (n) <= POWI_MAX_MULTS)))
1716     return gimple_expand_builtin_powi (gsi, loc, arg0, n);
1717
1718   /* Attempt various optimizations using sqrt and cbrt.  */
1719   type = TREE_TYPE (arg0);
1720   mode = TYPE_MODE (type);
1721   sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1722
1723   /* Optimize pow(x,0.5) = sqrt(x).  This replacement is always safe
1724      unless signed zeros must be maintained.  pow(-0,0.5) = +0, while
1725      sqrt(-0) = -0.  */
1726   if (sqrtfn
1727       && real_equal (&c, &dconsthalf)
1728       && !HONOR_SIGNED_ZEROS (mode))
1729     return build_and_insert_call (gsi, loc, sqrtfn, arg0);
1730
1731   hw_sqrt_exists = optab_handler (sqrt_optab, mode) != CODE_FOR_nothing;
1732
1733   /* Optimize pow(x,1./3.) = cbrt(x).  This requires unsafe math
1734      optimizations since 1./3. is not exactly representable.  If x
1735      is negative and finite, the correct value of pow(x,1./3.) is
1736      a NaN with the "invalid" exception raised, because the value
1737      of 1./3. actually has an even denominator.  The correct value
1738      of cbrt(x) is a negative real value.  */
1739   cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT);
1740   dconst1_3 = real_value_truncate (mode, dconst_third ());
1741
1742   if (flag_unsafe_math_optimizations
1743       && cbrtfn
1744       && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
1745       && real_equal (&c, &dconst1_3))
1746     return build_and_insert_call (gsi, loc, cbrtfn, arg0);
1747
1748   /* Optimize pow(x,1./6.) = cbrt(sqrt(x)).  Don't do this optimization
1749      if we don't have a hardware sqrt insn.  */
1750   dconst1_6 = dconst1_3;
1751   SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
1752
1753   if (flag_unsafe_math_optimizations
1754       && sqrtfn
1755       && cbrtfn
1756       && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
1757       && speed_p
1758       && hw_sqrt_exists
1759       && real_equal (&c, &dconst1_6))
1760     {
1761       /* sqrt(x)  */
1762       sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
1763
1764       /* cbrt(sqrt(x))  */
1765       return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0);
1766     }
1767
1768
1769   /* Attempt to expand the POW as a product of square root chains.
1770      Expand the 0.25 case even when otpimising for size.  */
1771   if (flag_unsafe_math_optimizations
1772       && sqrtfn
1773       && hw_sqrt_exists
1774       && (speed_p || real_equal (&c, &dconst1_4))
1775       && !HONOR_SIGNED_ZEROS (mode))
1776     {
1777       unsigned int max_depth = speed_p
1778                                 ? PARAM_VALUE (PARAM_MAX_POW_SQRT_DEPTH)
1779                                 : 2;
1780
1781       tree expand_with_sqrts
1782         = expand_pow_as_sqrts (gsi, loc, arg0, arg1, max_depth);
1783
1784       if (expand_with_sqrts)
1785         return expand_with_sqrts;
1786     }
1787
1788   real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
1789   n = real_to_integer (&c2);
1790   real_from_integer (&cint, VOIDmode, n, SIGNED);
1791   c2_is_int = real_identical (&c2, &cint);
1792
1793   /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
1794
1795      powi(x, n/3) * powi(cbrt(x), n%3),                    n > 0;
1796      1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)),  n < 0.
1797
1798      Do not calculate the first factor when n/3 = 0.  As cbrt(x) is
1799      different from pow(x, 1./3.) due to rounding and behavior with
1800      negative x, we need to constrain this transformation to unsafe
1801      math and positive x or finite math.  */
1802   real_from_integer (&dconst3, VOIDmode, 3, SIGNED);
1803   real_arithmetic (&c2, MULT_EXPR, &c, &dconst3);
1804   real_round (&c2, mode, &c2);
1805   n = real_to_integer (&c2);
1806   real_from_integer (&cint, VOIDmode, n, SIGNED);
1807   real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3);
1808   real_convert (&c2, mode, &c2);
1809
1810   if (flag_unsafe_math_optimizations
1811       && cbrtfn
1812       && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
1813       && real_identical (&c2, &c)
1814       && !c2_is_int
1815       && optimize_function_for_speed_p (cfun)
1816       && powi_cost (n / 3) <= POWI_MAX_MULTS)
1817     {
1818       tree powi_x_ndiv3 = NULL_TREE;
1819
1820       /* Attempt to fold powi(arg0, abs(n/3)) into multiplies.  If not
1821          possible or profitable, give up.  Skip the degenerate case when
1822          abs(n) < 3, where the result is always 1.  */
1823       if (absu_hwi (n) >= 3)
1824         {
1825           powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0,
1826                                                      abs_hwi (n / 3));
1827           if (!powi_x_ndiv3)
1828             return NULL_TREE;
1829         }
1830
1831       /* Calculate powi(cbrt(x), n%3).  Don't use gimple_expand_builtin_powi
1832          as that creates an unnecessary variable.  Instead, just produce
1833          either cbrt(x) or cbrt(x) * cbrt(x).  */
1834       cbrt_x = build_and_insert_call (gsi, loc, cbrtfn, arg0);
1835
1836       if (absu_hwi (n) % 3 == 1)
1837         powi_cbrt_x = cbrt_x;
1838       else
1839         powi_cbrt_x = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1840                                               cbrt_x, cbrt_x);
1841
1842       /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1.  */
1843       if (absu_hwi (n) < 3)
1844         result = powi_cbrt_x;
1845       else
1846         result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1847                                          powi_x_ndiv3, powi_cbrt_x);
1848
1849       /* If n is negative, reciprocate the result.  */
1850       if (n < 0)
1851         result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1852                                          build_real (type, dconst1), result);
1853
1854       return result;
1855     }
1856
1857   /* No optimizations succeeded.  */
1858   return NULL_TREE;
1859 }
1860
1861 /* ARG is the argument to a cabs builtin call in GSI with location info
1862    LOC.  Create a sequence of statements prior to GSI that calculates
1863    sqrt(R*R + I*I), where R and I are the real and imaginary components
1864    of ARG, respectively.  Return an expression holding the result.  */
1865
1866 static tree
1867 gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg)
1868 {
1869   tree real_part, imag_part, addend1, addend2, sum, result;
1870   tree type = TREE_TYPE (TREE_TYPE (arg));
1871   tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1872   machine_mode mode = TYPE_MODE (type);
1873
1874   if (!flag_unsafe_math_optimizations
1875       || !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi)))
1876       || !sqrtfn
1877       || optab_handler (sqrt_optab, mode) == CODE_FOR_nothing)
1878     return NULL_TREE;
1879
1880   real_part = build_and_insert_ref (gsi, loc, type, "cabs",
1881                                     REALPART_EXPR, arg);
1882   addend1 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
1883                                     real_part, real_part);
1884   imag_part = build_and_insert_ref (gsi, loc, type, "cabs",
1885                                     IMAGPART_EXPR, arg);
1886   addend2 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
1887                                     imag_part, imag_part);
1888   sum = build_and_insert_binop (gsi, loc, "cabs", PLUS_EXPR, addend1, addend2);
1889   result = build_and_insert_call (gsi, loc, sqrtfn, sum);
1890
1891   return result;
1892 }
1893
1894 /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
1895    on the SSA_NAME argument of each of them.  Also expand powi(x,n) into
1896    an optimal number of multiplies, when n is a constant.  */
1897
1898 namespace {
1899
1900 const pass_data pass_data_cse_sincos =
1901 {
1902   GIMPLE_PASS, /* type */
1903   "sincos", /* name */
1904   OPTGROUP_NONE, /* optinfo_flags */
1905   TV_TREE_SINCOS, /* tv_id */
1906   PROP_ssa, /* properties_required */
1907   PROP_gimple_opt_math, /* properties_provided */
1908   0, /* properties_destroyed */
1909   0, /* todo_flags_start */
1910   TODO_update_ssa, /* todo_flags_finish */
1911 };
1912
1913 class pass_cse_sincos : public gimple_opt_pass
1914 {
1915 public:
1916   pass_cse_sincos (gcc::context *ctxt)
1917     : gimple_opt_pass (pass_data_cse_sincos, ctxt)
1918   {}
1919
1920   /* opt_pass methods: */
1921   virtual bool gate (function *)
1922     {
1923       /* We no longer require either sincos or cexp, since powi expansion
1924          piggybacks on this pass.  */
1925       return optimize;
1926     }
1927
1928   virtual unsigned int execute (function *);
1929
1930 }; // class pass_cse_sincos
1931
1932 unsigned int
1933 pass_cse_sincos::execute (function *fun)
1934 {
1935   basic_block bb;
1936   bool cfg_changed = false;
1937
1938   calculate_dominance_info (CDI_DOMINATORS);
1939   memset (&sincos_stats, 0, sizeof (sincos_stats));
1940
1941   FOR_EACH_BB_FN (bb, fun)
1942     {
1943       gimple_stmt_iterator gsi;
1944       bool cleanup_eh = false;
1945
1946       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1947         {
1948           gimple *stmt = gsi_stmt (gsi);
1949
1950           /* Only the last stmt in a bb could throw, no need to call
1951              gimple_purge_dead_eh_edges if we change something in the middle
1952              of a basic block.  */
1953           cleanup_eh = false;
1954
1955           if (is_gimple_call (stmt)
1956               && gimple_call_lhs (stmt))
1957             {
1958               tree arg, arg0, arg1, result;
1959               HOST_WIDE_INT n;
1960               location_t loc;
1961
1962               switch (gimple_call_combined_fn (stmt))
1963                 {
1964                 CASE_CFN_COS:
1965                 CASE_CFN_SIN:
1966                 CASE_CFN_CEXPI:
1967                   /* Make sure we have either sincos or cexp.  */
1968                   if (!targetm.libc_has_function (function_c99_math_complex)
1969                       && !targetm.libc_has_function (function_sincos))
1970                     break;
1971
1972                   arg = gimple_call_arg (stmt, 0);
1973                   if (TREE_CODE (arg) == SSA_NAME)
1974                     cfg_changed |= execute_cse_sincos_1 (arg);
1975                   break;
1976
1977                 CASE_CFN_POW:
1978                   arg0 = gimple_call_arg (stmt, 0);
1979                   arg1 = gimple_call_arg (stmt, 1);
1980
1981                   loc = gimple_location (stmt);
1982                   result = gimple_expand_builtin_pow (&gsi, loc, arg0, arg1);
1983
1984                   if (result)
1985                     {
1986                       tree lhs = gimple_get_lhs (stmt);
1987                       gassign *new_stmt = gimple_build_assign (lhs, result);
1988                       gimple_set_location (new_stmt, loc);
1989                       unlink_stmt_vdef (stmt);
1990                       gsi_replace (&gsi, new_stmt, true);
1991                       cleanup_eh = true;
1992                       if (gimple_vdef (stmt))
1993                         release_ssa_name (gimple_vdef (stmt));
1994                     }
1995                   break;
1996
1997                 CASE_CFN_POWI:
1998                   arg0 = gimple_call_arg (stmt, 0);
1999                   arg1 = gimple_call_arg (stmt, 1);
2000                   loc = gimple_location (stmt);
2001
2002                   if (real_minus_onep (arg0))
2003                     {
2004                       tree t0, t1, cond, one, minus_one;
2005                       gassign *stmt;
2006
2007                       t0 = TREE_TYPE (arg0);
2008                       t1 = TREE_TYPE (arg1);
2009                       one = build_real (t0, dconst1);
2010                       minus_one = build_real (t0, dconstm1);
2011
2012                       cond = make_temp_ssa_name (t1, NULL, "powi_cond");
2013                       stmt = gimple_build_assign (cond, BIT_AND_EXPR,
2014                                                   arg1, build_int_cst (t1, 1));
2015                       gimple_set_location (stmt, loc);
2016                       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2017
2018                       result = make_temp_ssa_name (t0, NULL, "powi");
2019                       stmt = gimple_build_assign (result, COND_EXPR, cond,
2020                                                   minus_one, one);
2021                       gimple_set_location (stmt, loc);
2022                       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2023                     }
2024                   else
2025                     {
2026                       if (!tree_fits_shwi_p (arg1))
2027                         break;
2028
2029                       n = tree_to_shwi (arg1);
2030                       result = gimple_expand_builtin_powi (&gsi, loc, arg0, n);
2031                     }
2032
2033                   if (result)
2034                     {
2035                       tree lhs = gimple_get_lhs (stmt);
2036                       gassign *new_stmt = gimple_build_assign (lhs, result);
2037                       gimple_set_location (new_stmt, loc);
2038                       unlink_stmt_vdef (stmt);
2039                       gsi_replace (&gsi, new_stmt, true);
2040                       cleanup_eh = true;
2041                       if (gimple_vdef (stmt))
2042                         release_ssa_name (gimple_vdef (stmt));
2043                     }
2044                   break;
2045
2046                 CASE_CFN_CABS:
2047                   arg0 = gimple_call_arg (stmt, 0);
2048                   loc = gimple_location (stmt);
2049                   result = gimple_expand_builtin_cabs (&gsi, loc, arg0);
2050
2051                   if (result)
2052                     {
2053                       tree lhs = gimple_get_lhs (stmt);
2054                       gassign *new_stmt = gimple_build_assign (lhs, result);
2055                       gimple_set_location (new_stmt, loc);
2056                       unlink_stmt_vdef (stmt);
2057                       gsi_replace (&gsi, new_stmt, true);
2058                       cleanup_eh = true;
2059                       if (gimple_vdef (stmt))
2060                         release_ssa_name (gimple_vdef (stmt));
2061                     }
2062                   break;
2063
2064                 default:;
2065                 }
2066             }
2067         }
2068       if (cleanup_eh)
2069         cfg_changed |= gimple_purge_dead_eh_edges (bb);
2070     }
2071
2072   statistics_counter_event (fun, "sincos statements inserted",
2073                             sincos_stats.inserted);
2074
2075   return cfg_changed ? TODO_cleanup_cfg : 0;
2076 }
2077
2078 } // anon namespace
2079
2080 gimple_opt_pass *
2081 make_pass_cse_sincos (gcc::context *ctxt)
2082 {
2083   return new pass_cse_sincos (ctxt);
2084 }
2085
2086 /* Return true if stmt is a type conversion operation that can be stripped
2087    when used in a widening multiply operation.  */
2088 static bool
2089 widening_mult_conversion_strippable_p (tree result_type, gimple *stmt)
2090 {
2091   enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
2092
2093   if (TREE_CODE (result_type) == INTEGER_TYPE)
2094     {
2095       tree op_type;
2096       tree inner_op_type;
2097
2098       if (!CONVERT_EXPR_CODE_P (rhs_code))
2099         return false;
2100
2101       op_type = TREE_TYPE (gimple_assign_lhs (stmt));
2102
2103       /* If the type of OP has the same precision as the result, then
2104          we can strip this conversion.  The multiply operation will be
2105          selected to create the correct extension as a by-product.  */
2106       if (TYPE_PRECISION (result_type) == TYPE_PRECISION (op_type))
2107         return true;
2108
2109       /* We can also strip a conversion if it preserves the signed-ness of
2110          the operation and doesn't narrow the range.  */
2111       inner_op_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
2112
2113       /* If the inner-most type is unsigned, then we can strip any
2114          intermediate widening operation.  If it's signed, then the
2115          intermediate widening operation must also be signed.  */
2116       if ((TYPE_UNSIGNED (inner_op_type)
2117            || TYPE_UNSIGNED (op_type) == TYPE_UNSIGNED (inner_op_type))
2118           && TYPE_PRECISION (op_type) > TYPE_PRECISION (inner_op_type))
2119         return true;
2120
2121       return false;
2122     }
2123
2124   return rhs_code == FIXED_CONVERT_EXPR;
2125 }
2126
2127 /* Return true if RHS is a suitable operand for a widening multiplication,
2128    assuming a target type of TYPE.
2129    There are two cases:
2130
2131      - RHS makes some value at least twice as wide.  Store that value
2132        in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
2133
2134      - RHS is an integer constant.  Store that value in *NEW_RHS_OUT if so,
2135        but leave *TYPE_OUT untouched.  */
2136
2137 static bool
2138 is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out,
2139                         tree *new_rhs_out)
2140 {
2141   gimple *stmt;
2142   tree type1, rhs1;
2143
2144   if (TREE_CODE (rhs) == SSA_NAME)
2145     {
2146       stmt = SSA_NAME_DEF_STMT (rhs);
2147       if (is_gimple_assign (stmt))
2148         {
2149           if (! widening_mult_conversion_strippable_p (type, stmt))
2150             rhs1 = rhs;
2151           else
2152             {
2153               rhs1 = gimple_assign_rhs1 (stmt);
2154
2155               if (TREE_CODE (rhs1) == INTEGER_CST)
2156                 {
2157                   *new_rhs_out = rhs1;
2158                   *type_out = NULL;
2159                   return true;
2160                 }
2161             }
2162         }
2163       else
2164         rhs1 = rhs;
2165
2166       type1 = TREE_TYPE (rhs1);
2167
2168       if (TREE_CODE (type1) != TREE_CODE (type)
2169           || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type))
2170         return false;
2171
2172       *new_rhs_out = rhs1;
2173       *type_out = type1;
2174       return true;
2175     }
2176
2177   if (TREE_CODE (rhs) == INTEGER_CST)
2178     {
2179       *new_rhs_out = rhs;
2180       *type_out = NULL;
2181       return true;
2182     }
2183
2184   return false;
2185 }
2186
2187 /* Return true if STMT performs a widening multiplication, assuming the
2188    output type is TYPE.  If so, store the unwidened types of the operands
2189    in *TYPE1_OUT and *TYPE2_OUT respectively.  Also fill *RHS1_OUT and
2190    *RHS2_OUT such that converting those operands to types *TYPE1_OUT
2191    and *TYPE2_OUT would give the operands of the multiplication.  */
2192
2193 static bool
2194 is_widening_mult_p (gimple *stmt,
2195                     tree *type1_out, tree *rhs1_out,
2196                     tree *type2_out, tree *rhs2_out)
2197 {
2198   tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2199
2200   if (TREE_CODE (type) != INTEGER_TYPE
2201       && TREE_CODE (type) != FIXED_POINT_TYPE)
2202     return false;
2203
2204   if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out,
2205                                rhs1_out))
2206     return false;
2207
2208   if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out,
2209                                rhs2_out))
2210     return false;
2211
2212   if (*type1_out == NULL)
2213     {
2214       if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out))
2215         return false;
2216       *type1_out = *type2_out;
2217     }
2218
2219   if (*type2_out == NULL)
2220     {
2221       if (!int_fits_type_p (*rhs2_out, *type1_out))
2222         return false;
2223       *type2_out = *type1_out;
2224     }
2225
2226   /* Ensure that the larger of the two operands comes first. */
2227   if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out))
2228     {
2229       std::swap (*type1_out, *type2_out);
2230       std::swap (*rhs1_out, *rhs2_out);
2231     }
2232
2233   return true;
2234 }
2235
2236 /* Check to see if the CALL statement is an invocation of copysign
2237    with 1. being the first argument.  */
2238 static bool
2239 is_copysign_call_with_1 (gimple *call)
2240 {
2241   gcall *c = dyn_cast <gcall *> (call);
2242   if (! c)
2243     return false;
2244
2245   enum combined_fn code = gimple_call_combined_fn (c);
2246
2247   if (code == CFN_LAST)
2248     return false;
2249
2250   if (builtin_fn_p (code))
2251     {
2252       switch (as_builtin_fn (code))
2253         {
2254         CASE_FLT_FN (BUILT_IN_COPYSIGN):
2255         CASE_FLT_FN_FLOATN_NX (BUILT_IN_COPYSIGN):
2256           return real_onep (gimple_call_arg (c, 0));
2257         default:
2258           return false;
2259         }
2260     }
2261
2262   if (internal_fn_p (code))
2263     {
2264       switch (as_internal_fn (code))
2265         {
2266         case IFN_COPYSIGN:
2267           return real_onep (gimple_call_arg (c, 0));
2268         default:
2269           return false;
2270         }
2271     }
2272
2273    return false;
2274 }
2275
2276 /* Try to expand the pattern x * copysign (1, y) into xorsign (x, y).
2277    This only happens when the the xorsign optab is defined, if the
2278    pattern is not a xorsign pattern or if expansion fails FALSE is
2279    returned, otherwise TRUE is returned.  */
2280 static bool
2281 convert_expand_mult_copysign (gimple *stmt, gimple_stmt_iterator *gsi)
2282 {
2283   tree treeop0, treeop1, lhs, type;
2284   location_t loc = gimple_location (stmt);
2285   lhs = gimple_assign_lhs (stmt);
2286   treeop0 = gimple_assign_rhs1 (stmt);
2287   treeop1 = gimple_assign_rhs2 (stmt);
2288   type = TREE_TYPE (lhs);
2289   machine_mode mode = TYPE_MODE (type);
2290
2291   if (HONOR_SNANS (type))
2292     return false;
2293
2294   if (TREE_CODE (treeop0) == SSA_NAME && TREE_CODE (treeop1) == SSA_NAME)
2295     {
2296       gimple *call0 = SSA_NAME_DEF_STMT (treeop0);
2297       if (!has_single_use (treeop0) || !is_copysign_call_with_1 (call0))
2298         {
2299           call0 = SSA_NAME_DEF_STMT (treeop1);
2300           if (!has_single_use (treeop1) || !is_copysign_call_with_1 (call0))
2301             return false;
2302
2303           treeop1 = treeop0;
2304         }
2305         if (optab_handler (xorsign_optab, mode) == CODE_FOR_nothing)
2306           return false;
2307
2308         gcall *c = as_a<gcall*> (call0);
2309         treeop0 = gimple_call_arg (c, 1);
2310
2311         gcall *call_stmt
2312           = gimple_build_call_internal (IFN_XORSIGN, 2, treeop1, treeop0);
2313         gimple_set_lhs (call_stmt, lhs);
2314         gimple_set_location (call_stmt, loc);
2315         gsi_replace (gsi, call_stmt, true);
2316         return true;
2317     }
2318
2319   return false;
2320 }
2321
2322 /* Process a single gimple statement STMT, which has a MULT_EXPR as
2323    its rhs, and try to convert it into a WIDEN_MULT_EXPR.  The return
2324    value is true iff we converted the statement.  */
2325
2326 static bool
2327 convert_mult_to_widen (gimple *stmt, gimple_stmt_iterator *gsi)
2328 {
2329   tree lhs, rhs1, rhs2, type, type1, type2;
2330   enum insn_code handler;
2331   scalar_int_mode to_mode, from_mode, actual_mode;
2332   optab op;
2333   int actual_precision;
2334   location_t loc = gimple_location (stmt);
2335   bool from_unsigned1, from_unsigned2;
2336
2337   lhs = gimple_assign_lhs (stmt);
2338   type = TREE_TYPE (lhs);
2339   if (TREE_CODE (type) != INTEGER_TYPE)
2340     return false;
2341
2342   if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2))
2343     return false;
2344
2345   to_mode = SCALAR_INT_TYPE_MODE (type);
2346   from_mode = SCALAR_INT_TYPE_MODE (type1);
2347   if (to_mode == from_mode)
2348     return false;
2349
2350   from_unsigned1 = TYPE_UNSIGNED (type1);
2351   from_unsigned2 = TYPE_UNSIGNED (type2);
2352
2353   if (from_unsigned1 && from_unsigned2)
2354     op = umul_widen_optab;
2355   else if (!from_unsigned1 && !from_unsigned2)
2356     op = smul_widen_optab;
2357   else
2358     op = usmul_widen_optab;
2359
2360   handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode,
2361                                                   &actual_mode);
2362
2363   if (handler == CODE_FOR_nothing)
2364     {
2365       if (op != smul_widen_optab)
2366         {
2367           /* We can use a signed multiply with unsigned types as long as
2368              there is a wider mode to use, or it is the smaller of the two
2369              types that is unsigned.  Note that type1 >= type2, always.  */
2370           if ((TYPE_UNSIGNED (type1)
2371                && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2372               || (TYPE_UNSIGNED (type2)
2373                   && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2374             {
2375               if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
2376                   || GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode))
2377                 return false;
2378             }
2379
2380           op = smul_widen_optab;
2381           handler = find_widening_optab_handler_and_mode (op, to_mode,
2382                                                           from_mode,
2383                                                           &actual_mode);
2384
2385           if (handler == CODE_FOR_nothing)
2386             return false;
2387
2388           from_unsigned1 = from_unsigned2 = false;
2389         }
2390       else
2391         return false;
2392     }
2393
2394   /* Ensure that the inputs to the handler are in the correct precison
2395      for the opcode.  This will be the full mode size.  */
2396   actual_precision = GET_MODE_PRECISION (actual_mode);
2397   if (2 * actual_precision > TYPE_PRECISION (type))
2398     return false;
2399   if (actual_precision != TYPE_PRECISION (type1)
2400       || from_unsigned1 != TYPE_UNSIGNED (type1))
2401     rhs1 = build_and_insert_cast (gsi, loc,
2402                                   build_nonstandard_integer_type
2403                                     (actual_precision, from_unsigned1), rhs1);
2404   if (actual_precision != TYPE_PRECISION (type2)
2405       || from_unsigned2 != TYPE_UNSIGNED (type2))
2406     rhs2 = build_and_insert_cast (gsi, loc,
2407                                   build_nonstandard_integer_type
2408                                     (actual_precision, from_unsigned2), rhs2);
2409
2410   /* Handle constants.  */
2411   if (TREE_CODE (rhs1) == INTEGER_CST)
2412     rhs1 = fold_convert (type1, rhs1);
2413   if (TREE_CODE (rhs2) == INTEGER_CST)
2414     rhs2 = fold_convert (type2, rhs2);
2415
2416   gimple_assign_set_rhs1 (stmt, rhs1);
2417   gimple_assign_set_rhs2 (stmt, rhs2);
2418   gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
2419   update_stmt (stmt);
2420   widen_mul_stats.widen_mults_inserted++;
2421   return true;
2422 }
2423
2424 /* Process a single gimple statement STMT, which is found at the
2425    iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
2426    rhs (given by CODE), and try to convert it into a
2427    WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR.  The return value
2428    is true iff we converted the statement.  */
2429
2430 static bool
2431 convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple *stmt,
2432                             enum tree_code code)
2433 {
2434   gimple *rhs1_stmt = NULL, *rhs2_stmt = NULL;
2435   gimple *conv1_stmt = NULL, *conv2_stmt = NULL, *conv_stmt;
2436   tree type, type1, type2, optype;
2437   tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
2438   enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
2439   optab this_optab;
2440   enum tree_code wmult_code;
2441   enum insn_code handler;
2442   scalar_mode to_mode, from_mode, actual_mode;
2443   location_t loc = gimple_location (stmt);
2444   int actual_precision;
2445   bool from_unsigned1, from_unsigned2;
2446
2447   lhs = gimple_assign_lhs (stmt);
2448   type = TREE_TYPE (lhs);
2449   if (TREE_CODE (type) != INTEGER_TYPE
2450       && TREE_CODE (type) != FIXED_POINT_TYPE)
2451     return false;
2452
2453   if (code == MINUS_EXPR)
2454     wmult_code = WIDEN_MULT_MINUS_EXPR;
2455   else
2456     wmult_code = WIDEN_MULT_PLUS_EXPR;
2457
2458   rhs1 = gimple_assign_rhs1 (stmt);
2459   rhs2 = gimple_assign_rhs2 (stmt);
2460
2461   if (TREE_CODE (rhs1) == SSA_NAME)
2462     {
2463       rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2464       if (is_gimple_assign (rhs1_stmt))
2465         rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2466     }
2467
2468   if (TREE_CODE (rhs2) == SSA_NAME)
2469     {
2470       rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2471       if (is_gimple_assign (rhs2_stmt))
2472         rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2473     }
2474
2475   /* Allow for one conversion statement between the multiply
2476      and addition/subtraction statement.  If there are more than
2477      one conversions then we assume they would invalidate this
2478      transformation.  If that's not the case then they should have
2479      been folded before now.  */
2480   if (CONVERT_EXPR_CODE_P (rhs1_code))
2481     {
2482       conv1_stmt = rhs1_stmt;
2483       rhs1 = gimple_assign_rhs1 (rhs1_stmt);
2484       if (TREE_CODE (rhs1) == SSA_NAME)
2485         {
2486           rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2487           if (is_gimple_assign (rhs1_stmt))
2488             rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2489         }
2490       else
2491         return false;
2492     }
2493   if (CONVERT_EXPR_CODE_P (rhs2_code))
2494     {
2495       conv2_stmt = rhs2_stmt;
2496       rhs2 = gimple_assign_rhs1 (rhs2_stmt);
2497       if (TREE_CODE (rhs2) == SSA_NAME)
2498         {
2499           rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2500           if (is_gimple_assign (rhs2_stmt))
2501             rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2502         }
2503       else
2504         return false;
2505     }
2506
2507   /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
2508      is_widening_mult_p, but we still need the rhs returns.
2509
2510      It might also appear that it would be sufficient to use the existing
2511      operands of the widening multiply, but that would limit the choice of
2512      multiply-and-accumulate instructions.
2513
2514      If the widened-multiplication result has more than one uses, it is
2515      probably wiser not to do the conversion.  */
2516   if (code == PLUS_EXPR
2517       && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
2518     {
2519       if (!has_single_use (rhs1)
2520           || !is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
2521                                   &type2, &mult_rhs2))
2522         return false;
2523       add_rhs = rhs2;
2524       conv_stmt = conv1_stmt;
2525     }
2526   else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
2527     {
2528       if (!has_single_use (rhs2)
2529           || !is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
2530                                   &type2, &mult_rhs2))
2531         return false;
2532       add_rhs = rhs1;
2533       conv_stmt = conv2_stmt;
2534     }
2535   else
2536     return false;
2537
2538   to_mode = SCALAR_TYPE_MODE (type);
2539   from_mode = SCALAR_TYPE_MODE (type1);
2540   if (to_mode == from_mode)
2541     return false;
2542
2543   from_unsigned1 = TYPE_UNSIGNED (type1);
2544   from_unsigned2 = TYPE_UNSIGNED (type2);
2545   optype = type1;
2546
2547   /* There's no such thing as a mixed sign madd yet, so use a wider mode.  */
2548   if (from_unsigned1 != from_unsigned2)
2549     {
2550       if (!INTEGRAL_TYPE_P (type))
2551         return false;
2552       /* We can use a signed multiply with unsigned types as long as
2553          there is a wider mode to use, or it is the smaller of the two
2554          types that is unsigned.  Note that type1 >= type2, always.  */
2555       if ((from_unsigned1
2556            && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2557           || (from_unsigned2
2558               && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2559         {
2560           if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
2561               || GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode))
2562             return false;
2563         }
2564
2565       from_unsigned1 = from_unsigned2 = false;
2566       optype = build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode),
2567                                                false);
2568     }
2569
2570   /* If there was a conversion between the multiply and addition
2571      then we need to make sure it fits a multiply-and-accumulate.
2572      The should be a single mode change which does not change the
2573      value.  */
2574   if (conv_stmt)
2575     {
2576       /* We use the original, unmodified data types for this.  */
2577       tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt));
2578       tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt));
2579       int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2);
2580       bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2);
2581
2582       if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type))
2583         {
2584           /* Conversion is a truncate.  */
2585           if (TYPE_PRECISION (to_type) < data_size)
2586             return false;
2587         }
2588       else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type))
2589         {
2590           /* Conversion is an extend.  Check it's the right sort.  */
2591           if (TYPE_UNSIGNED (from_type) != is_unsigned
2592               && !(is_unsigned && TYPE_PRECISION (from_type) > data_size))
2593             return false;
2594         }
2595       /* else convert is a no-op for our purposes.  */
2596     }
2597
2598   /* Verify that the machine can perform a widening multiply
2599      accumulate in this mode/signedness combination, otherwise
2600      this transformation is likely to pessimize code.  */
2601   this_optab = optab_for_tree_code (wmult_code, optype, optab_default);
2602   handler = find_widening_optab_handler_and_mode (this_optab, to_mode,
2603                                                   from_mode, &actual_mode);
2604
2605   if (handler == CODE_FOR_nothing)
2606     return false;
2607
2608   /* Ensure that the inputs to the handler are in the correct precison
2609      for the opcode.  This will be the full mode size.  */
2610   actual_precision = GET_MODE_PRECISION (actual_mode);
2611   if (actual_precision != TYPE_PRECISION (type1)
2612       || from_unsigned1 != TYPE_UNSIGNED (type1))
2613     mult_rhs1 = build_and_insert_cast (gsi, loc,
2614                                        build_nonstandard_integer_type
2615                                          (actual_precision, from_unsigned1),
2616                                        mult_rhs1);
2617   if (actual_precision != TYPE_PRECISION (type2)
2618       || from_unsigned2 != TYPE_UNSIGNED (type2))
2619     mult_rhs2 = build_and_insert_cast (gsi, loc,
2620                                        build_nonstandard_integer_type
2621                                          (actual_precision, from_unsigned2),
2622                                        mult_rhs2);
2623
2624   if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs)))
2625     add_rhs = build_and_insert_cast (gsi, loc, type, add_rhs);
2626
2627   /* Handle constants.  */
2628   if (TREE_CODE (mult_rhs1) == INTEGER_CST)
2629     mult_rhs1 = fold_convert (type1, mult_rhs1);
2630   if (TREE_CODE (mult_rhs2) == INTEGER_CST)
2631     mult_rhs2 = fold_convert (type2, mult_rhs2);
2632
2633   gimple_assign_set_rhs_with_ops (gsi, wmult_code, mult_rhs1, mult_rhs2,
2634                                   add_rhs);
2635   update_stmt (gsi_stmt (*gsi));
2636   widen_mul_stats.maccs_inserted++;
2637   return true;
2638 }
2639
2640 /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
2641    with uses in additions and subtractions to form fused multiply-add
2642    operations.  Returns true if successful and MUL_STMT should be removed.  */
2643
2644 static bool
2645 convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2)
2646 {
2647   tree mul_result = gimple_get_lhs (mul_stmt);
2648   tree type = TREE_TYPE (mul_result);
2649   gimple *use_stmt, *neguse_stmt;
2650   gassign *fma_stmt;
2651   use_operand_p use_p;
2652   imm_use_iterator imm_iter;
2653
2654   if (FLOAT_TYPE_P (type)
2655       && flag_fp_contract_mode == FP_CONTRACT_OFF)
2656     return false;
2657
2658   /* We don't want to do bitfield reduction ops.  */
2659   if (INTEGRAL_TYPE_P (type)
2660       && !type_has_mode_precision_p (type))
2661     return false;
2662
2663   /* If the target doesn't support it, don't generate it.  We assume that
2664      if fma isn't available then fms, fnma or fnms are not either.  */
2665   if (optab_handler (fma_optab, TYPE_MODE (type)) == CODE_FOR_nothing)
2666     return false;
2667
2668   /* If the multiplication has zero uses, it is kept around probably because
2669      of -fnon-call-exceptions.  Don't optimize it away in that case,
2670      it is DCE job.  */
2671   if (has_zero_uses (mul_result))
2672     return false;
2673
2674   /* Make sure that the multiplication statement becomes dead after
2675      the transformation, thus that all uses are transformed to FMAs.
2676      This means we assume that an FMA operation has the same cost
2677      as an addition.  */
2678   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
2679     {
2680       enum tree_code use_code;
2681       tree result = mul_result;
2682       bool negate_p = false;
2683
2684       use_stmt = USE_STMT (use_p);
2685
2686       if (is_gimple_debug (use_stmt))
2687         continue;
2688
2689       /* For now restrict this operations to single basic blocks.  In theory
2690          we would want to support sinking the multiplication in
2691          m = a*b;
2692          if ()
2693            ma = m + c;
2694          else
2695            d = m;
2696          to form a fma in the then block and sink the multiplication to the
2697          else block.  */
2698       if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
2699         return false;
2700
2701       if (!is_gimple_assign (use_stmt))
2702         return false;
2703
2704       use_code = gimple_assign_rhs_code (use_stmt);
2705
2706       /* A negate on the multiplication leads to FNMA.  */
2707       if (use_code == NEGATE_EXPR)
2708         {
2709           ssa_op_iter iter;
2710           use_operand_p usep;
2711
2712           result = gimple_assign_lhs (use_stmt);
2713
2714           /* Make sure the negate statement becomes dead with this
2715              single transformation.  */
2716           if (!single_imm_use (gimple_assign_lhs (use_stmt),
2717                                &use_p, &neguse_stmt))
2718             return false;
2719
2720           /* Make sure the multiplication isn't also used on that stmt.  */
2721           FOR_EACH_PHI_OR_STMT_USE (usep, neguse_stmt, iter, SSA_OP_USE)
2722             if (USE_FROM_PTR (usep) == mul_result)
2723               return false;
2724
2725           /* Re-validate.  */
2726           use_stmt = neguse_stmt;
2727           if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
2728             return false;
2729           if (!is_gimple_assign (use_stmt))
2730             return false;
2731
2732           use_code = gimple_assign_rhs_code (use_stmt);
2733           negate_p = true;
2734         }
2735
2736       switch (use_code)
2737         {
2738         case MINUS_EXPR:
2739           if (gimple_assign_rhs2 (use_stmt) == result)
2740             negate_p = !negate_p;
2741           break;
2742         case PLUS_EXPR:
2743           break;
2744         default:
2745           /* FMA can only be formed from PLUS and MINUS.  */
2746           return false;
2747         }
2748
2749       /* If the subtrahend (gimple_assign_rhs2 (use_stmt)) is computed
2750          by a MULT_EXPR that we'll visit later, we might be able to
2751          get a more profitable match with fnma.
2752          OTOH, if we don't, a negate / fma pair has likely lower latency
2753          that a mult / subtract pair.  */
2754       if (use_code == MINUS_EXPR && !negate_p
2755           && gimple_assign_rhs1 (use_stmt) == result
2756           && optab_handler (fms_optab, TYPE_MODE (type)) == CODE_FOR_nothing
2757           && optab_handler (fnma_optab, TYPE_MODE (type)) != CODE_FOR_nothing)
2758         {
2759           tree rhs2 = gimple_assign_rhs2 (use_stmt);
2760
2761           if (TREE_CODE (rhs2) == SSA_NAME)
2762             {
2763               gimple *stmt2 = SSA_NAME_DEF_STMT (rhs2);
2764               if (has_single_use (rhs2)
2765                   && is_gimple_assign (stmt2)
2766                   && gimple_assign_rhs_code (stmt2) == MULT_EXPR)
2767               return false;
2768             }
2769         }
2770
2771       /* We can't handle a * b + a * b.  */
2772       if (gimple_assign_rhs1 (use_stmt) == gimple_assign_rhs2 (use_stmt))
2773         return false;
2774
2775       /* While it is possible to validate whether or not the exact form
2776          that we've recognized is available in the backend, the assumption
2777          is that the transformation is never a loss.  For instance, suppose
2778          the target only has the plain FMA pattern available.  Consider
2779          a*b-c -> fma(a,b,-c): we've exchanged MUL+SUB for FMA+NEG, which
2780          is still two operations.  Consider -(a*b)-c -> fma(-a,b,-c): we
2781          still have 3 operations, but in the FMA form the two NEGs are
2782          independent and could be run in parallel.  */
2783     }
2784
2785   FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
2786     {
2787       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
2788       enum tree_code use_code;
2789       tree addop, mulop1 = op1, result = mul_result;
2790       bool negate_p = false;
2791
2792       if (is_gimple_debug (use_stmt))
2793         continue;
2794
2795       use_code = gimple_assign_rhs_code (use_stmt);
2796       if (use_code == NEGATE_EXPR)
2797         {
2798           result = gimple_assign_lhs (use_stmt);
2799           single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt);
2800           gsi_remove (&gsi, true);
2801           release_defs (use_stmt);
2802
2803           use_stmt = neguse_stmt;
2804           gsi = gsi_for_stmt (use_stmt);
2805           use_code = gimple_assign_rhs_code (use_stmt);
2806           negate_p = true;
2807         }
2808
2809       if (gimple_assign_rhs1 (use_stmt) == result)
2810         {
2811           addop = gimple_assign_rhs2 (use_stmt);
2812           /* a * b - c -> a * b + (-c)  */
2813           if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
2814             addop = force_gimple_operand_gsi (&gsi,
2815                                               build1 (NEGATE_EXPR,
2816                                                       type, addop),
2817                                               true, NULL_TREE, true,
2818                                               GSI_SAME_STMT);
2819         }
2820       else
2821         {
2822           addop = gimple_assign_rhs1 (use_stmt);
2823           /* a - b * c -> (-b) * c + a */
2824           if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
2825             negate_p = !negate_p;
2826         }
2827
2828       if (negate_p)
2829         mulop1 = force_gimple_operand_gsi (&gsi,
2830                                            build1 (NEGATE_EXPR,
2831                                                    type, mulop1),
2832                                            true, NULL_TREE, true,
2833                                            GSI_SAME_STMT);
2834
2835       fma_stmt = gimple_build_assign (gimple_assign_lhs (use_stmt),
2836                                       FMA_EXPR, mulop1, op2, addop);
2837       gsi_replace (&gsi, fma_stmt, true);
2838       widen_mul_stats.fmas_inserted++;
2839     }
2840
2841   return true;
2842 }
2843
2844
2845 /* Helper function of match_uaddsub_overflow.  Return 1
2846    if USE_STMT is unsigned overflow check ovf != 0 for
2847    STMT, -1 if USE_STMT is unsigned overflow check ovf == 0
2848    and 0 otherwise.  */
2849
2850 static int
2851 uaddsub_overflow_check_p (gimple *stmt, gimple *use_stmt)
2852 {
2853   enum tree_code ccode = ERROR_MARK;
2854   tree crhs1 = NULL_TREE, crhs2 = NULL_TREE;
2855   if (gimple_code (use_stmt) == GIMPLE_COND)
2856     {
2857       ccode = gimple_cond_code (use_stmt);
2858       crhs1 = gimple_cond_lhs (use_stmt);
2859       crhs2 = gimple_cond_rhs (use_stmt);
2860     }
2861   else if (is_gimple_assign (use_stmt))
2862     {
2863       if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
2864         {
2865           ccode = gimple_assign_rhs_code (use_stmt);
2866           crhs1 = gimple_assign_rhs1 (use_stmt);
2867           crhs2 = gimple_assign_rhs2 (use_stmt);
2868         }
2869       else if (gimple_assign_rhs_code (use_stmt) == COND_EXPR)
2870         {
2871           tree cond = gimple_assign_rhs1 (use_stmt);
2872           if (COMPARISON_CLASS_P (cond))
2873             {
2874               ccode = TREE_CODE (cond);
2875               crhs1 = TREE_OPERAND (cond, 0);
2876               crhs2 = TREE_OPERAND (cond, 1);
2877             }
2878           else
2879             return 0;
2880         }
2881       else
2882         return 0;
2883     }
2884   else
2885     return 0;
2886
2887   if (TREE_CODE_CLASS (ccode) != tcc_comparison)
2888     return 0;
2889
2890   enum tree_code code = gimple_assign_rhs_code (stmt);
2891   tree lhs = gimple_assign_lhs (stmt);
2892   tree rhs1 = gimple_assign_rhs1 (stmt);
2893   tree rhs2 = gimple_assign_rhs2 (stmt);
2894
2895   switch (ccode)
2896     {
2897     case GT_EXPR:
2898     case LE_EXPR:
2899       /* r = a - b; r > a or r <= a
2900          r = a + b; a > r or a <= r or b > r or b <= r.  */
2901       if ((code == MINUS_EXPR && crhs1 == lhs && crhs2 == rhs1)
2902           || (code == PLUS_EXPR && (crhs1 == rhs1 || crhs1 == rhs2)
2903               && crhs2 == lhs))
2904         return ccode == GT_EXPR ? 1 : -1;
2905       break;
2906     case LT_EXPR:
2907     case GE_EXPR:
2908       /* r = a - b; a < r or a >= r
2909          r = a + b; r < a or r >= a or r < b or r >= b.  */
2910       if ((code == MINUS_EXPR && crhs1 == rhs1 && crhs2 == lhs)
2911           || (code == PLUS_EXPR && crhs1 == lhs
2912               && (crhs2 == rhs1 || crhs2 == rhs2)))
2913         return ccode == LT_EXPR ? 1 : -1;
2914       break;
2915     default:
2916       break;
2917     }
2918   return 0;
2919 }
2920
2921 /* Recognize for unsigned x
2922    x = y - z;
2923    if (x > y)
2924    where there are other uses of x and replace it with
2925    _7 = SUB_OVERFLOW (y, z);
2926    x = REALPART_EXPR <_7>;
2927    _8 = IMAGPART_EXPR <_7>;
2928    if (_8)
2929    and similarly for addition.  */
2930
2931 static bool
2932 match_uaddsub_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
2933                         enum tree_code code)
2934 {
2935   tree lhs = gimple_assign_lhs (stmt);
2936   tree type = TREE_TYPE (lhs);
2937   use_operand_p use_p;
2938   imm_use_iterator iter;
2939   bool use_seen = false;
2940   bool ovf_use_seen = false;
2941   gimple *use_stmt;
2942
2943   gcc_checking_assert (code == PLUS_EXPR || code == MINUS_EXPR);
2944   if (!INTEGRAL_TYPE_P (type)
2945       || !TYPE_UNSIGNED (type)
2946       || has_zero_uses (lhs)
2947       || has_single_use (lhs)
2948       || optab_handler (code == PLUS_EXPR ? uaddv4_optab : usubv4_optab,
2949                         TYPE_MODE (type)) == CODE_FOR_nothing)
2950     return false;
2951
2952   FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
2953     {
2954       use_stmt = USE_STMT (use_p);
2955       if (is_gimple_debug (use_stmt))
2956         continue;
2957
2958       if (uaddsub_overflow_check_p (stmt, use_stmt))
2959         ovf_use_seen = true;
2960       else
2961         use_seen = true;
2962       if (ovf_use_seen && use_seen)
2963         break;
2964     }
2965
2966   if (!ovf_use_seen || !use_seen)
2967     return false;
2968
2969   tree ctype = build_complex_type (type);
2970   tree rhs1 = gimple_assign_rhs1 (stmt);
2971   tree rhs2 = gimple_assign_rhs2 (stmt);
2972   gcall *g = gimple_build_call_internal (code == PLUS_EXPR
2973                                          ? IFN_ADD_OVERFLOW : IFN_SUB_OVERFLOW,
2974                                          2, rhs1, rhs2);
2975   tree ctmp = make_ssa_name (ctype);
2976   gimple_call_set_lhs (g, ctmp);
2977   gsi_insert_before (gsi, g, GSI_SAME_STMT);
2978   gassign *g2 = gimple_build_assign (lhs, REALPART_EXPR,
2979                                      build1 (REALPART_EXPR, type, ctmp));
2980   gsi_replace (gsi, g2, true);
2981   tree ovf = make_ssa_name (type);
2982   g2 = gimple_build_assign (ovf, IMAGPART_EXPR,
2983                             build1 (IMAGPART_EXPR, type, ctmp));
2984   gsi_insert_after (gsi, g2, GSI_NEW_STMT);
2985
2986   FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
2987     {
2988       if (is_gimple_debug (use_stmt))
2989         continue;
2990
2991       int ovf_use = uaddsub_overflow_check_p (stmt, use_stmt);
2992       if (ovf_use == 0)
2993         continue;
2994       if (gimple_code (use_stmt) == GIMPLE_COND)
2995         {
2996           gcond *cond_stmt = as_a <gcond *> (use_stmt);
2997           gimple_cond_set_lhs (cond_stmt, ovf);
2998           gimple_cond_set_rhs (cond_stmt, build_int_cst (type, 0));
2999           gimple_cond_set_code (cond_stmt, ovf_use == 1 ? NE_EXPR : EQ_EXPR);
3000         }
3001       else
3002         {
3003           gcc_checking_assert (is_gimple_assign (use_stmt));
3004           if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
3005             {
3006               gimple_assign_set_rhs1 (use_stmt, ovf);
3007               gimple_assign_set_rhs2 (use_stmt, build_int_cst (type, 0));
3008               gimple_assign_set_rhs_code (use_stmt,
3009                                           ovf_use == 1 ? NE_EXPR : EQ_EXPR);
3010             }
3011           else
3012             {
3013               gcc_checking_assert (gimple_assign_rhs_code (use_stmt)
3014                                    == COND_EXPR);
3015               tree cond = build2 (ovf_use == 1 ? NE_EXPR : EQ_EXPR,
3016                                   boolean_type_node, ovf,
3017                                   build_int_cst (type, 0));
3018               gimple_assign_set_rhs1 (use_stmt, cond);
3019             }
3020         }
3021       update_stmt (use_stmt);
3022     }
3023   return true;
3024 }
3025
3026 /* Return true if target has support for divmod.  */
3027
3028 static bool
3029 target_supports_divmod_p (optab divmod_optab, optab div_optab, machine_mode mode)
3030 {
3031   /* If target supports hardware divmod insn, use it for divmod.  */
3032   if (optab_handler (divmod_optab, mode) != CODE_FOR_nothing)
3033     return true;
3034
3035   /* Check if libfunc for divmod is available.  */
3036   rtx libfunc = optab_libfunc (divmod_optab, mode);
3037   if (libfunc != NULL_RTX)
3038     {
3039       /* If optab_handler exists for div_optab, perhaps in a wider mode,
3040          we don't want to use the libfunc even if it exists for given mode.  */
3041       machine_mode div_mode;
3042       FOR_EACH_MODE_FROM (div_mode, mode)
3043         if (optab_handler (div_optab, div_mode) != CODE_FOR_nothing)
3044           return false;
3045
3046       return targetm.expand_divmod_libfunc != NULL;
3047     }
3048
3049   return false;
3050 }
3051
3052 /* Check if stmt is candidate for divmod transform.  */
3053
3054 static bool
3055 divmod_candidate_p (gassign *stmt)
3056 {
3057   tree type = TREE_TYPE (gimple_assign_lhs (stmt));
3058   machine_mode mode = TYPE_MODE (type);
3059   optab divmod_optab, div_optab;
3060
3061   if (TYPE_UNSIGNED (type))
3062     {
3063       divmod_optab = udivmod_optab;
3064       div_optab = udiv_optab;
3065     }
3066   else
3067     {
3068       divmod_optab = sdivmod_optab;
3069       div_optab = sdiv_optab;
3070     }
3071
3072   tree op1 = gimple_assign_rhs1 (stmt);
3073   tree op2 = gimple_assign_rhs2 (stmt);
3074
3075   /* Disable the transform if either is a constant, since division-by-constant
3076      may have specialized expansion.  */
3077   if (CONSTANT_CLASS_P (op1) || CONSTANT_CLASS_P (op2))
3078     return false;
3079
3080   /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should
3081      expand using the [su]divv optabs.  */
3082   if (TYPE_OVERFLOW_TRAPS (type))
3083     return false;
3084
3085   if (!target_supports_divmod_p (divmod_optab, div_optab, mode))
3086     return false;
3087
3088   return true;
3089 }
3090
3091 /* This function looks for:
3092    t1 = a TRUNC_DIV_EXPR b;
3093    t2 = a TRUNC_MOD_EXPR b;
3094    and transforms it to the following sequence:
3095    complex_tmp = DIVMOD (a, b);
3096    t1 = REALPART_EXPR(a);
3097    t2 = IMAGPART_EXPR(b);
3098    For conditions enabling the transform see divmod_candidate_p().
3099
3100    The pass has three parts:
3101    1) Find top_stmt which is trunc_div or trunc_mod stmt and dominates all
3102       other trunc_div_expr and trunc_mod_expr stmts.
3103    2) Add top_stmt and all trunc_div and trunc_mod stmts dominated by top_stmt
3104       to stmts vector.
3105    3) Insert DIVMOD call just before top_stmt and update entries in
3106       stmts vector to use return value of DIMOVD (REALEXPR_PART for div,
3107       IMAGPART_EXPR for mod).  */
3108
3109 static bool
3110 convert_to_divmod (gassign *stmt)
3111 {
3112   if (stmt_can_throw_internal (stmt)
3113       || !divmod_candidate_p (stmt))
3114     return false;
3115
3116   tree op1 = gimple_assign_rhs1 (stmt);
3117   tree op2 = gimple_assign_rhs2 (stmt);
3118
3119   imm_use_iterator use_iter;
3120   gimple *use_stmt;
3121   auto_vec<gimple *> stmts;
3122
3123   gimple *top_stmt = stmt;
3124   basic_block top_bb = gimple_bb (stmt);
3125
3126   /* Part 1: Try to set top_stmt to "topmost" stmt that dominates
3127      at-least stmt and possibly other trunc_div/trunc_mod stmts
3128      having same operands as stmt.  */
3129
3130   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, op1)
3131     {
3132       if (is_gimple_assign (use_stmt)
3133           && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
3134               || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
3135           && operand_equal_p (op1, gimple_assign_rhs1 (use_stmt), 0)
3136           && operand_equal_p (op2, gimple_assign_rhs2 (use_stmt), 0))
3137         {
3138           if (stmt_can_throw_internal (use_stmt))
3139             continue;
3140
3141           basic_block bb = gimple_bb (use_stmt);
3142
3143           if (bb == top_bb)
3144             {
3145               if (gimple_uid (use_stmt) < gimple_uid (top_stmt))
3146                 top_stmt = use_stmt;
3147             }
3148           else if (dominated_by_p (CDI_DOMINATORS, top_bb, bb))
3149             {
3150               top_bb = bb;
3151               top_stmt = use_stmt;
3152             }
3153         }
3154     }
3155
3156   tree top_op1 = gimple_assign_rhs1 (top_stmt);
3157   tree top_op2 = gimple_assign_rhs2 (top_stmt);
3158
3159   stmts.safe_push (top_stmt);
3160   bool div_seen = (gimple_assign_rhs_code (top_stmt) == TRUNC_DIV_EXPR);
3161
3162   /* Part 2: Add all trunc_div/trunc_mod statements domianted by top_bb
3163      to stmts vector. The 2nd loop will always add stmt to stmts vector, since
3164      gimple_bb (top_stmt) dominates gimple_bb (stmt), so the
3165      2nd loop ends up adding at-least single trunc_mod_expr stmt.  */
3166
3167   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, top_op1)
3168     {
3169       if (is_gimple_assign (use_stmt)
3170           && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
3171               || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
3172           && operand_equal_p (top_op1, gimple_assign_rhs1 (use_stmt), 0)
3173           && operand_equal_p (top_op2, gimple_assign_rhs2 (use_stmt), 0))
3174         {
3175           if (use_stmt == top_stmt
3176               || stmt_can_throw_internal (use_stmt)
3177               || !dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt), top_bb))
3178             continue;
3179
3180           stmts.safe_push (use_stmt);
3181           if (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR)
3182             div_seen = true;
3183         }
3184     }
3185
3186   if (!div_seen)
3187     return false;
3188
3189   /* Part 3: Create libcall to internal fn DIVMOD:
3190      divmod_tmp = DIVMOD (op1, op2).  */
3191
3192   gcall *call_stmt = gimple_build_call_internal (IFN_DIVMOD, 2, op1, op2);
3193   tree res = make_temp_ssa_name (build_complex_type (TREE_TYPE (op1)),
3194                                  call_stmt, "divmod_tmp");
3195   gimple_call_set_lhs (call_stmt, res);
3196   /* We rejected throwing statements above.  */
3197   gimple_call_set_nothrow (call_stmt, true);
3198
3199   /* Insert the call before top_stmt.  */
3200   gimple_stmt_iterator top_stmt_gsi = gsi_for_stmt (top_stmt);
3201   gsi_insert_before (&top_stmt_gsi, call_stmt, GSI_SAME_STMT);
3202
3203   widen_mul_stats.divmod_calls_inserted++;
3204
3205   /* Update all statements in stmts vector:
3206      lhs = op1 TRUNC_DIV_EXPR op2 -> lhs = REALPART_EXPR<divmod_tmp>
3207      lhs = op1 TRUNC_MOD_EXPR op2 -> lhs = IMAGPART_EXPR<divmod_tmp>.  */
3208
3209   for (unsigned i = 0; stmts.iterate (i, &use_stmt); ++i)
3210     {
3211       tree new_rhs;
3212
3213       switch (gimple_assign_rhs_code (use_stmt))
3214         {
3215           case TRUNC_DIV_EXPR:
3216             new_rhs = fold_build1 (REALPART_EXPR, TREE_TYPE (op1), res);
3217             break;
3218
3219           case TRUNC_MOD_EXPR:
3220             new_rhs = fold_build1 (IMAGPART_EXPR, TREE_TYPE (op1), res);
3221             break;
3222
3223           default:
3224             gcc_unreachable ();
3225         }
3226
3227       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
3228       gimple_assign_set_rhs_from_tree (&gsi, new_rhs);
3229       update_stmt (use_stmt);
3230     }
3231
3232   return true;
3233 }
3234
3235 /* Find integer multiplications where the operands are extended from
3236    smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
3237    where appropriate.  */
3238
3239 namespace {
3240
3241 const pass_data pass_data_optimize_widening_mul =
3242 {
3243   GIMPLE_PASS, /* type */
3244   "widening_mul", /* name */
3245   OPTGROUP_NONE, /* optinfo_flags */
3246   TV_TREE_WIDEN_MUL, /* tv_id */
3247   PROP_ssa, /* properties_required */
3248   0, /* properties_provided */
3249   0, /* properties_destroyed */
3250   0, /* todo_flags_start */
3251   TODO_update_ssa, /* todo_flags_finish */
3252 };
3253
3254 class pass_optimize_widening_mul : public gimple_opt_pass
3255 {
3256 public:
3257   pass_optimize_widening_mul (gcc::context *ctxt)
3258     : gimple_opt_pass (pass_data_optimize_widening_mul, ctxt)
3259   {}
3260
3261   /* opt_pass methods: */
3262   virtual bool gate (function *)
3263     {
3264       return flag_expensive_optimizations && optimize;
3265     }
3266
3267   virtual unsigned int execute (function *);
3268
3269 }; // class pass_optimize_widening_mul
3270
3271 unsigned int
3272 pass_optimize_widening_mul::execute (function *fun)
3273 {
3274   basic_block bb;
3275   bool cfg_changed = false;
3276
3277   memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
3278   calculate_dominance_info (CDI_DOMINATORS);
3279   renumber_gimple_stmt_uids ();
3280
3281   FOR_EACH_BB_FN (bb, fun)
3282     {
3283       gimple_stmt_iterator gsi;
3284
3285       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
3286         {
3287           gimple *stmt = gsi_stmt (gsi);
3288           enum tree_code code;
3289
3290           if (is_gimple_assign (stmt))
3291             {
3292               code = gimple_assign_rhs_code (stmt);
3293               switch (code)
3294                 {
3295                 case MULT_EXPR:
3296                   if (!convert_mult_to_widen (stmt, &gsi)
3297                       && !convert_expand_mult_copysign (stmt, &gsi)
3298                       && convert_mult_to_fma (stmt,
3299                                               gimple_assign_rhs1 (stmt),
3300                                               gimple_assign_rhs2 (stmt)))
3301                     {
3302                       gsi_remove (&gsi, true);
3303                       release_defs (stmt);
3304                       continue;
3305                     }
3306                   break;
3307
3308                 case PLUS_EXPR:
3309                 case MINUS_EXPR:
3310                   if (!convert_plusminus_to_widen (&gsi, stmt, code))
3311                     match_uaddsub_overflow (&gsi, stmt, code);
3312                   break;
3313
3314                 case TRUNC_MOD_EXPR:
3315                   convert_to_divmod (as_a<gassign *> (stmt));
3316                   break;
3317
3318                 default:;
3319                 }
3320             }
3321           else if (is_gimple_call (stmt)
3322                    && gimple_call_lhs (stmt))
3323             {
3324               tree fndecl = gimple_call_fndecl (stmt);
3325               if (fndecl
3326                   && gimple_call_builtin_p (stmt, BUILT_IN_NORMAL))
3327                 {
3328                   switch (DECL_FUNCTION_CODE (fndecl))
3329                     {
3330                       case BUILT_IN_POWF:
3331                       case BUILT_IN_POW:
3332                       case BUILT_IN_POWL:
3333                         if (TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST
3334                             && real_equal
3335                                  (&TREE_REAL_CST (gimple_call_arg (stmt, 1)),
3336                                   &dconst2)
3337                             && convert_mult_to_fma (stmt,
3338                                                     gimple_call_arg (stmt, 0),
3339                                                     gimple_call_arg (stmt, 0)))
3340                           {
3341                             unlink_stmt_vdef (stmt);
3342                             if (gsi_remove (&gsi, true)
3343                                 && gimple_purge_dead_eh_edges (bb))
3344                               cfg_changed = true;
3345                             release_defs (stmt);
3346                             continue;
3347                           }
3348                           break;
3349
3350                       default:;
3351                     }
3352                 }
3353             }
3354           gsi_next (&gsi);
3355         }
3356     }
3357
3358   statistics_counter_event (fun, "widening multiplications inserted",
3359                             widen_mul_stats.widen_mults_inserted);
3360   statistics_counter_event (fun, "widening maccs inserted",
3361                             widen_mul_stats.maccs_inserted);
3362   statistics_counter_event (fun, "fused multiply-adds inserted",
3363                             widen_mul_stats.fmas_inserted);
3364   statistics_counter_event (fun, "divmod calls inserted",
3365                             widen_mul_stats.divmod_calls_inserted);
3366
3367   return cfg_changed ? TODO_cleanup_cfg : 0;
3368 }
3369
3370 } // anon namespace
3371
3372 gimple_opt_pass *
3373 make_pass_optimize_widening_mul (gcc::context *ctxt)
3374 {
3375   return new pass_optimize_widening_mul (ctxt);
3376 }