gcc/tree-ssa-math-opts.c

   1 /* Global, SSA-based optimizations using mathematical identities.
   2    Copyright (C) 2005-2020 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* Currently, the only mini-pass in this file tries to CSE reciprocal
  21    operations.  These are common in sequences such as this one:
  22
  23         modulus = sqrt(x*x + y*y + z*z);
  24         x = x / modulus;
  25         y = y / modulus;
  26         z = z / modulus;
  27
  28    that can be optimized to
  29
  30         modulus = sqrt(x*x + y*y + z*z);
  31         rmodulus = 1.0 / modulus;
  32         x = x * rmodulus;
  33         y = y * rmodulus;
  34         z = z * rmodulus;
  35
  36    We do this for loop invariant divisors, and with this pass whenever
  37    we notice that a division has the same divisor multiple times.
  38
  39    Of course, like in PRE, we don't insert a division if a dominator
  40    already has one.  However, this cannot be done as an extension of
  41    PRE for several reasons.
  42
  43    First of all, with some experiments it was found out that the
  44    transformation is not always useful if there are only two divisions
  45    by the same divisor.  This is probably because modern processors
  46    can pipeline the divisions; on older, in-order processors it should
  47    still be effective to optimize two divisions by the same number.
  48    We make this a param, and it shall be called N in the remainder of
  49    this comment.
  50
  51    Second, if trapping math is active, we have less freedom on where
  52    to insert divisions: we can only do so in basic blocks that already
  53    contain one.  (If divisions don't trap, instead, we can insert
  54    divisions elsewhere, which will be in blocks that are common dominators
  55    of those that have the division).
  56
  57    We really don't want to compute the reciprocal unless a division will
  58    be found.  To do this, we won't insert the division in a basic block
  59    that has less than N divisions *post-dominating* it.
  60
  61    The algorithm constructs a subset of the dominator tree, holding the
  62    blocks containing the divisions and the common dominators to them,
  63    and walk it twice.  The first walk is in post-order, and it annotates
  64    each block with the number of divisions that post-dominate it: this
  65    gives information on where divisions can be inserted profitably.
  66    The second walk is in pre-order, and it inserts divisions as explained
  67    above, and replaces divisions by multiplications.
  68
  69    In the best case, the cost of the pass is O(n_statements).  In the
  70    worst-case, the cost is due to creating the dominator tree subset,
  71    with a cost of O(n_basic_blocks ^ 2); however this can only happen
  72    for n_statements / n_basic_blocks statements.  So, the amortized cost
  73    of creating the dominator tree subset is O(n_basic_blocks) and the
  74    worst-case cost of the pass is O(n_statements * n_basic_blocks).
  75
  76    More practically, the cost will be small because there are few
  77    divisions, and they tend to be in the same basic block, so insert_bb
  78    is called very few times.
  79
  80    If we did this using domwalk.c, an efficient implementation would have
  81    to work on all the variables in a single pass, because we could not
  82    work on just a subset of the dominator tree, as we do now, and the
  83    cost would also be something like O(n_statements * n_basic_blocks).
  84    The data structures would be more complex in order to work on all the
  85    variables in a single pass.  */
  86
  87 #include "config.h"
  88 #include "system.h"
  89 #include "coretypes.h"
  90 #include "backend.h"
  91 #include "target.h"
  92 #include "rtl.h"
  93 #include "tree.h"
  94 #include "gimple.h"
  95 #include "predict.h"
  96 #include "alloc-pool.h"
  97 #include "tree-pass.h"
  98 #include "ssa.h"
  99 #include "optabs-tree.h"
 100 #include "gimple-pretty-print.h"
 101 #include "alias.h"
 102 #include "fold-const.h"
 103 #include "gimple-fold.h"
 104 #include "gimple-iterator.h"
 105 #include "gimplify.h"
 106 #include "gimplify-me.h"
 107 #include "stor-layout.h"
 108 #include "tree-cfg.h"
 109 #include "tree-dfa.h"
 110 #include "tree-ssa.h"
 111 #include "builtins.h"
 112 #include "internal-fn.h"
 113 #include "case-cfn-macros.h"
 114 #include "optabs-libfuncs.h"
 115 #include "tree-eh.h"
 116 #include "targhooks.h"
 117 #include "domwalk.h"
 118
 119 /* This structure represents one basic block that either computes a
 120    division, or is a common dominator for basic block that compute a
 121    division.  */
 122 struct occurrence {
 123   /* The basic block represented by this structure.  */
 124   basic_block bb;
 125
 126   /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
 127      inserted in BB.  */
 128   tree recip_def;
 129
 130   /* If non-NULL, the SSA_NAME holding the definition for a squared
 131      reciprocal inserted in BB.  */
 132   tree square_recip_def;
 133
 134   /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
 135      was inserted in BB.  */
 136   gimple *recip_def_stmt;
 137
 138   /* Pointer to a list of "struct occurrence"s for blocks dominated
 139      by BB.  */
 140   struct occurrence *children;
 141
 142   /* Pointer to the next "struct occurrence"s in the list of blocks
 143      sharing a common dominator.  */
 144   struct occurrence *next;
 145
 146   /* The number of divisions that are in BB before compute_merit.  The
 147      number of divisions that are in BB or post-dominate it after
 148      compute_merit.  */
 149   int num_divisions;
 150
 151   /* True if the basic block has a division, false if it is a common
 152      dominator for basic blocks that do.  If it is false and trapping
 153      math is active, BB is not a candidate for inserting a reciprocal.  */
 154   bool bb_has_division;
 155 };
 156
 157 static struct
 158 {
 159   /* Number of 1.0/X ops inserted.  */
 160   int rdivs_inserted;
 161
 162   /* Number of 1.0/FUNC ops inserted.  */
 163   int rfuncs_inserted;
 164 } reciprocal_stats;
 165
 166 static struct
 167 {
 168   /* Number of cexpi calls inserted.  */
 169   int inserted;
 170 } sincos_stats;
 171
 172 static struct
 173 {
 174   /* Number of widening multiplication ops inserted.  */
 175   int widen_mults_inserted;
 176
 177   /* Number of integer multiply-and-accumulate ops inserted.  */
 178   int maccs_inserted;
 179
 180   /* Number of fp fused multiply-add ops inserted.  */
 181   int fmas_inserted;
 182
 183   /* Number of divmod calls inserted.  */
 184   int divmod_calls_inserted;
 185 } widen_mul_stats;
 186
 187 /* The instance of "struct occurrence" representing the highest
 188    interesting block in the dominator tree.  */
 189 static struct occurrence *occ_head;
 190
 191 /* Allocation pool for getting instances of "struct occurrence".  */
 192 static object_allocator<occurrence> *occ_pool;
 193
 194
 195
 196 /* Allocate and return a new struct occurrence for basic block BB, and
 197    whose children list is headed by CHILDREN.  */
 198 static struct occurrence *
 199 occ_new (basic_block bb, struct occurrence *children)
 200 {
 201   struct occurrence *occ;
 202
 203   bb->aux = occ = occ_pool->allocate ();
 204   memset (occ, 0, sizeof (struct occurrence));
 205
 206   occ->bb = bb;
 207   occ->children = children;
 208   return occ;
 209 }
 210
 211
 212 /* Insert NEW_OCC into our subset of the dominator tree.  P_HEAD points to a
 213    list of "struct occurrence"s, one per basic block, having IDOM as
 214    their common dominator.
 215
 216    We try to insert NEW_OCC as deep as possible in the tree, and we also
 217    insert any other block that is a common dominator for BB and one
 218    block already in the tree.  */
 219
 220 static void
 221 insert_bb (struct occurrence *new_occ, basic_block idom,
 222            struct occurrence **p_head)
 223 {
 224   struct occurrence *occ, **p_occ;
 225
 226   for (p_occ = p_head; (occ = *p_occ) != NULL; )
 227     {
 228       basic_block bb = new_occ->bb, occ_bb = occ->bb;
 229       basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb);
 230       if (dom == bb)
 231         {
 232           /* BB dominates OCC_BB.  OCC becomes NEW_OCC's child: remove OCC
 233              from its list.  */
 234           *p_occ = occ->next;
 235           occ->next = new_occ->children;
 236           new_occ->children = occ;
 237
 238           /* Try the next block (it may as well be dominated by BB).  */
 239         }
 240
 241       else if (dom == occ_bb)
 242         {
 243           /* OCC_BB dominates BB.  Tail recurse to look deeper.  */
 244           insert_bb (new_occ, dom, &occ->children);
 245           return;
 246         }
 247
 248       else if (dom != idom)
 249         {
 250           gcc_assert (!dom->aux);
 251
 252           /* There is a dominator between IDOM and BB, add it and make
 253              two children out of NEW_OCC and OCC.  First, remove OCC from
 254              its list.  */
 255           *p_occ = occ->next;
 256           new_occ->next = occ;
 257           occ->next = NULL;
 258
 259           /* None of the previous blocks has DOM as a dominator: if we tail
 260              recursed, we would reexamine them uselessly. Just switch BB with
 261              DOM, and go on looking for blocks dominated by DOM.  */
 262           new_occ = occ_new (dom, new_occ);
 263         }
 264
 265       else
 266         {
 267           /* Nothing special, go on with the next element.  */
 268           p_occ = &occ->next;
 269         }
 270     }
 271
 272   /* No place was found as a child of IDOM.  Make BB a sibling of IDOM.  */
 273   new_occ->next = *p_head;
 274   *p_head = new_occ;
 275 }
 276
 277 /* Register that we found a division in BB.
 278    IMPORTANCE is a measure of how much weighting to give
 279    that division.  Use IMPORTANCE = 2 to register a single
 280    division.  If the division is going to be found multiple
 281    times use 1 (as it is with squares).  */
 282
 283 static inline void
 284 register_division_in (basic_block bb, int importance)
 285 {
 286   struct occurrence *occ;
 287
 288   occ = (struct occurrence *) bb->aux;
 289   if (!occ)
 290     {
 291       occ = occ_new (bb, NULL);
 292       insert_bb (occ, ENTRY_BLOCK_PTR_FOR_FN (cfun), &occ_head);
 293     }
 294
 295   occ->bb_has_division = true;
 296   occ->num_divisions += importance;
 297 }
 298
 299
 300 /* Compute the number of divisions that postdominate each block in OCC and
 301    its children.  */
 302
 303 static void
 304 compute_merit (struct occurrence *occ)
 305 {
 306   struct occurrence *occ_child;
 307   basic_block dom = occ->bb;
 308
 309   for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
 310     {
 311       basic_block bb;
 312       if (occ_child->children)
 313         compute_merit (occ_child);
 314
 315       if (flag_exceptions)
 316         bb = single_noncomplex_succ (dom);
 317       else
 318         bb = dom;
 319
 320       if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
 321         occ->num_divisions += occ_child->num_divisions;
 322     }
 323 }
 324
 325
 326 /* Return whether USE_STMT is a floating-point division by DEF.  */
 327 static inline bool
 328 is_division_by (gimple *use_stmt, tree def)
 329 {
 330   return is_gimple_assign (use_stmt)
 331          && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
 332          && gimple_assign_rhs2 (use_stmt) == def
 333          /* Do not recognize x / x as valid division, as we are getting
 334             confused later by replacing all immediate uses x in such
 335             a stmt.  */
 336          && gimple_assign_rhs1 (use_stmt) != def
 337          && !stmt_can_throw_internal (cfun, use_stmt);
 338 }
 339
 340 /* Return TRUE if USE_STMT is a multiplication of DEF by A.  */
 341 static inline bool
 342 is_mult_by (gimple *use_stmt, tree def, tree a)
 343 {
 344   if (gimple_code (use_stmt) == GIMPLE_ASSIGN
 345       && gimple_assign_rhs_code (use_stmt) == MULT_EXPR)
 346     {
 347       tree op0 = gimple_assign_rhs1 (use_stmt);
 348       tree op1 = gimple_assign_rhs2 (use_stmt);
 349
 350       return (op0 == def && op1 == a)
 351               || (op0 == a && op1 == def);
 352     }
 353   return 0;
 354 }
 355
 356 /* Return whether USE_STMT is DEF * DEF.  */
 357 static inline bool
 358 is_square_of (gimple *use_stmt, tree def)
 359 {
 360   return is_mult_by (use_stmt, def, def);
 361 }
 362
 363 /* Return whether USE_STMT is a floating-point division by
 364    DEF * DEF.  */
 365 static inline bool
 366 is_division_by_square (gimple *use_stmt, tree def)
 367 {
 368   if (gimple_code (use_stmt) == GIMPLE_ASSIGN
 369       && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
 370       && gimple_assign_rhs1 (use_stmt) != gimple_assign_rhs2 (use_stmt)
 371       && !stmt_can_throw_internal (cfun, use_stmt))
 372     {
 373       tree denominator = gimple_assign_rhs2 (use_stmt);
 374       if (TREE_CODE (denominator) == SSA_NAME)
 375         return is_square_of (SSA_NAME_DEF_STMT (denominator), def);
 376     }
 377   return 0;
 378 }
 379
 380 /* Walk the subset of the dominator tree rooted at OCC, setting the
 381    RECIP_DEF field to a definition of 1.0 / DEF that can be used in
 382    the given basic block.  The field may be left NULL, of course,
 383    if it is not possible or profitable to do the optimization.
 384
 385    DEF_BSI is an iterator pointing at the statement defining DEF.
 386    If RECIP_DEF is set, a dominator already has a computation that can
 387    be used.
 388
 389    If should_insert_square_recip is set, then this also inserts
 390    the square of the reciprocal immediately after the definition
 391    of the reciprocal.  */
 392
 393 static void
 394 insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
 395                     tree def, tree recip_def, tree square_recip_def,
 396                     int should_insert_square_recip, int threshold)
 397 {
 398   tree type;
 399   gassign *new_stmt, *new_square_stmt;
 400   gimple_stmt_iterator gsi;
 401   struct occurrence *occ_child;
 402
 403   if (!recip_def
 404       && (occ->bb_has_division || !flag_trapping_math)
 405       /* Divide by two as all divisions are counted twice in
 406          the costing loop.  */
 407       && occ->num_divisions / 2 >= threshold)
 408     {
 409       /* Make a variable with the replacement and substitute it.  */
 410       type = TREE_TYPE (def);
 411       recip_def = create_tmp_reg (type, "reciptmp");
 412       new_stmt = gimple_build_assign (recip_def, RDIV_EXPR,
 413                                       build_one_cst (type), def);
 414
 415       if (should_insert_square_recip)
 416         {
 417           square_recip_def = create_tmp_reg (type, "powmult_reciptmp");
 418           new_square_stmt = gimple_build_assign (square_recip_def, MULT_EXPR,
 419                                                  recip_def, recip_def);
 420         }
 421
 422       if (occ->bb_has_division)
 423         {
 424           /* Case 1: insert before an existing division.  */
 425           gsi = gsi_after_labels (occ->bb);
 426           while (!gsi_end_p (gsi)
 427                  && (!is_division_by (gsi_stmt (gsi), def))
 428                  && (!is_division_by_square (gsi_stmt (gsi), def)))
 429             gsi_next (&gsi);
 430
 431           gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
 432           if (should_insert_square_recip)
 433             gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
 434         }
 435       else if (def_gsi && occ->bb == def_gsi->bb)
 436         {
 437           /* Case 2: insert right after the definition.  Note that this will
 438              never happen if the definition statement can throw, because in
 439              that case the sole successor of the statement's basic block will
 440              dominate all the uses as well.  */
 441           gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
 442           if (should_insert_square_recip)
 443             gsi_insert_after (def_gsi, new_square_stmt, GSI_NEW_STMT);
 444         }
 445       else
 446         {
 447           /* Case 3: insert in a basic block not containing defs/uses.  */
 448           gsi = gsi_after_labels (occ->bb);
 449           gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
 450           if (should_insert_square_recip)
 451             gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
 452         }
 453
 454       reciprocal_stats.rdivs_inserted++;
 455
 456       occ->recip_def_stmt = new_stmt;
 457     }
 458
 459   occ->recip_def = recip_def;
 460   occ->square_recip_def = square_recip_def;
 461   for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
 462     insert_reciprocals (def_gsi, occ_child, def, recip_def,
 463                         square_recip_def, should_insert_square_recip,
 464                         threshold);
 465 }
 466
 467 /* Replace occurrences of expr / (x * x) with expr * ((1 / x) * (1 / x)).
 468    Take as argument the use for (x * x).  */
 469 static inline void
 470 replace_reciprocal_squares (use_operand_p use_p)
 471 {
 472   gimple *use_stmt = USE_STMT (use_p);
 473   basic_block bb = gimple_bb (use_stmt);
 474   struct occurrence *occ = (struct occurrence *) bb->aux;
 475
 476   if (optimize_bb_for_speed_p (bb) && occ->square_recip_def
 477       && occ->recip_def)
 478     {
 479       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
 480       gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
 481       gimple_assign_set_rhs2 (use_stmt, occ->square_recip_def);
 482       SET_USE (use_p, occ->square_recip_def);
 483       fold_stmt_inplace (&gsi);
 484       update_stmt (use_stmt);
 485     }
 486 }
 487
 488
 489 /* Replace the division at USE_P with a multiplication by the reciprocal, if
 490    possible.  */
 491
 492 static inline void
 493 replace_reciprocal (use_operand_p use_p)
 494 {
 495   gimple *use_stmt = USE_STMT (use_p);
 496   basic_block bb = gimple_bb (use_stmt);
 497   struct occurrence *occ = (struct occurrence *) bb->aux;
 498
 499   if (optimize_bb_for_speed_p (bb)
 500       && occ->recip_def && use_stmt != occ->recip_def_stmt)
 501     {
 502       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
 503       gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
 504       SET_USE (use_p, occ->recip_def);
 505       fold_stmt_inplace (&gsi);
 506       update_stmt (use_stmt);
 507     }
 508 }
 509
 510
 511 /* Free OCC and return one more "struct occurrence" to be freed.  */
 512
 513 static struct occurrence *
 514 free_bb (struct occurrence *occ)
 515 {
 516   struct occurrence *child, *next;
 517
 518   /* First get the two pointers hanging off OCC.  */
 519   next = occ->next;
 520   child = occ->children;
 521   occ->bb->aux = NULL;
 522   occ_pool->remove (occ);
 523
 524   /* Now ensure that we don't recurse unless it is necessary.  */
 525   if (!child)
 526     return next;
 527   else
 528     {
 529       while (next)
 530         next = free_bb (next);
 531
 532       return child;
 533     }
 534 }
 535
 536 /* Transform sequences like
 537    t = sqrt (a)
 538    x = 1.0 / t;
 539    r1 = x * x;
 540    r2 = a * x;
 541    into:
 542    t = sqrt (a)
 543    r1 = 1.0 / a;
 544    r2 = t;
 545    x = r1 * r2;
 546    depending on the uses of x, r1, r2.  This removes one multiplication and
 547    allows the sqrt and division operations to execute in parallel.
 548    DEF_GSI is the gsi of the initial division by sqrt that defines
 549    DEF (x in the example above).  */
 550
 551 static void
 552 optimize_recip_sqrt (gimple_stmt_iterator *def_gsi, tree def)
 553 {
 554   gimple *use_stmt;
 555   imm_use_iterator use_iter;
 556   gimple *stmt = gsi_stmt (*def_gsi);
 557   tree x = def;
 558   tree orig_sqrt_ssa_name = gimple_assign_rhs2 (stmt);
 559   tree div_rhs1 = gimple_assign_rhs1 (stmt);
 560
 561   if (TREE_CODE (orig_sqrt_ssa_name) != SSA_NAME
 562       || TREE_CODE (div_rhs1) != REAL_CST
 563       || !real_equal (&TREE_REAL_CST (div_rhs1), &dconst1))
 564     return;
 565
 566   gcall *sqrt_stmt
 567     = dyn_cast <gcall *> (SSA_NAME_DEF_STMT (orig_sqrt_ssa_name));
 568
 569   if (!sqrt_stmt || !gimple_call_lhs (sqrt_stmt))
 570     return;
 571
 572   switch (gimple_call_combined_fn (sqrt_stmt))
 573     {
 574     CASE_CFN_SQRT:
 575     CASE_CFN_SQRT_FN:
 576       break;
 577
 578     default:
 579       return;
 580     }
 581   tree a = gimple_call_arg (sqrt_stmt, 0);
 582
 583   /* We have 'a' and 'x'.  Now analyze the uses of 'x'.  */
 584
 585   /* Statements that use x in x * x.  */
 586   auto_vec<gimple *> sqr_stmts;
 587   /* Statements that use x in a * x.  */
 588   auto_vec<gimple *> mult_stmts;
 589   bool has_other_use = false;
 590   bool mult_on_main_path = false;
 591
 592   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, x)
 593     {
 594       if (is_gimple_debug (use_stmt))
 595         continue;
 596       if (is_square_of (use_stmt, x))
 597         {
 598           sqr_stmts.safe_push (use_stmt);
 599           if (gimple_bb (use_stmt) == gimple_bb (stmt))
 600             mult_on_main_path = true;
 601         }
 602       else if (is_mult_by (use_stmt, x, a))
 603         {
 604           mult_stmts.safe_push (use_stmt);
 605           if (gimple_bb (use_stmt) == gimple_bb (stmt))
 606             mult_on_main_path = true;
 607         }
 608       else
 609         has_other_use = true;
 610     }
 611
 612   /* In the x * x and a * x cases we just rewire stmt operands or
 613      remove multiplications.  In the has_other_use case we introduce
 614      a multiplication so make sure we don't introduce a multiplication
 615      on a path where there was none.  */
 616   if (has_other_use && !mult_on_main_path)
 617     return;
 618
 619   if (sqr_stmts.is_empty () && mult_stmts.is_empty ())
 620     return;
 621
 622   /* If x = 1.0 / sqrt (a) has uses other than those optimized here we want
 623      to be able to compose it from the sqr and mult cases.  */
 624   if (has_other_use && (sqr_stmts.is_empty () || mult_stmts.is_empty ()))
 625     return;
 626
 627   if (dump_file)
 628     {
 629       fprintf (dump_file, "Optimizing reciprocal sqrt multiplications of\n");
 630       print_gimple_stmt (dump_file, sqrt_stmt, 0, TDF_NONE);
 631       print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
 632       fprintf (dump_file, "\n");
 633     }
 634
 635   bool delete_div = !has_other_use;
 636   tree sqr_ssa_name = NULL_TREE;
 637   if (!sqr_stmts.is_empty ())
 638     {
 639       /* r1 = x * x.  Transform the original
 640          x = 1.0 / t
 641          into
 642          tmp1 = 1.0 / a
 643          r1 = tmp1.  */
 644
 645       sqr_ssa_name
 646         = make_temp_ssa_name (TREE_TYPE (a), NULL, "recip_sqrt_sqr");
 647
 648       if (dump_file)
 649         {
 650           fprintf (dump_file, "Replacing original division\n");
 651           print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
 652           fprintf (dump_file, "with new division\n");
 653         }
 654       stmt
 655         = gimple_build_assign (sqr_ssa_name, gimple_assign_rhs_code (stmt),
 656                                gimple_assign_rhs1 (stmt), a);
 657       gsi_insert_before (def_gsi, stmt, GSI_SAME_STMT);
 658       gsi_remove (def_gsi, true);
 659       *def_gsi = gsi_for_stmt (stmt);
 660       fold_stmt_inplace (def_gsi);
 661       update_stmt (stmt);
 662
 663       if (dump_file)
 664         print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
 665
 666       delete_div = false;
 667       gimple *sqr_stmt;
 668       unsigned int i;
 669       FOR_EACH_VEC_ELT (sqr_stmts, i, sqr_stmt)
 670         {
 671           gimple_stmt_iterator gsi2 = gsi_for_stmt (sqr_stmt);
 672           gimple_assign_set_rhs_from_tree (&gsi2, sqr_ssa_name);
 673           update_stmt (sqr_stmt);
 674         }
 675     }
 676   if (!mult_stmts.is_empty ())
 677     {
 678       /* r2 = a * x.  Transform this into:
 679          r2 = t (The original sqrt (a)).  */
 680       unsigned int i;
 681       gimple *mult_stmt = NULL;
 682       FOR_EACH_VEC_ELT (mult_stmts, i, mult_stmt)
 683         {
 684           gimple_stmt_iterator gsi2 = gsi_for_stmt (mult_stmt);
 685
 686           if (dump_file)
 687             {
 688               fprintf (dump_file, "Replacing squaring multiplication\n");
 689               print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE);
 690               fprintf (dump_file, "with assignment\n");
 691             }
 692           gimple_assign_set_rhs_from_tree (&gsi2, orig_sqrt_ssa_name);
 693           fold_stmt_inplace (&gsi2);
 694           update_stmt (mult_stmt);
 695           if (dump_file)
 696             print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE);
 697       }
 698     }
 699
 700   if (has_other_use)
 701     {
 702       /* Using the two temporaries tmp1, tmp2 from above
 703          the original x is now:
 704          x = tmp1 * tmp2.  */
 705       gcc_assert (orig_sqrt_ssa_name);
 706       gcc_assert (sqr_ssa_name);
 707
 708       gimple *new_stmt
 709         = gimple_build_assign (x, MULT_EXPR,
 710                                orig_sqrt_ssa_name, sqr_ssa_name);
 711       gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
 712       update_stmt (stmt);
 713     }
 714   else if (delete_div)
 715     {
 716       /* Remove the original division.  */
 717       gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt);
 718       gsi_remove (&gsi2, true);
 719       release_defs (stmt);
 720     }
 721   else
 722     release_ssa_name (x);
 723 }
 724
 725 /* Look for floating-point divisions among DEF's uses, and try to
 726    replace them by multiplications with the reciprocal.  Add
 727    as many statements computing the reciprocal as needed.
 728
 729    DEF must be a GIMPLE register of a floating-point type.  */
 730
 731 static void
 732 execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
 733 {
 734   use_operand_p use_p, square_use_p;
 735   imm_use_iterator use_iter, square_use_iter;
 736   tree square_def;
 737   struct occurrence *occ;
 738   int count = 0;
 739   int threshold;
 740   int square_recip_count = 0;
 741   int sqrt_recip_count = 0;
 742
 743   gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && TREE_CODE (def) == SSA_NAME);
 744   threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
 745
 746   /* If DEF is a square (x * x), count the number of divisions by x.
 747      If there are more divisions by x than by (DEF * DEF), prefer to optimize
 748      the reciprocal of x instead of DEF.  This improves cases like:
 749        def = x * x
 750        t0 = a / def
 751        t1 = b / def
 752        t2 = c / x
 753      Reciprocal optimization of x results in 1 division rather than 2 or 3.  */
 754   gimple *def_stmt = SSA_NAME_DEF_STMT (def);
 755
 756   if (is_gimple_assign (def_stmt)
 757       && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
 758       && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME
 759       && gimple_assign_rhs1 (def_stmt) == gimple_assign_rhs2 (def_stmt))
 760     {
 761       tree op0 = gimple_assign_rhs1 (def_stmt);
 762
 763       FOR_EACH_IMM_USE_FAST (use_p, use_iter, op0)
 764         {
 765           gimple *use_stmt = USE_STMT (use_p);
 766           if (is_division_by (use_stmt, op0))
 767             sqrt_recip_count++;
 768         }
 769     }
 770
 771   FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
 772     {
 773       gimple *use_stmt = USE_STMT (use_p);
 774       if (is_division_by (use_stmt, def))
 775         {
 776           register_division_in (gimple_bb (use_stmt), 2);
 777           count++;
 778         }
 779
 780       if (is_square_of (use_stmt, def))
 781         {
 782           square_def = gimple_assign_lhs (use_stmt);
 783           FOR_EACH_IMM_USE_FAST (square_use_p, square_use_iter, square_def)
 784             {
 785               gimple *square_use_stmt = USE_STMT (square_use_p);
 786               if (is_division_by (square_use_stmt, square_def))
 787                 {
 788                   /* This is executed twice for each division by a square.  */
 789                   register_division_in (gimple_bb (square_use_stmt), 1);
 790                   square_recip_count++;
 791                 }
 792             }
 793         }
 794     }
 795
 796   /* Square reciprocals were counted twice above.  */
 797   square_recip_count /= 2;
 798
 799   /* If it is more profitable to optimize 1 / x, don't optimize 1 / (x * x).  */
 800   if (sqrt_recip_count > square_recip_count)
 801     goto out;
 802
 803   /* Do the expensive part only if we can hope to optimize something.  */
 804   if (count + square_recip_count >= threshold && count >= 1)
 805     {
 806       gimple *use_stmt;
 807       for (occ = occ_head; occ; occ = occ->next)
 808         {
 809           compute_merit (occ);
 810           insert_reciprocals (def_gsi, occ, def, NULL, NULL,
 811                               square_recip_count, threshold);
 812         }
 813
 814       FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
 815         {
 816           if (is_division_by (use_stmt, def))
 817             {
 818               FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
 819                 replace_reciprocal (use_p);
 820             }
 821           else if (square_recip_count > 0 && is_square_of (use_stmt, def))
 822             {
 823               FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
 824                 {
 825                   /* Find all uses of the square that are divisions and
 826                    * replace them by multiplications with the inverse.  */
 827                   imm_use_iterator square_iterator;
 828                   gimple *powmult_use_stmt = USE_STMT (use_p);
 829                   tree powmult_def_name = gimple_assign_lhs (powmult_use_stmt);
 830
 831                   FOR_EACH_IMM_USE_STMT (powmult_use_stmt,
 832                                          square_iterator, powmult_def_name)
 833                     FOR_EACH_IMM_USE_ON_STMT (square_use_p, square_iterator)
 834                       {
 835                         gimple *powmult_use_stmt = USE_STMT (square_use_p);
 836                         if (is_division_by (powmult_use_stmt, powmult_def_name))
 837                           replace_reciprocal_squares (square_use_p);
 838                       }
 839                 }
 840             }
 841         }
 842     }
 843
 844 out:
 845   for (occ = occ_head; occ; )
 846     occ = free_bb (occ);
 847
 848   occ_head = NULL;
 849 }
 850
 851 /* Return an internal function that implements the reciprocal of CALL,
 852    or IFN_LAST if there is no such function that the target supports.  */
 853
 854 internal_fn
 855 internal_fn_reciprocal (gcall *call)
 856 {
 857   internal_fn ifn;
 858
 859   switch (gimple_call_combined_fn (call))
 860     {
 861     CASE_CFN_SQRT:
 862     CASE_CFN_SQRT_FN:
 863       ifn = IFN_RSQRT;
 864       break;
 865
 866     default:
 867       return IFN_LAST;
 868     }
 869
 870   tree_pair types = direct_internal_fn_types (ifn, call);
 871   if (!direct_internal_fn_supported_p (ifn, types, OPTIMIZE_FOR_SPEED))
 872     return IFN_LAST;
 873
 874   return ifn;
 875 }
 876
 877 /* Go through all the floating-point SSA_NAMEs, and call
 878    execute_cse_reciprocals_1 on each of them.  */
 879 namespace {
 880
 881 const pass_data pass_data_cse_reciprocals =
 882 {
 883   GIMPLE_PASS, /* type */
 884   "recip", /* name */
 885   OPTGROUP_NONE, /* optinfo_flags */
 886   TV_TREE_RECIP, /* tv_id */
 887   PROP_ssa, /* properties_required */
 888   0, /* properties_provided */
 889   0, /* properties_destroyed */
 890   0, /* todo_flags_start */
 891   TODO_update_ssa, /* todo_flags_finish */
 892 };
 893
 894 class pass_cse_reciprocals : public gimple_opt_pass
 895 {
 896 public:
 897   pass_cse_reciprocals (gcc::context *ctxt)
 898     : gimple_opt_pass (pass_data_cse_reciprocals, ctxt)
 899   {}
 900
 901   /* opt_pass methods: */
 902   virtual bool gate (function *) { return optimize && flag_reciprocal_math; }
 903   virtual unsigned int execute (function *);
 904
 905 }; // class pass_cse_reciprocals
 906
 907 unsigned int
 908 pass_cse_reciprocals::execute (function *fun)
 909 {
 910   basic_block bb;
 911   tree arg;
 912
 913   occ_pool = new object_allocator<occurrence> ("dominators for recip");
 914
 915   memset (&reciprocal_stats, 0, sizeof (reciprocal_stats));
 916   calculate_dominance_info (CDI_DOMINATORS);
 917   calculate_dominance_info (CDI_POST_DOMINATORS);
 918
 919   if (flag_checking)
 920     FOR_EACH_BB_FN (bb, fun)
 921       gcc_assert (!bb->aux);
 922
 923   for (arg = DECL_ARGUMENTS (fun->decl); arg; arg = DECL_CHAIN (arg))
 924     if (FLOAT_TYPE_P (TREE_TYPE (arg))
 925         && is_gimple_reg (arg))
 926       {
 927         tree name = ssa_default_def (fun, arg);
 928         if (name)
 929           execute_cse_reciprocals_1 (NULL, name);
 930       }
 931
 932   FOR_EACH_BB_FN (bb, fun)
 933     {
 934       tree def;
 935
 936       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
 937            gsi_next (&gsi))
 938         {
 939           gphi *phi = gsi.phi ();
 940           def = PHI_RESULT (phi);
 941           if (! virtual_operand_p (def)
 942               && FLOAT_TYPE_P (TREE_TYPE (def)))
 943             execute_cse_reciprocals_1 (NULL, def);
 944         }
 945
 946       for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
 947            gsi_next (&gsi))
 948         {
 949           gimple *stmt = gsi_stmt (gsi);
 950
 951           if (gimple_has_lhs (stmt)
 952               && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
 953               && FLOAT_TYPE_P (TREE_TYPE (def))
 954               && TREE_CODE (def) == SSA_NAME)
 955             {
 956               execute_cse_reciprocals_1 (&gsi, def);
 957               stmt = gsi_stmt (gsi);
 958               if (flag_unsafe_math_optimizations
 959                   && is_gimple_assign (stmt)
 960                   && gimple_assign_lhs (stmt) == def
 961                   && !stmt_can_throw_internal (cfun, stmt)
 962                   && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
 963                 optimize_recip_sqrt (&gsi, def);
 964             }
 965         }
 966
 967       if (optimize_bb_for_size_p (bb))
 968         continue;
 969
 970       /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b).  */
 971       for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
 972            gsi_next (&gsi))
 973         {
 974           gimple *stmt = gsi_stmt (gsi);
 975
 976           if (is_gimple_assign (stmt)
 977               && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
 978             {
 979               tree arg1 = gimple_assign_rhs2 (stmt);
 980               gimple *stmt1;
 981
 982               if (TREE_CODE (arg1) != SSA_NAME)
 983                 continue;
 984
 985               stmt1 = SSA_NAME_DEF_STMT (arg1);
 986
 987               if (is_gimple_call (stmt1)
 988                   && gimple_call_lhs (stmt1))
 989                 {
 990                   bool fail;
 991                   imm_use_iterator ui;
 992                   use_operand_p use_p;
 993                   tree fndecl = NULL_TREE;
 994
 995                   gcall *call = as_a <gcall *> (stmt1);
 996                   internal_fn ifn = internal_fn_reciprocal (call);
 997                   if (ifn == IFN_LAST)
 998                     {
 999                       fndecl = gimple_call_fndecl (call);
1000                       if (!fndecl
1001                           || !fndecl_built_in_p (fndecl, BUILT_IN_MD))
1002                         continue;
1003                       fndecl = targetm.builtin_reciprocal (fndecl);
1004                       if (!fndecl)
1005                         continue;
1006                     }
1007
1008                   /* Check that all uses of the SSA name are divisions,
1009                      otherwise replacing the defining statement will do
1010                      the wrong thing.  */
1011                   fail = false;
1012                   FOR_EACH_IMM_USE_FAST (use_p, ui, arg1)
1013                     {
1014                       gimple *stmt2 = USE_STMT (use_p);
1015                       if (is_gimple_debug (stmt2))
1016                         continue;
1017                       if (!is_gimple_assign (stmt2)
1018                           || gimple_assign_rhs_code (stmt2) != RDIV_EXPR
1019                           || gimple_assign_rhs1 (stmt2) == arg1
1020                           || gimple_assign_rhs2 (stmt2) != arg1)
1021                         {
1022                           fail = true;
1023                           break;
1024                         }
1025                     }
1026                   if (fail)
1027                     continue;
1028
1029                   gimple_replace_ssa_lhs (call, arg1);
1030                   if (gimple_call_internal_p (call) != (ifn != IFN_LAST))
1031                     {
1032                       auto_vec<tree, 4> args;
1033                       for (unsigned int i = 0;
1034                            i < gimple_call_num_args (call); i++)
1035                         args.safe_push (gimple_call_arg (call, i));
1036                       gcall *stmt2;
1037                       if (ifn == IFN_LAST)
1038                         stmt2 = gimple_build_call_vec (fndecl, args);
1039                       else
1040                         stmt2 = gimple_build_call_internal_vec (ifn, args);
1041                       gimple_call_set_lhs (stmt2, arg1);
1042                       gimple_move_vops (stmt2, call);
1043                       gimple_call_set_nothrow (stmt2,
1044                                                gimple_call_nothrow_p (call));
1045                       gimple_stmt_iterator gsi2 = gsi_for_stmt (call);
1046                       gsi_replace (&gsi2, stmt2, true);
1047                     }
1048                   else
1049                     {
1050                       if (ifn == IFN_LAST)
1051                         gimple_call_set_fndecl (call, fndecl);
1052                       else
1053                         gimple_call_set_internal_fn (call, ifn);
1054                       update_stmt (call);
1055                     }
1056                   reciprocal_stats.rfuncs_inserted++;
1057
1058                   FOR_EACH_IMM_USE_STMT (stmt, ui, arg1)
1059                     {
1060                       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1061                       gimple_assign_set_rhs_code (stmt, MULT_EXPR);
1062                       fold_stmt_inplace (&gsi);
1063                       update_stmt (stmt);
1064                     }
1065                 }
1066             }
1067         }
1068     }
1069
1070   statistics_counter_event (fun, "reciprocal divs inserted",
1071                             reciprocal_stats.rdivs_inserted);
1072   statistics_counter_event (fun, "reciprocal functions inserted",
1073                             reciprocal_stats.rfuncs_inserted);
1074
1075   free_dominance_info (CDI_DOMINATORS);
1076   free_dominance_info (CDI_POST_DOMINATORS);
1077   delete occ_pool;
1078   return 0;
1079 }
1080
1081 } // anon namespace
1082
1083 gimple_opt_pass *
1084 make_pass_cse_reciprocals (gcc::context *ctxt)
1085 {
1086   return new pass_cse_reciprocals (ctxt);
1087 }
1088
1089 /* Records an occurrence at statement USE_STMT in the vector of trees
1090    STMTS if it is dominated by *TOP_BB or dominates it or this basic block
1091    is not yet initialized.  Returns true if the occurrence was pushed on
1092    the vector.  Adjusts *TOP_BB to be the basic block dominating all
1093    statements in the vector.  */
1094
1095 static bool
1096 maybe_record_sincos (vec<gimple *> *stmts,
1097                      basic_block *top_bb, gimple *use_stmt)
1098 {
1099   basic_block use_bb = gimple_bb (use_stmt);
1100   if (*top_bb
1101       && (*top_bb == use_bb
1102           || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb)))
1103     stmts->safe_push (use_stmt);
1104   else if (!*top_bb
1105            || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb))
1106     {
1107       stmts->safe_push (use_stmt);
1108       *top_bb = use_bb;
1109     }
1110   else
1111     return false;
1112
1113   return true;
1114 }
1115
1116 /* Look for sin, cos and cexpi calls with the same argument NAME and
1117    create a single call to cexpi CSEing the result in this case.
1118    We first walk over all immediate uses of the argument collecting
1119    statements that we can CSE in a vector and in a second pass replace
1120    the statement rhs with a REALPART or IMAGPART expression on the
1121    result of the cexpi call we insert before the use statement that
1122    dominates all other candidates.  */
1123
1124 static bool
1125 execute_cse_sincos_1 (tree name)
1126 {
1127   gimple_stmt_iterator gsi;
1128   imm_use_iterator use_iter;
1129   tree fndecl, res, type;
1130   gimple *def_stmt, *use_stmt, *stmt;
1131   int seen_cos = 0, seen_sin = 0, seen_cexpi = 0;
1132   auto_vec<gimple *> stmts;
1133   basic_block top_bb = NULL;
1134   int i;
1135   bool cfg_changed = false;
1136
1137   type = TREE_TYPE (name);
1138   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
1139     {
1140       if (gimple_code (use_stmt) != GIMPLE_CALL
1141           || !gimple_call_lhs (use_stmt))
1142         continue;
1143
1144       switch (gimple_call_combined_fn (use_stmt))
1145         {
1146         CASE_CFN_COS:
1147           seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1148           break;
1149
1150         CASE_CFN_SIN:
1151           seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1152           break;
1153
1154         CASE_CFN_CEXPI:
1155           seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1156           break;
1157
1158         default:;
1159         }
1160     }
1161
1162   if (seen_cos + seen_sin + seen_cexpi <= 1)
1163     return false;
1164
1165   /* Simply insert cexpi at the beginning of top_bb but not earlier than
1166      the name def statement.  */
1167   fndecl = mathfn_built_in (type, BUILT_IN_CEXPI);
1168   if (!fndecl)
1169     return false;
1170   stmt = gimple_build_call (fndecl, 1, name);
1171   res = make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl)), stmt, "sincostmp");
1172   gimple_call_set_lhs (stmt, res);
1173
1174   def_stmt = SSA_NAME_DEF_STMT (name);
1175   if (!SSA_NAME_IS_DEFAULT_DEF (name)
1176       && gimple_code (def_stmt) != GIMPLE_PHI
1177       && gimple_bb (def_stmt) == top_bb)
1178     {
1179       gsi = gsi_for_stmt (def_stmt);
1180       gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1181     }
1182   else
1183     {
1184       gsi = gsi_after_labels (top_bb);
1185       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1186     }
1187   sincos_stats.inserted++;
1188
1189   /* And adjust the recorded old call sites.  */
1190   for (i = 0; stmts.iterate (i, &use_stmt); ++i)
1191     {
1192       tree rhs = NULL;
1193
1194       switch (gimple_call_combined_fn (use_stmt))
1195         {
1196         CASE_CFN_COS:
1197           rhs = fold_build1 (REALPART_EXPR, type, res);
1198           break;
1199
1200         CASE_CFN_SIN:
1201           rhs = fold_build1 (IMAGPART_EXPR, type, res);
1202           break;
1203
1204         CASE_CFN_CEXPI:
1205           rhs = res;
1206           break;
1207
1208         default:;
1209           gcc_unreachable ();
1210         }
1211
1212         /* Replace call with a copy.  */
1213         stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs);
1214
1215         gsi = gsi_for_stmt (use_stmt);
1216         gsi_replace (&gsi, stmt, true);
1217         if (gimple_purge_dead_eh_edges (gimple_bb (stmt)))
1218           cfg_changed = true;
1219     }
1220
1221   return cfg_changed;
1222 }
1223
1224 /* To evaluate powi(x,n), the floating point value x raised to the
1225    constant integer exponent n, we use a hybrid algorithm that
1226    combines the "window method" with look-up tables.  For an
1227    introduction to exponentiation algorithms and "addition chains",
1228    see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
1229    "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
1230    3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
1231    Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998.  */
1232
1233 /* Provide a default value for POWI_MAX_MULTS, the maximum number of
1234    multiplications to inline before calling the system library's pow
1235    function.  powi(x,n) requires at worst 2*bits(n)-2 multiplications,
1236    so this default never requires calling pow, powf or powl.  */
1237
1238 #ifndef POWI_MAX_MULTS
1239 #define POWI_MAX_MULTS  (2*HOST_BITS_PER_WIDE_INT-2)
1240 #endif
1241
1242 /* The size of the "optimal power tree" lookup table.  All
1243    exponents less than this value are simply looked up in the
1244    powi_table below.  This threshold is also used to size the
1245    cache of pseudo registers that hold intermediate results.  */
1246 #define POWI_TABLE_SIZE 256
1247
1248 /* The size, in bits of the window, used in the "window method"
1249    exponentiation algorithm.  This is equivalent to a radix of
1250    (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method".  */
1251 #define POWI_WINDOW_SIZE 3
1252
1253 /* The following table is an efficient representation of an
1254    "optimal power tree".  For each value, i, the corresponding
1255    value, j, in the table states than an optimal evaluation
1256    sequence for calculating pow(x,i) can be found by evaluating
1257    pow(x,j)*pow(x,i-j).  An optimal power tree for the first
1258    100 integers is given in Knuth's "Seminumerical algorithms".  */
1259
1260 static const unsigned char powi_table[POWI_TABLE_SIZE] =
1261   {
1262       0,   1,   1,   2,   2,   3,   3,   4,  /*   0 -   7 */
1263       4,   6,   5,   6,   6,  10,   7,   9,  /*   8 -  15 */
1264       8,  16,   9,  16,  10,  12,  11,  13,  /*  16 -  23 */
1265      12,  17,  13,  18,  14,  24,  15,  26,  /*  24 -  31 */
1266      16,  17,  17,  19,  18,  33,  19,  26,  /*  32 -  39 */
1267      20,  25,  21,  40,  22,  27,  23,  44,  /*  40 -  47 */
1268      24,  32,  25,  34,  26,  29,  27,  44,  /*  48 -  55 */
1269      28,  31,  29,  34,  30,  60,  31,  36,  /*  56 -  63 */
1270      32,  64,  33,  34,  34,  46,  35,  37,  /*  64 -  71 */
1271      36,  65,  37,  50,  38,  48,  39,  69,  /*  72 -  79 */
1272      40,  49,  41,  43,  42,  51,  43,  58,  /*  80 -  87 */
1273      44,  64,  45,  47,  46,  59,  47,  76,  /*  88 -  95 */
1274      48,  65,  49,  66,  50,  67,  51,  66,  /*  96 - 103 */
1275      52,  70,  53,  74,  54, 104,  55,  74,  /* 104 - 111 */
1276      56,  64,  57,  69,  58,  78,  59,  68,  /* 112 - 119 */
1277      60,  61,  61,  80,  62,  75,  63,  68,  /* 120 - 127 */
1278      64,  65,  65, 128,  66, 129,  67,  90,  /* 128 - 135 */
1279      68,  73,  69, 131,  70,  94,  71,  88,  /* 136 - 143 */
1280      72, 128,  73,  98,  74, 132,  75, 121,  /* 144 - 151 */
1281      76, 102,  77, 124,  78, 132,  79, 106,  /* 152 - 159 */
1282      80,  97,  81, 160,  82,  99,  83, 134,  /* 160 - 167 */
1283      84,  86,  85,  95,  86, 160,  87, 100,  /* 168 - 175 */
1284      88, 113,  89,  98,  90, 107,  91, 122,  /* 176 - 183 */
1285      92, 111,  93, 102,  94, 126,  95, 150,  /* 184 - 191 */
1286      96, 128,  97, 130,  98, 133,  99, 195,  /* 192 - 199 */
1287     100, 128, 101, 123, 102, 164, 103, 138,  /* 200 - 207 */
1288     104, 145, 105, 146, 106, 109, 107, 149,  /* 208 - 215 */
1289     108, 200, 109, 146, 110, 170, 111, 157,  /* 216 - 223 */
1290     112, 128, 113, 130, 114, 182, 115, 132,  /* 224 - 231 */
1291     116, 200, 117, 132, 118, 158, 119, 206,  /* 232 - 239 */
1292     120, 240, 121, 162, 122, 147, 123, 152,  /* 240 - 247 */
1293     124, 166, 125, 214, 126, 138, 127, 153,  /* 248 - 255 */
1294   };
1295
1296
1297 /* Return the number of multiplications required to calculate
1298    powi(x,n) where n is less than POWI_TABLE_SIZE.  This is a
1299    subroutine of powi_cost.  CACHE is an array indicating
1300    which exponents have already been calculated.  */
1301
1302 static int
1303 powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache)
1304 {
1305   /* If we've already calculated this exponent, then this evaluation
1306      doesn't require any additional multiplications.  */
1307   if (cache[n])
1308     return 0;
1309
1310   cache[n] = true;
1311   return powi_lookup_cost (n - powi_table[n], cache)
1312          + powi_lookup_cost (powi_table[n], cache) + 1;
1313 }
1314
1315 /* Return the number of multiplications required to calculate
1316    powi(x,n) for an arbitrary x, given the exponent N.  This
1317    function needs to be kept in sync with powi_as_mults below.  */
1318
1319 static int
1320 powi_cost (HOST_WIDE_INT n)
1321 {
1322   bool cache[POWI_TABLE_SIZE];
1323   unsigned HOST_WIDE_INT digit;
1324   unsigned HOST_WIDE_INT val;
1325   int result;
1326
1327   if (n == 0)
1328     return 0;
1329
1330   /* Ignore the reciprocal when calculating the cost.  */
1331   val = (n < 0) ? -n : n;
1332
1333   /* Initialize the exponent cache.  */
1334   memset (cache, 0, POWI_TABLE_SIZE * sizeof (bool));
1335   cache[1] = true;
1336
1337   result = 0;
1338
1339   while (val >= POWI_TABLE_SIZE)
1340     {
1341       if (val & 1)
1342         {
1343           digit = val & ((1 << POWI_WINDOW_SIZE) - 1);
1344           result += powi_lookup_cost (digit, cache)
1345                     + POWI_WINDOW_SIZE + 1;
1346           val >>= POWI_WINDOW_SIZE;
1347         }
1348       else
1349         {
1350           val >>= 1;
1351           result++;
1352         }
1353     }
1354
1355   return result + powi_lookup_cost (val, cache);
1356 }
1357
1358 /* Recursive subroutine of powi_as_mults.  This function takes the
1359    array, CACHE, of already calculated exponents and an exponent N and
1360    returns a tree that corresponds to CACHE[1]**N, with type TYPE.  */
1361
1362 static tree
1363 powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type,
1364                  HOST_WIDE_INT n, tree *cache)
1365 {
1366   tree op0, op1, ssa_target;
1367   unsigned HOST_WIDE_INT digit;
1368   gassign *mult_stmt;
1369
1370   if (n < POWI_TABLE_SIZE && cache[n])
1371     return cache[n];
1372
1373   ssa_target = make_temp_ssa_name (type, NULL, "powmult");
1374
1375   if (n < POWI_TABLE_SIZE)
1376     {
1377       cache[n] = ssa_target;
1378       op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache);
1379       op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache);
1380     }
1381   else if (n & 1)
1382     {
1383       digit = n & ((1 << POWI_WINDOW_SIZE) - 1);
1384       op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache);
1385       op1 = powi_as_mults_1 (gsi, loc, type, digit, cache);
1386     }
1387   else
1388     {
1389       op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache);
1390       op1 = op0;
1391     }
1392
1393   mult_stmt = gimple_build_assign (ssa_target, MULT_EXPR, op0, op1);
1394   gimple_set_location (mult_stmt, loc);
1395   gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
1396
1397   return ssa_target;
1398 }
1399
1400 /* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
1401    This function needs to be kept in sync with powi_cost above.  */
1402
1403 static tree
1404 powi_as_mults (gimple_stmt_iterator *gsi, location_t loc,
1405                tree arg0, HOST_WIDE_INT n)
1406 {
1407   tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0);
1408   gassign *div_stmt;
1409   tree target;
1410
1411   if (n == 0)
1412     return build_real (type, dconst1);
1413
1414   memset (cache, 0,  sizeof (cache));
1415   cache[1] = arg0;
1416
1417   result = powi_as_mults_1 (gsi, loc, type, (n < 0) ? -n : n, cache);
1418   if (n >= 0)
1419     return result;
1420
1421   /* If the original exponent was negative, reciprocate the result.  */
1422   target = make_temp_ssa_name (type, NULL, "powmult");
1423   div_stmt = gimple_build_assign (target, RDIV_EXPR,
1424                                   build_real (type, dconst1), result);
1425   gimple_set_location (div_stmt, loc);
1426   gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
1427
1428   return target;
1429 }
1430
1431 /* ARG0 and N are the two arguments to a powi builtin in GSI with
1432    location info LOC.  If the arguments are appropriate, create an
1433    equivalent sequence of statements prior to GSI using an optimal
1434    number of multiplications, and return an expession holding the
1435    result.  */
1436
1437 static tree
1438 gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc,
1439                             tree arg0, HOST_WIDE_INT n)
1440 {
1441   /* Avoid largest negative number.  */
1442   if (n != -n
1443       && ((n >= -1 && n <= 2)
1444           || (optimize_function_for_speed_p (cfun)
1445               && powi_cost (n) <= POWI_MAX_MULTS)))
1446     return powi_as_mults (gsi, loc, arg0, n);
1447
1448   return NULL_TREE;
1449 }
1450
1451 /* Build a gimple call statement that calls FN with argument ARG.
1452    Set the lhs of the call statement to a fresh SSA name.  Insert the
1453    statement prior to GSI's current position, and return the fresh
1454    SSA name.  */
1455
1456 static tree
1457 build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc,
1458                        tree fn, tree arg)
1459 {
1460   gcall *call_stmt;
1461   tree ssa_target;
1462
1463   call_stmt = gimple_build_call (fn, 1, arg);
1464   ssa_target = make_temp_ssa_name (TREE_TYPE (arg), NULL, "powroot");
1465   gimple_set_lhs (call_stmt, ssa_target);
1466   gimple_set_location (call_stmt, loc);
1467   gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
1468
1469   return ssa_target;
1470 }
1471
1472 /* Build a gimple binary operation with the given CODE and arguments
1473    ARG0, ARG1, assigning the result to a new SSA name for variable
1474    TARGET.  Insert the statement prior to GSI's current position, and
1475    return the fresh SSA name.*/
1476
1477 static tree
1478 build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc,
1479                         const char *name, enum tree_code code,
1480                         tree arg0, tree arg1)
1481 {
1482   tree result = make_temp_ssa_name (TREE_TYPE (arg0), NULL, name);
1483   gassign *stmt = gimple_build_assign (result, code, arg0, arg1);
1484   gimple_set_location (stmt, loc);
1485   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1486   return result;
1487 }
1488
1489 /* Build a gimple reference operation with the given CODE and argument
1490    ARG, assigning the result to a new SSA name of TYPE with NAME.
1491    Insert the statement prior to GSI's current position, and return
1492    the fresh SSA name.  */
1493
1494 static inline tree
1495 build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type,
1496                       const char *name, enum tree_code code, tree arg0)
1497 {
1498   tree result = make_temp_ssa_name (type, NULL, name);
1499   gimple *stmt = gimple_build_assign (result, build1 (code, type, arg0));
1500   gimple_set_location (stmt, loc);
1501   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1502   return result;
1503 }
1504
1505 /* Build a gimple assignment to cast VAL to TYPE.  Insert the statement
1506    prior to GSI's current position, and return the fresh SSA name.  */
1507
1508 static tree
1509 build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,
1510                        tree type, tree val)
1511 {
1512   tree result = make_ssa_name (type);
1513   gassign *stmt = gimple_build_assign (result, NOP_EXPR, val);
1514   gimple_set_location (stmt, loc);
1515   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1516   return result;
1517 }
1518
1519 struct pow_synth_sqrt_info
1520 {
1521   bool *factors;
1522   unsigned int deepest;
1523   unsigned int num_mults;
1524 };
1525
1526 /* Return true iff the real value C can be represented as a
1527    sum of powers of 0.5 up to N.  That is:
1528    C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1.
1529    Record in INFO the various parameters of the synthesis algorithm such
1530    as the factors a[i], the maximum 0.5 power and the number of
1531    multiplications that will be required.  */
1532
1533 bool
1534 representable_as_half_series_p (REAL_VALUE_TYPE c, unsigned n,
1535                                  struct pow_synth_sqrt_info *info)
1536 {
1537   REAL_VALUE_TYPE factor = dconsthalf;
1538   REAL_VALUE_TYPE remainder = c;
1539
1540   info->deepest = 0;
1541   info->num_mults = 0;
1542   memset (info->factors, 0, n * sizeof (bool));
1543
1544   for (unsigned i = 0; i < n; i++)
1545     {
1546       REAL_VALUE_TYPE res;
1547
1548       /* If something inexact happened bail out now.  */
1549       if (real_arithmetic (&res, MINUS_EXPR, &remainder, &factor))
1550         return false;
1551
1552       /* We have hit zero.  The number is representable as a sum
1553          of powers of 0.5.  */
1554       if (real_equal (&res, &dconst0))
1555         {
1556           info->factors[i] = true;
1557           info->deepest = i + 1;
1558           return true;
1559         }
1560       else if (!REAL_VALUE_NEGATIVE (res))
1561         {
1562           remainder = res;
1563           info->factors[i] = true;
1564           info->num_mults++;
1565         }
1566       else
1567         info->factors[i] = false;
1568
1569       real_arithmetic (&factor, MULT_EXPR, &factor, &dconsthalf);
1570     }
1571   return false;
1572 }
1573
1574 /* Return the tree corresponding to FN being applied
1575    to ARG N times at GSI and LOC.
1576    Look up previous results from CACHE if need be.
1577    cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times.  */
1578
1579 static tree
1580 get_fn_chain (tree arg, unsigned int n, gimple_stmt_iterator *gsi,
1581               tree fn, location_t loc, tree *cache)
1582 {
1583   tree res = cache[n];
1584   if (!res)
1585     {
1586       tree prev = get_fn_chain (arg, n - 1, gsi, fn, loc, cache);
1587       res = build_and_insert_call (gsi, loc, fn, prev);
1588       cache[n] = res;
1589     }
1590
1591   return res;
1592 }
1593
1594 /* Print to STREAM the repeated application of function FNAME to ARG
1595    N times.  So, for FNAME = "foo", ARG = "x", N = 2 it would print:
1596    "foo (foo (x))".  */
1597
1598 static void
1599 print_nested_fn (FILE* stream, const char *fname, const char* arg,
1600                  unsigned int n)
1601 {
1602   if (n == 0)
1603     fprintf (stream, "%s", arg);
1604   else
1605     {
1606       fprintf (stream, "%s (", fname);
1607       print_nested_fn (stream, fname, arg, n - 1);
1608       fprintf (stream, ")");
1609     }
1610 }
1611
1612 /* Print to STREAM the fractional sequence of sqrt chains
1613    applied to ARG, described by INFO.  Used for the dump file.  */
1614
1615 static void
1616 dump_fractional_sqrt_sequence (FILE *stream, const char *arg,
1617                                 struct pow_synth_sqrt_info *info)
1618 {
1619   for (unsigned int i = 0; i < info->deepest; i++)
1620     {
1621       bool is_set = info->factors[i];
1622       if (is_set)
1623         {
1624           print_nested_fn (stream, "sqrt", arg, i + 1);
1625           if (i != info->deepest - 1)
1626             fprintf (stream, " * ");
1627         }
1628     }
1629 }
1630
1631 /* Print to STREAM a representation of raising ARG to an integer
1632    power N.  Used for the dump file.  */
1633
1634 static void
1635 dump_integer_part (FILE *stream, const char* arg, HOST_WIDE_INT n)
1636 {
1637   if (n > 1)
1638     fprintf (stream, "powi (%s, " HOST_WIDE_INT_PRINT_DEC ")", arg, n);
1639   else if (n == 1)
1640     fprintf (stream, "%s", arg);
1641 }
1642
1643 /* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of
1644    square roots.  Place at GSI and LOC.  Limit the maximum depth
1645    of the sqrt chains to MAX_DEPTH.  Return the tree holding the
1646    result of the expanded sequence or NULL_TREE if the expansion failed.
1647
1648    This routine assumes that ARG1 is a real number with a fractional part
1649    (the integer exponent case will have been handled earlier in
1650    gimple_expand_builtin_pow).
1651
1652    For ARG1 > 0.0:
1653    * For ARG1 composed of a whole part WHOLE_PART and a fractional part
1654      FRAC_PART i.e. WHOLE_PART == floor (ARG1) and
1655                     FRAC_PART == ARG1 - WHOLE_PART:
1656      Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where
1657      POW (ARG0, FRAC_PART) is expanded as a product of square root chains
1658      if it can be expressed as such, that is if FRAC_PART satisfies:
1659      FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i))
1660      where integer a[i] is either 0 or 1.
1661
1662      Example:
1663      POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625)
1664        --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x)))
1665
1666    For ARG1 < 0.0 there are two approaches:
1667    * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1)
1668          is calculated as above.
1669
1670      Example:
1671      POW (x, -5.625) == 1.0 / POW (x, 5.625)
1672        -->  1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x))))
1673
1674    * (B) : WHOLE_PART := - ceil (abs (ARG1))
1675            FRAC_PART  := ARG1 - WHOLE_PART
1676      and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART).
1677      Example:
1678      POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6)
1679        --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6))
1680
1681    For ARG1 < 0.0 we choose between (A) and (B) depending on
1682    how many multiplications we'd have to do.
1683    So, for the example in (B): POW (x, -5.875), if we were to
1684    follow algorithm (A) we would produce:
1685    1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X)))
1686    which contains more multiplications than approach (B).
1687
1688    Hopefully, this approach will eliminate potentially expensive POW library
1689    calls when unsafe floating point math is enabled and allow the compiler to
1690    further optimise the multiplies, square roots and divides produced by this
1691    function.  */
1692
1693 static tree
1694 expand_pow_as_sqrts (gimple_stmt_iterator *gsi, location_t loc,
1695                      tree arg0, tree arg1, HOST_WIDE_INT max_depth)
1696 {
1697   tree type = TREE_TYPE (arg0);
1698   machine_mode mode = TYPE_MODE (type);
1699   tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1700   bool one_over = true;
1701
1702   if (!sqrtfn)
1703     return NULL_TREE;
1704
1705   if (TREE_CODE (arg1) != REAL_CST)
1706     return NULL_TREE;
1707
1708   REAL_VALUE_TYPE exp_init = TREE_REAL_CST (arg1);
1709
1710   gcc_assert (max_depth > 0);
1711   tree *cache = XALLOCAVEC (tree, max_depth + 1);
1712
1713   struct pow_synth_sqrt_info synth_info;
1714   synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1715   synth_info.deepest = 0;
1716   synth_info.num_mults = 0;
1717
1718   bool neg_exp = REAL_VALUE_NEGATIVE (exp_init);
1719   REAL_VALUE_TYPE exp = real_value_abs (&exp_init);
1720
1721   /* The whole and fractional parts of exp.  */
1722   REAL_VALUE_TYPE whole_part;
1723   REAL_VALUE_TYPE frac_part;
1724
1725   real_floor (&whole_part, mode, &exp);
1726   real_arithmetic (&frac_part, MINUS_EXPR, &exp, &whole_part);
1727
1728
1729   REAL_VALUE_TYPE ceil_whole = dconst0;
1730   REAL_VALUE_TYPE ceil_fract = dconst0;
1731
1732   if (neg_exp)
1733     {
1734       real_ceil (&ceil_whole, mode, &exp);
1735       real_arithmetic (&ceil_fract, MINUS_EXPR, &ceil_whole, &exp);
1736     }
1737
1738   if (!representable_as_half_series_p (frac_part, max_depth, &synth_info))
1739     return NULL_TREE;
1740
1741   /* Check whether it's more profitable to not use 1.0 / ...  */
1742   if (neg_exp)
1743     {
1744       struct pow_synth_sqrt_info alt_synth_info;
1745       alt_synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1746       alt_synth_info.deepest = 0;
1747       alt_synth_info.num_mults = 0;
1748
1749       if (representable_as_half_series_p (ceil_fract, max_depth,
1750                                            &alt_synth_info)
1751           && alt_synth_info.deepest <= synth_info.deepest
1752           && alt_synth_info.num_mults < synth_info.num_mults)
1753         {
1754           whole_part = ceil_whole;
1755           frac_part = ceil_fract;
1756           synth_info.deepest = alt_synth_info.deepest;
1757           synth_info.num_mults = alt_synth_info.num_mults;
1758           memcpy (synth_info.factors, alt_synth_info.factors,
1759                   (max_depth + 1) * sizeof (bool));
1760           one_over = false;
1761         }
1762     }
1763
1764   HOST_WIDE_INT n = real_to_integer (&whole_part);
1765   REAL_VALUE_TYPE cint;
1766   real_from_integer (&cint, VOIDmode, n, SIGNED);
1767
1768   if (!real_identical (&whole_part, &cint))
1769     return NULL_TREE;
1770
1771   if (powi_cost (n) + synth_info.num_mults > POWI_MAX_MULTS)
1772     return NULL_TREE;
1773
1774   memset (cache, 0, (max_depth + 1) * sizeof (tree));
1775
1776   tree integer_res = n == 0 ? build_real (type, dconst1) : arg0;
1777
1778   /* Calculate the integer part of the exponent.  */
1779   if (n > 1)
1780     {
1781       integer_res = gimple_expand_builtin_powi (gsi, loc, arg0, n);
1782       if (!integer_res)
1783         return NULL_TREE;
1784     }
1785
1786   if (dump_file)
1787     {
1788       char string[64];
1789
1790       real_to_decimal (string, &exp_init, sizeof (string), 0, 1);
1791       fprintf (dump_file, "synthesizing pow (x, %s) as:\n", string);
1792
1793       if (neg_exp)
1794         {
1795           if (one_over)
1796             {
1797               fprintf (dump_file, "1.0 / (");
1798               dump_integer_part (dump_file, "x", n);
1799               if (n > 0)
1800                 fprintf (dump_file, " * ");
1801               dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1802               fprintf (dump_file, ")");
1803             }
1804           else
1805             {
1806               dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1807               fprintf (dump_file, " / (");
1808               dump_integer_part (dump_file, "x", n);
1809               fprintf (dump_file, ")");
1810             }
1811         }
1812       else
1813         {
1814           dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1815           if (n > 0)
1816             fprintf (dump_file, " * ");
1817           dump_integer_part (dump_file, "x", n);
1818         }
1819
1820       fprintf (dump_file, "\ndeepest sqrt chain: %d\n", synth_info.deepest);
1821     }
1822
1823
1824   tree fract_res = NULL_TREE;
1825   cache[0] = arg0;
1826
1827   /* Calculate the fractional part of the exponent.  */
1828   for (unsigned i = 0; i < synth_info.deepest; i++)
1829     {
1830       if (synth_info.factors[i])
1831         {
1832           tree sqrt_chain = get_fn_chain (arg0, i + 1, gsi, sqrtfn, loc, cache);
1833
1834           if (!fract_res)
1835               fract_res = sqrt_chain;
1836
1837           else
1838             fract_res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1839                                            fract_res, sqrt_chain);
1840         }
1841     }
1842
1843   tree res = NULL_TREE;
1844
1845   if (neg_exp)
1846     {
1847       if (one_over)
1848         {
1849           if (n > 0)
1850             res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1851                                            fract_res, integer_res);
1852           else
1853             res = fract_res;
1854
1855           res = build_and_insert_binop (gsi, loc, "powrootrecip", RDIV_EXPR,
1856                                           build_real (type, dconst1), res);
1857         }
1858       else
1859         {
1860           res = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1861                                          fract_res, integer_res);
1862         }
1863     }
1864   else
1865     res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1866                                    fract_res, integer_res);
1867   return res;
1868 }
1869
1870 /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
1871    with location info LOC.  If possible, create an equivalent and
1872    less expensive sequence of statements prior to GSI, and return an
1873    expession holding the result.  */
1874
1875 static tree
1876 gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,
1877                            tree arg0, tree arg1)
1878 {
1879   REAL_VALUE_TYPE c, cint, dconst1_3, dconst1_4, dconst1_6;
1880   REAL_VALUE_TYPE c2, dconst3;
1881   HOST_WIDE_INT n;
1882   tree type, sqrtfn, cbrtfn, sqrt_arg0, result, cbrt_x, powi_cbrt_x;
1883   machine_mode mode;
1884   bool speed_p = optimize_bb_for_speed_p (gsi_bb (*gsi));
1885   bool hw_sqrt_exists, c_is_int, c2_is_int;
1886
1887   dconst1_4 = dconst1;
1888   SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
1889
1890   /* If the exponent isn't a constant, there's nothing of interest
1891      to be done.  */
1892   if (TREE_CODE (arg1) != REAL_CST)
1893     return NULL_TREE;
1894
1895   /* Don't perform the operation if flag_signaling_nans is on
1896      and the operand is a signaling NaN.  */
1897   if (HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg1)))
1898       && ((TREE_CODE (arg0) == REAL_CST
1899            && REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg0)))
1900           || REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg1))))
1901     return NULL_TREE;
1902
1903   /* If the exponent is equivalent to an integer, expand to an optimal
1904      multiplication sequence when profitable.  */
1905   c = TREE_REAL_CST (arg1);
1906   n = real_to_integer (&c);
1907   real_from_integer (&cint, VOIDmode, n, SIGNED);
1908   c_is_int = real_identical (&c, &cint);
1909
1910   if (c_is_int
1911       && ((n >= -1 && n <= 2)
1912           || (flag_unsafe_math_optimizations
1913               && speed_p
1914               && powi_cost (n) <= POWI_MAX_MULTS)))
1915     return gimple_expand_builtin_powi (gsi, loc, arg0, n);
1916
1917   /* Attempt various optimizations using sqrt and cbrt.  */
1918   type = TREE_TYPE (arg0);
1919   mode = TYPE_MODE (type);
1920   sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1921
1922   /* Optimize pow(x,0.5) = sqrt(x).  This replacement is always safe
1923      unless signed zeros must be maintained.  pow(-0,0.5) = +0, while
1924      sqrt(-0) = -0.  */
1925   if (sqrtfn
1926       && real_equal (&c, &dconsthalf)
1927       && !HONOR_SIGNED_ZEROS (mode))
1928     return build_and_insert_call (gsi, loc, sqrtfn, arg0);
1929
1930   hw_sqrt_exists = optab_handler (sqrt_optab, mode) != CODE_FOR_nothing;
1931
1932   /* Optimize pow(x,1./3.) = cbrt(x).  This requires unsafe math
1933      optimizations since 1./3. is not exactly representable.  If x
1934      is negative and finite, the correct value of pow(x,1./3.) is
1935      a NaN with the "invalid" exception raised, because the value
1936      of 1./3. actually has an even denominator.  The correct value
1937      of cbrt(x) is a negative real value.  */
1938   cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT);
1939   dconst1_3 = real_value_truncate (mode, dconst_third ());
1940
1941   if (flag_unsafe_math_optimizations
1942       && cbrtfn
1943       && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
1944       && real_equal (&c, &dconst1_3))
1945     return build_and_insert_call (gsi, loc, cbrtfn, arg0);
1946
1947   /* Optimize pow(x,1./6.) = cbrt(sqrt(x)).  Don't do this optimization
1948      if we don't have a hardware sqrt insn.  */
1949   dconst1_6 = dconst1_3;
1950   SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
1951
1952   if (flag_unsafe_math_optimizations
1953       && sqrtfn
1954       && cbrtfn
1955       && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
1956       && speed_p
1957       && hw_sqrt_exists
1958       && real_equal (&c, &dconst1_6))
1959     {
1960       /* sqrt(x)  */
1961       sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
1962
1963       /* cbrt(sqrt(x))  */
1964       return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0);
1965     }
1966
1967
1968   /* Attempt to expand the POW as a product of square root chains.
1969      Expand the 0.25 case even when otpimising for size.  */
1970   if (flag_unsafe_math_optimizations
1971       && sqrtfn
1972       && hw_sqrt_exists
1973       && (speed_p || real_equal (&c, &dconst1_4))
1974       && !HONOR_SIGNED_ZEROS (mode))
1975     {
1976       unsigned int max_depth = speed_p
1977                                 ? param_max_pow_sqrt_depth
1978                                 : 2;
1979
1980       tree expand_with_sqrts
1981         = expand_pow_as_sqrts (gsi, loc, arg0, arg1, max_depth);
1982
1983       if (expand_with_sqrts)
1984         return expand_with_sqrts;
1985     }
1986
1987   real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
1988   n = real_to_integer (&c2);
1989   real_from_integer (&cint, VOIDmode, n, SIGNED);
1990   c2_is_int = real_identical (&c2, &cint);
1991
1992   /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
1993
1994      powi(x, n/3) * powi(cbrt(x), n%3),                    n > 0;
1995      1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)),  n < 0.
1996
1997      Do not calculate the first factor when n/3 = 0.  As cbrt(x) is
1998      different from pow(x, 1./3.) due to rounding and behavior with
1999      negative x, we need to constrain this transformation to unsafe
2000      math and positive x or finite math.  */
2001   real_from_integer (&dconst3, VOIDmode, 3, SIGNED);
2002   real_arithmetic (&c2, MULT_EXPR, &c, &dconst3);
2003   real_round (&c2, mode, &c2);
2004   n = real_to_integer (&c2);
2005   real_from_integer (&cint, VOIDmode, n, SIGNED);
2006   real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3);
2007   real_convert (&c2, mode, &c2);
2008
2009   if (flag_unsafe_math_optimizations
2010       && cbrtfn
2011       && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2012       && real_identical (&c2, &c)
2013       && !c2_is_int
2014       && optimize_function_for_speed_p (cfun)
2015       && powi_cost (n / 3) <= POWI_MAX_MULTS)
2016     {
2017       tree powi_x_ndiv3 = NULL_TREE;
2018
2019       /* Attempt to fold powi(arg0, abs(n/3)) into multiplies.  If not
2020          possible or profitable, give up.  Skip the degenerate case when
2021          abs(n) < 3, where the result is always 1.  */
2022       if (absu_hwi (n) >= 3)
2023         {
2024           powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0,
2025                                                      abs_hwi (n / 3));
2026           if (!powi_x_ndiv3)
2027             return NULL_TREE;
2028         }
2029
2030       /* Calculate powi(cbrt(x), n%3).  Don't use gimple_expand_builtin_powi
2031          as that creates an unnecessary variable.  Instead, just produce
2032          either cbrt(x) or cbrt(x) * cbrt(x).  */
2033       cbrt_x = build_and_insert_call (gsi, loc, cbrtfn, arg0);
2034
2035       if (absu_hwi (n) % 3 == 1)
2036         powi_cbrt_x = cbrt_x;
2037       else
2038         powi_cbrt_x = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
2039                                               cbrt_x, cbrt_x);
2040
2041       /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1.  */
2042       if (absu_hwi (n) < 3)
2043         result = powi_cbrt_x;
2044       else
2045         result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
2046                                          powi_x_ndiv3, powi_cbrt_x);
2047
2048       /* If n is negative, reciprocate the result.  */
2049       if (n < 0)
2050         result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
2051                                          build_real (type, dconst1), result);
2052
2053       return result;
2054     }
2055
2056   /* No optimizations succeeded.  */
2057   return NULL_TREE;
2058 }
2059
2060 /* ARG is the argument to a cabs builtin call in GSI with location info
2061    LOC.  Create a sequence of statements prior to GSI that calculates
2062    sqrt(R*R + I*I), where R and I are the real and imaginary components
2063    of ARG, respectively.  Return an expression holding the result.  */
2064
2065 static tree
2066 gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg)
2067 {
2068   tree real_part, imag_part, addend1, addend2, sum, result;
2069   tree type = TREE_TYPE (TREE_TYPE (arg));
2070   tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
2071   machine_mode mode = TYPE_MODE (type);
2072
2073   if (!flag_unsafe_math_optimizations
2074       || !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi)))
2075       || !sqrtfn
2076       || optab_handler (sqrt_optab, mode) == CODE_FOR_nothing)
2077     return NULL_TREE;
2078
2079   real_part = build_and_insert_ref (gsi, loc, type, "cabs",
2080                                     REALPART_EXPR, arg);
2081   addend1 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
2082                                     real_part, real_part);
2083   imag_part = build_and_insert_ref (gsi, loc, type, "cabs",
2084                                     IMAGPART_EXPR, arg);
2085   addend2 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
2086                                     imag_part, imag_part);
2087   sum = build_and_insert_binop (gsi, loc, "cabs", PLUS_EXPR, addend1, addend2);
2088   result = build_and_insert_call (gsi, loc, sqrtfn, sum);
2089
2090   return result;
2091 }
2092
2093 /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
2094    on the SSA_NAME argument of each of them.  Also expand powi(x,n) into
2095    an optimal number of multiplies, when n is a constant.  */
2096
2097 namespace {
2098
2099 const pass_data pass_data_cse_sincos =
2100 {
2101   GIMPLE_PASS, /* type */
2102   "sincos", /* name */
2103   OPTGROUP_NONE, /* optinfo_flags */
2104   TV_TREE_SINCOS, /* tv_id */
2105   PROP_ssa, /* properties_required */
2106   PROP_gimple_opt_math, /* properties_provided */
2107   0, /* properties_destroyed */
2108   0, /* todo_flags_start */
2109   TODO_update_ssa, /* todo_flags_finish */
2110 };
2111
2112 class pass_cse_sincos : public gimple_opt_pass
2113 {
2114 public:
2115   pass_cse_sincos (gcc::context *ctxt)
2116     : gimple_opt_pass (pass_data_cse_sincos, ctxt)
2117   {}
2118
2119   /* opt_pass methods: */
2120   virtual bool gate (function *)
2121     {
2122       /* We no longer require either sincos or cexp, since powi expansion
2123          piggybacks on this pass.  */
2124       return optimize;
2125     }
2126
2127   virtual unsigned int execute (function *);
2128
2129 }; // class pass_cse_sincos
2130
2131 unsigned int
2132 pass_cse_sincos::execute (function *fun)
2133 {
2134   basic_block bb;
2135   bool cfg_changed = false;
2136
2137   calculate_dominance_info (CDI_DOMINATORS);
2138   memset (&sincos_stats, 0, sizeof (sincos_stats));
2139
2140   FOR_EACH_BB_FN (bb, fun)
2141     {
2142       gimple_stmt_iterator gsi;
2143       bool cleanup_eh = false;
2144
2145       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2146         {
2147           gimple *stmt = gsi_stmt (gsi);
2148
2149           /* Only the last stmt in a bb could throw, no need to call
2150              gimple_purge_dead_eh_edges if we change something in the middle
2151              of a basic block.  */
2152           cleanup_eh = false;
2153
2154           if (is_gimple_call (stmt)
2155               && gimple_call_lhs (stmt))
2156             {
2157               tree arg, arg0, arg1, result;
2158               HOST_WIDE_INT n;
2159               location_t loc;
2160
2161               switch (gimple_call_combined_fn (stmt))
2162                 {
2163                 CASE_CFN_COS:
2164                 CASE_CFN_SIN:
2165                 CASE_CFN_CEXPI:
2166                   /* Make sure we have either sincos or cexp.  */
2167                   if (!targetm.libc_has_function (function_c99_math_complex)
2168                       && !targetm.libc_has_function (function_sincos))
2169                     break;
2170
2171                   arg = gimple_call_arg (stmt, 0);
2172                   if (TREE_CODE (arg) == SSA_NAME)
2173                     cfg_changed |= execute_cse_sincos_1 (arg);
2174                   break;
2175
2176                 CASE_CFN_POW:
2177                   arg0 = gimple_call_arg (stmt, 0);
2178                   arg1 = gimple_call_arg (stmt, 1);
2179
2180                   loc = gimple_location (stmt);
2181                   result = gimple_expand_builtin_pow (&gsi, loc, arg0, arg1);
2182
2183                   if (result)
2184                     {
2185                       tree lhs = gimple_get_lhs (stmt);
2186                       gassign *new_stmt = gimple_build_assign (lhs, result);
2187                       gimple_set_location (new_stmt, loc);
2188                       unlink_stmt_vdef (stmt);
2189                       gsi_replace (&gsi, new_stmt, true);
2190                       cleanup_eh = true;
2191                       if (gimple_vdef (stmt))
2192                         release_ssa_name (gimple_vdef (stmt));
2193                     }
2194                   break;
2195
2196                 CASE_CFN_POWI:
2197                   arg0 = gimple_call_arg (stmt, 0);
2198                   arg1 = gimple_call_arg (stmt, 1);
2199                   loc = gimple_location (stmt);
2200
2201                   if (real_minus_onep (arg0))
2202                     {
2203                       tree t0, t1, cond, one, minus_one;
2204                       gassign *stmt;
2205
2206                       t0 = TREE_TYPE (arg0);
2207                       t1 = TREE_TYPE (arg1);
2208                       one = build_real (t0, dconst1);
2209                       minus_one = build_real (t0, dconstm1);
2210
2211                       cond = make_temp_ssa_name (t1, NULL, "powi_cond");
2212                       stmt = gimple_build_assign (cond, BIT_AND_EXPR,
2213                                                   arg1, build_int_cst (t1, 1));
2214                       gimple_set_location (stmt, loc);
2215                       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2216
2217                       result = make_temp_ssa_name (t0, NULL, "powi");
2218                       stmt = gimple_build_assign (result, COND_EXPR, cond,
2219                                                   minus_one, one);
2220                       gimple_set_location (stmt, loc);
2221                       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2222                     }
2223                   else
2224                     {
2225                       if (!tree_fits_shwi_p (arg1))
2226                         break;
2227
2228                       n = tree_to_shwi (arg1);
2229                       result = gimple_expand_builtin_powi (&gsi, loc, arg0, n);
2230                     }
2231
2232                   if (result)
2233                     {
2234                       tree lhs = gimple_get_lhs (stmt);
2235                       gassign *new_stmt = gimple_build_assign (lhs, result);
2236                       gimple_set_location (new_stmt, loc);
2237                       unlink_stmt_vdef (stmt);
2238                       gsi_replace (&gsi, new_stmt, true);
2239                       cleanup_eh = true;
2240                       if (gimple_vdef (stmt))
2241                         release_ssa_name (gimple_vdef (stmt));
2242                     }
2243                   break;
2244
2245                 CASE_CFN_CABS:
2246                   arg0 = gimple_call_arg (stmt, 0);
2247                   loc = gimple_location (stmt);
2248                   result = gimple_expand_builtin_cabs (&gsi, loc, arg0);
2249
2250                   if (result)
2251                     {
2252                       tree lhs = gimple_get_lhs (stmt);
2253                       gassign *new_stmt = gimple_build_assign (lhs, result);
2254                       gimple_set_location (new_stmt, loc);
2255                       unlink_stmt_vdef (stmt);
2256                       gsi_replace (&gsi, new_stmt, true);
2257                       cleanup_eh = true;
2258                       if (gimple_vdef (stmt))
2259                         release_ssa_name (gimple_vdef (stmt));
2260                     }
2261                   break;
2262
2263                 default:;
2264                 }
2265             }
2266         }
2267       if (cleanup_eh)
2268         cfg_changed |= gimple_purge_dead_eh_edges (bb);
2269     }
2270
2271   statistics_counter_event (fun, "sincos statements inserted",
2272                             sincos_stats.inserted);
2273
2274   return cfg_changed ? TODO_cleanup_cfg : 0;
2275 }
2276
2277 } // anon namespace
2278
2279 gimple_opt_pass *
2280 make_pass_cse_sincos (gcc::context *ctxt)
2281 {
2282   return new pass_cse_sincos (ctxt);
2283 }
2284
2285 /* Return true if stmt is a type conversion operation that can be stripped
2286    when used in a widening multiply operation.  */
2287 static bool
2288 widening_mult_conversion_strippable_p (tree result_type, gimple *stmt)
2289 {
2290   enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
2291
2292   if (TREE_CODE (result_type) == INTEGER_TYPE)
2293     {
2294       tree op_type;
2295       tree inner_op_type;
2296
2297       if (!CONVERT_EXPR_CODE_P (rhs_code))
2298         return false;
2299
2300       op_type = TREE_TYPE (gimple_assign_lhs (stmt));
2301
2302       /* If the type of OP has the same precision as the result, then
2303          we can strip this conversion.  The multiply operation will be
2304          selected to create the correct extension as a by-product.  */
2305       if (TYPE_PRECISION (result_type) == TYPE_PRECISION (op_type))
2306         return true;
2307
2308       /* We can also strip a conversion if it preserves the signed-ness of
2309          the operation and doesn't narrow the range.  */
2310       inner_op_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
2311
2312       /* If the inner-most type is unsigned, then we can strip any
2313          intermediate widening operation.  If it's signed, then the
2314          intermediate widening operation must also be signed.  */
2315       if ((TYPE_UNSIGNED (inner_op_type)
2316            || TYPE_UNSIGNED (op_type) == TYPE_UNSIGNED (inner_op_type))
2317           && TYPE_PRECISION (op_type) > TYPE_PRECISION (inner_op_type))
2318         return true;
2319
2320       return false;
2321     }
2322
2323   return rhs_code == FIXED_CONVERT_EXPR;
2324 }
2325
2326 /* Return true if RHS is a suitable operand for a widening multiplication,
2327    assuming a target type of TYPE.
2328    There are two cases:
2329
2330      - RHS makes some value at least twice as wide.  Store that value
2331        in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
2332
2333      - RHS is an integer constant.  Store that value in *NEW_RHS_OUT if so,
2334        but leave *TYPE_OUT untouched.  */
2335
2336 static bool
2337 is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out,
2338                         tree *new_rhs_out)
2339 {
2340   gimple *stmt;
2341   tree type1, rhs1;
2342
2343   if (TREE_CODE (rhs) == SSA_NAME)
2344     {
2345       stmt = SSA_NAME_DEF_STMT (rhs);
2346       if (is_gimple_assign (stmt))
2347         {
2348           if (! widening_mult_conversion_strippable_p (type, stmt))
2349             rhs1 = rhs;
2350           else
2351             {
2352               rhs1 = gimple_assign_rhs1 (stmt);
2353
2354               if (TREE_CODE (rhs1) == INTEGER_CST)
2355                 {
2356                   *new_rhs_out = rhs1;
2357                   *type_out = NULL;
2358                   return true;
2359                 }
2360             }
2361         }
2362       else
2363         rhs1 = rhs;
2364
2365       type1 = TREE_TYPE (rhs1);
2366
2367       if (TREE_CODE (type1) != TREE_CODE (type)
2368           || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type))
2369         return false;
2370
2371       *new_rhs_out = rhs1;
2372       *type_out = type1;
2373       return true;
2374     }
2375
2376   if (TREE_CODE (rhs) == INTEGER_CST)
2377     {
2378       *new_rhs_out = rhs;
2379       *type_out = NULL;
2380       return true;
2381     }
2382
2383   return false;
2384 }
2385
2386 /* Return true if STMT performs a widening multiplication, assuming the
2387    output type is TYPE.  If so, store the unwidened types of the operands
2388    in *TYPE1_OUT and *TYPE2_OUT respectively.  Also fill *RHS1_OUT and
2389    *RHS2_OUT such that converting those operands to types *TYPE1_OUT
2390    and *TYPE2_OUT would give the operands of the multiplication.  */
2391
2392 static bool
2393 is_widening_mult_p (gimple *stmt,
2394                     tree *type1_out, tree *rhs1_out,
2395                     tree *type2_out, tree *rhs2_out)
2396 {
2397   tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2398
2399   if (TREE_CODE (type) == INTEGER_TYPE)
2400     {
2401       if (TYPE_OVERFLOW_TRAPS (type))
2402         return false;
2403     }
2404   else if (TREE_CODE (type) != FIXED_POINT_TYPE)
2405     return false;
2406
2407   if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out,
2408                                rhs1_out))
2409     return false;
2410
2411   if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out,
2412                                rhs2_out))
2413     return false;
2414
2415   if (*type1_out == NULL)
2416     {
2417       if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out))
2418         return false;
2419       *type1_out = *type2_out;
2420     }
2421
2422   if (*type2_out == NULL)
2423     {
2424       if (!int_fits_type_p (*rhs2_out, *type1_out))
2425         return false;
2426       *type2_out = *type1_out;
2427     }
2428
2429   /* Ensure that the larger of the two operands comes first. */
2430   if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out))
2431     {
2432       std::swap (*type1_out, *type2_out);
2433       std::swap (*rhs1_out, *rhs2_out);
2434     }
2435
2436   return true;
2437 }
2438
2439 /* Check to see if the CALL statement is an invocation of copysign
2440    with 1. being the first argument.  */
2441 static bool
2442 is_copysign_call_with_1 (gimple *call)
2443 {
2444   gcall *c = dyn_cast <gcall *> (call);
2445   if (! c)
2446     return false;
2447
2448   enum combined_fn code = gimple_call_combined_fn (c);
2449
2450   if (code == CFN_LAST)
2451     return false;
2452
2453   if (builtin_fn_p (code))
2454     {
2455       switch (as_builtin_fn (code))
2456         {
2457         CASE_FLT_FN (BUILT_IN_COPYSIGN):
2458         CASE_FLT_FN_FLOATN_NX (BUILT_IN_COPYSIGN):
2459           return real_onep (gimple_call_arg (c, 0));
2460         default:
2461           return false;
2462         }
2463     }
2464
2465   if (internal_fn_p (code))
2466     {
2467       switch (as_internal_fn (code))
2468         {
2469         case IFN_COPYSIGN:
2470           return real_onep (gimple_call_arg (c, 0));
2471         default:
2472           return false;
2473         }
2474     }
2475
2476    return false;
2477 }
2478
2479 /* Try to expand the pattern x * copysign (1, y) into xorsign (x, y).
2480    This only happens when the the xorsign optab is defined, if the
2481    pattern is not a xorsign pattern or if expansion fails FALSE is
2482    returned, otherwise TRUE is returned.  */
2483 static bool
2484 convert_expand_mult_copysign (gimple *stmt, gimple_stmt_iterator *gsi)
2485 {
2486   tree treeop0, treeop1, lhs, type;
2487   location_t loc = gimple_location (stmt);
2488   lhs = gimple_assign_lhs (stmt);
2489   treeop0 = gimple_assign_rhs1 (stmt);
2490   treeop1 = gimple_assign_rhs2 (stmt);
2491   type = TREE_TYPE (lhs);
2492   machine_mode mode = TYPE_MODE (type);
2493
2494   if (HONOR_SNANS (type))
2495     return false;
2496
2497   if (TREE_CODE (treeop0) == SSA_NAME && TREE_CODE (treeop1) == SSA_NAME)
2498     {
2499       gimple *call0 = SSA_NAME_DEF_STMT (treeop0);
2500       if (!has_single_use (treeop0) || !is_copysign_call_with_1 (call0))
2501         {
2502           call0 = SSA_NAME_DEF_STMT (treeop1);
2503           if (!has_single_use (treeop1) || !is_copysign_call_with_1 (call0))
2504             return false;
2505
2506           treeop1 = treeop0;
2507         }
2508         if (optab_handler (xorsign_optab, mode) == CODE_FOR_nothing)
2509           return false;
2510
2511         gcall *c = as_a<gcall*> (call0);
2512         treeop0 = gimple_call_arg (c, 1);
2513
2514         gcall *call_stmt
2515           = gimple_build_call_internal (IFN_XORSIGN, 2, treeop1, treeop0);
2516         gimple_set_lhs (call_stmt, lhs);
2517         gimple_set_location (call_stmt, loc);
2518         gsi_replace (gsi, call_stmt, true);
2519         return true;
2520     }
2521
2522   return false;
2523 }
2524
2525 /* Process a single gimple statement STMT, which has a MULT_EXPR as
2526    its rhs, and try to convert it into a WIDEN_MULT_EXPR.  The return
2527    value is true iff we converted the statement.  */
2528
2529 static bool
2530 convert_mult_to_widen (gimple *stmt, gimple_stmt_iterator *gsi)
2531 {
2532   tree lhs, rhs1, rhs2, type, type1, type2;
2533   enum insn_code handler;
2534   scalar_int_mode to_mode, from_mode, actual_mode;
2535   optab op;
2536   int actual_precision;
2537   location_t loc = gimple_location (stmt);
2538   bool from_unsigned1, from_unsigned2;
2539
2540   lhs = gimple_assign_lhs (stmt);
2541   type = TREE_TYPE (lhs);
2542   if (TREE_CODE (type) != INTEGER_TYPE)
2543     return false;
2544
2545   if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2))
2546     return false;
2547
2548   to_mode = SCALAR_INT_TYPE_MODE (type);
2549   from_mode = SCALAR_INT_TYPE_MODE (type1);
2550   if (to_mode == from_mode)
2551     return false;
2552
2553   from_unsigned1 = TYPE_UNSIGNED (type1);
2554   from_unsigned2 = TYPE_UNSIGNED (type2);
2555
2556   if (from_unsigned1 && from_unsigned2)
2557     op = umul_widen_optab;
2558   else if (!from_unsigned1 && !from_unsigned2)
2559     op = smul_widen_optab;
2560   else
2561     op = usmul_widen_optab;
2562
2563   handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode,
2564                                                   &actual_mode);
2565
2566   if (handler == CODE_FOR_nothing)
2567     {
2568       if (op != smul_widen_optab)
2569         {
2570           /* We can use a signed multiply with unsigned types as long as
2571              there is a wider mode to use, or it is the smaller of the two
2572              types that is unsigned.  Note that type1 >= type2, always.  */
2573           if ((TYPE_UNSIGNED (type1)
2574                && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2575               || (TYPE_UNSIGNED (type2)
2576                   && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2577             {
2578               if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
2579                   || GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode))
2580                 return false;
2581             }
2582
2583           op = smul_widen_optab;
2584           handler = find_widening_optab_handler_and_mode (op, to_mode,
2585                                                           from_mode,
2586                                                           &actual_mode);
2587
2588           if (handler == CODE_FOR_nothing)
2589             return false;
2590
2591           from_unsigned1 = from_unsigned2 = false;
2592         }
2593       else
2594         return false;
2595     }
2596
2597   /* Ensure that the inputs to the handler are in the correct precison
2598      for the opcode.  This will be the full mode size.  */
2599   actual_precision = GET_MODE_PRECISION (actual_mode);
2600   if (2 * actual_precision > TYPE_PRECISION (type))
2601     return false;
2602   if (actual_precision != TYPE_PRECISION (type1)
2603       || from_unsigned1 != TYPE_UNSIGNED (type1))
2604     rhs1 = build_and_insert_cast (gsi, loc,
2605                                   build_nonstandard_integer_type
2606                                     (actual_precision, from_unsigned1), rhs1);
2607   if (actual_precision != TYPE_PRECISION (type2)
2608       || from_unsigned2 != TYPE_UNSIGNED (type2))
2609     rhs2 = build_and_insert_cast (gsi, loc,
2610                                   build_nonstandard_integer_type
2611                                     (actual_precision, from_unsigned2), rhs2);
2612
2613   /* Handle constants.  */
2614   if (TREE_CODE (rhs1) == INTEGER_CST)
2615     rhs1 = fold_convert (type1, rhs1);
2616   if (TREE_CODE (rhs2) == INTEGER_CST)
2617     rhs2 = fold_convert (type2, rhs2);
2618
2619   gimple_assign_set_rhs1 (stmt, rhs1);
2620   gimple_assign_set_rhs2 (stmt, rhs2);
2621   gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
2622   update_stmt (stmt);
2623   widen_mul_stats.widen_mults_inserted++;
2624   return true;
2625 }
2626
2627 /* Process a single gimple statement STMT, which is found at the
2628    iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
2629    rhs (given by CODE), and try to convert it into a
2630    WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR.  The return value
2631    is true iff we converted the statement.  */
2632
2633 static bool
2634 convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple *stmt,
2635                             enum tree_code code)
2636 {
2637   gimple *rhs1_stmt = NULL, *rhs2_stmt = NULL;
2638   gimple *conv1_stmt = NULL, *conv2_stmt = NULL, *conv_stmt;
2639   tree type, type1, type2, optype;
2640   tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
2641   enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
2642   optab this_optab;
2643   enum tree_code wmult_code;
2644   enum insn_code handler;
2645   scalar_mode to_mode, from_mode, actual_mode;
2646   location_t loc = gimple_location (stmt);
2647   int actual_precision;
2648   bool from_unsigned1, from_unsigned2;
2649
2650   lhs = gimple_assign_lhs (stmt);
2651   type = TREE_TYPE (lhs);
2652   if (TREE_CODE (type) != INTEGER_TYPE
2653       && TREE_CODE (type) != FIXED_POINT_TYPE)
2654     return false;
2655
2656   if (code == MINUS_EXPR)
2657     wmult_code = WIDEN_MULT_MINUS_EXPR;
2658   else
2659     wmult_code = WIDEN_MULT_PLUS_EXPR;
2660
2661   rhs1 = gimple_assign_rhs1 (stmt);
2662   rhs2 = gimple_assign_rhs2 (stmt);
2663
2664   if (TREE_CODE (rhs1) == SSA_NAME)
2665     {
2666       rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2667       if (is_gimple_assign (rhs1_stmt))
2668         rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2669     }
2670
2671   if (TREE_CODE (rhs2) == SSA_NAME)
2672     {
2673       rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2674       if (is_gimple_assign (rhs2_stmt))
2675         rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2676     }
2677
2678   /* Allow for one conversion statement between the multiply
2679      and addition/subtraction statement.  If there are more than
2680      one conversions then we assume they would invalidate this
2681      transformation.  If that's not the case then they should have
2682      been folded before now.  */
2683   if (CONVERT_EXPR_CODE_P (rhs1_code))
2684     {
2685       conv1_stmt = rhs1_stmt;
2686       rhs1 = gimple_assign_rhs1 (rhs1_stmt);
2687       if (TREE_CODE (rhs1) == SSA_NAME)
2688         {
2689           rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2690           if (is_gimple_assign (rhs1_stmt))
2691             rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2692         }
2693       else
2694         return false;
2695     }
2696   if (CONVERT_EXPR_CODE_P (rhs2_code))
2697     {
2698       conv2_stmt = rhs2_stmt;
2699       rhs2 = gimple_assign_rhs1 (rhs2_stmt);
2700       if (TREE_CODE (rhs2) == SSA_NAME)
2701         {
2702           rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2703           if (is_gimple_assign (rhs2_stmt))
2704             rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2705         }
2706       else
2707         return false;
2708     }
2709
2710   /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
2711      is_widening_mult_p, but we still need the rhs returns.
2712
2713      It might also appear that it would be sufficient to use the existing
2714      operands of the widening multiply, but that would limit the choice of
2715      multiply-and-accumulate instructions.
2716
2717      If the widened-multiplication result has more than one uses, it is
2718      probably wiser not to do the conversion.  */
2719   if (code == PLUS_EXPR
2720       && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
2721     {
2722       if (!has_single_use (rhs1)
2723           || !is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
2724                                   &type2, &mult_rhs2))
2725         return false;
2726       add_rhs = rhs2;
2727       conv_stmt = conv1_stmt;
2728     }
2729   else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
2730     {
2731       if (!has_single_use (rhs2)
2732           || !is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
2733                                   &type2, &mult_rhs2))
2734         return false;
2735       add_rhs = rhs1;
2736       conv_stmt = conv2_stmt;
2737     }
2738   else
2739     return false;
2740
2741   to_mode = SCALAR_TYPE_MODE (type);
2742   from_mode = SCALAR_TYPE_MODE (type1);
2743   if (to_mode == from_mode)
2744     return false;
2745
2746   from_unsigned1 = TYPE_UNSIGNED (type1);
2747   from_unsigned2 = TYPE_UNSIGNED (type2);
2748   optype = type1;
2749
2750   /* There's no such thing as a mixed sign madd yet, so use a wider mode.  */
2751   if (from_unsigned1 != from_unsigned2)
2752     {
2753       if (!INTEGRAL_TYPE_P (type))
2754         return false;
2755       /* We can use a signed multiply with unsigned types as long as
2756          there is a wider mode to use, or it is the smaller of the two
2757          types that is unsigned.  Note that type1 >= type2, always.  */
2758       if ((from_unsigned1
2759            && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2760           || (from_unsigned2
2761               && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2762         {
2763           if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
2764               || GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode))
2765             return false;
2766         }
2767
2768       from_unsigned1 = from_unsigned2 = false;
2769       optype = build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode),
2770                                                false);
2771     }
2772
2773   /* If there was a conversion between the multiply and addition
2774      then we need to make sure it fits a multiply-and-accumulate.
2775      The should be a single mode change which does not change the
2776      value.  */
2777   if (conv_stmt)
2778     {
2779       /* We use the original, unmodified data types for this.  */
2780       tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt));
2781       tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt));
2782       int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2);
2783       bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2);
2784
2785       if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type))
2786         {
2787           /* Conversion is a truncate.  */
2788           if (TYPE_PRECISION (to_type) < data_size)
2789             return false;
2790         }
2791       else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type))
2792         {
2793           /* Conversion is an extend.  Check it's the right sort.  */
2794           if (TYPE_UNSIGNED (from_type) != is_unsigned
2795               && !(is_unsigned && TYPE_PRECISION (from_type) > data_size))
2796             return false;
2797         }
2798       /* else convert is a no-op for our purposes.  */
2799     }
2800
2801   /* Verify that the machine can perform a widening multiply
2802      accumulate in this mode/signedness combination, otherwise
2803      this transformation is likely to pessimize code.  */
2804   this_optab = optab_for_tree_code (wmult_code, optype, optab_default);
2805   handler = find_widening_optab_handler_and_mode (this_optab, to_mode,
2806                                                   from_mode, &actual_mode);
2807
2808   if (handler == CODE_FOR_nothing)
2809     return false;
2810
2811   /* Ensure that the inputs to the handler are in the correct precison
2812      for the opcode.  This will be the full mode size.  */
2813   actual_precision = GET_MODE_PRECISION (actual_mode);
2814   if (actual_precision != TYPE_PRECISION (type1)
2815       || from_unsigned1 != TYPE_UNSIGNED (type1))
2816     mult_rhs1 = build_and_insert_cast (gsi, loc,
2817                                        build_nonstandard_integer_type
2818                                          (actual_precision, from_unsigned1),
2819                                        mult_rhs1);
2820   if (actual_precision != TYPE_PRECISION (type2)
2821       || from_unsigned2 != TYPE_UNSIGNED (type2))
2822     mult_rhs2 = build_and_insert_cast (gsi, loc,
2823                                        build_nonstandard_integer_type
2824                                          (actual_precision, from_unsigned2),
2825                                        mult_rhs2);
2826
2827   if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs)))
2828     add_rhs = build_and_insert_cast (gsi, loc, type, add_rhs);
2829
2830   /* Handle constants.  */
2831   if (TREE_CODE (mult_rhs1) == INTEGER_CST)
2832     mult_rhs1 = fold_convert (type1, mult_rhs1);
2833   if (TREE_CODE (mult_rhs2) == INTEGER_CST)
2834     mult_rhs2 = fold_convert (type2, mult_rhs2);
2835
2836   gimple_assign_set_rhs_with_ops (gsi, wmult_code, mult_rhs1, mult_rhs2,
2837                                   add_rhs);
2838   update_stmt (gsi_stmt (*gsi));
2839   widen_mul_stats.maccs_inserted++;
2840   return true;
2841 }
2842
2843 /* Given a result MUL_RESULT which is a result of a multiplication of OP1 and
2844    OP2 and which we know is used in statements that can be, together with the
2845    multiplication, converted to FMAs, perform the transformation.  */
2846
2847 static void
2848 convert_mult_to_fma_1 (tree mul_result, tree op1, tree op2)
2849 {
2850   tree type = TREE_TYPE (mul_result);
2851   gimple *use_stmt;
2852   imm_use_iterator imm_iter;
2853   gcall *fma_stmt;
2854
2855   FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
2856     {
2857       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
2858       tree addop, mulop1 = op1, result = mul_result;
2859       bool negate_p = false;
2860       gimple_seq seq = NULL;
2861
2862       if (is_gimple_debug (use_stmt))
2863         continue;
2864
2865       if (is_gimple_assign (use_stmt)
2866           && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
2867         {
2868           result = gimple_assign_lhs (use_stmt);
2869           use_operand_p use_p;
2870           gimple *neguse_stmt;
2871           single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt);
2872           gsi_remove (&gsi, true);
2873           release_defs (use_stmt);
2874
2875           use_stmt = neguse_stmt;
2876           gsi = gsi_for_stmt (use_stmt);
2877           negate_p = true;
2878         }
2879
2880       tree cond, else_value, ops[3];
2881       tree_code code;
2882       if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code,
2883                                               ops, &else_value))
2884         gcc_unreachable ();
2885       addop = ops[0] == result ? ops[1] : ops[0];
2886
2887       if (code == MINUS_EXPR)
2888         {
2889           if (ops[0] == result)
2890             /* a * b - c -> a * b + (-c)  */
2891             addop = gimple_build (&seq, NEGATE_EXPR, type, addop);
2892           else
2893             /* a - b * c -> (-b) * c + a */
2894             negate_p = !negate_p;
2895         }
2896
2897       if (negate_p)
2898         mulop1 = gimple_build (&seq, NEGATE_EXPR, type, mulop1);
2899
2900       if (seq)
2901         gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2902
2903       if (cond)
2904         fma_stmt = gimple_build_call_internal (IFN_COND_FMA, 5, cond, mulop1,
2905                                                op2, addop, else_value);
2906       else
2907         fma_stmt = gimple_build_call_internal (IFN_FMA, 3, mulop1, op2, addop);
2908       gimple_set_lhs (fma_stmt, gimple_get_lhs (use_stmt));
2909       gimple_call_set_nothrow (fma_stmt, !stmt_can_throw_internal (cfun,
2910                                                                    use_stmt));
2911       gsi_replace (&gsi, fma_stmt, true);
2912       /* Follow all SSA edges so that we generate FMS, FNMA and FNMS
2913          regardless of where the negation occurs.  */
2914       gimple *orig_stmt = gsi_stmt (gsi);
2915       if (fold_stmt (&gsi, follow_all_ssa_edges))
2916         {
2917           if (maybe_clean_or_replace_eh_stmt (orig_stmt, gsi_stmt (gsi)))
2918             gcc_unreachable ();
2919           update_stmt (gsi_stmt (gsi));
2920         }
2921
2922       if (dump_file && (dump_flags & TDF_DETAILS))
2923         {
2924           fprintf (dump_file, "Generated FMA ");
2925           print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_NONE);
2926           fprintf (dump_file, "\n");
2927         }
2928
2929       widen_mul_stats.fmas_inserted++;
2930     }
2931 }
2932
2933 /* Data necessary to perform the actual transformation from a multiplication
2934    and an addition to an FMA after decision is taken it should be done and to
2935    then delete the multiplication statement from the function IL.  */
2936
2937 struct fma_transformation_info
2938 {
2939   gimple *mul_stmt;
2940   tree mul_result;
2941   tree op1;
2942   tree op2;
2943 };
2944
2945 /* Structure containing the current state of FMA deferring, i.e. whether we are
2946    deferring, whether to continue deferring, and all data necessary to come
2947    back and perform all deferred transformations.  */
2948
2949 class fma_deferring_state
2950 {
2951 public:
2952   /* Class constructor.  Pass true as PERFORM_DEFERRING in order to actually
2953      do any deferring.  */
2954
2955   fma_deferring_state (bool perform_deferring)
2956     : m_candidates (), m_mul_result_set (), m_initial_phi (NULL),
2957       m_last_result (NULL_TREE), m_deferring_p (perform_deferring) {}
2958
2959   /* List of FMA candidates for which we the transformation has been determined
2960      possible but we at this point in BB analysis we do not consider them
2961      beneficial.  */
2962   auto_vec<fma_transformation_info, 8> m_candidates;
2963
2964   /* Set of results of multiplication that are part of an already deferred FMA
2965      candidates.  */
2966   hash_set<tree> m_mul_result_set;
2967
2968   /* The PHI that supposedly feeds back result of a FMA to another over loop
2969      boundary.  */
2970   gphi *m_initial_phi;
2971
2972   /* Result of the last produced FMA candidate or NULL if there has not been
2973      one.  */
2974   tree m_last_result;
2975
2976   /* If true, deferring might still be profitable.  If false, transform all
2977      candidates and no longer defer.  */
2978   bool m_deferring_p;
2979 };
2980
2981 /* Transform all deferred FMA candidates and mark STATE as no longer
2982    deferring.  */
2983
2984 static void
2985 cancel_fma_deferring (fma_deferring_state *state)
2986 {
2987   if (!state->m_deferring_p)
2988     return;
2989
2990   for (unsigned i = 0; i < state->m_candidates.length (); i++)
2991     {
2992       if (dump_file && (dump_flags & TDF_DETAILS))
2993         fprintf (dump_file, "Generating deferred FMA\n");
2994
2995       const fma_transformation_info &fti = state->m_candidates[i];
2996       convert_mult_to_fma_1 (fti.mul_result, fti.op1, fti.op2);
2997
2998       gimple_stmt_iterator gsi = gsi_for_stmt (fti.mul_stmt);
2999       gsi_remove (&gsi, true);
3000       release_defs (fti.mul_stmt);
3001     }
3002   state->m_deferring_p = false;
3003 }
3004
3005 /* If OP is an SSA name defined by a PHI node, return the PHI statement.
3006    Otherwise return NULL.  */
3007
3008 static gphi *
3009 result_of_phi (tree op)
3010 {
3011   if (TREE_CODE (op) != SSA_NAME)
3012     return NULL;
3013
3014   return dyn_cast <gphi *> (SSA_NAME_DEF_STMT (op));
3015 }
3016
3017 /* After processing statements of a BB and recording STATE, return true if the
3018    initial phi is fed by the last FMA candidate result ore one such result from
3019    previously processed BBs marked in LAST_RESULT_SET.  */
3020
3021 static bool
3022 last_fma_candidate_feeds_initial_phi (fma_deferring_state *state,
3023                                       hash_set<tree> *last_result_set)
3024 {
3025   ssa_op_iter iter;
3026   use_operand_p use;
3027   FOR_EACH_PHI_ARG (use, state->m_initial_phi, iter, SSA_OP_USE)
3028     {
3029       tree t = USE_FROM_PTR (use);
3030       if (t == state->m_last_result
3031           || last_result_set->contains (t))
3032         return true;
3033     }
3034
3035   return false;
3036 }
3037
3038 /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
3039    with uses in additions and subtractions to form fused multiply-add
3040    operations.  Returns true if successful and MUL_STMT should be removed.
3041    If MUL_COND is nonnull, the multiplication in MUL_STMT is conditional
3042    on MUL_COND, otherwise it is unconditional.
3043
3044    If STATE indicates that we are deferring FMA transformation, that means
3045    that we do not produce FMAs for basic blocks which look like:
3046
3047     <bb 6>
3048     # accumulator_111 = PHI <0.0(5), accumulator_66(6)>
3049     _65 = _14 * _16;
3050     accumulator_66 = _65 + accumulator_111;
3051
3052   or its unrolled version, i.e. with several FMA candidates that feed result
3053   of one into the addend of another.  Instead, we add them to a list in STATE
3054   and if we later discover an FMA candidate that is not part of such a chain,
3055   we go back and perform all deferred past candidates.  */
3056
3057 static bool
3058 convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2,
3059                      fma_deferring_state *state, tree mul_cond = NULL_TREE)
3060 {
3061   tree mul_result = gimple_get_lhs (mul_stmt);
3062   tree type = TREE_TYPE (mul_result);
3063   gimple *use_stmt, *neguse_stmt;
3064   use_operand_p use_p;
3065   imm_use_iterator imm_iter;
3066
3067   if (FLOAT_TYPE_P (type)
3068       && flag_fp_contract_mode == FP_CONTRACT_OFF)
3069     return false;
3070
3071   /* We don't want to do bitfield reduction ops.  */
3072   if (INTEGRAL_TYPE_P (type)
3073       && (!type_has_mode_precision_p (type) || TYPE_OVERFLOW_TRAPS (type)))
3074     return false;
3075
3076   /* If the target doesn't support it, don't generate it.  We assume that
3077      if fma isn't available then fms, fnma or fnms are not either.  */
3078   optimization_type opt_type = bb_optimization_type (gimple_bb (mul_stmt));
3079   if (!direct_internal_fn_supported_p (IFN_FMA, type, opt_type))
3080     return false;
3081
3082   /* If the multiplication has zero uses, it is kept around probably because
3083      of -fnon-call-exceptions.  Don't optimize it away in that case,
3084      it is DCE job.  */
3085   if (has_zero_uses (mul_result))
3086     return false;
3087
3088   bool check_defer
3089     = (state->m_deferring_p
3090        && (tree_to_shwi (TYPE_SIZE (type))
3091            <= param_avoid_fma_max_bits));
3092   bool defer = check_defer;
3093   bool seen_negate_p = false;
3094   /* Make sure that the multiplication statement becomes dead after
3095      the transformation, thus that all uses are transformed to FMAs.
3096      This means we assume that an FMA operation has the same cost
3097      as an addition.  */
3098   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
3099     {
3100       tree result = mul_result;
3101       bool negate_p = false;
3102
3103       use_stmt = USE_STMT (use_p);
3104
3105       if (is_gimple_debug (use_stmt))
3106         continue;
3107
3108       /* For now restrict this operations to single basic blocks.  In theory
3109          we would want to support sinking the multiplication in
3110          m = a*b;
3111          if ()
3112            ma = m + c;
3113          else
3114            d = m;
3115          to form a fma in the then block and sink the multiplication to the
3116          else block.  */
3117       if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3118         return false;
3119
3120       /* A negate on the multiplication leads to FNMA.  */
3121       if (is_gimple_assign (use_stmt)
3122           && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
3123         {
3124           ssa_op_iter iter;
3125           use_operand_p usep;
3126
3127           /* If (due to earlier missed optimizations) we have two
3128              negates of the same value, treat them as equivalent
3129              to a single negate with multiple uses.  */
3130           if (seen_negate_p)
3131             return false;
3132
3133           result = gimple_assign_lhs (use_stmt);
3134
3135           /* Make sure the negate statement becomes dead with this
3136              single transformation.  */
3137           if (!single_imm_use (gimple_assign_lhs (use_stmt),
3138                                &use_p, &neguse_stmt))
3139             return false;
3140
3141           /* Make sure the multiplication isn't also used on that stmt.  */
3142           FOR_EACH_PHI_OR_STMT_USE (usep, neguse_stmt, iter, SSA_OP_USE)
3143             if (USE_FROM_PTR (usep) == mul_result)
3144               return false;
3145
3146           /* Re-validate.  */
3147           use_stmt = neguse_stmt;
3148           if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3149             return false;
3150
3151           negate_p = seen_negate_p = true;
3152         }
3153
3154       tree cond, else_value, ops[3];
3155       tree_code code;
3156       if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code, ops,
3157                                               &else_value))
3158         return false;
3159
3160       switch (code)
3161         {
3162         case MINUS_EXPR:
3163           if (ops[1] == result)
3164             negate_p = !negate_p;
3165           break;
3166         case PLUS_EXPR:
3167           break;
3168         default:
3169           /* FMA can only be formed from PLUS and MINUS.  */
3170           return false;
3171         }
3172
3173       if (mul_cond && cond != mul_cond)
3174         return false;
3175
3176       if (cond)
3177         {
3178           if (cond == result || else_value == result)
3179             return false;
3180           if (!direct_internal_fn_supported_p (IFN_COND_FMA, type, opt_type))
3181             return false;
3182         }
3183
3184       /* If the subtrahend (OPS[1]) is computed by a MULT_EXPR that
3185          we'll visit later, we might be able to get a more profitable
3186          match with fnma.
3187          OTOH, if we don't, a negate / fma pair has likely lower latency
3188          that a mult / subtract pair.  */
3189       if (code == MINUS_EXPR
3190           && !negate_p
3191           && ops[0] == result
3192           && !direct_internal_fn_supported_p (IFN_FMS, type, opt_type)
3193           && direct_internal_fn_supported_p (IFN_FNMA, type, opt_type)
3194           && TREE_CODE (ops[1]) == SSA_NAME
3195           && has_single_use (ops[1]))
3196         {
3197           gimple *stmt2 = SSA_NAME_DEF_STMT (ops[1]);
3198           if (is_gimple_assign (stmt2)
3199               && gimple_assign_rhs_code (stmt2) == MULT_EXPR)
3200             return false;
3201         }
3202
3203       /* We can't handle a * b + a * b.  */
3204       if (ops[0] == ops[1])
3205         return false;
3206       /* If deferring, make sure we are not looking at an instruction that
3207          wouldn't have existed if we were not.  */
3208       if (state->m_deferring_p
3209           && (state->m_mul_result_set.contains (ops[0])
3210               || state->m_mul_result_set.contains (ops[1])))
3211         return false;
3212
3213       if (check_defer)
3214         {
3215           tree use_lhs = gimple_get_lhs (use_stmt);
3216           if (state->m_last_result)
3217             {
3218               if (ops[1] == state->m_last_result
3219                   || ops[0] == state->m_last_result)
3220                 defer = true;
3221               else
3222                 defer = false;
3223             }
3224           else
3225             {
3226               gcc_checking_assert (!state->m_initial_phi);
3227               gphi *phi;
3228               if (ops[0] == result)
3229                 phi = result_of_phi (ops[1]);
3230               else
3231                 {
3232                   gcc_assert (ops[1] == result);
3233                   phi = result_of_phi (ops[0]);
3234                 }
3235
3236               if (phi)
3237                 {
3238                   state->m_initial_phi = phi;
3239                   defer = true;
3240                 }
3241               else
3242                 defer = false;
3243             }
3244
3245           state->m_last_result = use_lhs;
3246           check_defer = false;
3247         }
3248       else
3249         defer = false;
3250
3251       /* While it is possible to validate whether or not the exact form that
3252          we've recognized is available in the backend, the assumption is that
3253          if the deferring logic above did not trigger, the transformation is
3254          never a loss.  For instance, suppose the target only has the plain FMA
3255          pattern available.  Consider a*b-c -> fma(a,b,-c): we've exchanged
3256          MUL+SUB for FMA+NEG, which is still two operations.  Consider
3257          -(a*b)-c -> fma(-a,b,-c): we still have 3 operations, but in the FMA
3258          form the two NEGs are independent and could be run in parallel.  */
3259     }
3260
3261   if (defer)
3262     {
3263       fma_transformation_info fti;
3264       fti.mul_stmt = mul_stmt;
3265       fti.mul_result = mul_result;
3266       fti.op1 = op1;
3267       fti.op2 = op2;
3268       state->m_candidates.safe_push (fti);
3269       state->m_mul_result_set.add (mul_result);
3270
3271       if (dump_file && (dump_flags & TDF_DETAILS))
3272         {
3273           fprintf (dump_file, "Deferred generating FMA for multiplication ");
3274           print_gimple_stmt (dump_file, mul_stmt, 0, TDF_NONE);
3275           fprintf (dump_file, "\n");
3276         }
3277
3278       return false;
3279     }
3280   else
3281     {
3282       if (state->m_deferring_p)
3283         cancel_fma_deferring (state);
3284       convert_mult_to_fma_1 (mul_result, op1, op2);
3285       return true;
3286     }
3287 }
3288
3289
3290 /* Helper function of match_uaddsub_overflow.  Return 1
3291    if USE_STMT is unsigned overflow check ovf != 0 for
3292    STMT, -1 if USE_STMT is unsigned overflow check ovf == 0
3293    and 0 otherwise.  */
3294
3295 static int
3296 uaddsub_overflow_check_p (gimple *stmt, gimple *use_stmt)
3297 {
3298   enum tree_code ccode = ERROR_MARK;
3299   tree crhs1 = NULL_TREE, crhs2 = NULL_TREE;
3300   if (gimple_code (use_stmt) == GIMPLE_COND)
3301     {
3302       ccode = gimple_cond_code (use_stmt);
3303       crhs1 = gimple_cond_lhs (use_stmt);
3304       crhs2 = gimple_cond_rhs (use_stmt);
3305     }
3306   else if (is_gimple_assign (use_stmt))
3307     {
3308       if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
3309         {
3310           ccode = gimple_assign_rhs_code (use_stmt);
3311           crhs1 = gimple_assign_rhs1 (use_stmt);
3312           crhs2 = gimple_assign_rhs2 (use_stmt);
3313         }
3314       else if (gimple_assign_rhs_code (use_stmt) == COND_EXPR)
3315         {
3316           tree cond = gimple_assign_rhs1 (use_stmt);
3317           if (COMPARISON_CLASS_P (cond))
3318             {
3319               ccode = TREE_CODE (cond);
3320               crhs1 = TREE_OPERAND (cond, 0);
3321               crhs2 = TREE_OPERAND (cond, 1);
3322             }
3323           else
3324             return 0;
3325         }
3326       else
3327         return 0;
3328     }
3329   else
3330     return 0;
3331
3332   if (TREE_CODE_CLASS (ccode) != tcc_comparison)
3333     return 0;
3334
3335   enum tree_code code = gimple_assign_rhs_code (stmt);
3336   tree lhs = gimple_assign_lhs (stmt);
3337   tree rhs1 = gimple_assign_rhs1 (stmt);
3338   tree rhs2 = gimple_assign_rhs2 (stmt);
3339
3340   switch (ccode)
3341     {
3342     case GT_EXPR:
3343     case LE_EXPR:
3344       /* r = a - b; r > a or r <= a
3345          r = a + b; a > r or a <= r or b > r or b <= r.  */
3346       if ((code == MINUS_EXPR && crhs1 == lhs && crhs2 == rhs1)
3347           || (code == PLUS_EXPR && (crhs1 == rhs1 || crhs1 == rhs2)
3348               && crhs2 == lhs))
3349         return ccode == GT_EXPR ? 1 : -1;
3350       break;
3351     case LT_EXPR:
3352     case GE_EXPR:
3353       /* r = a - b; a < r or a >= r
3354          r = a + b; r < a or r >= a or r < b or r >= b.  */
3355       if ((code == MINUS_EXPR && crhs1 == rhs1 && crhs2 == lhs)
3356           || (code == PLUS_EXPR && crhs1 == lhs
3357               && (crhs2 == rhs1 || crhs2 == rhs2)))
3358         return ccode == LT_EXPR ? 1 : -1;
3359       break;
3360     default:
3361       break;
3362     }
3363   return 0;
3364 }
3365
3366 /* Recognize for unsigned x
3367    x = y - z;
3368    if (x > y)
3369    where there are other uses of x and replace it with
3370    _7 = SUB_OVERFLOW (y, z);
3371    x = REALPART_EXPR <_7>;
3372    _8 = IMAGPART_EXPR <_7>;
3373    if (_8)
3374    and similarly for addition.  */
3375
3376 static bool
3377 match_uaddsub_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
3378                         enum tree_code code)
3379 {
3380   tree lhs = gimple_assign_lhs (stmt);
3381   tree type = TREE_TYPE (lhs);
3382   use_operand_p use_p;
3383   imm_use_iterator iter;
3384   bool use_seen = false;
3385   bool ovf_use_seen = false;
3386   gimple *use_stmt;
3387
3388   gcc_checking_assert (code == PLUS_EXPR || code == MINUS_EXPR);
3389   if (!INTEGRAL_TYPE_P (type)
3390       || !TYPE_UNSIGNED (type)
3391       || has_zero_uses (lhs)
3392       || has_single_use (lhs)
3393       || optab_handler (code == PLUS_EXPR ? uaddv4_optab : usubv4_optab,
3394                         TYPE_MODE (type)) == CODE_FOR_nothing)
3395     return false;
3396
3397   FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
3398     {
3399       use_stmt = USE_STMT (use_p);
3400       if (is_gimple_debug (use_stmt))
3401         continue;
3402
3403       if (uaddsub_overflow_check_p (stmt, use_stmt))
3404         ovf_use_seen = true;
3405       else
3406         use_seen = true;
3407       if (ovf_use_seen && use_seen)
3408         break;
3409     }
3410
3411   if (!ovf_use_seen || !use_seen)
3412     return false;
3413
3414   tree ctype = build_complex_type (type);
3415   tree rhs1 = gimple_assign_rhs1 (stmt);
3416   tree rhs2 = gimple_assign_rhs2 (stmt);
3417   gcall *g = gimple_build_call_internal (code == PLUS_EXPR
3418                                          ? IFN_ADD_OVERFLOW : IFN_SUB_OVERFLOW,
3419                                          2, rhs1, rhs2);
3420   tree ctmp = make_ssa_name (ctype);
3421   gimple_call_set_lhs (g, ctmp);
3422   gsi_insert_before (gsi, g, GSI_SAME_STMT);
3423   gassign *g2 = gimple_build_assign (lhs, REALPART_EXPR,
3424                                      build1 (REALPART_EXPR, type, ctmp));
3425   gsi_replace (gsi, g2, true);
3426   tree ovf = make_ssa_name (type);
3427   g2 = gimple_build_assign (ovf, IMAGPART_EXPR,
3428                             build1 (IMAGPART_EXPR, type, ctmp));
3429   gsi_insert_after (gsi, g2, GSI_NEW_STMT);
3430
3431   FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
3432     {
3433       if (is_gimple_debug (use_stmt))
3434         continue;
3435
3436       int ovf_use = uaddsub_overflow_check_p (stmt, use_stmt);
3437       if (ovf_use == 0)
3438         continue;
3439       if (gimple_code (use_stmt) == GIMPLE_COND)
3440         {
3441           gcond *cond_stmt = as_a <gcond *> (use_stmt);
3442           gimple_cond_set_lhs (cond_stmt, ovf);
3443           gimple_cond_set_rhs (cond_stmt, build_int_cst (type, 0));
3444           gimple_cond_set_code (cond_stmt, ovf_use == 1 ? NE_EXPR : EQ_EXPR);
3445         }
3446       else
3447         {
3448           gcc_checking_assert (is_gimple_assign (use_stmt));
3449           if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
3450             {
3451               gimple_assign_set_rhs1 (use_stmt, ovf);
3452               gimple_assign_set_rhs2 (use_stmt, build_int_cst (type, 0));
3453               gimple_assign_set_rhs_code (use_stmt,
3454                                           ovf_use == 1 ? NE_EXPR : EQ_EXPR);
3455             }
3456           else
3457             {
3458               gcc_checking_assert (gimple_assign_rhs_code (use_stmt)
3459                                    == COND_EXPR);
3460               tree cond = build2 (ovf_use == 1 ? NE_EXPR : EQ_EXPR,
3461                                   boolean_type_node, ovf,
3462                                   build_int_cst (type, 0));
3463               gimple_assign_set_rhs1 (use_stmt, cond);
3464             }
3465         }
3466       update_stmt (use_stmt);
3467     }
3468   return true;
3469 }
3470
3471 /* Return true if target has support for divmod.  */
3472
3473 static bool
3474 target_supports_divmod_p (optab divmod_optab, optab div_optab, machine_mode mode)
3475 {
3476   /* If target supports hardware divmod insn, use it for divmod.  */
3477   if (optab_handler (divmod_optab, mode) != CODE_FOR_nothing)
3478     return true;
3479
3480   /* Check if libfunc for divmod is available.  */
3481   rtx libfunc = optab_libfunc (divmod_optab, mode);
3482   if (libfunc != NULL_RTX)
3483     {
3484       /* If optab_handler exists for div_optab, perhaps in a wider mode,
3485          we don't want to use the libfunc even if it exists for given mode.  */
3486       machine_mode div_mode;
3487       FOR_EACH_MODE_FROM (div_mode, mode)
3488         if (optab_handler (div_optab, div_mode) != CODE_FOR_nothing)
3489           return false;
3490
3491       return targetm.expand_divmod_libfunc != NULL;
3492     }
3493
3494   return false;
3495 }
3496
3497 /* Check if stmt is candidate for divmod transform.  */
3498
3499 static bool
3500 divmod_candidate_p (gassign *stmt)
3501 {
3502   tree type = TREE_TYPE (gimple_assign_lhs (stmt));
3503   machine_mode mode = TYPE_MODE (type);
3504   optab divmod_optab, div_optab;
3505
3506   if (TYPE_UNSIGNED (type))
3507     {
3508       divmod_optab = udivmod_optab;
3509       div_optab = udiv_optab;
3510     }
3511   else
3512     {
3513       divmod_optab = sdivmod_optab;
3514       div_optab = sdiv_optab;
3515     }
3516
3517   tree op1 = gimple_assign_rhs1 (stmt);
3518   tree op2 = gimple_assign_rhs2 (stmt);
3519
3520   /* Disable the transform if either is a constant, since division-by-constant
3521      may have specialized expansion.  */
3522   if (CONSTANT_CLASS_P (op1) || CONSTANT_CLASS_P (op2))
3523     return false;
3524
3525   /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should
3526      expand using the [su]divv optabs.  */
3527   if (TYPE_OVERFLOW_TRAPS (type))
3528     return false;
3529
3530   if (!target_supports_divmod_p (divmod_optab, div_optab, mode))
3531     return false;
3532
3533   return true;
3534 }
3535
3536 /* This function looks for:
3537    t1 = a TRUNC_DIV_EXPR b;
3538    t2 = a TRUNC_MOD_EXPR b;
3539    and transforms it to the following sequence:
3540    complex_tmp = DIVMOD (a, b);
3541    t1 = REALPART_EXPR(a);
3542    t2 = IMAGPART_EXPR(b);
3543    For conditions enabling the transform see divmod_candidate_p().
3544
3545    The pass has three parts:
3546    1) Find top_stmt which is trunc_div or trunc_mod stmt and dominates all
3547       other trunc_div_expr and trunc_mod_expr stmts.
3548    2) Add top_stmt and all trunc_div and trunc_mod stmts dominated by top_stmt
3549       to stmts vector.
3550    3) Insert DIVMOD call just before top_stmt and update entries in
3551       stmts vector to use return value of DIMOVD (REALEXPR_PART for div,
3552       IMAGPART_EXPR for mod).  */
3553
3554 static bool
3555 convert_to_divmod (gassign *stmt)
3556 {
3557   if (stmt_can_throw_internal (cfun, stmt)
3558       || !divmod_candidate_p (stmt))
3559     return false;
3560
3561   tree op1 = gimple_assign_rhs1 (stmt);
3562   tree op2 = gimple_assign_rhs2 (stmt);
3563
3564   imm_use_iterator use_iter;
3565   gimple *use_stmt;
3566   auto_vec<gimple *> stmts;
3567
3568   gimple *top_stmt = stmt;
3569   basic_block top_bb = gimple_bb (stmt);
3570
3571   /* Part 1: Try to set top_stmt to "topmost" stmt that dominates
3572      at-least stmt and possibly other trunc_div/trunc_mod stmts
3573      having same operands as stmt.  */
3574
3575   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, op1)
3576     {
3577       if (is_gimple_assign (use_stmt)
3578           && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
3579               || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
3580           && operand_equal_p (op1, gimple_assign_rhs1 (use_stmt), 0)
3581           && operand_equal_p (op2, gimple_assign_rhs2 (use_stmt), 0))
3582         {
3583           if (stmt_can_throw_internal (cfun, use_stmt))
3584             continue;
3585
3586           basic_block bb = gimple_bb (use_stmt);
3587
3588           if (bb == top_bb)
3589             {
3590               if (gimple_uid (use_stmt) < gimple_uid (top_stmt))
3591                 top_stmt = use_stmt;
3592             }
3593           else if (dominated_by_p (CDI_DOMINATORS, top_bb, bb))
3594             {
3595               top_bb = bb;
3596               top_stmt = use_stmt;
3597             }
3598         }
3599     }
3600
3601   tree top_op1 = gimple_assign_rhs1 (top_stmt);
3602   tree top_op2 = gimple_assign_rhs2 (top_stmt);
3603
3604   stmts.safe_push (top_stmt);
3605   bool div_seen = (gimple_assign_rhs_code (top_stmt) == TRUNC_DIV_EXPR);
3606
3607   /* Part 2: Add all trunc_div/trunc_mod statements domianted by top_bb
3608      to stmts vector. The 2nd loop will always add stmt to stmts vector, since
3609      gimple_bb (top_stmt) dominates gimple_bb (stmt), so the
3610      2nd loop ends up adding at-least single trunc_mod_expr stmt.  */
3611
3612   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, top_op1)
3613     {
3614       if (is_gimple_assign (use_stmt)
3615           && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
3616               || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
3617           && operand_equal_p (top_op1, gimple_assign_rhs1 (use_stmt), 0)
3618           && operand_equal_p (top_op2, gimple_assign_rhs2 (use_stmt), 0))
3619         {
3620           if (use_stmt == top_stmt
3621               || stmt_can_throw_internal (cfun, use_stmt)
3622               || !dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt), top_bb))
3623             continue;
3624
3625           stmts.safe_push (use_stmt);
3626           if (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR)
3627             div_seen = true;
3628         }
3629     }
3630
3631   if (!div_seen)
3632     return false;
3633
3634   /* Part 3: Create libcall to internal fn DIVMOD:
3635      divmod_tmp = DIVMOD (op1, op2).  */
3636
3637   gcall *call_stmt = gimple_build_call_internal (IFN_DIVMOD, 2, op1, op2);
3638   tree res = make_temp_ssa_name (build_complex_type (TREE_TYPE (op1)),
3639                                  call_stmt, "divmod_tmp");
3640   gimple_call_set_lhs (call_stmt, res);
3641   /* We rejected throwing statements above.  */
3642   gimple_call_set_nothrow (call_stmt, true);
3643
3644   /* Insert the call before top_stmt.  */
3645   gimple_stmt_iterator top_stmt_gsi = gsi_for_stmt (top_stmt);
3646   gsi_insert_before (&top_stmt_gsi, call_stmt, GSI_SAME_STMT);
3647
3648   widen_mul_stats.divmod_calls_inserted++;
3649
3650   /* Update all statements in stmts vector:
3651      lhs = op1 TRUNC_DIV_EXPR op2 -> lhs = REALPART_EXPR<divmod_tmp>
3652      lhs = op1 TRUNC_MOD_EXPR op2 -> lhs = IMAGPART_EXPR<divmod_tmp>.  */
3653
3654   for (unsigned i = 0; stmts.iterate (i, &use_stmt); ++i)
3655     {
3656       tree new_rhs;
3657
3658       switch (gimple_assign_rhs_code (use_stmt))
3659         {
3660           case TRUNC_DIV_EXPR:
3661             new_rhs = fold_build1 (REALPART_EXPR, TREE_TYPE (op1), res);
3662             break;
3663
3664           case TRUNC_MOD_EXPR:
3665             new_rhs = fold_build1 (IMAGPART_EXPR, TREE_TYPE (op1), res);
3666             break;
3667
3668           default:
3669             gcc_unreachable ();
3670         }
3671
3672       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
3673       gimple_assign_set_rhs_from_tree (&gsi, new_rhs);
3674       update_stmt (use_stmt);
3675     }
3676
3677   return true;
3678 }
3679
3680 /* Find integer multiplications where the operands are extended from
3681    smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
3682    where appropriate.  */
3683
3684 namespace {
3685
3686 const pass_data pass_data_optimize_widening_mul =
3687 {
3688   GIMPLE_PASS, /* type */
3689   "widening_mul", /* name */
3690   OPTGROUP_NONE, /* optinfo_flags */
3691   TV_TREE_WIDEN_MUL, /* tv_id */
3692   PROP_ssa, /* properties_required */
3693   0, /* properties_provided */
3694   0, /* properties_destroyed */
3695   0, /* todo_flags_start */
3696   TODO_update_ssa, /* todo_flags_finish */
3697 };
3698
3699 class pass_optimize_widening_mul : public gimple_opt_pass
3700 {
3701 public:
3702   pass_optimize_widening_mul (gcc::context *ctxt)
3703     : gimple_opt_pass (pass_data_optimize_widening_mul, ctxt)
3704   {}
3705
3706   /* opt_pass methods: */
3707   virtual bool gate (function *)
3708     {
3709       return flag_expensive_optimizations && optimize;
3710     }
3711
3712   virtual unsigned int execute (function *);
3713
3714 }; // class pass_optimize_widening_mul
3715
3716 /* Walker class to perform the transformation in reverse dominance order. */
3717
3718 class math_opts_dom_walker : public dom_walker
3719 {
3720 public:
3721   /* Constructor, CFG_CHANGED is a pointer to a boolean flag that will be set
3722      if walking modidifes the CFG.  */
3723
3724   math_opts_dom_walker (bool *cfg_changed_p)
3725     : dom_walker (CDI_DOMINATORS), m_last_result_set (),
3726       m_cfg_changed_p (cfg_changed_p) {}
3727
3728   /* The actual actions performed in the walk.  */
3729
3730   virtual void after_dom_children (basic_block);
3731
3732   /* Set of results of chains of multiply and add statement combinations that
3733      were not transformed into FMAs because of active deferring.  */
3734   hash_set<tree> m_last_result_set;
3735
3736   /* Pointer to a flag of the user that needs to be set if CFG has been
3737      modified.  */
3738   bool *m_cfg_changed_p;
3739 };
3740
3741 void
3742 math_opts_dom_walker::after_dom_children (basic_block bb)
3743 {
3744   gimple_stmt_iterator gsi;
3745
3746   fma_deferring_state fma_state (param_avoid_fma_max_bits > 0);
3747
3748   for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
3749     {
3750       gimple *stmt = gsi_stmt (gsi);
3751       enum tree_code code;
3752
3753       if (is_gimple_assign (stmt))
3754         {
3755           code = gimple_assign_rhs_code (stmt);
3756           switch (code)
3757             {
3758             case MULT_EXPR:
3759               if (!convert_mult_to_widen (stmt, &gsi)
3760                   && !convert_expand_mult_copysign (stmt, &gsi)
3761                   && convert_mult_to_fma (stmt,
3762                                           gimple_assign_rhs1 (stmt),
3763                                           gimple_assign_rhs2 (stmt),
3764                                           &fma_state))
3765                 {
3766                   gsi_remove (&gsi, true);
3767                   release_defs (stmt);
3768                   continue;
3769                 }
3770               break;
3771
3772             case PLUS_EXPR:
3773             case MINUS_EXPR:
3774               if (!convert_plusminus_to_widen (&gsi, stmt, code))
3775                 match_uaddsub_overflow (&gsi, stmt, code);
3776               break;
3777
3778             case TRUNC_MOD_EXPR:
3779               convert_to_divmod (as_a<gassign *> (stmt));
3780               break;
3781
3782             default:;
3783             }
3784         }
3785       else if (is_gimple_call (stmt))
3786         {
3787           switch (gimple_call_combined_fn (stmt))
3788             {
3789             CASE_CFN_POW:
3790               if (gimple_call_lhs (stmt)
3791                   && TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST
3792                   && real_equal (&TREE_REAL_CST (gimple_call_arg (stmt, 1)),
3793                                  &dconst2)
3794                   && convert_mult_to_fma (stmt,
3795                                           gimple_call_arg (stmt, 0),
3796                                           gimple_call_arg (stmt, 0),
3797                                           &fma_state))
3798                 {
3799                   unlink_stmt_vdef (stmt);
3800                   if (gsi_remove (&gsi, true)
3801                       && gimple_purge_dead_eh_edges (bb))
3802                     *m_cfg_changed_p = true;
3803                   release_defs (stmt);
3804                   continue;
3805                 }
3806               break;
3807
3808             case CFN_COND_MUL:
3809               if (convert_mult_to_fma (stmt,
3810                                        gimple_call_arg (stmt, 1),
3811                                        gimple_call_arg (stmt, 2),
3812                                        &fma_state,
3813                                        gimple_call_arg (stmt, 0)))
3814
3815                 {
3816                   gsi_remove (&gsi, true);
3817                   release_defs (stmt);
3818                   continue;
3819                 }
3820               break;
3821
3822             case CFN_LAST:
3823               cancel_fma_deferring (&fma_state);
3824               break;
3825
3826             default:
3827               break;
3828             }
3829         }
3830       gsi_next (&gsi);
3831     }
3832   if (fma_state.m_deferring_p
3833       && fma_state.m_initial_phi)
3834     {
3835       gcc_checking_assert (fma_state.m_last_result);
3836       if (!last_fma_candidate_feeds_initial_phi (&fma_state,
3837                                                  &m_last_result_set))
3838         cancel_fma_deferring (&fma_state);
3839       else
3840         m_last_result_set.add (fma_state.m_last_result);
3841     }
3842 }
3843
3844
3845 unsigned int
3846 pass_optimize_widening_mul::execute (function *fun)
3847 {
3848   bool cfg_changed = false;
3849
3850   memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
3851   calculate_dominance_info (CDI_DOMINATORS);
3852   renumber_gimple_stmt_uids (cfun);
3853
3854   math_opts_dom_walker (&cfg_changed).walk (ENTRY_BLOCK_PTR_FOR_FN (cfun));
3855
3856   statistics_counter_event (fun, "widening multiplications inserted",
3857                             widen_mul_stats.widen_mults_inserted);
3858   statistics_counter_event (fun, "widening maccs inserted",
3859                             widen_mul_stats.maccs_inserted);
3860   statistics_counter_event (fun, "fused multiply-adds inserted",
3861                             widen_mul_stats.fmas_inserted);
3862   statistics_counter_event (fun, "divmod calls inserted",
3863                             widen_mul_stats.divmod_calls_inserted);
3864
3865   return cfg_changed ? TODO_cleanup_cfg : 0;
3866 }
3867
3868 } // anon namespace
3869
3870 gimple_opt_pass *
3871 make_pass_optimize_widening_mul (gcc::context *ctxt)
3872 {
3873   return new pass_optimize_widening_mul (ctxt);
3874 }