gcc/tree-ssa-math-opts.c

   1 /* Global, SSA-based optimizations using mathematical identities.
   2    Copyright (C) 2005-2017 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* Currently, the only mini-pass in this file tries to CSE reciprocal
  21    operations.  These are common in sequences such as this one:
  22
  23         modulus = sqrt(x*x + y*y + z*z);
  24         x = x / modulus;
  25         y = y / modulus;
  26         z = z / modulus;
  27
  28    that can be optimized to
  29
  30         modulus = sqrt(x*x + y*y + z*z);
  31         rmodulus = 1.0 / modulus;
  32         x = x * rmodulus;
  33         y = y * rmodulus;
  34         z = z * rmodulus;
  35
  36    We do this for loop invariant divisors, and with this pass whenever
  37    we notice that a division has the same divisor multiple times.
  38
  39    Of course, like in PRE, we don't insert a division if a dominator
  40    already has one.  However, this cannot be done as an extension of
  41    PRE for several reasons.
  42
  43    First of all, with some experiments it was found out that the
  44    transformation is not always useful if there are only two divisions
  45    by the same divisor.  This is probably because modern processors
  46    can pipeline the divisions; on older, in-order processors it should
  47    still be effective to optimize two divisions by the same number.
  48    We make this a param, and it shall be called N in the remainder of
  49    this comment.
  50
  51    Second, if trapping math is active, we have less freedom on where
  52    to insert divisions: we can only do so in basic blocks that already
  53    contain one.  (If divisions don't trap, instead, we can insert
  54    divisions elsewhere, which will be in blocks that are common dominators
  55    of those that have the division).
  56
  57    We really don't want to compute the reciprocal unless a division will
  58    be found.  To do this, we won't insert the division in a basic block
  59    that has less than N divisions *post-dominating* it.
  60
  61    The algorithm constructs a subset of the dominator tree, holding the
  62    blocks containing the divisions and the common dominators to them,
  63    and walk it twice.  The first walk is in post-order, and it annotates
  64    each block with the number of divisions that post-dominate it: this
  65    gives information on where divisions can be inserted profitably.
  66    The second walk is in pre-order, and it inserts divisions as explained
  67    above, and replaces divisions by multiplications.
  68
  69    In the best case, the cost of the pass is O(n_statements).  In the
  70    worst-case, the cost is due to creating the dominator tree subset,
  71    with a cost of O(n_basic_blocks ^ 2); however this can only happen
  72    for n_statements / n_basic_blocks statements.  So, the amortized cost
  73    of creating the dominator tree subset is O(n_basic_blocks) and the
  74    worst-case cost of the pass is O(n_statements * n_basic_blocks).
  75
  76    More practically, the cost will be small because there are few
  77    divisions, and they tend to be in the same basic block, so insert_bb
  78    is called very few times.
  79
  80    If we did this using domwalk.c, an efficient implementation would have
  81    to work on all the variables in a single pass, because we could not
  82    work on just a subset of the dominator tree, as we do now, and the
  83    cost would also be something like O(n_statements * n_basic_blocks).
  84    The data structures would be more complex in order to work on all the
  85    variables in a single pass.  */
  86
  87 #include "config.h"
  88 #include "system.h"
  89 #include "coretypes.h"
  90 #include "backend.h"
  91 #include "target.h"
  92 #include "rtl.h"
  93 #include "tree.h"
  94 #include "gimple.h"
  95 #include "predict.h"
  96 #include "alloc-pool.h"
  97 #include "tree-pass.h"
  98 #include "ssa.h"
  99 #include "optabs-tree.h"
 100 #include "gimple-pretty-print.h"
 101 #include "alias.h"
 102 #include "fold-const.h"
 103 #include "gimple-fold.h"
 104 #include "gimple-iterator.h"
 105 #include "gimplify.h"
 106 #include "gimplify-me.h"
 107 #include "stor-layout.h"
 108 #include "tree-cfg.h"
 109 #include "tree-dfa.h"
 110 #include "tree-ssa.h"
 111 #include "builtins.h"
 112 #include "params.h"
 113 #include "internal-fn.h"
 114 #include "case-cfn-macros.h"
 115 #include "optabs-libfuncs.h"
 116 #include "tree-eh.h"
 117 #include "targhooks.h"
 118
 119 /* This structure represents one basic block that either computes a
 120    division, or is a common dominator for basic block that compute a
 121    division.  */
 122 struct occurrence {
 123   /* The basic block represented by this structure.  */
 124   basic_block bb;
 125
 126   /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
 127      inserted in BB.  */
 128   tree recip_def;
 129
 130   /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
 131      was inserted in BB.  */
 132   gimple *recip_def_stmt;
 133
 134   /* Pointer to a list of "struct occurrence"s for blocks dominated
 135      by BB.  */
 136   struct occurrence *children;
 137
 138   /* Pointer to the next "struct occurrence"s in the list of blocks
 139      sharing a common dominator.  */
 140   struct occurrence *next;
 141
 142   /* The number of divisions that are in BB before compute_merit.  The
 143      number of divisions that are in BB or post-dominate it after
 144      compute_merit.  */
 145   int num_divisions;
 146
 147   /* True if the basic block has a division, false if it is a common
 148      dominator for basic blocks that do.  If it is false and trapping
 149      math is active, BB is not a candidate for inserting a reciprocal.  */
 150   bool bb_has_division;
 151 };
 152
 153 static struct
 154 {
 155   /* Number of 1.0/X ops inserted.  */
 156   int rdivs_inserted;
 157
 158   /* Number of 1.0/FUNC ops inserted.  */
 159   int rfuncs_inserted;
 160 } reciprocal_stats;
 161
 162 static struct
 163 {
 164   /* Number of cexpi calls inserted.  */
 165   int inserted;
 166 } sincos_stats;
 167
 168 static struct
 169 {
 170   /* Number of widening multiplication ops inserted.  */
 171   int widen_mults_inserted;
 172
 173   /* Number of integer multiply-and-accumulate ops inserted.  */
 174   int maccs_inserted;
 175
 176   /* Number of fp fused multiply-add ops inserted.  */
 177   int fmas_inserted;
 178
 179   /* Number of divmod calls inserted.  */
 180   int divmod_calls_inserted;
 181 } widen_mul_stats;
 182
 183 /* The instance of "struct occurrence" representing the highest
 184    interesting block in the dominator tree.  */
 185 static struct occurrence *occ_head;
 186
 187 /* Allocation pool for getting instances of "struct occurrence".  */
 188 static object_allocator<occurrence> *occ_pool;
 189
 190
 191
 192 /* Allocate and return a new struct occurrence for basic block BB, and
 193    whose children list is headed by CHILDREN.  */
 194 static struct occurrence *
 195 occ_new (basic_block bb, struct occurrence *children)
 196 {
 197   struct occurrence *occ;
 198
 199   bb->aux = occ = occ_pool->allocate ();
 200   memset (occ, 0, sizeof (struct occurrence));
 201
 202   occ->bb = bb;
 203   occ->children = children;
 204   return occ;
 205 }
 206
 207
 208 /* Insert NEW_OCC into our subset of the dominator tree.  P_HEAD points to a
 209    list of "struct occurrence"s, one per basic block, having IDOM as
 210    their common dominator.
 211
 212    We try to insert NEW_OCC as deep as possible in the tree, and we also
 213    insert any other block that is a common dominator for BB and one
 214    block already in the tree.  */
 215
 216 static void
 217 insert_bb (struct occurrence *new_occ, basic_block idom,
 218            struct occurrence **p_head)
 219 {
 220   struct occurrence *occ, **p_occ;
 221
 222   for (p_occ = p_head; (occ = *p_occ) != NULL; )
 223     {
 224       basic_block bb = new_occ->bb, occ_bb = occ->bb;
 225       basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb);
 226       if (dom == bb)
 227         {
 228           /* BB dominates OCC_BB.  OCC becomes NEW_OCC's child: remove OCC
 229              from its list.  */
 230           *p_occ = occ->next;
 231           occ->next = new_occ->children;
 232           new_occ->children = occ;
 233
 234           /* Try the next block (it may as well be dominated by BB).  */
 235         }
 236
 237       else if (dom == occ_bb)
 238         {
 239           /* OCC_BB dominates BB.  Tail recurse to look deeper.  */
 240           insert_bb (new_occ, dom, &occ->children);
 241           return;
 242         }
 243
 244       else if (dom != idom)
 245         {
 246           gcc_assert (!dom->aux);
 247
 248           /* There is a dominator between IDOM and BB, add it and make
 249              two children out of NEW_OCC and OCC.  First, remove OCC from
 250              its list.  */
 251           *p_occ = occ->next;
 252           new_occ->next = occ;
 253           occ->next = NULL;
 254
 255           /* None of the previous blocks has DOM as a dominator: if we tail
 256              recursed, we would reexamine them uselessly. Just switch BB with
 257              DOM, and go on looking for blocks dominated by DOM.  */
 258           new_occ = occ_new (dom, new_occ);
 259         }
 260
 261       else
 262         {
 263           /* Nothing special, go on with the next element.  */
 264           p_occ = &occ->next;
 265         }
 266     }
 267
 268   /* No place was found as a child of IDOM.  Make BB a sibling of IDOM.  */
 269   new_occ->next = *p_head;
 270   *p_head = new_occ;
 271 }
 272
 273 /* Register that we found a division in BB.  */
 274
 275 static inline void
 276 register_division_in (basic_block bb)
 277 {
 278   struct occurrence *occ;
 279
 280   occ = (struct occurrence *) bb->aux;
 281   if (!occ)
 282     {
 283       occ = occ_new (bb, NULL);
 284       insert_bb (occ, ENTRY_BLOCK_PTR_FOR_FN (cfun), &occ_head);
 285     }
 286
 287   occ->bb_has_division = true;
 288   occ->num_divisions++;
 289 }
 290
 291
 292 /* Compute the number of divisions that postdominate each block in OCC and
 293    its children.  */
 294
 295 static void
 296 compute_merit (struct occurrence *occ)
 297 {
 298   struct occurrence *occ_child;
 299   basic_block dom = occ->bb;
 300
 301   for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
 302     {
 303       basic_block bb;
 304       if (occ_child->children)
 305         compute_merit (occ_child);
 306
 307       if (flag_exceptions)
 308         bb = single_noncomplex_succ (dom);
 309       else
 310         bb = dom;
 311
 312       if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
 313         occ->num_divisions += occ_child->num_divisions;
 314     }
 315 }
 316
 317
 318 /* Return whether USE_STMT is a floating-point division by DEF.  */
 319 static inline bool
 320 is_division_by (gimple *use_stmt, tree def)
 321 {
 322   return is_gimple_assign (use_stmt)
 323          && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
 324          && gimple_assign_rhs2 (use_stmt) == def
 325          /* Do not recognize x / x as valid division, as we are getting
 326             confused later by replacing all immediate uses x in such
 327             a stmt.  */
 328          && gimple_assign_rhs1 (use_stmt) != def;
 329 }
 330
 331 /* Walk the subset of the dominator tree rooted at OCC, setting the
 332    RECIP_DEF field to a definition of 1.0 / DEF that can be used in
 333    the given basic block.  The field may be left NULL, of course,
 334    if it is not possible or profitable to do the optimization.
 335
 336    DEF_BSI is an iterator pointing at the statement defining DEF.
 337    If RECIP_DEF is set, a dominator already has a computation that can
 338    be used.  */
 339
 340 static void
 341 insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
 342                     tree def, tree recip_def, int threshold)
 343 {
 344   tree type;
 345   gassign *new_stmt;
 346   gimple_stmt_iterator gsi;
 347   struct occurrence *occ_child;
 348
 349   if (!recip_def
 350       && (occ->bb_has_division || !flag_trapping_math)
 351       && occ->num_divisions >= threshold)
 352     {
 353       /* Make a variable with the replacement and substitute it.  */
 354       type = TREE_TYPE (def);
 355       recip_def = create_tmp_reg (type, "reciptmp");
 356       new_stmt = gimple_build_assign (recip_def, RDIV_EXPR,
 357                                       build_one_cst (type), def);
 358
 359       if (occ->bb_has_division)
 360         {
 361           /* Case 1: insert before an existing division.  */
 362           gsi = gsi_after_labels (occ->bb);
 363           while (!gsi_end_p (gsi) && !is_division_by (gsi_stmt (gsi), def))
 364             gsi_next (&gsi);
 365
 366           gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
 367         }
 368       else if (def_gsi && occ->bb == def_gsi->bb)
 369         {
 370           /* Case 2: insert right after the definition.  Note that this will
 371              never happen if the definition statement can throw, because in
 372              that case the sole successor of the statement's basic block will
 373              dominate all the uses as well.  */
 374           gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
 375         }
 376       else
 377         {
 378           /* Case 3: insert in a basic block not containing defs/uses.  */
 379           gsi = gsi_after_labels (occ->bb);
 380           gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
 381         }
 382
 383       reciprocal_stats.rdivs_inserted++;
 384
 385       occ->recip_def_stmt = new_stmt;
 386     }
 387
 388   occ->recip_def = recip_def;
 389   for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
 390     insert_reciprocals (def_gsi, occ_child, def, recip_def, threshold);
 391 }
 392
 393
 394 /* Replace the division at USE_P with a multiplication by the reciprocal, if
 395    possible.  */
 396
 397 static inline void
 398 replace_reciprocal (use_operand_p use_p)
 399 {
 400   gimple *use_stmt = USE_STMT (use_p);
 401   basic_block bb = gimple_bb (use_stmt);
 402   struct occurrence *occ = (struct occurrence *) bb->aux;
 403
 404   if (optimize_bb_for_speed_p (bb)
 405       && occ->recip_def && use_stmt != occ->recip_def_stmt)
 406     {
 407       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
 408       gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
 409       SET_USE (use_p, occ->recip_def);
 410       fold_stmt_inplace (&gsi);
 411       update_stmt (use_stmt);
 412     }
 413 }
 414
 415
 416 /* Free OCC and return one more "struct occurrence" to be freed.  */
 417
 418 static struct occurrence *
 419 free_bb (struct occurrence *occ)
 420 {
 421   struct occurrence *child, *next;
 422
 423   /* First get the two pointers hanging off OCC.  */
 424   next = occ->next;
 425   child = occ->children;
 426   occ->bb->aux = NULL;
 427   occ_pool->remove (occ);
 428
 429   /* Now ensure that we don't recurse unless it is necessary.  */
 430   if (!child)
 431     return next;
 432   else
 433     {
 434       while (next)
 435         next = free_bb (next);
 436
 437       return child;
 438     }
 439 }
 440
 441
 442 /* Look for floating-point divisions among DEF's uses, and try to
 443    replace them by multiplications with the reciprocal.  Add
 444    as many statements computing the reciprocal as needed.
 445
 446    DEF must be a GIMPLE register of a floating-point type.  */
 447
 448 static void
 449 execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
 450 {
 451   use_operand_p use_p;
 452   imm_use_iterator use_iter;
 453   struct occurrence *occ;
 454   int count = 0, threshold;
 455
 456   gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && is_gimple_reg (def));
 457
 458   FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
 459     {
 460       gimple *use_stmt = USE_STMT (use_p);
 461       if (is_division_by (use_stmt, def))
 462         {
 463           register_division_in (gimple_bb (use_stmt));
 464           count++;
 465         }
 466     }
 467
 468   /* Do the expensive part only if we can hope to optimize something.  */
 469   threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
 470   if (count >= threshold)
 471     {
 472       gimple *use_stmt;
 473       for (occ = occ_head; occ; occ = occ->next)
 474         {
 475           compute_merit (occ);
 476           insert_reciprocals (def_gsi, occ, def, NULL, threshold);
 477         }
 478
 479       FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
 480         {
 481           if (is_division_by (use_stmt, def))
 482             {
 483               FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
 484                 replace_reciprocal (use_p);
 485             }
 486         }
 487     }
 488
 489   for (occ = occ_head; occ; )
 490     occ = free_bb (occ);
 491
 492   occ_head = NULL;
 493 }
 494
 495 /* Return an internal function that implements the reciprocal of CALL,
 496    or IFN_LAST if there is no such function that the target supports.  */
 497
 498 internal_fn
 499 internal_fn_reciprocal (gcall *call)
 500 {
 501   internal_fn ifn;
 502
 503   switch (gimple_call_combined_fn (call))
 504     {
 505     CASE_CFN_SQRT:
 506     CASE_CFN_SQRT_FN:
 507       ifn = IFN_RSQRT;
 508       break;
 509
 510     default:
 511       return IFN_LAST;
 512     }
 513
 514   tree_pair types = direct_internal_fn_types (ifn, call);
 515   if (!direct_internal_fn_supported_p (ifn, types, OPTIMIZE_FOR_SPEED))
 516     return IFN_LAST;
 517
 518   return ifn;
 519 }
 520
 521 /* Go through all the floating-point SSA_NAMEs, and call
 522    execute_cse_reciprocals_1 on each of them.  */
 523 namespace {
 524
 525 const pass_data pass_data_cse_reciprocals =
 526 {
 527   GIMPLE_PASS, /* type */
 528   "recip", /* name */
 529   OPTGROUP_NONE, /* optinfo_flags */
 530   TV_NONE, /* tv_id */
 531   PROP_ssa, /* properties_required */
 532   0, /* properties_provided */
 533   0, /* properties_destroyed */
 534   0, /* todo_flags_start */
 535   TODO_update_ssa, /* todo_flags_finish */
 536 };
 537
 538 class pass_cse_reciprocals : public gimple_opt_pass
 539 {
 540 public:
 541   pass_cse_reciprocals (gcc::context *ctxt)
 542     : gimple_opt_pass (pass_data_cse_reciprocals, ctxt)
 543   {}
 544
 545   /* opt_pass methods: */
 546   virtual bool gate (function *) { return optimize && flag_reciprocal_math; }
 547   virtual unsigned int execute (function *);
 548
 549 }; // class pass_cse_reciprocals
 550
 551 unsigned int
 552 pass_cse_reciprocals::execute (function *fun)
 553 {
 554   basic_block bb;
 555   tree arg;
 556
 557   occ_pool = new object_allocator<occurrence> ("dominators for recip");
 558
 559   memset (&reciprocal_stats, 0, sizeof (reciprocal_stats));
 560   calculate_dominance_info (CDI_DOMINATORS);
 561   calculate_dominance_info (CDI_POST_DOMINATORS);
 562
 563   if (flag_checking)
 564     FOR_EACH_BB_FN (bb, fun)
 565       gcc_assert (!bb->aux);
 566
 567   for (arg = DECL_ARGUMENTS (fun->decl); arg; arg = DECL_CHAIN (arg))
 568     if (FLOAT_TYPE_P (TREE_TYPE (arg))
 569         && is_gimple_reg (arg))
 570       {
 571         tree name = ssa_default_def (fun, arg);
 572         if (name)
 573           execute_cse_reciprocals_1 (NULL, name);
 574       }
 575
 576   FOR_EACH_BB_FN (bb, fun)
 577     {
 578       tree def;
 579
 580       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
 581            gsi_next (&gsi))
 582         {
 583           gphi *phi = gsi.phi ();
 584           def = PHI_RESULT (phi);
 585           if (! virtual_operand_p (def)
 586               && FLOAT_TYPE_P (TREE_TYPE (def)))
 587             execute_cse_reciprocals_1 (NULL, def);
 588         }
 589
 590       for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
 591            gsi_next (&gsi))
 592         {
 593           gimple *stmt = gsi_stmt (gsi);
 594
 595           if (gimple_has_lhs (stmt)
 596               && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
 597               && FLOAT_TYPE_P (TREE_TYPE (def))
 598               && TREE_CODE (def) == SSA_NAME)
 599             execute_cse_reciprocals_1 (&gsi, def);
 600         }
 601
 602       if (optimize_bb_for_size_p (bb))
 603         continue;
 604
 605       /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b).  */
 606       for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
 607            gsi_next (&gsi))
 608         {
 609           gimple *stmt = gsi_stmt (gsi);
 610
 611           if (is_gimple_assign (stmt)
 612               && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
 613             {
 614               tree arg1 = gimple_assign_rhs2 (stmt);
 615               gimple *stmt1;
 616
 617               if (TREE_CODE (arg1) != SSA_NAME)
 618                 continue;
 619
 620               stmt1 = SSA_NAME_DEF_STMT (arg1);
 621
 622               if (is_gimple_call (stmt1)
 623                   && gimple_call_lhs (stmt1))
 624                 {
 625                   bool fail;
 626                   imm_use_iterator ui;
 627                   use_operand_p use_p;
 628                   tree fndecl = NULL_TREE;
 629
 630                   gcall *call = as_a <gcall *> (stmt1);
 631                   internal_fn ifn = internal_fn_reciprocal (call);
 632                   if (ifn == IFN_LAST)
 633                     {
 634                       fndecl = gimple_call_fndecl (call);
 635                       if (!fndecl
 636                           || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_MD)
 637                         continue;
 638                       fndecl = targetm.builtin_reciprocal (fndecl);
 639                       if (!fndecl)
 640                         continue;
 641                     }
 642
 643                   /* Check that all uses of the SSA name are divisions,
 644                      otherwise replacing the defining statement will do
 645                      the wrong thing.  */
 646                   fail = false;
 647                   FOR_EACH_IMM_USE_FAST (use_p, ui, arg1)
 648                     {
 649                       gimple *stmt2 = USE_STMT (use_p);
 650                       if (is_gimple_debug (stmt2))
 651                         continue;
 652                       if (!is_gimple_assign (stmt2)
 653                           || gimple_assign_rhs_code (stmt2) != RDIV_EXPR
 654                           || gimple_assign_rhs1 (stmt2) == arg1
 655                           || gimple_assign_rhs2 (stmt2) != arg1)
 656                         {
 657                           fail = true;
 658                           break;
 659                         }
 660                     }
 661                   if (fail)
 662                     continue;
 663
 664                   gimple_replace_ssa_lhs (call, arg1);
 665                   if (gimple_call_internal_p (call) != (ifn != IFN_LAST))
 666                     {
 667                       auto_vec<tree, 4> args;
 668                       for (unsigned int i = 0;
 669                            i < gimple_call_num_args (call); i++)
 670                         args.safe_push (gimple_call_arg (call, i));
 671                       gcall *stmt2;
 672                       if (ifn == IFN_LAST)
 673                         stmt2 = gimple_build_call_vec (fndecl, args);
 674                       else
 675                         stmt2 = gimple_build_call_internal_vec (ifn, args);
 676                       gimple_call_set_lhs (stmt2, arg1);
 677                       if (gimple_vdef (call))
 678                         {
 679                           gimple_set_vdef (stmt2, gimple_vdef (call));
 680                           SSA_NAME_DEF_STMT (gimple_vdef (stmt2)) = stmt2;
 681                         }
 682                       gimple_call_set_nothrow (stmt2,
 683                                                gimple_call_nothrow_p (call));
 684                       gimple_set_vuse (stmt2, gimple_vuse (call));
 685                       gimple_stmt_iterator gsi2 = gsi_for_stmt (call);
 686                       gsi_replace (&gsi2, stmt2, true);
 687                     }
 688                   else
 689                     {
 690                       if (ifn == IFN_LAST)
 691                         gimple_call_set_fndecl (call, fndecl);
 692                       else
 693                         gimple_call_set_internal_fn (call, ifn);
 694                       update_stmt (call);
 695                     }
 696                   reciprocal_stats.rfuncs_inserted++;
 697
 698                   FOR_EACH_IMM_USE_STMT (stmt, ui, arg1)
 699                     {
 700                       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
 701                       gimple_assign_set_rhs_code (stmt, MULT_EXPR);
 702                       fold_stmt_inplace (&gsi);
 703                       update_stmt (stmt);
 704                     }
 705                 }
 706             }
 707         }
 708     }
 709
 710   statistics_counter_event (fun, "reciprocal divs inserted",
 711                             reciprocal_stats.rdivs_inserted);
 712   statistics_counter_event (fun, "reciprocal functions inserted",
 713                             reciprocal_stats.rfuncs_inserted);
 714
 715   free_dominance_info (CDI_DOMINATORS);
 716   free_dominance_info (CDI_POST_DOMINATORS);
 717   delete occ_pool;
 718   return 0;
 719 }
 720
 721 } // anon namespace
 722
 723 gimple_opt_pass *
 724 make_pass_cse_reciprocals (gcc::context *ctxt)
 725 {
 726   return new pass_cse_reciprocals (ctxt);
 727 }
 728
 729 /* Records an occurrence at statement USE_STMT in the vector of trees
 730    STMTS if it is dominated by *TOP_BB or dominates it or this basic block
 731    is not yet initialized.  Returns true if the occurrence was pushed on
 732    the vector.  Adjusts *TOP_BB to be the basic block dominating all
 733    statements in the vector.  */
 734
 735 static bool
 736 maybe_record_sincos (vec<gimple *> *stmts,
 737                      basic_block *top_bb, gimple *use_stmt)
 738 {
 739   basic_block use_bb = gimple_bb (use_stmt);
 740   if (*top_bb
 741       && (*top_bb == use_bb
 742           || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb)))
 743     stmts->safe_push (use_stmt);
 744   else if (!*top_bb
 745            || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb))
 746     {
 747       stmts->safe_push (use_stmt);
 748       *top_bb = use_bb;
 749     }
 750   else
 751     return false;
 752
 753   return true;
 754 }
 755
 756 /* Look for sin, cos and cexpi calls with the same argument NAME and
 757    create a single call to cexpi CSEing the result in this case.
 758    We first walk over all immediate uses of the argument collecting
 759    statements that we can CSE in a vector and in a second pass replace
 760    the statement rhs with a REALPART or IMAGPART expression on the
 761    result of the cexpi call we insert before the use statement that
 762    dominates all other candidates.  */
 763
 764 static bool
 765 execute_cse_sincos_1 (tree name)
 766 {
 767   gimple_stmt_iterator gsi;
 768   imm_use_iterator use_iter;
 769   tree fndecl, res, type;
 770   gimple *def_stmt, *use_stmt, *stmt;
 771   int seen_cos = 0, seen_sin = 0, seen_cexpi = 0;
 772   auto_vec<gimple *> stmts;
 773   basic_block top_bb = NULL;
 774   int i;
 775   bool cfg_changed = false;
 776
 777   type = TREE_TYPE (name);
 778   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
 779     {
 780       if (gimple_code (use_stmt) != GIMPLE_CALL
 781           || !gimple_call_lhs (use_stmt))
 782         continue;
 783
 784       switch (gimple_call_combined_fn (use_stmt))
 785         {
 786         CASE_CFN_COS:
 787           seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
 788           break;
 789
 790         CASE_CFN_SIN:
 791           seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
 792           break;
 793
 794         CASE_CFN_CEXPI:
 795           seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
 796           break;
 797
 798         default:;
 799         }
 800     }
 801
 802   if (seen_cos + seen_sin + seen_cexpi <= 1)
 803     return false;
 804
 805   /* Simply insert cexpi at the beginning of top_bb but not earlier than
 806      the name def statement.  */
 807   fndecl = mathfn_built_in (type, BUILT_IN_CEXPI);
 808   if (!fndecl)
 809     return false;
 810   stmt = gimple_build_call (fndecl, 1, name);
 811   res = make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl)), stmt, "sincostmp");
 812   gimple_call_set_lhs (stmt, res);
 813
 814   def_stmt = SSA_NAME_DEF_STMT (name);
 815   if (!SSA_NAME_IS_DEFAULT_DEF (name)
 816       && gimple_code (def_stmt) != GIMPLE_PHI
 817       && gimple_bb (def_stmt) == top_bb)
 818     {
 819       gsi = gsi_for_stmt (def_stmt);
 820       gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
 821     }
 822   else
 823     {
 824       gsi = gsi_after_labels (top_bb);
 825       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
 826     }
 827   sincos_stats.inserted++;
 828
 829   /* And adjust the recorded old call sites.  */
 830   for (i = 0; stmts.iterate (i, &use_stmt); ++i)
 831     {
 832       tree rhs = NULL;
 833
 834       switch (gimple_call_combined_fn (use_stmt))
 835         {
 836         CASE_CFN_COS:
 837           rhs = fold_build1 (REALPART_EXPR, type, res);
 838           break;
 839
 840         CASE_CFN_SIN:
 841           rhs = fold_build1 (IMAGPART_EXPR, type, res);
 842           break;
 843
 844         CASE_CFN_CEXPI:
 845           rhs = res;
 846           break;
 847
 848         default:;
 849           gcc_unreachable ();
 850         }
 851
 852         /* Replace call with a copy.  */
 853         stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs);
 854
 855         gsi = gsi_for_stmt (use_stmt);
 856         gsi_replace (&gsi, stmt, true);
 857         if (gimple_purge_dead_eh_edges (gimple_bb (stmt)))
 858           cfg_changed = true;
 859     }
 860
 861   return cfg_changed;
 862 }
 863
 864 /* To evaluate powi(x,n), the floating point value x raised to the
 865    constant integer exponent n, we use a hybrid algorithm that
 866    combines the "window method" with look-up tables.  For an
 867    introduction to exponentiation algorithms and "addition chains",
 868    see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
 869    "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
 870    3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
 871    Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998.  */
 872
 873 /* Provide a default value for POWI_MAX_MULTS, the maximum number of
 874    multiplications to inline before calling the system library's pow
 875    function.  powi(x,n) requires at worst 2*bits(n)-2 multiplications,
 876    so this default never requires calling pow, powf or powl.  */
 877
 878 #ifndef POWI_MAX_MULTS
 879 #define POWI_MAX_MULTS  (2*HOST_BITS_PER_WIDE_INT-2)
 880 #endif
 881
 882 /* The size of the "optimal power tree" lookup table.  All
 883    exponents less than this value are simply looked up in the
 884    powi_table below.  This threshold is also used to size the
 885    cache of pseudo registers that hold intermediate results.  */
 886 #define POWI_TABLE_SIZE 256
 887
 888 /* The size, in bits of the window, used in the "window method"
 889    exponentiation algorithm.  This is equivalent to a radix of
 890    (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method".  */
 891 #define POWI_WINDOW_SIZE 3
 892
 893 /* The following table is an efficient representation of an
 894    "optimal power tree".  For each value, i, the corresponding
 895    value, j, in the table states than an optimal evaluation
 896    sequence for calculating pow(x,i) can be found by evaluating
 897    pow(x,j)*pow(x,i-j).  An optimal power tree for the first
 898    100 integers is given in Knuth's "Seminumerical algorithms".  */
 899
 900 static const unsigned char powi_table[POWI_TABLE_SIZE] =
 901   {
 902       0,   1,   1,   2,   2,   3,   3,   4,  /*   0 -   7 */
 903       4,   6,   5,   6,   6,  10,   7,   9,  /*   8 -  15 */
 904       8,  16,   9,  16,  10,  12,  11,  13,  /*  16 -  23 */
 905      12,  17,  13,  18,  14,  24,  15,  26,  /*  24 -  31 */
 906      16,  17,  17,  19,  18,  33,  19,  26,  /*  32 -  39 */
 907      20,  25,  21,  40,  22,  27,  23,  44,  /*  40 -  47 */
 908      24,  32,  25,  34,  26,  29,  27,  44,  /*  48 -  55 */
 909      28,  31,  29,  34,  30,  60,  31,  36,  /*  56 -  63 */
 910      32,  64,  33,  34,  34,  46,  35,  37,  /*  64 -  71 */
 911      36,  65,  37,  50,  38,  48,  39,  69,  /*  72 -  79 */
 912      40,  49,  41,  43,  42,  51,  43,  58,  /*  80 -  87 */
 913      44,  64,  45,  47,  46,  59,  47,  76,  /*  88 -  95 */
 914      48,  65,  49,  66,  50,  67,  51,  66,  /*  96 - 103 */
 915      52,  70,  53,  74,  54, 104,  55,  74,  /* 104 - 111 */
 916      56,  64,  57,  69,  58,  78,  59,  68,  /* 112 - 119 */
 917      60,  61,  61,  80,  62,  75,  63,  68,  /* 120 - 127 */
 918      64,  65,  65, 128,  66, 129,  67,  90,  /* 128 - 135 */
 919      68,  73,  69, 131,  70,  94,  71,  88,  /* 136 - 143 */
 920      72, 128,  73,  98,  74, 132,  75, 121,  /* 144 - 151 */
 921      76, 102,  77, 124,  78, 132,  79, 106,  /* 152 - 159 */
 922      80,  97,  81, 160,  82,  99,  83, 134,  /* 160 - 167 */
 923      84,  86,  85,  95,  86, 160,  87, 100,  /* 168 - 175 */
 924      88, 113,  89,  98,  90, 107,  91, 122,  /* 176 - 183 */
 925      92, 111,  93, 102,  94, 126,  95, 150,  /* 184 - 191 */
 926      96, 128,  97, 130,  98, 133,  99, 195,  /* 192 - 199 */
 927     100, 128, 101, 123, 102, 164, 103, 138,  /* 200 - 207 */
 928     104, 145, 105, 146, 106, 109, 107, 149,  /* 208 - 215 */
 929     108, 200, 109, 146, 110, 170, 111, 157,  /* 216 - 223 */
 930     112, 128, 113, 130, 114, 182, 115, 132,  /* 224 - 231 */
 931     116, 200, 117, 132, 118, 158, 119, 206,  /* 232 - 239 */
 932     120, 240, 121, 162, 122, 147, 123, 152,  /* 240 - 247 */
 933     124, 166, 125, 214, 126, 138, 127, 153,  /* 248 - 255 */
 934   };
 935
 936
 937 /* Return the number of multiplications required to calculate
 938    powi(x,n) where n is less than POWI_TABLE_SIZE.  This is a
 939    subroutine of powi_cost.  CACHE is an array indicating
 940    which exponents have already been calculated.  */
 941
 942 static int
 943 powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache)
 944 {
 945   /* If we've already calculated this exponent, then this evaluation
 946      doesn't require any additional multiplications.  */
 947   if (cache[n])
 948     return 0;
 949
 950   cache[n] = true;
 951   return powi_lookup_cost (n - powi_table[n], cache)
 952          + powi_lookup_cost (powi_table[n], cache) + 1;
 953 }
 954
 955 /* Return the number of multiplications required to calculate
 956    powi(x,n) for an arbitrary x, given the exponent N.  This
 957    function needs to be kept in sync with powi_as_mults below.  */
 958
 959 static int
 960 powi_cost (HOST_WIDE_INT n)
 961 {
 962   bool cache[POWI_TABLE_SIZE];
 963   unsigned HOST_WIDE_INT digit;
 964   unsigned HOST_WIDE_INT val;
 965   int result;
 966
 967   if (n == 0)
 968     return 0;
 969
 970   /* Ignore the reciprocal when calculating the cost.  */
 971   val = (n < 0) ? -n : n;
 972
 973   /* Initialize the exponent cache.  */
 974   memset (cache, 0, POWI_TABLE_SIZE * sizeof (bool));
 975   cache[1] = true;
 976
 977   result = 0;
 978
 979   while (val >= POWI_TABLE_SIZE)
 980     {
 981       if (val & 1)
 982         {
 983           digit = val & ((1 << POWI_WINDOW_SIZE) - 1);
 984           result += powi_lookup_cost (digit, cache)
 985                     + POWI_WINDOW_SIZE + 1;
 986           val >>= POWI_WINDOW_SIZE;
 987         }
 988       else
 989         {
 990           val >>= 1;
 991           result++;
 992         }
 993     }
 994
 995   return result + powi_lookup_cost (val, cache);
 996 }
 997
 998 /* Recursive subroutine of powi_as_mults.  This function takes the
 999    array, CACHE, of already calculated exponents and an exponent N and
1000    returns a tree that corresponds to CACHE[1]**N, with type TYPE.  */
1001
1002 static tree
1003 powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type,
1004                  HOST_WIDE_INT n, tree *cache)
1005 {
1006   tree op0, op1, ssa_target;
1007   unsigned HOST_WIDE_INT digit;
1008   gassign *mult_stmt;
1009
1010   if (n < POWI_TABLE_SIZE && cache[n])
1011     return cache[n];
1012
1013   ssa_target = make_temp_ssa_name (type, NULL, "powmult");
1014
1015   if (n < POWI_TABLE_SIZE)
1016     {
1017       cache[n] = ssa_target;
1018       op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache);
1019       op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache);
1020     }
1021   else if (n & 1)
1022     {
1023       digit = n & ((1 << POWI_WINDOW_SIZE) - 1);
1024       op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache);
1025       op1 = powi_as_mults_1 (gsi, loc, type, digit, cache);
1026     }
1027   else
1028     {
1029       op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache);
1030       op1 = op0;
1031     }
1032
1033   mult_stmt = gimple_build_assign (ssa_target, MULT_EXPR, op0, op1);
1034   gimple_set_location (mult_stmt, loc);
1035   gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
1036
1037   return ssa_target;
1038 }
1039
1040 /* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
1041    This function needs to be kept in sync with powi_cost above.  */
1042
1043 static tree
1044 powi_as_mults (gimple_stmt_iterator *gsi, location_t loc,
1045                tree arg0, HOST_WIDE_INT n)
1046 {
1047   tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0);
1048   gassign *div_stmt;
1049   tree target;
1050
1051   if (n == 0)
1052     return build_real (type, dconst1);
1053
1054   memset (cache, 0,  sizeof (cache));
1055   cache[1] = arg0;
1056
1057   result = powi_as_mults_1 (gsi, loc, type, (n < 0) ? -n : n, cache);
1058   if (n >= 0)
1059     return result;
1060
1061   /* If the original exponent was negative, reciprocate the result.  */
1062   target = make_temp_ssa_name (type, NULL, "powmult");
1063   div_stmt = gimple_build_assign (target, RDIV_EXPR,
1064                                   build_real (type, dconst1), result);
1065   gimple_set_location (div_stmt, loc);
1066   gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
1067
1068   return target;
1069 }
1070
1071 /* ARG0 and N are the two arguments to a powi builtin in GSI with
1072    location info LOC.  If the arguments are appropriate, create an
1073    equivalent sequence of statements prior to GSI using an optimal
1074    number of multiplications, and return an expession holding the
1075    result.  */
1076
1077 static tree
1078 gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc,
1079                             tree arg0, HOST_WIDE_INT n)
1080 {
1081   /* Avoid largest negative number.  */
1082   if (n != -n
1083       && ((n >= -1 && n <= 2)
1084           || (optimize_function_for_speed_p (cfun)
1085               && powi_cost (n) <= POWI_MAX_MULTS)))
1086     return powi_as_mults (gsi, loc, arg0, n);
1087
1088   return NULL_TREE;
1089 }
1090
1091 /* Build a gimple call statement that calls FN with argument ARG.
1092    Set the lhs of the call statement to a fresh SSA name.  Insert the
1093    statement prior to GSI's current position, and return the fresh
1094    SSA name.  */
1095
1096 static tree
1097 build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc,
1098                        tree fn, tree arg)
1099 {
1100   gcall *call_stmt;
1101   tree ssa_target;
1102
1103   call_stmt = gimple_build_call (fn, 1, arg);
1104   ssa_target = make_temp_ssa_name (TREE_TYPE (arg), NULL, "powroot");
1105   gimple_set_lhs (call_stmt, ssa_target);
1106   gimple_set_location (call_stmt, loc);
1107   gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
1108
1109   return ssa_target;
1110 }
1111
1112 /* Build a gimple binary operation with the given CODE and arguments
1113    ARG0, ARG1, assigning the result to a new SSA name for variable
1114    TARGET.  Insert the statement prior to GSI's current position, and
1115    return the fresh SSA name.*/
1116
1117 static tree
1118 build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc,
1119                         const char *name, enum tree_code code,
1120                         tree arg0, tree arg1)
1121 {
1122   tree result = make_temp_ssa_name (TREE_TYPE (arg0), NULL, name);
1123   gassign *stmt = gimple_build_assign (result, code, arg0, arg1);
1124   gimple_set_location (stmt, loc);
1125   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1126   return result;
1127 }
1128
1129 /* Build a gimple reference operation with the given CODE and argument
1130    ARG, assigning the result to a new SSA name of TYPE with NAME.
1131    Insert the statement prior to GSI's current position, and return
1132    the fresh SSA name.  */
1133
1134 static inline tree
1135 build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type,
1136                       const char *name, enum tree_code code, tree arg0)
1137 {
1138   tree result = make_temp_ssa_name (type, NULL, name);
1139   gimple *stmt = gimple_build_assign (result, build1 (code, type, arg0));
1140   gimple_set_location (stmt, loc);
1141   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1142   return result;
1143 }
1144
1145 /* Build a gimple assignment to cast VAL to TYPE.  Insert the statement
1146    prior to GSI's current position, and return the fresh SSA name.  */
1147
1148 static tree
1149 build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,
1150                        tree type, tree val)
1151 {
1152   tree result = make_ssa_name (type);
1153   gassign *stmt = gimple_build_assign (result, NOP_EXPR, val);
1154   gimple_set_location (stmt, loc);
1155   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1156   return result;
1157 }
1158
1159 struct pow_synth_sqrt_info
1160 {
1161   bool *factors;
1162   unsigned int deepest;
1163   unsigned int num_mults;
1164 };
1165
1166 /* Return true iff the real value C can be represented as a
1167    sum of powers of 0.5 up to N.  That is:
1168    C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1.
1169    Record in INFO the various parameters of the synthesis algorithm such
1170    as the factors a[i], the maximum 0.5 power and the number of
1171    multiplications that will be required.  */
1172
1173 bool
1174 representable_as_half_series_p (REAL_VALUE_TYPE c, unsigned n,
1175                                  struct pow_synth_sqrt_info *info)
1176 {
1177   REAL_VALUE_TYPE factor = dconsthalf;
1178   REAL_VALUE_TYPE remainder = c;
1179
1180   info->deepest = 0;
1181   info->num_mults = 0;
1182   memset (info->factors, 0, n * sizeof (bool));
1183
1184   for (unsigned i = 0; i < n; i++)
1185     {
1186       REAL_VALUE_TYPE res;
1187
1188       /* If something inexact happened bail out now.  */
1189       if (real_arithmetic (&res, MINUS_EXPR, &remainder, &factor))
1190         return false;
1191
1192       /* We have hit zero.  The number is representable as a sum
1193          of powers of 0.5.  */
1194       if (real_equal (&res, &dconst0))
1195         {
1196           info->factors[i] = true;
1197           info->deepest = i + 1;
1198           return true;
1199         }
1200       else if (!REAL_VALUE_NEGATIVE (res))
1201         {
1202           remainder = res;
1203           info->factors[i] = true;
1204           info->num_mults++;
1205         }
1206       else
1207         info->factors[i] = false;
1208
1209       real_arithmetic (&factor, MULT_EXPR, &factor, &dconsthalf);
1210     }
1211   return false;
1212 }
1213
1214 /* Return the tree corresponding to FN being applied
1215    to ARG N times at GSI and LOC.
1216    Look up previous results from CACHE if need be.
1217    cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times.  */
1218
1219 static tree
1220 get_fn_chain (tree arg, unsigned int n, gimple_stmt_iterator *gsi,
1221               tree fn, location_t loc, tree *cache)
1222 {
1223   tree res = cache[n];
1224   if (!res)
1225     {
1226       tree prev = get_fn_chain (arg, n - 1, gsi, fn, loc, cache);
1227       res = build_and_insert_call (gsi, loc, fn, prev);
1228       cache[n] = res;
1229     }
1230
1231   return res;
1232 }
1233
1234 /* Print to STREAM the repeated application of function FNAME to ARG
1235    N times.  So, for FNAME = "foo", ARG = "x", N = 2 it would print:
1236    "foo (foo (x))".  */
1237
1238 static void
1239 print_nested_fn (FILE* stream, const char *fname, const char* arg,
1240                  unsigned int n)
1241 {
1242   if (n == 0)
1243     fprintf (stream, "%s", arg);
1244   else
1245     {
1246       fprintf (stream, "%s (", fname);
1247       print_nested_fn (stream, fname, arg, n - 1);
1248       fprintf (stream, ")");
1249     }
1250 }
1251
1252 /* Print to STREAM the fractional sequence of sqrt chains
1253    applied to ARG, described by INFO.  Used for the dump file.  */
1254
1255 static void
1256 dump_fractional_sqrt_sequence (FILE *stream, const char *arg,
1257                                 struct pow_synth_sqrt_info *info)
1258 {
1259   for (unsigned int i = 0; i < info->deepest; i++)
1260     {
1261       bool is_set = info->factors[i];
1262       if (is_set)
1263         {
1264           print_nested_fn (stream, "sqrt", arg, i + 1);
1265           if (i != info->deepest - 1)
1266             fprintf (stream, " * ");
1267         }
1268     }
1269 }
1270
1271 /* Print to STREAM a representation of raising ARG to an integer
1272    power N.  Used for the dump file.  */
1273
1274 static void
1275 dump_integer_part (FILE *stream, const char* arg, HOST_WIDE_INT n)
1276 {
1277   if (n > 1)
1278     fprintf (stream, "powi (%s, " HOST_WIDE_INT_PRINT_DEC ")", arg, n);
1279   else if (n == 1)
1280     fprintf (stream, "%s", arg);
1281 }
1282
1283 /* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of
1284    square roots.  Place at GSI and LOC.  Limit the maximum depth
1285    of the sqrt chains to MAX_DEPTH.  Return the tree holding the
1286    result of the expanded sequence or NULL_TREE if the expansion failed.
1287
1288    This routine assumes that ARG1 is a real number with a fractional part
1289    (the integer exponent case will have been handled earlier in
1290    gimple_expand_builtin_pow).
1291
1292    For ARG1 > 0.0:
1293    * For ARG1 composed of a whole part WHOLE_PART and a fractional part
1294      FRAC_PART i.e. WHOLE_PART == floor (ARG1) and
1295                     FRAC_PART == ARG1 - WHOLE_PART:
1296      Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where
1297      POW (ARG0, FRAC_PART) is expanded as a product of square root chains
1298      if it can be expressed as such, that is if FRAC_PART satisfies:
1299      FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i))
1300      where integer a[i] is either 0 or 1.
1301
1302      Example:
1303      POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625)
1304        --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x)))
1305
1306    For ARG1 < 0.0 there are two approaches:
1307    * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1)
1308          is calculated as above.
1309
1310      Example:
1311      POW (x, -5.625) == 1.0 / POW (x, 5.625)
1312        -->  1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x))))
1313
1314    * (B) : WHOLE_PART := - ceil (abs (ARG1))
1315            FRAC_PART  := ARG1 - WHOLE_PART
1316      and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART).
1317      Example:
1318      POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6)
1319        --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6))
1320
1321    For ARG1 < 0.0 we choose between (A) and (B) depending on
1322    how many multiplications we'd have to do.
1323    So, for the example in (B): POW (x, -5.875), if we were to
1324    follow algorithm (A) we would produce:
1325    1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X)))
1326    which contains more multiplications than approach (B).
1327
1328    Hopefully, this approach will eliminate potentially expensive POW library
1329    calls when unsafe floating point math is enabled and allow the compiler to
1330    further optimise the multiplies, square roots and divides produced by this
1331    function.  */
1332
1333 static tree
1334 expand_pow_as_sqrts (gimple_stmt_iterator *gsi, location_t loc,
1335                      tree arg0, tree arg1, HOST_WIDE_INT max_depth)
1336 {
1337   tree type = TREE_TYPE (arg0);
1338   machine_mode mode = TYPE_MODE (type);
1339   tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1340   bool one_over = true;
1341
1342   if (!sqrtfn)
1343     return NULL_TREE;
1344
1345   if (TREE_CODE (arg1) != REAL_CST)
1346     return NULL_TREE;
1347
1348   REAL_VALUE_TYPE exp_init = TREE_REAL_CST (arg1);
1349
1350   gcc_assert (max_depth > 0);
1351   tree *cache = XALLOCAVEC (tree, max_depth + 1);
1352
1353   struct pow_synth_sqrt_info synth_info;
1354   synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1355   synth_info.deepest = 0;
1356   synth_info.num_mults = 0;
1357
1358   bool neg_exp = REAL_VALUE_NEGATIVE (exp_init);
1359   REAL_VALUE_TYPE exp = real_value_abs (&exp_init);
1360
1361   /* The whole and fractional parts of exp.  */
1362   REAL_VALUE_TYPE whole_part;
1363   REAL_VALUE_TYPE frac_part;
1364
1365   real_floor (&whole_part, mode, &exp);
1366   real_arithmetic (&frac_part, MINUS_EXPR, &exp, &whole_part);
1367
1368
1369   REAL_VALUE_TYPE ceil_whole = dconst0;
1370   REAL_VALUE_TYPE ceil_fract = dconst0;
1371
1372   if (neg_exp)
1373     {
1374       real_ceil (&ceil_whole, mode, &exp);
1375       real_arithmetic (&ceil_fract, MINUS_EXPR, &ceil_whole, &exp);
1376     }
1377
1378   if (!representable_as_half_series_p (frac_part, max_depth, &synth_info))
1379     return NULL_TREE;
1380
1381   /* Check whether it's more profitable to not use 1.0 / ...  */
1382   if (neg_exp)
1383     {
1384       struct pow_synth_sqrt_info alt_synth_info;
1385       alt_synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1386       alt_synth_info.deepest = 0;
1387       alt_synth_info.num_mults = 0;
1388
1389       if (representable_as_half_series_p (ceil_fract, max_depth,
1390                                            &alt_synth_info)
1391           && alt_synth_info.deepest <= synth_info.deepest
1392           && alt_synth_info.num_mults < synth_info.num_mults)
1393         {
1394           whole_part = ceil_whole;
1395           frac_part = ceil_fract;
1396           synth_info.deepest = alt_synth_info.deepest;
1397           synth_info.num_mults = alt_synth_info.num_mults;
1398           memcpy (synth_info.factors, alt_synth_info.factors,
1399                   (max_depth + 1) * sizeof (bool));
1400           one_over = false;
1401         }
1402     }
1403
1404   HOST_WIDE_INT n = real_to_integer (&whole_part);
1405   REAL_VALUE_TYPE cint;
1406   real_from_integer (&cint, VOIDmode, n, SIGNED);
1407
1408   if (!real_identical (&whole_part, &cint))
1409     return NULL_TREE;
1410
1411   if (powi_cost (n) + synth_info.num_mults > POWI_MAX_MULTS)
1412     return NULL_TREE;
1413
1414   memset (cache, 0, (max_depth + 1) * sizeof (tree));
1415
1416   tree integer_res = n == 0 ? build_real (type, dconst1) : arg0;
1417
1418   /* Calculate the integer part of the exponent.  */
1419   if (n > 1)
1420     {
1421       integer_res = gimple_expand_builtin_powi (gsi, loc, arg0, n);
1422       if (!integer_res)
1423         return NULL_TREE;
1424     }
1425
1426   if (dump_file)
1427     {
1428       char string[64];
1429
1430       real_to_decimal (string, &exp_init, sizeof (string), 0, 1);
1431       fprintf (dump_file, "synthesizing pow (x, %s) as:\n", string);
1432
1433       if (neg_exp)
1434         {
1435           if (one_over)
1436             {
1437               fprintf (dump_file, "1.0 / (");
1438               dump_integer_part (dump_file, "x", n);
1439               if (n > 0)
1440                 fprintf (dump_file, " * ");
1441               dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1442               fprintf (dump_file, ")");
1443             }
1444           else
1445             {
1446               dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1447               fprintf (dump_file, " / (");
1448               dump_integer_part (dump_file, "x", n);
1449               fprintf (dump_file, ")");
1450             }
1451         }
1452       else
1453         {
1454           dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1455           if (n > 0)
1456             fprintf (dump_file, " * ");
1457           dump_integer_part (dump_file, "x", n);
1458         }
1459
1460       fprintf (dump_file, "\ndeepest sqrt chain: %d\n", synth_info.deepest);
1461     }
1462
1463
1464   tree fract_res = NULL_TREE;
1465   cache[0] = arg0;
1466
1467   /* Calculate the fractional part of the exponent.  */
1468   for (unsigned i = 0; i < synth_info.deepest; i++)
1469     {
1470       if (synth_info.factors[i])
1471         {
1472           tree sqrt_chain = get_fn_chain (arg0, i + 1, gsi, sqrtfn, loc, cache);
1473
1474           if (!fract_res)
1475               fract_res = sqrt_chain;
1476
1477           else
1478             fract_res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1479                                            fract_res, sqrt_chain);
1480         }
1481     }
1482
1483   tree res = NULL_TREE;
1484
1485   if (neg_exp)
1486     {
1487       if (one_over)
1488         {
1489           if (n > 0)
1490             res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1491                                            fract_res, integer_res);
1492           else
1493             res = fract_res;
1494
1495           res = build_and_insert_binop (gsi, loc, "powrootrecip", RDIV_EXPR,
1496                                           build_real (type, dconst1), res);
1497         }
1498       else
1499         {
1500           res = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1501                                          fract_res, integer_res);
1502         }
1503     }
1504   else
1505     res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1506                                    fract_res, integer_res);
1507   return res;
1508 }
1509
1510 /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
1511    with location info LOC.  If possible, create an equivalent and
1512    less expensive sequence of statements prior to GSI, and return an
1513    expession holding the result.  */
1514
1515 static tree
1516 gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,
1517                            tree arg0, tree arg1)
1518 {
1519   REAL_VALUE_TYPE c, cint, dconst1_3, dconst1_4, dconst1_6;
1520   REAL_VALUE_TYPE c2, dconst3;
1521   HOST_WIDE_INT n;
1522   tree type, sqrtfn, cbrtfn, sqrt_arg0, result, cbrt_x, powi_cbrt_x;
1523   machine_mode mode;
1524   bool speed_p = optimize_bb_for_speed_p (gsi_bb (*gsi));
1525   bool hw_sqrt_exists, c_is_int, c2_is_int;
1526
1527   dconst1_4 = dconst1;
1528   SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
1529
1530   /* If the exponent isn't a constant, there's nothing of interest
1531      to be done.  */
1532   if (TREE_CODE (arg1) != REAL_CST)
1533     return NULL_TREE;
1534
1535   /* Don't perform the operation if flag_signaling_nans is on
1536      and the operand is a signaling NaN.  */
1537   if (HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg1)))
1538       && ((TREE_CODE (arg0) == REAL_CST
1539            && REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg0)))
1540           || REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg1))))
1541     return NULL_TREE;
1542
1543   /* If the exponent is equivalent to an integer, expand to an optimal
1544      multiplication sequence when profitable.  */
1545   c = TREE_REAL_CST (arg1);
1546   n = real_to_integer (&c);
1547   real_from_integer (&cint, VOIDmode, n, SIGNED);
1548   c_is_int = real_identical (&c, &cint);
1549
1550   if (c_is_int
1551       && ((n >= -1 && n <= 2)
1552           || (flag_unsafe_math_optimizations
1553               && speed_p
1554               && powi_cost (n) <= POWI_MAX_MULTS)))
1555     return gimple_expand_builtin_powi (gsi, loc, arg0, n);
1556
1557   /* Attempt various optimizations using sqrt and cbrt.  */
1558   type = TREE_TYPE (arg0);
1559   mode = TYPE_MODE (type);
1560   sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1561
1562   /* Optimize pow(x,0.5) = sqrt(x).  This replacement is always safe
1563      unless signed zeros must be maintained.  pow(-0,0.5) = +0, while
1564      sqrt(-0) = -0.  */
1565   if (sqrtfn
1566       && real_equal (&c, &dconsthalf)
1567       && !HONOR_SIGNED_ZEROS (mode))
1568     return build_and_insert_call (gsi, loc, sqrtfn, arg0);
1569
1570   hw_sqrt_exists = optab_handler (sqrt_optab, mode) != CODE_FOR_nothing;
1571
1572   /* Optimize pow(x,1./3.) = cbrt(x).  This requires unsafe math
1573      optimizations since 1./3. is not exactly representable.  If x
1574      is negative and finite, the correct value of pow(x,1./3.) is
1575      a NaN with the "invalid" exception raised, because the value
1576      of 1./3. actually has an even denominator.  The correct value
1577      of cbrt(x) is a negative real value.  */
1578   cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT);
1579   dconst1_3 = real_value_truncate (mode, dconst_third ());
1580
1581   if (flag_unsafe_math_optimizations
1582       && cbrtfn
1583       && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
1584       && real_equal (&c, &dconst1_3))
1585     return build_and_insert_call (gsi, loc, cbrtfn, arg0);
1586
1587   /* Optimize pow(x,1./6.) = cbrt(sqrt(x)).  Don't do this optimization
1588      if we don't have a hardware sqrt insn.  */
1589   dconst1_6 = dconst1_3;
1590   SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
1591
1592   if (flag_unsafe_math_optimizations
1593       && sqrtfn
1594       && cbrtfn
1595       && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
1596       && speed_p
1597       && hw_sqrt_exists
1598       && real_equal (&c, &dconst1_6))
1599     {
1600       /* sqrt(x)  */
1601       sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
1602
1603       /* cbrt(sqrt(x))  */
1604       return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0);
1605     }
1606
1607
1608   /* Attempt to expand the POW as a product of square root chains.
1609      Expand the 0.25 case even when otpimising for size.  */
1610   if (flag_unsafe_math_optimizations
1611       && sqrtfn
1612       && hw_sqrt_exists
1613       && (speed_p || real_equal (&c, &dconst1_4))
1614       && !HONOR_SIGNED_ZEROS (mode))
1615     {
1616       unsigned int max_depth = speed_p
1617                                 ? PARAM_VALUE (PARAM_MAX_POW_SQRT_DEPTH)
1618                                 : 2;
1619
1620       tree expand_with_sqrts
1621         = expand_pow_as_sqrts (gsi, loc, arg0, arg1, max_depth);
1622
1623       if (expand_with_sqrts)
1624         return expand_with_sqrts;
1625     }
1626
1627   real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
1628   n = real_to_integer (&c2);
1629   real_from_integer (&cint, VOIDmode, n, SIGNED);
1630   c2_is_int = real_identical (&c2, &cint);
1631
1632   /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
1633
1634      powi(x, n/3) * powi(cbrt(x), n%3),                    n > 0;
1635      1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)),  n < 0.
1636
1637      Do not calculate the first factor when n/3 = 0.  As cbrt(x) is
1638      different from pow(x, 1./3.) due to rounding and behavior with
1639      negative x, we need to constrain this transformation to unsafe
1640      math and positive x or finite math.  */
1641   real_from_integer (&dconst3, VOIDmode, 3, SIGNED);
1642   real_arithmetic (&c2, MULT_EXPR, &c, &dconst3);
1643   real_round (&c2, mode, &c2);
1644   n = real_to_integer (&c2);
1645   real_from_integer (&cint, VOIDmode, n, SIGNED);
1646   real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3);
1647   real_convert (&c2, mode, &c2);
1648
1649   if (flag_unsafe_math_optimizations
1650       && cbrtfn
1651       && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
1652       && real_identical (&c2, &c)
1653       && !c2_is_int
1654       && optimize_function_for_speed_p (cfun)
1655       && powi_cost (n / 3) <= POWI_MAX_MULTS)
1656     {
1657       tree powi_x_ndiv3 = NULL_TREE;
1658
1659       /* Attempt to fold powi(arg0, abs(n/3)) into multiplies.  If not
1660          possible or profitable, give up.  Skip the degenerate case when
1661          abs(n) < 3, where the result is always 1.  */
1662       if (absu_hwi (n) >= 3)
1663         {
1664           powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0,
1665                                                      abs_hwi (n / 3));
1666           if (!powi_x_ndiv3)
1667             return NULL_TREE;
1668         }
1669
1670       /* Calculate powi(cbrt(x), n%3).  Don't use gimple_expand_builtin_powi
1671          as that creates an unnecessary variable.  Instead, just produce
1672          either cbrt(x) or cbrt(x) * cbrt(x).  */
1673       cbrt_x = build_and_insert_call (gsi, loc, cbrtfn, arg0);
1674
1675       if (absu_hwi (n) % 3 == 1)
1676         powi_cbrt_x = cbrt_x;
1677       else
1678         powi_cbrt_x = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1679                                               cbrt_x, cbrt_x);
1680
1681       /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1.  */
1682       if (absu_hwi (n) < 3)
1683         result = powi_cbrt_x;
1684       else
1685         result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1686                                          powi_x_ndiv3, powi_cbrt_x);
1687
1688       /* If n is negative, reciprocate the result.  */
1689       if (n < 0)
1690         result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1691                                          build_real (type, dconst1), result);
1692
1693       return result;
1694     }
1695
1696   /* No optimizations succeeded.  */
1697   return NULL_TREE;
1698 }
1699
1700 /* ARG is the argument to a cabs builtin call in GSI with location info
1701    LOC.  Create a sequence of statements prior to GSI that calculates
1702    sqrt(R*R + I*I), where R and I are the real and imaginary components
1703    of ARG, respectively.  Return an expression holding the result.  */
1704
1705 static tree
1706 gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg)
1707 {
1708   tree real_part, imag_part, addend1, addend2, sum, result;
1709   tree type = TREE_TYPE (TREE_TYPE (arg));
1710   tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1711   machine_mode mode = TYPE_MODE (type);
1712
1713   if (!flag_unsafe_math_optimizations
1714       || !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi)))
1715       || !sqrtfn
1716       || optab_handler (sqrt_optab, mode) == CODE_FOR_nothing)
1717     return NULL_TREE;
1718
1719   real_part = build_and_insert_ref (gsi, loc, type, "cabs",
1720                                     REALPART_EXPR, arg);
1721   addend1 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
1722                                     real_part, real_part);
1723   imag_part = build_and_insert_ref (gsi, loc, type, "cabs",
1724                                     IMAGPART_EXPR, arg);
1725   addend2 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
1726                                     imag_part, imag_part);
1727   sum = build_and_insert_binop (gsi, loc, "cabs", PLUS_EXPR, addend1, addend2);
1728   result = build_and_insert_call (gsi, loc, sqrtfn, sum);
1729
1730   return result;
1731 }
1732
1733 /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
1734    on the SSA_NAME argument of each of them.  Also expand powi(x,n) into
1735    an optimal number of multiplies, when n is a constant.  */
1736
1737 namespace {
1738
1739 const pass_data pass_data_cse_sincos =
1740 {
1741   GIMPLE_PASS, /* type */
1742   "sincos", /* name */
1743   OPTGROUP_NONE, /* optinfo_flags */
1744   TV_NONE, /* tv_id */
1745   PROP_ssa, /* properties_required */
1746   PROP_gimple_opt_math, /* properties_provided */
1747   0, /* properties_destroyed */
1748   0, /* todo_flags_start */
1749   TODO_update_ssa, /* todo_flags_finish */
1750 };
1751
1752 class pass_cse_sincos : public gimple_opt_pass
1753 {
1754 public:
1755   pass_cse_sincos (gcc::context *ctxt)
1756     : gimple_opt_pass (pass_data_cse_sincos, ctxt)
1757   {}
1758
1759   /* opt_pass methods: */
1760   virtual bool gate (function *)
1761     {
1762       /* We no longer require either sincos or cexp, since powi expansion
1763          piggybacks on this pass.  */
1764       return optimize;
1765     }
1766
1767   virtual unsigned int execute (function *);
1768
1769 }; // class pass_cse_sincos
1770
1771 unsigned int
1772 pass_cse_sincos::execute (function *fun)
1773 {
1774   basic_block bb;
1775   bool cfg_changed = false;
1776
1777   calculate_dominance_info (CDI_DOMINATORS);
1778   memset (&sincos_stats, 0, sizeof (sincos_stats));
1779
1780   FOR_EACH_BB_FN (bb, fun)
1781     {
1782       gimple_stmt_iterator gsi;
1783       bool cleanup_eh = false;
1784
1785       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1786         {
1787           gimple *stmt = gsi_stmt (gsi);
1788
1789           /* Only the last stmt in a bb could throw, no need to call
1790              gimple_purge_dead_eh_edges if we change something in the middle
1791              of a basic block.  */
1792           cleanup_eh = false;
1793
1794           if (is_gimple_call (stmt)
1795               && gimple_call_lhs (stmt))
1796             {
1797               tree arg, arg0, arg1, result;
1798               HOST_WIDE_INT n;
1799               location_t loc;
1800
1801               switch (gimple_call_combined_fn (stmt))
1802                 {
1803                 CASE_CFN_COS:
1804                 CASE_CFN_SIN:
1805                 CASE_CFN_CEXPI:
1806                   /* Make sure we have either sincos or cexp.  */
1807                   if (!targetm.libc_has_function (function_c99_math_complex)
1808                       && !targetm.libc_has_function (function_sincos))
1809                     break;
1810
1811                   arg = gimple_call_arg (stmt, 0);
1812                   if (TREE_CODE (arg) == SSA_NAME)
1813                     cfg_changed |= execute_cse_sincos_1 (arg);
1814                   break;
1815
1816                 CASE_CFN_POW:
1817                   arg0 = gimple_call_arg (stmt, 0);
1818                   arg1 = gimple_call_arg (stmt, 1);
1819
1820                   loc = gimple_location (stmt);
1821                   result = gimple_expand_builtin_pow (&gsi, loc, arg0, arg1);
1822
1823                   if (result)
1824                     {
1825                       tree lhs = gimple_get_lhs (stmt);
1826                       gassign *new_stmt = gimple_build_assign (lhs, result);
1827                       gimple_set_location (new_stmt, loc);
1828                       unlink_stmt_vdef (stmt);
1829                       gsi_replace (&gsi, new_stmt, true);
1830                       cleanup_eh = true;
1831                       if (gimple_vdef (stmt))
1832                         release_ssa_name (gimple_vdef (stmt));
1833                     }
1834                   break;
1835
1836                 CASE_CFN_POWI:
1837                   arg0 = gimple_call_arg (stmt, 0);
1838                   arg1 = gimple_call_arg (stmt, 1);
1839                   loc = gimple_location (stmt);
1840
1841                   if (real_minus_onep (arg0))
1842                     {
1843                       tree t0, t1, cond, one, minus_one;
1844                       gassign *stmt;
1845
1846                       t0 = TREE_TYPE (arg0);
1847                       t1 = TREE_TYPE (arg1);
1848                       one = build_real (t0, dconst1);
1849                       minus_one = build_real (t0, dconstm1);
1850
1851                       cond = make_temp_ssa_name (t1, NULL, "powi_cond");
1852                       stmt = gimple_build_assign (cond, BIT_AND_EXPR,
1853                                                   arg1, build_int_cst (t1, 1));
1854                       gimple_set_location (stmt, loc);
1855                       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1856
1857                       result = make_temp_ssa_name (t0, NULL, "powi");
1858                       stmt = gimple_build_assign (result, COND_EXPR, cond,
1859                                                   minus_one, one);
1860                       gimple_set_location (stmt, loc);
1861                       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1862                     }
1863                   else
1864                     {
1865                       if (!tree_fits_shwi_p (arg1))
1866                         break;
1867
1868                       n = tree_to_shwi (arg1);
1869                       result = gimple_expand_builtin_powi (&gsi, loc, arg0, n);
1870                     }
1871
1872                   if (result)
1873                     {
1874                       tree lhs = gimple_get_lhs (stmt);
1875                       gassign *new_stmt = gimple_build_assign (lhs, result);
1876                       gimple_set_location (new_stmt, loc);
1877                       unlink_stmt_vdef (stmt);
1878                       gsi_replace (&gsi, new_stmt, true);
1879                       cleanup_eh = true;
1880                       if (gimple_vdef (stmt))
1881                         release_ssa_name (gimple_vdef (stmt));
1882                     }
1883                   break;
1884
1885                 CASE_CFN_CABS:
1886                   arg0 = gimple_call_arg (stmt, 0);
1887                   loc = gimple_location (stmt);
1888                   result = gimple_expand_builtin_cabs (&gsi, loc, arg0);
1889
1890                   if (result)
1891                     {
1892                       tree lhs = gimple_get_lhs (stmt);
1893                       gassign *new_stmt = gimple_build_assign (lhs, result);
1894                       gimple_set_location (new_stmt, loc);
1895                       unlink_stmt_vdef (stmt);
1896                       gsi_replace (&gsi, new_stmt, true);
1897                       cleanup_eh = true;
1898                       if (gimple_vdef (stmt))
1899                         release_ssa_name (gimple_vdef (stmt));
1900                     }
1901                   break;
1902
1903                 default:;
1904                 }
1905             }
1906         }
1907       if (cleanup_eh)
1908         cfg_changed |= gimple_purge_dead_eh_edges (bb);
1909     }
1910
1911   statistics_counter_event (fun, "sincos statements inserted",
1912                             sincos_stats.inserted);
1913
1914   return cfg_changed ? TODO_cleanup_cfg : 0;
1915 }
1916
1917 } // anon namespace
1918
1919 gimple_opt_pass *
1920 make_pass_cse_sincos (gcc::context *ctxt)
1921 {
1922   return new pass_cse_sincos (ctxt);
1923 }
1924
1925 /* Return true if stmt is a type conversion operation that can be stripped
1926    when used in a widening multiply operation.  */
1927 static bool
1928 widening_mult_conversion_strippable_p (tree result_type, gimple *stmt)
1929 {
1930   enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
1931
1932   if (TREE_CODE (result_type) == INTEGER_TYPE)
1933     {
1934       tree op_type;
1935       tree inner_op_type;
1936
1937       if (!CONVERT_EXPR_CODE_P (rhs_code))
1938         return false;
1939
1940       op_type = TREE_TYPE (gimple_assign_lhs (stmt));
1941
1942       /* If the type of OP has the same precision as the result, then
1943          we can strip this conversion.  The multiply operation will be
1944          selected to create the correct extension as a by-product.  */
1945       if (TYPE_PRECISION (result_type) == TYPE_PRECISION (op_type))
1946         return true;
1947
1948       /* We can also strip a conversion if it preserves the signed-ness of
1949          the operation and doesn't narrow the range.  */
1950       inner_op_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
1951
1952       /* If the inner-most type is unsigned, then we can strip any
1953          intermediate widening operation.  If it's signed, then the
1954          intermediate widening operation must also be signed.  */
1955       if ((TYPE_UNSIGNED (inner_op_type)
1956            || TYPE_UNSIGNED (op_type) == TYPE_UNSIGNED (inner_op_type))
1957           && TYPE_PRECISION (op_type) > TYPE_PRECISION (inner_op_type))
1958         return true;
1959
1960       return false;
1961     }
1962
1963   return rhs_code == FIXED_CONVERT_EXPR;
1964 }
1965
1966 /* Return true if RHS is a suitable operand for a widening multiplication,
1967    assuming a target type of TYPE.
1968    There are two cases:
1969
1970      - RHS makes some value at least twice as wide.  Store that value
1971        in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
1972
1973      - RHS is an integer constant.  Store that value in *NEW_RHS_OUT if so,
1974        but leave *TYPE_OUT untouched.  */
1975
1976 static bool
1977 is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out,
1978                         tree *new_rhs_out)
1979 {
1980   gimple *stmt;
1981   tree type1, rhs1;
1982
1983   if (TREE_CODE (rhs) == SSA_NAME)
1984     {
1985       stmt = SSA_NAME_DEF_STMT (rhs);
1986       if (is_gimple_assign (stmt))
1987         {
1988           if (! widening_mult_conversion_strippable_p (type, stmt))
1989             rhs1 = rhs;
1990           else
1991             {
1992               rhs1 = gimple_assign_rhs1 (stmt);
1993
1994               if (TREE_CODE (rhs1) == INTEGER_CST)
1995                 {
1996                   *new_rhs_out = rhs1;
1997                   *type_out = NULL;
1998                   return true;
1999                 }
2000             }
2001         }
2002       else
2003         rhs1 = rhs;
2004
2005       type1 = TREE_TYPE (rhs1);
2006
2007       if (TREE_CODE (type1) != TREE_CODE (type)
2008           || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type))
2009         return false;
2010
2011       *new_rhs_out = rhs1;
2012       *type_out = type1;
2013       return true;
2014     }
2015
2016   if (TREE_CODE (rhs) == INTEGER_CST)
2017     {
2018       *new_rhs_out = rhs;
2019       *type_out = NULL;
2020       return true;
2021     }
2022
2023   return false;
2024 }
2025
2026 /* Return true if STMT performs a widening multiplication, assuming the
2027    output type is TYPE.  If so, store the unwidened types of the operands
2028    in *TYPE1_OUT and *TYPE2_OUT respectively.  Also fill *RHS1_OUT and
2029    *RHS2_OUT such that converting those operands to types *TYPE1_OUT
2030    and *TYPE2_OUT would give the operands of the multiplication.  */
2031
2032 static bool
2033 is_widening_mult_p (gimple *stmt,
2034                     tree *type1_out, tree *rhs1_out,
2035                     tree *type2_out, tree *rhs2_out)
2036 {
2037   tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2038
2039   if (TREE_CODE (type) != INTEGER_TYPE
2040       && TREE_CODE (type) != FIXED_POINT_TYPE)
2041     return false;
2042
2043   if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out,
2044                                rhs1_out))
2045     return false;
2046
2047   if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out,
2048                                rhs2_out))
2049     return false;
2050
2051   if (*type1_out == NULL)
2052     {
2053       if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out))
2054         return false;
2055       *type1_out = *type2_out;
2056     }
2057
2058   if (*type2_out == NULL)
2059     {
2060       if (!int_fits_type_p (*rhs2_out, *type1_out))
2061         return false;
2062       *type2_out = *type1_out;
2063     }
2064
2065   /* Ensure that the larger of the two operands comes first. */
2066   if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out))
2067     {
2068       std::swap (*type1_out, *type2_out);
2069       std::swap (*rhs1_out, *rhs2_out);
2070     }
2071
2072   return true;
2073 }
2074
2075 /* Check to see if the CALL statement is an invocation of copysign
2076    with 1. being the first argument.  */
2077 static bool
2078 is_copysign_call_with_1 (gimple *call)
2079 {
2080   gcall *c = dyn_cast <gcall *> (call);
2081   if (! c)
2082     return false;
2083
2084   enum combined_fn code = gimple_call_combined_fn (c);
2085
2086   if (code == CFN_LAST)
2087     return false;
2088
2089   if (builtin_fn_p (code))
2090     {
2091       switch (as_builtin_fn (code))
2092         {
2093         CASE_FLT_FN (BUILT_IN_COPYSIGN):
2094         CASE_FLT_FN_FLOATN_NX (BUILT_IN_COPYSIGN):
2095           return real_onep (gimple_call_arg (c, 0));
2096         default:
2097           return false;
2098         }
2099     }
2100
2101   if (internal_fn_p (code))
2102     {
2103       switch (as_internal_fn (code))
2104         {
2105         case IFN_COPYSIGN:
2106           return real_onep (gimple_call_arg (c, 0));
2107         default:
2108           return false;
2109         }
2110     }
2111
2112    return false;
2113 }
2114
2115 /* Try to expand the pattern x * copysign (1, y) into xorsign (x, y).
2116    This only happens when the the xorsign optab is defined, if the
2117    pattern is not a xorsign pattern or if expansion fails FALSE is
2118    returned, otherwise TRUE is returned.  */
2119 static bool
2120 convert_expand_mult_copysign (gimple *stmt, gimple_stmt_iterator *gsi)
2121 {
2122   tree treeop0, treeop1, lhs, type;
2123   location_t loc = gimple_location (stmt);
2124   lhs = gimple_assign_lhs (stmt);
2125   treeop0 = gimple_assign_rhs1 (stmt);
2126   treeop1 = gimple_assign_rhs2 (stmt);
2127   type = TREE_TYPE (lhs);
2128   machine_mode mode = TYPE_MODE (type);
2129
2130   if (HONOR_SNANS (type))
2131     return false;
2132
2133   if (TREE_CODE (treeop0) == SSA_NAME && TREE_CODE (treeop1) == SSA_NAME)
2134     {
2135       gimple *call0 = SSA_NAME_DEF_STMT (treeop0);
2136       if (!has_single_use (treeop0) || !is_copysign_call_with_1 (call0))
2137         {
2138           call0 = SSA_NAME_DEF_STMT (treeop1);
2139           if (!has_single_use (treeop1) || !is_copysign_call_with_1 (call0))
2140             return false;
2141
2142           treeop1 = treeop0;
2143         }
2144         if (optab_handler (xorsign_optab, mode) == CODE_FOR_nothing)
2145           return false;
2146
2147         gcall *c = as_a<gcall*> (call0);
2148         treeop0 = gimple_call_arg (c, 1);
2149
2150         gcall *call_stmt
2151           = gimple_build_call_internal (IFN_XORSIGN, 2, treeop1, treeop0);
2152         gimple_set_lhs (call_stmt, lhs);
2153         gimple_set_location (call_stmt, loc);
2154         gsi_replace (gsi, call_stmt, true);
2155         return true;
2156     }
2157
2158   return false;
2159 }
2160
2161 /* Process a single gimple statement STMT, which has a MULT_EXPR as
2162    its rhs, and try to convert it into a WIDEN_MULT_EXPR.  The return
2163    value is true iff we converted the statement.  */
2164
2165 static bool
2166 convert_mult_to_widen (gimple *stmt, gimple_stmt_iterator *gsi)
2167 {
2168   tree lhs, rhs1, rhs2, type, type1, type2;
2169   enum insn_code handler;
2170   scalar_int_mode to_mode, from_mode, actual_mode;
2171   optab op;
2172   int actual_precision;
2173   location_t loc = gimple_location (stmt);
2174   bool from_unsigned1, from_unsigned2;
2175
2176   lhs = gimple_assign_lhs (stmt);
2177   type = TREE_TYPE (lhs);
2178   if (TREE_CODE (type) != INTEGER_TYPE)
2179     return false;
2180
2181   if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2))
2182     return false;
2183
2184   to_mode = SCALAR_INT_TYPE_MODE (type);
2185   from_mode = SCALAR_INT_TYPE_MODE (type1);
2186   if (to_mode == from_mode)
2187     return false;
2188
2189   from_unsigned1 = TYPE_UNSIGNED (type1);
2190   from_unsigned2 = TYPE_UNSIGNED (type2);
2191
2192   if (from_unsigned1 && from_unsigned2)
2193     op = umul_widen_optab;
2194   else if (!from_unsigned1 && !from_unsigned2)
2195     op = smul_widen_optab;
2196   else
2197     op = usmul_widen_optab;
2198
2199   handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode,
2200                                                   &actual_mode);
2201
2202   if (handler == CODE_FOR_nothing)
2203     {
2204       if (op != smul_widen_optab)
2205         {
2206           /* We can use a signed multiply with unsigned types as long as
2207              there is a wider mode to use, or it is the smaller of the two
2208              types that is unsigned.  Note that type1 >= type2, always.  */
2209           if ((TYPE_UNSIGNED (type1)
2210                && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2211               || (TYPE_UNSIGNED (type2)
2212                   && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2213             {
2214               if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
2215                   || GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode))
2216                 return false;
2217             }
2218
2219           op = smul_widen_optab;
2220           handler = find_widening_optab_handler_and_mode (op, to_mode,
2221                                                           from_mode,
2222                                                           &actual_mode);
2223
2224           if (handler == CODE_FOR_nothing)
2225             return false;
2226
2227           from_unsigned1 = from_unsigned2 = false;
2228         }
2229       else
2230         return false;
2231     }
2232
2233   /* Ensure that the inputs to the handler are in the correct precison
2234      for the opcode.  This will be the full mode size.  */
2235   actual_precision = GET_MODE_PRECISION (actual_mode);
2236   if (2 * actual_precision > TYPE_PRECISION (type))
2237     return false;
2238   if (actual_precision != TYPE_PRECISION (type1)
2239       || from_unsigned1 != TYPE_UNSIGNED (type1))
2240     rhs1 = build_and_insert_cast (gsi, loc,
2241                                   build_nonstandard_integer_type
2242                                     (actual_precision, from_unsigned1), rhs1);
2243   if (actual_precision != TYPE_PRECISION (type2)
2244       || from_unsigned2 != TYPE_UNSIGNED (type2))
2245     rhs2 = build_and_insert_cast (gsi, loc,
2246                                   build_nonstandard_integer_type
2247                                     (actual_precision, from_unsigned2), rhs2);
2248
2249   /* Handle constants.  */
2250   if (TREE_CODE (rhs1) == INTEGER_CST)
2251     rhs1 = fold_convert (type1, rhs1);
2252   if (TREE_CODE (rhs2) == INTEGER_CST)
2253     rhs2 = fold_convert (type2, rhs2);
2254
2255   gimple_assign_set_rhs1 (stmt, rhs1);
2256   gimple_assign_set_rhs2 (stmt, rhs2);
2257   gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
2258   update_stmt (stmt);
2259   widen_mul_stats.widen_mults_inserted++;
2260   return true;
2261 }
2262
2263 /* Process a single gimple statement STMT, which is found at the
2264    iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
2265    rhs (given by CODE), and try to convert it into a
2266    WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR.  The return value
2267    is true iff we converted the statement.  */
2268
2269 static bool
2270 convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple *stmt,
2271                             enum tree_code code)
2272 {
2273   gimple *rhs1_stmt = NULL, *rhs2_stmt = NULL;
2274   gimple *conv1_stmt = NULL, *conv2_stmt = NULL, *conv_stmt;
2275   tree type, type1, type2, optype;
2276   tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
2277   enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
2278   optab this_optab;
2279   enum tree_code wmult_code;
2280   enum insn_code handler;
2281   scalar_mode to_mode, from_mode, actual_mode;
2282   location_t loc = gimple_location (stmt);
2283   int actual_precision;
2284   bool from_unsigned1, from_unsigned2;
2285
2286   lhs = gimple_assign_lhs (stmt);
2287   type = TREE_TYPE (lhs);
2288   if (TREE_CODE (type) != INTEGER_TYPE
2289       && TREE_CODE (type) != FIXED_POINT_TYPE)
2290     return false;
2291
2292   if (code == MINUS_EXPR)
2293     wmult_code = WIDEN_MULT_MINUS_EXPR;
2294   else
2295     wmult_code = WIDEN_MULT_PLUS_EXPR;
2296
2297   rhs1 = gimple_assign_rhs1 (stmt);
2298   rhs2 = gimple_assign_rhs2 (stmt);
2299
2300   if (TREE_CODE (rhs1) == SSA_NAME)
2301     {
2302       rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2303       if (is_gimple_assign (rhs1_stmt))
2304         rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2305     }
2306
2307   if (TREE_CODE (rhs2) == SSA_NAME)
2308     {
2309       rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2310       if (is_gimple_assign (rhs2_stmt))
2311         rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2312     }
2313
2314   /* Allow for one conversion statement between the multiply
2315      and addition/subtraction statement.  If there are more than
2316      one conversions then we assume they would invalidate this
2317      transformation.  If that's not the case then they should have
2318      been folded before now.  */
2319   if (CONVERT_EXPR_CODE_P (rhs1_code))
2320     {
2321       conv1_stmt = rhs1_stmt;
2322       rhs1 = gimple_assign_rhs1 (rhs1_stmt);
2323       if (TREE_CODE (rhs1) == SSA_NAME)
2324         {
2325           rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2326           if (is_gimple_assign (rhs1_stmt))
2327             rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2328         }
2329       else
2330         return false;
2331     }
2332   if (CONVERT_EXPR_CODE_P (rhs2_code))
2333     {
2334       conv2_stmt = rhs2_stmt;
2335       rhs2 = gimple_assign_rhs1 (rhs2_stmt);
2336       if (TREE_CODE (rhs2) == SSA_NAME)
2337         {
2338           rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2339           if (is_gimple_assign (rhs2_stmt))
2340             rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2341         }
2342       else
2343         return false;
2344     }
2345
2346   /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
2347      is_widening_mult_p, but we still need the rhs returns.
2348
2349      It might also appear that it would be sufficient to use the existing
2350      operands of the widening multiply, but that would limit the choice of
2351      multiply-and-accumulate instructions.
2352
2353      If the widened-multiplication result has more than one uses, it is
2354      probably wiser not to do the conversion.  */
2355   if (code == PLUS_EXPR
2356       && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
2357     {
2358       if (!has_single_use (rhs1)
2359           || !is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
2360                                   &type2, &mult_rhs2))
2361         return false;
2362       add_rhs = rhs2;
2363       conv_stmt = conv1_stmt;
2364     }
2365   else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
2366     {
2367       if (!has_single_use (rhs2)
2368           || !is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
2369                                   &type2, &mult_rhs2))
2370         return false;
2371       add_rhs = rhs1;
2372       conv_stmt = conv2_stmt;
2373     }
2374   else
2375     return false;
2376
2377   to_mode = SCALAR_TYPE_MODE (type);
2378   from_mode = SCALAR_TYPE_MODE (type1);
2379   if (to_mode == from_mode)
2380     return false;
2381
2382   from_unsigned1 = TYPE_UNSIGNED (type1);
2383   from_unsigned2 = TYPE_UNSIGNED (type2);
2384   optype = type1;
2385
2386   /* There's no such thing as a mixed sign madd yet, so use a wider mode.  */
2387   if (from_unsigned1 != from_unsigned2)
2388     {
2389       if (!INTEGRAL_TYPE_P (type))
2390         return false;
2391       /* We can use a signed multiply with unsigned types as long as
2392          there is a wider mode to use, or it is the smaller of the two
2393          types that is unsigned.  Note that type1 >= type2, always.  */
2394       if ((from_unsigned1
2395            && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2396           || (from_unsigned2
2397               && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2398         {
2399           if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
2400               || GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode))
2401             return false;
2402         }
2403
2404       from_unsigned1 = from_unsigned2 = false;
2405       optype = build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode),
2406                                                false);
2407     }
2408
2409   /* If there was a conversion between the multiply and addition
2410      then we need to make sure it fits a multiply-and-accumulate.
2411      The should be a single mode change which does not change the
2412      value.  */
2413   if (conv_stmt)
2414     {
2415       /* We use the original, unmodified data types for this.  */
2416       tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt));
2417       tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt));
2418       int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2);
2419       bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2);
2420
2421       if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type))
2422         {
2423           /* Conversion is a truncate.  */
2424           if (TYPE_PRECISION (to_type) < data_size)
2425             return false;
2426         }
2427       else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type))
2428         {
2429           /* Conversion is an extend.  Check it's the right sort.  */
2430           if (TYPE_UNSIGNED (from_type) != is_unsigned
2431               && !(is_unsigned && TYPE_PRECISION (from_type) > data_size))
2432             return false;
2433         }
2434       /* else convert is a no-op for our purposes.  */
2435     }
2436
2437   /* Verify that the machine can perform a widening multiply
2438      accumulate in this mode/signedness combination, otherwise
2439      this transformation is likely to pessimize code.  */
2440   this_optab = optab_for_tree_code (wmult_code, optype, optab_default);
2441   handler = find_widening_optab_handler_and_mode (this_optab, to_mode,
2442                                                   from_mode, &actual_mode);
2443
2444   if (handler == CODE_FOR_nothing)
2445     return false;
2446
2447   /* Ensure that the inputs to the handler are in the correct precison
2448      for the opcode.  This will be the full mode size.  */
2449   actual_precision = GET_MODE_PRECISION (actual_mode);
2450   if (actual_precision != TYPE_PRECISION (type1)
2451       || from_unsigned1 != TYPE_UNSIGNED (type1))
2452     mult_rhs1 = build_and_insert_cast (gsi, loc,
2453                                        build_nonstandard_integer_type
2454                                          (actual_precision, from_unsigned1),
2455                                        mult_rhs1);
2456   if (actual_precision != TYPE_PRECISION (type2)
2457       || from_unsigned2 != TYPE_UNSIGNED (type2))
2458     mult_rhs2 = build_and_insert_cast (gsi, loc,
2459                                        build_nonstandard_integer_type
2460                                          (actual_precision, from_unsigned2),
2461                                        mult_rhs2);
2462
2463   if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs)))
2464     add_rhs = build_and_insert_cast (gsi, loc, type, add_rhs);
2465
2466   /* Handle constants.  */
2467   if (TREE_CODE (mult_rhs1) == INTEGER_CST)
2468     mult_rhs1 = fold_convert (type1, mult_rhs1);
2469   if (TREE_CODE (mult_rhs2) == INTEGER_CST)
2470     mult_rhs2 = fold_convert (type2, mult_rhs2);
2471
2472   gimple_assign_set_rhs_with_ops (gsi, wmult_code, mult_rhs1, mult_rhs2,
2473                                   add_rhs);
2474   update_stmt (gsi_stmt (*gsi));
2475   widen_mul_stats.maccs_inserted++;
2476   return true;
2477 }
2478
2479 /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
2480    with uses in additions and subtractions to form fused multiply-add
2481    operations.  Returns true if successful and MUL_STMT should be removed.  */
2482
2483 static bool
2484 convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2)
2485 {
2486   tree mul_result = gimple_get_lhs (mul_stmt);
2487   tree type = TREE_TYPE (mul_result);
2488   gimple *use_stmt, *neguse_stmt;
2489   gassign *fma_stmt;
2490   use_operand_p use_p;
2491   imm_use_iterator imm_iter;
2492
2493   if (FLOAT_TYPE_P (type)
2494       && flag_fp_contract_mode == FP_CONTRACT_OFF)
2495     return false;
2496
2497   /* We don't want to do bitfield reduction ops.  */
2498   if (INTEGRAL_TYPE_P (type)
2499       && !type_has_mode_precision_p (type))
2500     return false;
2501
2502   /* If the target doesn't support it, don't generate it.  We assume that
2503      if fma isn't available then fms, fnma or fnms are not either.  */
2504   if (optab_handler (fma_optab, TYPE_MODE (type)) == CODE_FOR_nothing)
2505     return false;
2506
2507   /* If the multiplication has zero uses, it is kept around probably because
2508      of -fnon-call-exceptions.  Don't optimize it away in that case,
2509      it is DCE job.  */
2510   if (has_zero_uses (mul_result))
2511     return false;
2512
2513   /* Make sure that the multiplication statement becomes dead after
2514      the transformation, thus that all uses are transformed to FMAs.
2515      This means we assume that an FMA operation has the same cost
2516      as an addition.  */
2517   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
2518     {
2519       enum tree_code use_code;
2520       tree result = mul_result;
2521       bool negate_p = false;
2522
2523       use_stmt = USE_STMT (use_p);
2524
2525       if (is_gimple_debug (use_stmt))
2526         continue;
2527
2528       /* For now restrict this operations to single basic blocks.  In theory
2529          we would want to support sinking the multiplication in
2530          m = a*b;
2531          if ()
2532            ma = m + c;
2533          else
2534            d = m;
2535          to form a fma in the then block and sink the multiplication to the
2536          else block.  */
2537       if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
2538         return false;
2539
2540       if (!is_gimple_assign (use_stmt))
2541         return false;
2542
2543       use_code = gimple_assign_rhs_code (use_stmt);
2544
2545       /* A negate on the multiplication leads to FNMA.  */
2546       if (use_code == NEGATE_EXPR)
2547         {
2548           ssa_op_iter iter;
2549           use_operand_p usep;
2550
2551           result = gimple_assign_lhs (use_stmt);
2552
2553           /* Make sure the negate statement becomes dead with this
2554              single transformation.  */
2555           if (!single_imm_use (gimple_assign_lhs (use_stmt),
2556                                &use_p, &neguse_stmt))
2557             return false;
2558
2559           /* Make sure the multiplication isn't also used on that stmt.  */
2560           FOR_EACH_PHI_OR_STMT_USE (usep, neguse_stmt, iter, SSA_OP_USE)
2561             if (USE_FROM_PTR (usep) == mul_result)
2562               return false;
2563
2564           /* Re-validate.  */
2565           use_stmt = neguse_stmt;
2566           if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
2567             return false;
2568           if (!is_gimple_assign (use_stmt))
2569             return false;
2570
2571           use_code = gimple_assign_rhs_code (use_stmt);
2572           negate_p = true;
2573         }
2574
2575       switch (use_code)
2576         {
2577         case MINUS_EXPR:
2578           if (gimple_assign_rhs2 (use_stmt) == result)
2579             negate_p = !negate_p;
2580           break;
2581         case PLUS_EXPR:
2582           break;
2583         default:
2584           /* FMA can only be formed from PLUS and MINUS.  */
2585           return false;
2586         }
2587
2588       /* If the subtrahend (gimple_assign_rhs2 (use_stmt)) is computed
2589          by a MULT_EXPR that we'll visit later, we might be able to
2590          get a more profitable match with fnma.
2591          OTOH, if we don't, a negate / fma pair has likely lower latency
2592          that a mult / subtract pair.  */
2593       if (use_code == MINUS_EXPR && !negate_p
2594           && gimple_assign_rhs1 (use_stmt) == result
2595           && optab_handler (fms_optab, TYPE_MODE (type)) == CODE_FOR_nothing
2596           && optab_handler (fnma_optab, TYPE_MODE (type)) != CODE_FOR_nothing)
2597         {
2598           tree rhs2 = gimple_assign_rhs2 (use_stmt);
2599
2600           if (TREE_CODE (rhs2) == SSA_NAME)
2601             {
2602               gimple *stmt2 = SSA_NAME_DEF_STMT (rhs2);
2603               if (has_single_use (rhs2)
2604                   && is_gimple_assign (stmt2)
2605                   && gimple_assign_rhs_code (stmt2) == MULT_EXPR)
2606               return false;
2607             }
2608         }
2609
2610       /* We can't handle a * b + a * b.  */
2611       if (gimple_assign_rhs1 (use_stmt) == gimple_assign_rhs2 (use_stmt))
2612         return false;
2613
2614       /* While it is possible to validate whether or not the exact form
2615          that we've recognized is available in the backend, the assumption
2616          is that the transformation is never a loss.  For instance, suppose
2617          the target only has the plain FMA pattern available.  Consider
2618          a*b-c -> fma(a,b,-c): we've exchanged MUL+SUB for FMA+NEG, which
2619          is still two operations.  Consider -(a*b)-c -> fma(-a,b,-c): we
2620          still have 3 operations, but in the FMA form the two NEGs are
2621          independent and could be run in parallel.  */
2622     }
2623
2624   FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
2625     {
2626       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
2627       enum tree_code use_code;
2628       tree addop, mulop1 = op1, result = mul_result;
2629       bool negate_p = false;
2630
2631       if (is_gimple_debug (use_stmt))
2632         continue;
2633
2634       use_code = gimple_assign_rhs_code (use_stmt);
2635       if (use_code == NEGATE_EXPR)
2636         {
2637           result = gimple_assign_lhs (use_stmt);
2638           single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt);
2639           gsi_remove (&gsi, true);
2640           release_defs (use_stmt);
2641
2642           use_stmt = neguse_stmt;
2643           gsi = gsi_for_stmt (use_stmt);
2644           use_code = gimple_assign_rhs_code (use_stmt);
2645           negate_p = true;
2646         }
2647
2648       if (gimple_assign_rhs1 (use_stmt) == result)
2649         {
2650           addop = gimple_assign_rhs2 (use_stmt);
2651           /* a * b - c -> a * b + (-c)  */
2652           if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
2653             addop = force_gimple_operand_gsi (&gsi,
2654                                               build1 (NEGATE_EXPR,
2655                                                       type, addop),
2656                                               true, NULL_TREE, true,
2657                                               GSI_SAME_STMT);
2658         }
2659       else
2660         {
2661           addop = gimple_assign_rhs1 (use_stmt);
2662           /* a - b * c -> (-b) * c + a */
2663           if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
2664             negate_p = !negate_p;
2665         }
2666
2667       if (negate_p)
2668         mulop1 = force_gimple_operand_gsi (&gsi,
2669                                            build1 (NEGATE_EXPR,
2670                                                    type, mulop1),
2671                                            true, NULL_TREE, true,
2672                                            GSI_SAME_STMT);
2673
2674       fma_stmt = gimple_build_assign (gimple_assign_lhs (use_stmt),
2675                                       FMA_EXPR, mulop1, op2, addop);
2676       gsi_replace (&gsi, fma_stmt, true);
2677       widen_mul_stats.fmas_inserted++;
2678     }
2679
2680   return true;
2681 }
2682
2683
2684 /* Helper function of match_uaddsub_overflow.  Return 1
2685    if USE_STMT is unsigned overflow check ovf != 0 for
2686    STMT, -1 if USE_STMT is unsigned overflow check ovf == 0
2687    and 0 otherwise.  */
2688
2689 static int
2690 uaddsub_overflow_check_p (gimple *stmt, gimple *use_stmt)
2691 {
2692   enum tree_code ccode = ERROR_MARK;
2693   tree crhs1 = NULL_TREE, crhs2 = NULL_TREE;
2694   if (gimple_code (use_stmt) == GIMPLE_COND)
2695     {
2696       ccode = gimple_cond_code (use_stmt);
2697       crhs1 = gimple_cond_lhs (use_stmt);
2698       crhs2 = gimple_cond_rhs (use_stmt);
2699     }
2700   else if (is_gimple_assign (use_stmt))
2701     {
2702       if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
2703         {
2704           ccode = gimple_assign_rhs_code (use_stmt);
2705           crhs1 = gimple_assign_rhs1 (use_stmt);
2706           crhs2 = gimple_assign_rhs2 (use_stmt);
2707         }
2708       else if (gimple_assign_rhs_code (use_stmt) == COND_EXPR)
2709         {
2710           tree cond = gimple_assign_rhs1 (use_stmt);
2711           if (COMPARISON_CLASS_P (cond))
2712             {
2713               ccode = TREE_CODE (cond);
2714               crhs1 = TREE_OPERAND (cond, 0);
2715               crhs2 = TREE_OPERAND (cond, 1);
2716             }
2717           else
2718             return 0;
2719         }
2720       else
2721         return 0;
2722     }
2723   else
2724     return 0;
2725
2726   if (TREE_CODE_CLASS (ccode) != tcc_comparison)
2727     return 0;
2728
2729   enum tree_code code = gimple_assign_rhs_code (stmt);
2730   tree lhs = gimple_assign_lhs (stmt);
2731   tree rhs1 = gimple_assign_rhs1 (stmt);
2732   tree rhs2 = gimple_assign_rhs2 (stmt);
2733
2734   switch (ccode)
2735     {
2736     case GT_EXPR:
2737     case LE_EXPR:
2738       /* r = a - b; r > a or r <= a
2739          r = a + b; a > r or a <= r or b > r or b <= r.  */
2740       if ((code == MINUS_EXPR && crhs1 == lhs && crhs2 == rhs1)
2741           || (code == PLUS_EXPR && (crhs1 == rhs1 || crhs1 == rhs2)
2742               && crhs2 == lhs))
2743         return ccode == GT_EXPR ? 1 : -1;
2744       break;
2745     case LT_EXPR:
2746     case GE_EXPR:
2747       /* r = a - b; a < r or a >= r
2748          r = a + b; r < a or r >= a or r < b or r >= b.  */
2749       if ((code == MINUS_EXPR && crhs1 == rhs1 && crhs2 == lhs)
2750           || (code == PLUS_EXPR && crhs1 == lhs
2751               && (crhs2 == rhs1 || crhs2 == rhs2)))
2752         return ccode == LT_EXPR ? 1 : -1;
2753       break;
2754     default:
2755       break;
2756     }
2757   return 0;
2758 }
2759
2760 /* Recognize for unsigned x
2761    x = y - z;
2762    if (x > y)
2763    where there are other uses of x and replace it with
2764    _7 = SUB_OVERFLOW (y, z);
2765    x = REALPART_EXPR <_7>;
2766    _8 = IMAGPART_EXPR <_7>;
2767    if (_8)
2768    and similarly for addition.  */
2769
2770 static bool
2771 match_uaddsub_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
2772                         enum tree_code code)
2773 {
2774   tree lhs = gimple_assign_lhs (stmt);
2775   tree type = TREE_TYPE (lhs);
2776   use_operand_p use_p;
2777   imm_use_iterator iter;
2778   bool use_seen = false;
2779   bool ovf_use_seen = false;
2780   gimple *use_stmt;
2781
2782   gcc_checking_assert (code == PLUS_EXPR || code == MINUS_EXPR);
2783   if (!INTEGRAL_TYPE_P (type)
2784       || !TYPE_UNSIGNED (type)
2785       || has_zero_uses (lhs)
2786       || has_single_use (lhs)
2787       || optab_handler (code == PLUS_EXPR ? uaddv4_optab : usubv4_optab,
2788                         TYPE_MODE (type)) == CODE_FOR_nothing)
2789     return false;
2790
2791   FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
2792     {
2793       use_stmt = USE_STMT (use_p);
2794       if (is_gimple_debug (use_stmt))
2795         continue;
2796
2797       if (uaddsub_overflow_check_p (stmt, use_stmt))
2798         ovf_use_seen = true;
2799       else
2800         use_seen = true;
2801       if (ovf_use_seen && use_seen)
2802         break;
2803     }
2804
2805   if (!ovf_use_seen || !use_seen)
2806     return false;
2807
2808   tree ctype = build_complex_type (type);
2809   tree rhs1 = gimple_assign_rhs1 (stmt);
2810   tree rhs2 = gimple_assign_rhs2 (stmt);
2811   gcall *g = gimple_build_call_internal (code == PLUS_EXPR
2812                                          ? IFN_ADD_OVERFLOW : IFN_SUB_OVERFLOW,
2813                                          2, rhs1, rhs2);
2814   tree ctmp = make_ssa_name (ctype);
2815   gimple_call_set_lhs (g, ctmp);
2816   gsi_insert_before (gsi, g, GSI_SAME_STMT);
2817   gassign *g2 = gimple_build_assign (lhs, REALPART_EXPR,
2818                                      build1 (REALPART_EXPR, type, ctmp));
2819   gsi_replace (gsi, g2, true);
2820   tree ovf = make_ssa_name (type);
2821   g2 = gimple_build_assign (ovf, IMAGPART_EXPR,
2822                             build1 (IMAGPART_EXPR, type, ctmp));
2823   gsi_insert_after (gsi, g2, GSI_NEW_STMT);
2824
2825   FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
2826     {
2827       if (is_gimple_debug (use_stmt))
2828         continue;
2829
2830       int ovf_use = uaddsub_overflow_check_p (stmt, use_stmt);
2831       if (ovf_use == 0)
2832         continue;
2833       if (gimple_code (use_stmt) == GIMPLE_COND)
2834         {
2835           gcond *cond_stmt = as_a <gcond *> (use_stmt);
2836           gimple_cond_set_lhs (cond_stmt, ovf);
2837           gimple_cond_set_rhs (cond_stmt, build_int_cst (type, 0));
2838           gimple_cond_set_code (cond_stmt, ovf_use == 1 ? NE_EXPR : EQ_EXPR);
2839         }
2840       else
2841         {
2842           gcc_checking_assert (is_gimple_assign (use_stmt));
2843           if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
2844             {
2845               gimple_assign_set_rhs1 (use_stmt, ovf);
2846               gimple_assign_set_rhs2 (use_stmt, build_int_cst (type, 0));
2847               gimple_assign_set_rhs_code (use_stmt,
2848                                           ovf_use == 1 ? NE_EXPR : EQ_EXPR);
2849             }
2850           else
2851             {
2852               gcc_checking_assert (gimple_assign_rhs_code (use_stmt)
2853                                    == COND_EXPR);
2854               tree cond = build2 (ovf_use == 1 ? NE_EXPR : EQ_EXPR,
2855                                   boolean_type_node, ovf,
2856                                   build_int_cst (type, 0));
2857               gimple_assign_set_rhs1 (use_stmt, cond);
2858             }
2859         }
2860       update_stmt (use_stmt);
2861     }
2862   return true;
2863 }
2864
2865 /* Return true if target has support for divmod.  */
2866
2867 static bool
2868 target_supports_divmod_p (optab divmod_optab, optab div_optab, machine_mode mode)
2869 {
2870   /* If target supports hardware divmod insn, use it for divmod.  */
2871   if (optab_handler (divmod_optab, mode) != CODE_FOR_nothing)
2872     return true;
2873
2874   /* Check if libfunc for divmod is available.  */
2875   rtx libfunc = optab_libfunc (divmod_optab, mode);
2876   if (libfunc != NULL_RTX)
2877     {
2878       /* If optab_handler exists for div_optab, perhaps in a wider mode,
2879          we don't want to use the libfunc even if it exists for given mode.  */
2880       machine_mode div_mode;
2881       FOR_EACH_MODE_FROM (div_mode, mode)
2882         if (optab_handler (div_optab, div_mode) != CODE_FOR_nothing)
2883           return false;
2884
2885       return targetm.expand_divmod_libfunc != NULL;
2886     }
2887
2888   return false;
2889 }
2890
2891 /* Check if stmt is candidate for divmod transform.  */
2892
2893 static bool
2894 divmod_candidate_p (gassign *stmt)
2895 {
2896   tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2897   machine_mode mode = TYPE_MODE (type);
2898   optab divmod_optab, div_optab;
2899
2900   if (TYPE_UNSIGNED (type))
2901     {
2902       divmod_optab = udivmod_optab;
2903       div_optab = udiv_optab;
2904     }
2905   else
2906     {
2907       divmod_optab = sdivmod_optab;
2908       div_optab = sdiv_optab;
2909     }
2910
2911   tree op1 = gimple_assign_rhs1 (stmt);
2912   tree op2 = gimple_assign_rhs2 (stmt);
2913
2914   /* Disable the transform if either is a constant, since division-by-constant
2915      may have specialized expansion.  */
2916   if (CONSTANT_CLASS_P (op1) || CONSTANT_CLASS_P (op2))
2917     return false;
2918
2919   /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should
2920      expand using the [su]divv optabs.  */
2921   if (TYPE_OVERFLOW_TRAPS (type))
2922     return false;
2923
2924   if (!target_supports_divmod_p (divmod_optab, div_optab, mode))
2925     return false;
2926
2927   return true;
2928 }
2929
2930 /* This function looks for:
2931    t1 = a TRUNC_DIV_EXPR b;
2932    t2 = a TRUNC_MOD_EXPR b;
2933    and transforms it to the following sequence:
2934    complex_tmp = DIVMOD (a, b);
2935    t1 = REALPART_EXPR(a);
2936    t2 = IMAGPART_EXPR(b);
2937    For conditions enabling the transform see divmod_candidate_p().
2938
2939    The pass has three parts:
2940    1) Find top_stmt which is trunc_div or trunc_mod stmt and dominates all
2941       other trunc_div_expr and trunc_mod_expr stmts.
2942    2) Add top_stmt and all trunc_div and trunc_mod stmts dominated by top_stmt
2943       to stmts vector.
2944    3) Insert DIVMOD call just before top_stmt and update entries in
2945       stmts vector to use return value of DIMOVD (REALEXPR_PART for div,
2946       IMAGPART_EXPR for mod).  */
2947
2948 static bool
2949 convert_to_divmod (gassign *stmt)
2950 {
2951   if (stmt_can_throw_internal (stmt)
2952       || !divmod_candidate_p (stmt))
2953     return false;
2954
2955   tree op1 = gimple_assign_rhs1 (stmt);
2956   tree op2 = gimple_assign_rhs2 (stmt);
2957
2958   imm_use_iterator use_iter;
2959   gimple *use_stmt;
2960   auto_vec<gimple *> stmts;
2961
2962   gimple *top_stmt = stmt;
2963   basic_block top_bb = gimple_bb (stmt);
2964
2965   /* Part 1: Try to set top_stmt to "topmost" stmt that dominates
2966      at-least stmt and possibly other trunc_div/trunc_mod stmts
2967      having same operands as stmt.  */
2968
2969   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, op1)
2970     {
2971       if (is_gimple_assign (use_stmt)
2972           && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
2973               || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
2974           && operand_equal_p (op1, gimple_assign_rhs1 (use_stmt), 0)
2975           && operand_equal_p (op2, gimple_assign_rhs2 (use_stmt), 0))
2976         {
2977           if (stmt_can_throw_internal (use_stmt))
2978             continue;
2979
2980           basic_block bb = gimple_bb (use_stmt);
2981
2982           if (bb == top_bb)
2983             {
2984               if (gimple_uid (use_stmt) < gimple_uid (top_stmt))
2985                 top_stmt = use_stmt;
2986             }
2987           else if (dominated_by_p (CDI_DOMINATORS, top_bb, bb))
2988             {
2989               top_bb = bb;
2990               top_stmt = use_stmt;
2991             }
2992         }
2993     }
2994
2995   tree top_op1 = gimple_assign_rhs1 (top_stmt);
2996   tree top_op2 = gimple_assign_rhs2 (top_stmt);
2997
2998   stmts.safe_push (top_stmt);
2999   bool div_seen = (gimple_assign_rhs_code (top_stmt) == TRUNC_DIV_EXPR);
3000
3001   /* Part 2: Add all trunc_div/trunc_mod statements domianted by top_bb
3002      to stmts vector. The 2nd loop will always add stmt to stmts vector, since
3003      gimple_bb (top_stmt) dominates gimple_bb (stmt), so the
3004      2nd loop ends up adding at-least single trunc_mod_expr stmt.  */
3005
3006   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, top_op1)
3007     {
3008       if (is_gimple_assign (use_stmt)
3009           && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
3010               || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
3011           && operand_equal_p (top_op1, gimple_assign_rhs1 (use_stmt), 0)
3012           && operand_equal_p (top_op2, gimple_assign_rhs2 (use_stmt), 0))
3013         {
3014           if (use_stmt == top_stmt
3015               || stmt_can_throw_internal (use_stmt)
3016               || !dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt), top_bb))
3017             continue;
3018
3019           stmts.safe_push (use_stmt);
3020           if (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR)
3021             div_seen = true;
3022         }
3023     }
3024
3025   if (!div_seen)
3026     return false;
3027
3028   /* Part 3: Create libcall to internal fn DIVMOD:
3029      divmod_tmp = DIVMOD (op1, op2).  */
3030
3031   gcall *call_stmt = gimple_build_call_internal (IFN_DIVMOD, 2, op1, op2);
3032   tree res = make_temp_ssa_name (build_complex_type (TREE_TYPE (op1)),
3033                                  call_stmt, "divmod_tmp");
3034   gimple_call_set_lhs (call_stmt, res);
3035   /* We rejected throwing statements above.  */
3036   gimple_call_set_nothrow (call_stmt, true);
3037
3038   /* Insert the call before top_stmt.  */
3039   gimple_stmt_iterator top_stmt_gsi = gsi_for_stmt (top_stmt);
3040   gsi_insert_before (&top_stmt_gsi, call_stmt, GSI_SAME_STMT);
3041
3042   widen_mul_stats.divmod_calls_inserted++;
3043
3044   /* Update all statements in stmts vector:
3045      lhs = op1 TRUNC_DIV_EXPR op2 -> lhs = REALPART_EXPR<divmod_tmp>
3046      lhs = op1 TRUNC_MOD_EXPR op2 -> lhs = IMAGPART_EXPR<divmod_tmp>.  */
3047
3048   for (unsigned i = 0; stmts.iterate (i, &use_stmt); ++i)
3049     {
3050       tree new_rhs;
3051
3052       switch (gimple_assign_rhs_code (use_stmt))
3053         {
3054           case TRUNC_DIV_EXPR:
3055             new_rhs = fold_build1 (REALPART_EXPR, TREE_TYPE (op1), res);
3056             break;
3057
3058           case TRUNC_MOD_EXPR:
3059             new_rhs = fold_build1 (IMAGPART_EXPR, TREE_TYPE (op1), res);
3060             break;
3061
3062           default:
3063             gcc_unreachable ();
3064         }
3065
3066       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
3067       gimple_assign_set_rhs_from_tree (&gsi, new_rhs);
3068       update_stmt (use_stmt);
3069     }
3070
3071   return true;
3072 }
3073
3074 /* Find integer multiplications where the operands are extended from
3075    smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
3076    where appropriate.  */
3077
3078 namespace {
3079
3080 const pass_data pass_data_optimize_widening_mul =
3081 {
3082   GIMPLE_PASS, /* type */
3083   "widening_mul", /* name */
3084   OPTGROUP_NONE, /* optinfo_flags */
3085   TV_NONE, /* tv_id */
3086   PROP_ssa, /* properties_required */
3087   0, /* properties_provided */
3088   0, /* properties_destroyed */
3089   0, /* todo_flags_start */
3090   TODO_update_ssa, /* todo_flags_finish */
3091 };
3092
3093 class pass_optimize_widening_mul : public gimple_opt_pass
3094 {
3095 public:
3096   pass_optimize_widening_mul (gcc::context *ctxt)
3097     : gimple_opt_pass (pass_data_optimize_widening_mul, ctxt)
3098   {}
3099
3100   /* opt_pass methods: */
3101   virtual bool gate (function *)
3102     {
3103       return flag_expensive_optimizations && optimize;
3104     }
3105
3106   virtual unsigned int execute (function *);
3107
3108 }; // class pass_optimize_widening_mul
3109
3110 unsigned int
3111 pass_optimize_widening_mul::execute (function *fun)
3112 {
3113   basic_block bb;
3114   bool cfg_changed = false;
3115
3116   memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
3117   calculate_dominance_info (CDI_DOMINATORS);
3118   renumber_gimple_stmt_uids ();
3119
3120   FOR_EACH_BB_FN (bb, fun)
3121     {
3122       gimple_stmt_iterator gsi;
3123
3124       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
3125         {
3126           gimple *stmt = gsi_stmt (gsi);
3127           enum tree_code code;
3128
3129           if (is_gimple_assign (stmt))
3130             {
3131               code = gimple_assign_rhs_code (stmt);
3132               switch (code)
3133                 {
3134                 case MULT_EXPR:
3135                   if (!convert_mult_to_widen (stmt, &gsi)
3136                       && !convert_expand_mult_copysign (stmt, &gsi)
3137                       && convert_mult_to_fma (stmt,
3138                                               gimple_assign_rhs1 (stmt),
3139                                               gimple_assign_rhs2 (stmt)))
3140                     {
3141                       gsi_remove (&gsi, true);
3142                       release_defs (stmt);
3143                       continue;
3144                     }
3145                   break;
3146
3147                 case PLUS_EXPR:
3148                 case MINUS_EXPR:
3149                   if (!convert_plusminus_to_widen (&gsi, stmt, code))
3150                     match_uaddsub_overflow (&gsi, stmt, code);
3151                   break;
3152
3153                 case TRUNC_MOD_EXPR:
3154                   convert_to_divmod (as_a<gassign *> (stmt));
3155                   break;
3156
3157                 default:;
3158                 }
3159             }
3160           else if (is_gimple_call (stmt)
3161                    && gimple_call_lhs (stmt))
3162             {
3163               tree fndecl = gimple_call_fndecl (stmt);
3164               if (fndecl
3165                   && gimple_call_builtin_p (stmt, BUILT_IN_NORMAL))
3166                 {
3167                   switch (DECL_FUNCTION_CODE (fndecl))
3168                     {
3169                       case BUILT_IN_POWF:
3170                       case BUILT_IN_POW:
3171                       case BUILT_IN_POWL:
3172                         if (TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST
3173                             && real_equal
3174                                  (&TREE_REAL_CST (gimple_call_arg (stmt, 1)),
3175                                   &dconst2)
3176                             && convert_mult_to_fma (stmt,
3177                                                     gimple_call_arg (stmt, 0),
3178                                                     gimple_call_arg (stmt, 0)))
3179                           {
3180                             unlink_stmt_vdef (stmt);
3181                             if (gsi_remove (&gsi, true)
3182                                 && gimple_purge_dead_eh_edges (bb))
3183                               cfg_changed = true;
3184                             release_defs (stmt);
3185                             continue;
3186                           }
3187                           break;
3188
3189                       default:;
3190                     }
3191                 }
3192             }
3193           gsi_next (&gsi);
3194         }
3195     }
3196
3197   statistics_counter_event (fun, "widening multiplications inserted",
3198                             widen_mul_stats.widen_mults_inserted);
3199   statistics_counter_event (fun, "widening maccs inserted",
3200                             widen_mul_stats.maccs_inserted);
3201   statistics_counter_event (fun, "fused multiply-adds inserted",
3202                             widen_mul_stats.fmas_inserted);
3203   statistics_counter_event (fun, "divmod calls inserted",
3204                             widen_mul_stats.divmod_calls_inserted);
3205
3206   return cfg_changed ? TODO_cleanup_cfg : 0;
3207 }
3208
3209 } // anon namespace
3210
3211 gimple_opt_pass *
3212 make_pass_optimize_widening_mul (gcc::context *ctxt)
3213 {
3214   return new pass_optimize_widening_mul (ctxt);
3215 }