gcc/tree-ssa-math-opts.c

   1 /* Global, SSA-based optimizations using mathematical identities.
   2    Copyright (C) 2005-2015 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* Currently, the only mini-pass in this file tries to CSE reciprocal
  21    operations.  These are common in sequences such as this one:
  22
  23         modulus = sqrt(x*x + y*y + z*z);
  24         x = x / modulus;
  25         y = y / modulus;
  26         z = z / modulus;
  27
  28    that can be optimized to
  29
  30         modulus = sqrt(x*x + y*y + z*z);
  31         rmodulus = 1.0 / modulus;
  32         x = x * rmodulus;
  33         y = y * rmodulus;
  34         z = z * rmodulus;
  35
  36    We do this for loop invariant divisors, and with this pass whenever
  37    we notice that a division has the same divisor multiple times.
  38
  39    Of course, like in PRE, we don't insert a division if a dominator
  40    already has one.  However, this cannot be done as an extension of
  41    PRE for several reasons.
  42
  43    First of all, with some experiments it was found out that the
  44    transformation is not always useful if there are only two divisions
  45    hy the same divisor.  This is probably because modern processors
  46    can pipeline the divisions; on older, in-order processors it should
  47    still be effective to optimize two divisions by the same number.
  48    We make this a param, and it shall be called N in the remainder of
  49    this comment.
  50
  51    Second, if trapping math is active, we have less freedom on where
  52    to insert divisions: we can only do so in basic blocks that already
  53    contain one.  (If divisions don't trap, instead, we can insert
  54    divisions elsewhere, which will be in blocks that are common dominators
  55    of those that have the division).
  56
  57    We really don't want to compute the reciprocal unless a division will
  58    be found.  To do this, we won't insert the division in a basic block
  59    that has less than N divisions *post-dominating* it.
  60
  61    The algorithm constructs a subset of the dominator tree, holding the
  62    blocks containing the divisions and the common dominators to them,
  63    and walk it twice.  The first walk is in post-order, and it annotates
  64    each block with the number of divisions that post-dominate it: this
  65    gives information on where divisions can be inserted profitably.
  66    The second walk is in pre-order, and it inserts divisions as explained
  67    above, and replaces divisions by multiplications.
  68
  69    In the best case, the cost of the pass is O(n_statements).  In the
  70    worst-case, the cost is due to creating the dominator tree subset,
  71    with a cost of O(n_basic_blocks ^ 2); however this can only happen
  72    for n_statements / n_basic_blocks statements.  So, the amortized cost
  73    of creating the dominator tree subset is O(n_basic_blocks) and the
  74    worst-case cost of the pass is O(n_statements * n_basic_blocks).
  75
  76    More practically, the cost will be small because there are few
  77    divisions, and they tend to be in the same basic block, so insert_bb
  78    is called very few times.
  79
  80    If we did this using domwalk.c, an efficient implementation would have
  81    to work on all the variables in a single pass, because we could not
  82    work on just a subset of the dominator tree, as we do now, and the
  83    cost would also be something like O(n_statements * n_basic_blocks).
  84    The data structures would be more complex in order to work on all the
  85    variables in a single pass.  */
  86
  87 #include "config.h"
  88 #include "system.h"
  89 #include "coretypes.h"
  90 #include "tm.h"
  91 #include "flags.h"
  92 #include "input.h"
  93 #include "alias.h"
  94 #include "symtab.h"
  95 #include "tree.h"
  96 #include "fold-const.h"
  97 #include "predict.h"
  98 #include "hard-reg-set.h"
  99 #include "function.h"
 100 #include "dominance.h"
 101 #include "cfg.h"
 102 #include "basic-block.h"
 103 #include "tree-ssa-alias.h"
 104 #include "internal-fn.h"
 105 #include "gimple-fold.h"
 106 #include "gimple-expr.h"
 107 #include "is-a.h"
 108 #include "gimple.h"
 109 #include "gimple-iterator.h"
 110 #include "gimplify.h"
 111 #include "gimplify-me.h"
 112 #include "stor-layout.h"
 113 #include "gimple-ssa.h"
 114 #include "tree-cfg.h"
 115 #include "tree-phinodes.h"
 116 #include "ssa-iterators.h"
 117 #include "stringpool.h"
 118 #include "tree-ssanames.h"
 119 #include "rtl.h"
 120 #include "insn-config.h"
 121 #include "expmed.h"
 122 #include "dojump.h"
 123 #include "explow.h"
 124 #include "calls.h"
 125 #include "emit-rtl.h"
 126 #include "varasm.h"
 127 #include "stmt.h"
 128 #include "expr.h"
 129 #include "tree-dfa.h"
 130 #include "tree-ssa.h"
 131 #include "tree-pass.h"
 132 #include "alloc-pool.h"
 133 #include "target.h"
 134 #include "gimple-pretty-print.h"
 135 #include "builtins.h"
 136 #include "params.h"
 137
 138 /* FIXME: RTL headers have to be included here for optabs.  */
 139 #include "rtl.h"                /* Because optabs.h wants enum rtx_code.  */
 140 #include "expr.h"               /* Because optabs.h wants sepops.  */
 141 #include "insn-codes.h"
 142 #include "optabs.h"
 143
 144 /* This structure represents one basic block that either computes a
 145    division, or is a common dominator for basic block that compute a
 146    division.  */
 147 struct occurrence {
 148   /* The basic block represented by this structure.  */
 149   basic_block bb;
 150
 151   /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
 152      inserted in BB.  */
 153   tree recip_def;
 154
 155   /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
 156      was inserted in BB.  */
 157   gimple recip_def_stmt;
 158
 159   /* Pointer to a list of "struct occurrence"s for blocks dominated
 160      by BB.  */
 161   struct occurrence *children;
 162
 163   /* Pointer to the next "struct occurrence"s in the list of blocks
 164      sharing a common dominator.  */
 165   struct occurrence *next;
 166
 167   /* The number of divisions that are in BB before compute_merit.  The
 168      number of divisions that are in BB or post-dominate it after
 169      compute_merit.  */
 170   int num_divisions;
 171
 172   /* True if the basic block has a division, false if it is a common
 173      dominator for basic blocks that do.  If it is false and trapping
 174      math is active, BB is not a candidate for inserting a reciprocal.  */
 175   bool bb_has_division;
 176 };
 177
 178 static struct
 179 {
 180   /* Number of 1.0/X ops inserted.  */
 181   int rdivs_inserted;
 182
 183   /* Number of 1.0/FUNC ops inserted.  */
 184   int rfuncs_inserted;
 185 } reciprocal_stats;
 186
 187 static struct
 188 {
 189   /* Number of cexpi calls inserted.  */
 190   int inserted;
 191 } sincos_stats;
 192
 193 static struct
 194 {
 195   /* Number of hand-written 16-bit nop / bswaps found.  */
 196   int found_16bit;
 197
 198   /* Number of hand-written 32-bit nop / bswaps found.  */
 199   int found_32bit;
 200
 201   /* Number of hand-written 64-bit nop / bswaps found.  */
 202   int found_64bit;
 203 } nop_stats, bswap_stats;
 204
 205 static struct
 206 {
 207   /* Number of widening multiplication ops inserted.  */
 208   int widen_mults_inserted;
 209
 210   /* Number of integer multiply-and-accumulate ops inserted.  */
 211   int maccs_inserted;
 212
 213   /* Number of fp fused multiply-add ops inserted.  */
 214   int fmas_inserted;
 215 } widen_mul_stats;
 216
 217 /* The instance of "struct occurrence" representing the highest
 218    interesting block in the dominator tree.  */
 219 static struct occurrence *occ_head;
 220
 221 /* Allocation pool for getting instances of "struct occurrence".  */
 222 static pool_allocator<occurrence> *occ_pool;
 223
 224
 225
 226 /* Allocate and return a new struct occurrence for basic block BB, and
 227    whose children list is headed by CHILDREN.  */
 228 static struct occurrence *
 229 occ_new (basic_block bb, struct occurrence *children)
 230 {
 231   struct occurrence *occ;
 232
 233   bb->aux = occ = occ_pool->allocate ();
 234   memset (occ, 0, sizeof (struct occurrence));
 235
 236   occ->bb = bb;
 237   occ->children = children;
 238   return occ;
 239 }
 240
 241
 242 /* Insert NEW_OCC into our subset of the dominator tree.  P_HEAD points to a
 243    list of "struct occurrence"s, one per basic block, having IDOM as
 244    their common dominator.
 245
 246    We try to insert NEW_OCC as deep as possible in the tree, and we also
 247    insert any other block that is a common dominator for BB and one
 248    block already in the tree.  */
 249
 250 static void
 251 insert_bb (struct occurrence *new_occ, basic_block idom,
 252            struct occurrence **p_head)
 253 {
 254   struct occurrence *occ, **p_occ;
 255
 256   for (p_occ = p_head; (occ = *p_occ) != NULL; )
 257     {
 258       basic_block bb = new_occ->bb, occ_bb = occ->bb;
 259       basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb);
 260       if (dom == bb)
 261         {
 262           /* BB dominates OCC_BB.  OCC becomes NEW_OCC's child: remove OCC
 263              from its list.  */
 264           *p_occ = occ->next;
 265           occ->next = new_occ->children;
 266           new_occ->children = occ;
 267
 268           /* Try the next block (it may as well be dominated by BB).  */
 269         }
 270
 271       else if (dom == occ_bb)
 272         {
 273           /* OCC_BB dominates BB.  Tail recurse to look deeper.  */
 274           insert_bb (new_occ, dom, &occ->children);
 275           return;
 276         }
 277
 278       else if (dom != idom)
 279         {
 280           gcc_assert (!dom->aux);
 281
 282           /* There is a dominator between IDOM and BB, add it and make
 283              two children out of NEW_OCC and OCC.  First, remove OCC from
 284              its list.  */
 285           *p_occ = occ->next;
 286           new_occ->next = occ;
 287           occ->next = NULL;
 288
 289           /* None of the previous blocks has DOM as a dominator: if we tail
 290              recursed, we would reexamine them uselessly. Just switch BB with
 291              DOM, and go on looking for blocks dominated by DOM.  */
 292           new_occ = occ_new (dom, new_occ);
 293         }
 294
 295       else
 296         {
 297           /* Nothing special, go on with the next element.  */
 298           p_occ = &occ->next;
 299         }
 300     }
 301
 302   /* No place was found as a child of IDOM.  Make BB a sibling of IDOM.  */
 303   new_occ->next = *p_head;
 304   *p_head = new_occ;
 305 }
 306
 307 /* Register that we found a division in BB.  */
 308
 309 static inline void
 310 register_division_in (basic_block bb)
 311 {
 312   struct occurrence *occ;
 313
 314   occ = (struct occurrence *) bb->aux;
 315   if (!occ)
 316     {
 317       occ = occ_new (bb, NULL);
 318       insert_bb (occ, ENTRY_BLOCK_PTR_FOR_FN (cfun), &occ_head);
 319     }
 320
 321   occ->bb_has_division = true;
 322   occ->num_divisions++;
 323 }
 324
 325
 326 /* Compute the number of divisions that postdominate each block in OCC and
 327    its children.  */
 328
 329 static void
 330 compute_merit (struct occurrence *occ)
 331 {
 332   struct occurrence *occ_child;
 333   basic_block dom = occ->bb;
 334
 335   for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
 336     {
 337       basic_block bb;
 338       if (occ_child->children)
 339         compute_merit (occ_child);
 340
 341       if (flag_exceptions)
 342         bb = single_noncomplex_succ (dom);
 343       else
 344         bb = dom;
 345
 346       if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
 347         occ->num_divisions += occ_child->num_divisions;
 348     }
 349 }
 350
 351
 352 /* Return whether USE_STMT is a floating-point division by DEF.  */
 353 static inline bool
 354 is_division_by (gimple use_stmt, tree def)
 355 {
 356   return is_gimple_assign (use_stmt)
 357          && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
 358          && gimple_assign_rhs2 (use_stmt) == def
 359          /* Do not recognize x / x as valid division, as we are getting
 360             confused later by replacing all immediate uses x in such
 361             a stmt.  */
 362          && gimple_assign_rhs1 (use_stmt) != def;
 363 }
 364
 365 /* Walk the subset of the dominator tree rooted at OCC, setting the
 366    RECIP_DEF field to a definition of 1.0 / DEF that can be used in
 367    the given basic block.  The field may be left NULL, of course,
 368    if it is not possible or profitable to do the optimization.
 369
 370    DEF_BSI is an iterator pointing at the statement defining DEF.
 371    If RECIP_DEF is set, a dominator already has a computation that can
 372    be used.  */
 373
 374 static void
 375 insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
 376                     tree def, tree recip_def, int threshold)
 377 {
 378   tree type;
 379   gassign *new_stmt;
 380   gimple_stmt_iterator gsi;
 381   struct occurrence *occ_child;
 382
 383   if (!recip_def
 384       && (occ->bb_has_division || !flag_trapping_math)
 385       && occ->num_divisions >= threshold)
 386     {
 387       /* Make a variable with the replacement and substitute it.  */
 388       type = TREE_TYPE (def);
 389       recip_def = create_tmp_reg (type, "reciptmp");
 390       new_stmt = gimple_build_assign (recip_def, RDIV_EXPR,
 391                                       build_one_cst (type), def);
 392
 393       if (occ->bb_has_division)
 394         {
 395           /* Case 1: insert before an existing division.  */
 396           gsi = gsi_after_labels (occ->bb);
 397           while (!gsi_end_p (gsi) && !is_division_by (gsi_stmt (gsi), def))
 398             gsi_next (&gsi);
 399
 400           gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
 401         }
 402       else if (def_gsi && occ->bb == def_gsi->bb)
 403         {
 404           /* Case 2: insert right after the definition.  Note that this will
 405              never happen if the definition statement can throw, because in
 406              that case the sole successor of the statement's basic block will
 407              dominate all the uses as well.  */
 408           gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
 409         }
 410       else
 411         {
 412           /* Case 3: insert in a basic block not containing defs/uses.  */
 413           gsi = gsi_after_labels (occ->bb);
 414           gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
 415         }
 416
 417       reciprocal_stats.rdivs_inserted++;
 418
 419       occ->recip_def_stmt = new_stmt;
 420     }
 421
 422   occ->recip_def = recip_def;
 423   for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
 424     insert_reciprocals (def_gsi, occ_child, def, recip_def, threshold);
 425 }
 426
 427
 428 /* Replace the division at USE_P with a multiplication by the reciprocal, if
 429    possible.  */
 430
 431 static inline void
 432 replace_reciprocal (use_operand_p use_p)
 433 {
 434   gimple use_stmt = USE_STMT (use_p);
 435   basic_block bb = gimple_bb (use_stmt);
 436   struct occurrence *occ = (struct occurrence *) bb->aux;
 437
 438   if (optimize_bb_for_speed_p (bb)
 439       && occ->recip_def && use_stmt != occ->recip_def_stmt)
 440     {
 441       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
 442       gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
 443       SET_USE (use_p, occ->recip_def);
 444       fold_stmt_inplace (&gsi);
 445       update_stmt (use_stmt);
 446     }
 447 }
 448
 449
 450 /* Free OCC and return one more "struct occurrence" to be freed.  */
 451
 452 static struct occurrence *
 453 free_bb (struct occurrence *occ)
 454 {
 455   struct occurrence *child, *next;
 456
 457   /* First get the two pointers hanging off OCC.  */
 458   next = occ->next;
 459   child = occ->children;
 460   occ->bb->aux = NULL;
 461   occ_pool->remove (occ);
 462
 463   /* Now ensure that we don't recurse unless it is necessary.  */
 464   if (!child)
 465     return next;
 466   else
 467     {
 468       while (next)
 469         next = free_bb (next);
 470
 471       return child;
 472     }
 473 }
 474
 475
 476 /* Look for floating-point divisions among DEF's uses, and try to
 477    replace them by multiplications with the reciprocal.  Add
 478    as many statements computing the reciprocal as needed.
 479
 480    DEF must be a GIMPLE register of a floating-point type.  */
 481
 482 static void
 483 execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
 484 {
 485   use_operand_p use_p;
 486   imm_use_iterator use_iter;
 487   struct occurrence *occ;
 488   int count = 0, threshold;
 489
 490   gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && is_gimple_reg (def));
 491
 492   FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
 493     {
 494       gimple use_stmt = USE_STMT (use_p);
 495       if (is_division_by (use_stmt, def))
 496         {
 497           register_division_in (gimple_bb (use_stmt));
 498           count++;
 499         }
 500     }
 501
 502   /* Do the expensive part only if we can hope to optimize something.  */
 503   threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
 504   if (count >= threshold)
 505     {
 506       gimple use_stmt;
 507       for (occ = occ_head; occ; occ = occ->next)
 508         {
 509           compute_merit (occ);
 510           insert_reciprocals (def_gsi, occ, def, NULL, threshold);
 511         }
 512
 513       FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
 514         {
 515           if (is_division_by (use_stmt, def))
 516             {
 517               FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
 518                 replace_reciprocal (use_p);
 519             }
 520         }
 521     }
 522
 523   for (occ = occ_head; occ; )
 524     occ = free_bb (occ);
 525
 526   occ_head = NULL;
 527 }
 528
 529 /* Go through all the floating-point SSA_NAMEs, and call
 530    execute_cse_reciprocals_1 on each of them.  */
 531 namespace {
 532
 533 const pass_data pass_data_cse_reciprocals =
 534 {
 535   GIMPLE_PASS, /* type */
 536   "recip", /* name */
 537   OPTGROUP_NONE, /* optinfo_flags */
 538   TV_NONE, /* tv_id */
 539   PROP_ssa, /* properties_required */
 540   0, /* properties_provided */
 541   0, /* properties_destroyed */
 542   0, /* todo_flags_start */
 543   TODO_update_ssa, /* todo_flags_finish */
 544 };
 545
 546 class pass_cse_reciprocals : public gimple_opt_pass
 547 {
 548 public:
 549   pass_cse_reciprocals (gcc::context *ctxt)
 550     : gimple_opt_pass (pass_data_cse_reciprocals, ctxt)
 551   {}
 552
 553   /* opt_pass methods: */
 554   virtual bool gate (function *) { return optimize && flag_reciprocal_math; }
 555   virtual unsigned int execute (function *);
 556
 557 }; // class pass_cse_reciprocals
 558
 559 unsigned int
 560 pass_cse_reciprocals::execute (function *fun)
 561 {
 562   basic_block bb;
 563   tree arg;
 564
 565   occ_pool = new pool_allocator<occurrence>
 566     ("dominators for recip", n_basic_blocks_for_fn (fun) / 3 + 1);
 567
 568   memset (&reciprocal_stats, 0, sizeof (reciprocal_stats));
 569   calculate_dominance_info (CDI_DOMINATORS);
 570   calculate_dominance_info (CDI_POST_DOMINATORS);
 571
 572 #ifdef ENABLE_CHECKING
 573   FOR_EACH_BB_FN (bb, fun)
 574     gcc_assert (!bb->aux);
 575 #endif
 576
 577   for (arg = DECL_ARGUMENTS (fun->decl); arg; arg = DECL_CHAIN (arg))
 578     if (FLOAT_TYPE_P (TREE_TYPE (arg))
 579         && is_gimple_reg (arg))
 580       {
 581         tree name = ssa_default_def (fun, arg);
 582         if (name)
 583           execute_cse_reciprocals_1 (NULL, name);
 584       }
 585
 586   FOR_EACH_BB_FN (bb, fun)
 587     {
 588       tree def;
 589
 590       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
 591            gsi_next (&gsi))
 592         {
 593           gphi *phi = gsi.phi ();
 594           def = PHI_RESULT (phi);
 595           if (! virtual_operand_p (def)
 596               && FLOAT_TYPE_P (TREE_TYPE (def)))
 597             execute_cse_reciprocals_1 (NULL, def);
 598         }
 599
 600       for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
 601            gsi_next (&gsi))
 602         {
 603           gimple stmt = gsi_stmt (gsi);
 604
 605           if (gimple_has_lhs (stmt)
 606               && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
 607               && FLOAT_TYPE_P (TREE_TYPE (def))
 608               && TREE_CODE (def) == SSA_NAME)
 609             execute_cse_reciprocals_1 (&gsi, def);
 610         }
 611
 612       if (optimize_bb_for_size_p (bb))
 613         continue;
 614
 615       /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b).  */
 616       for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
 617            gsi_next (&gsi))
 618         {
 619           gimple stmt = gsi_stmt (gsi);
 620           tree fndecl;
 621
 622           if (is_gimple_assign (stmt)
 623               && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
 624             {
 625               tree arg1 = gimple_assign_rhs2 (stmt);
 626               gimple stmt1;
 627
 628               if (TREE_CODE (arg1) != SSA_NAME)
 629                 continue;
 630
 631               stmt1 = SSA_NAME_DEF_STMT (arg1);
 632
 633               if (is_gimple_call (stmt1)
 634                   && gimple_call_lhs (stmt1)
 635                   && (fndecl = gimple_call_fndecl (stmt1))
 636                   && (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
 637                       || DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD))
 638                 {
 639                   enum built_in_function code;
 640                   bool md_code, fail;
 641                   imm_use_iterator ui;
 642                   use_operand_p use_p;
 643
 644                   code = DECL_FUNCTION_CODE (fndecl);
 645                   md_code = DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD;
 646
 647                   fndecl = targetm.builtin_reciprocal (code, md_code, false);
 648                   if (!fndecl)
 649                     continue;
 650
 651                   /* Check that all uses of the SSA name are divisions,
 652                      otherwise replacing the defining statement will do
 653                      the wrong thing.  */
 654                   fail = false;
 655                   FOR_EACH_IMM_USE_FAST (use_p, ui, arg1)
 656                     {
 657                       gimple stmt2 = USE_STMT (use_p);
 658                       if (is_gimple_debug (stmt2))
 659                         continue;
 660                       if (!is_gimple_assign (stmt2)
 661                           || gimple_assign_rhs_code (stmt2) != RDIV_EXPR
 662                           || gimple_assign_rhs1 (stmt2) == arg1
 663                           || gimple_assign_rhs2 (stmt2) != arg1)
 664                         {
 665                           fail = true;
 666                           break;
 667                         }
 668                     }
 669                   if (fail)
 670                     continue;
 671
 672                   gimple_replace_ssa_lhs (stmt1, arg1);
 673                   gimple_call_set_fndecl (stmt1, fndecl);
 674                   update_stmt (stmt1);
 675                   reciprocal_stats.rfuncs_inserted++;
 676
 677                   FOR_EACH_IMM_USE_STMT (stmt, ui, arg1)
 678                     {
 679                       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
 680                       gimple_assign_set_rhs_code (stmt, MULT_EXPR);
 681                       fold_stmt_inplace (&gsi);
 682                       update_stmt (stmt);
 683                     }
 684                 }
 685             }
 686         }
 687     }
 688
 689   statistics_counter_event (fun, "reciprocal divs inserted",
 690                             reciprocal_stats.rdivs_inserted);
 691   statistics_counter_event (fun, "reciprocal functions inserted",
 692                             reciprocal_stats.rfuncs_inserted);
 693
 694   free_dominance_info (CDI_DOMINATORS);
 695   free_dominance_info (CDI_POST_DOMINATORS);
 696   delete occ_pool;
 697   return 0;
 698 }
 699
 700 } // anon namespace
 701
 702 gimple_opt_pass *
 703 make_pass_cse_reciprocals (gcc::context *ctxt)
 704 {
 705   return new pass_cse_reciprocals (ctxt);
 706 }
 707
 708 /* Records an occurrence at statement USE_STMT in the vector of trees
 709    STMTS if it is dominated by *TOP_BB or dominates it or this basic block
 710    is not yet initialized.  Returns true if the occurrence was pushed on
 711    the vector.  Adjusts *TOP_BB to be the basic block dominating all
 712    statements in the vector.  */
 713
 714 static bool
 715 maybe_record_sincos (vec<gimple> *stmts,
 716                      basic_block *top_bb, gimple use_stmt)
 717 {
 718   basic_block use_bb = gimple_bb (use_stmt);
 719   if (*top_bb
 720       && (*top_bb == use_bb
 721           || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb)))
 722     stmts->safe_push (use_stmt);
 723   else if (!*top_bb
 724            || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb))
 725     {
 726       stmts->safe_push (use_stmt);
 727       *top_bb = use_bb;
 728     }
 729   else
 730     return false;
 731
 732   return true;
 733 }
 734
 735 /* Look for sin, cos and cexpi calls with the same argument NAME and
 736    create a single call to cexpi CSEing the result in this case.
 737    We first walk over all immediate uses of the argument collecting
 738    statements that we can CSE in a vector and in a second pass replace
 739    the statement rhs with a REALPART or IMAGPART expression on the
 740    result of the cexpi call we insert before the use statement that
 741    dominates all other candidates.  */
 742
 743 static bool
 744 execute_cse_sincos_1 (tree name)
 745 {
 746   gimple_stmt_iterator gsi;
 747   imm_use_iterator use_iter;
 748   tree fndecl, res, type;
 749   gimple def_stmt, use_stmt, stmt;
 750   int seen_cos = 0, seen_sin = 0, seen_cexpi = 0;
 751   auto_vec<gimple> stmts;
 752   basic_block top_bb = NULL;
 753   int i;
 754   bool cfg_changed = false;
 755
 756   type = TREE_TYPE (name);
 757   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
 758     {
 759       if (gimple_code (use_stmt) != GIMPLE_CALL
 760           || !gimple_call_lhs (use_stmt)
 761           || !(fndecl = gimple_call_fndecl (use_stmt))
 762           || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
 763         continue;
 764
 765       switch (DECL_FUNCTION_CODE (fndecl))
 766         {
 767         CASE_FLT_FN (BUILT_IN_COS):
 768           seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
 769           break;
 770
 771         CASE_FLT_FN (BUILT_IN_SIN):
 772           seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
 773           break;
 774
 775         CASE_FLT_FN (BUILT_IN_CEXPI):
 776           seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
 777           break;
 778
 779         default:;
 780         }
 781     }
 782
 783   if (seen_cos + seen_sin + seen_cexpi <= 1)
 784     return false;
 785
 786   /* Simply insert cexpi at the beginning of top_bb but not earlier than
 787      the name def statement.  */
 788   fndecl = mathfn_built_in (type, BUILT_IN_CEXPI);
 789   if (!fndecl)
 790     return false;
 791   stmt = gimple_build_call (fndecl, 1, name);
 792   res = make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl)), stmt, "sincostmp");
 793   gimple_call_set_lhs (stmt, res);
 794
 795   def_stmt = SSA_NAME_DEF_STMT (name);
 796   if (!SSA_NAME_IS_DEFAULT_DEF (name)
 797       && gimple_code (def_stmt) != GIMPLE_PHI
 798       && gimple_bb (def_stmt) == top_bb)
 799     {
 800       gsi = gsi_for_stmt (def_stmt);
 801       gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
 802     }
 803   else
 804     {
 805       gsi = gsi_after_labels (top_bb);
 806       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
 807     }
 808   sincos_stats.inserted++;
 809
 810   /* And adjust the recorded old call sites.  */
 811   for (i = 0; stmts.iterate (i, &use_stmt); ++i)
 812     {
 813       tree rhs = NULL;
 814       fndecl = gimple_call_fndecl (use_stmt);
 815
 816       switch (DECL_FUNCTION_CODE (fndecl))
 817         {
 818         CASE_FLT_FN (BUILT_IN_COS):
 819           rhs = fold_build1 (REALPART_EXPR, type, res);
 820           break;
 821
 822         CASE_FLT_FN (BUILT_IN_SIN):
 823           rhs = fold_build1 (IMAGPART_EXPR, type, res);
 824           break;
 825
 826         CASE_FLT_FN (BUILT_IN_CEXPI):
 827           rhs = res;
 828           break;
 829
 830         default:;
 831           gcc_unreachable ();
 832         }
 833
 834         /* Replace call with a copy.  */
 835         stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs);
 836
 837         gsi = gsi_for_stmt (use_stmt);
 838         gsi_replace (&gsi, stmt, true);
 839         if (gimple_purge_dead_eh_edges (gimple_bb (stmt)))
 840           cfg_changed = true;
 841     }
 842
 843   return cfg_changed;
 844 }
 845
 846 /* To evaluate powi(x,n), the floating point value x raised to the
 847    constant integer exponent n, we use a hybrid algorithm that
 848    combines the "window method" with look-up tables.  For an
 849    introduction to exponentiation algorithms and "addition chains",
 850    see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
 851    "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
 852    3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
 853    Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998.  */
 854
 855 /* Provide a default value for POWI_MAX_MULTS, the maximum number of
 856    multiplications to inline before calling the system library's pow
 857    function.  powi(x,n) requires at worst 2*bits(n)-2 multiplications,
 858    so this default never requires calling pow, powf or powl.  */
 859
 860 #ifndef POWI_MAX_MULTS
 861 #define POWI_MAX_MULTS  (2*HOST_BITS_PER_WIDE_INT-2)
 862 #endif
 863
 864 /* The size of the "optimal power tree" lookup table.  All
 865    exponents less than this value are simply looked up in the
 866    powi_table below.  This threshold is also used to size the
 867    cache of pseudo registers that hold intermediate results.  */
 868 #define POWI_TABLE_SIZE 256
 869
 870 /* The size, in bits of the window, used in the "window method"
 871    exponentiation algorithm.  This is equivalent to a radix of
 872    (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method".  */
 873 #define POWI_WINDOW_SIZE 3
 874
 875 /* The following table is an efficient representation of an
 876    "optimal power tree".  For each value, i, the corresponding
 877    value, j, in the table states than an optimal evaluation
 878    sequence for calculating pow(x,i) can be found by evaluating
 879    pow(x,j)*pow(x,i-j).  An optimal power tree for the first
 880    100 integers is given in Knuth's "Seminumerical algorithms".  */
 881
 882 static const unsigned char powi_table[POWI_TABLE_SIZE] =
 883   {
 884       0,   1,   1,   2,   2,   3,   3,   4,  /*   0 -   7 */
 885       4,   6,   5,   6,   6,  10,   7,   9,  /*   8 -  15 */
 886       8,  16,   9,  16,  10,  12,  11,  13,  /*  16 -  23 */
 887      12,  17,  13,  18,  14,  24,  15,  26,  /*  24 -  31 */
 888      16,  17,  17,  19,  18,  33,  19,  26,  /*  32 -  39 */
 889      20,  25,  21,  40,  22,  27,  23,  44,  /*  40 -  47 */
 890      24,  32,  25,  34,  26,  29,  27,  44,  /*  48 -  55 */
 891      28,  31,  29,  34,  30,  60,  31,  36,  /*  56 -  63 */
 892      32,  64,  33,  34,  34,  46,  35,  37,  /*  64 -  71 */
 893      36,  65,  37,  50,  38,  48,  39,  69,  /*  72 -  79 */
 894      40,  49,  41,  43,  42,  51,  43,  58,  /*  80 -  87 */
 895      44,  64,  45,  47,  46,  59,  47,  76,  /*  88 -  95 */
 896      48,  65,  49,  66,  50,  67,  51,  66,  /*  96 - 103 */
 897      52,  70,  53,  74,  54, 104,  55,  74,  /* 104 - 111 */
 898      56,  64,  57,  69,  58,  78,  59,  68,  /* 112 - 119 */
 899      60,  61,  61,  80,  62,  75,  63,  68,  /* 120 - 127 */
 900      64,  65,  65, 128,  66, 129,  67,  90,  /* 128 - 135 */
 901      68,  73,  69, 131,  70,  94,  71,  88,  /* 136 - 143 */
 902      72, 128,  73,  98,  74, 132,  75, 121,  /* 144 - 151 */
 903      76, 102,  77, 124,  78, 132,  79, 106,  /* 152 - 159 */
 904      80,  97,  81, 160,  82,  99,  83, 134,  /* 160 - 167 */
 905      84,  86,  85,  95,  86, 160,  87, 100,  /* 168 - 175 */
 906      88, 113,  89,  98,  90, 107,  91, 122,  /* 176 - 183 */
 907      92, 111,  93, 102,  94, 126,  95, 150,  /* 184 - 191 */
 908      96, 128,  97, 130,  98, 133,  99, 195,  /* 192 - 199 */
 909     100, 128, 101, 123, 102, 164, 103, 138,  /* 200 - 207 */
 910     104, 145, 105, 146, 106, 109, 107, 149,  /* 208 - 215 */
 911     108, 200, 109, 146, 110, 170, 111, 157,  /* 216 - 223 */
 912     112, 128, 113, 130, 114, 182, 115, 132,  /* 224 - 231 */
 913     116, 200, 117, 132, 118, 158, 119, 206,  /* 232 - 239 */
 914     120, 240, 121, 162, 122, 147, 123, 152,  /* 240 - 247 */
 915     124, 166, 125, 214, 126, 138, 127, 153,  /* 248 - 255 */
 916   };
 917
 918
 919 /* Return the number of multiplications required to calculate
 920    powi(x,n) where n is less than POWI_TABLE_SIZE.  This is a
 921    subroutine of powi_cost.  CACHE is an array indicating
 922    which exponents have already been calculated.  */
 923
 924 static int
 925 powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache)
 926 {
 927   /* If we've already calculated this exponent, then this evaluation
 928      doesn't require any additional multiplications.  */
 929   if (cache[n])
 930     return 0;
 931
 932   cache[n] = true;
 933   return powi_lookup_cost (n - powi_table[n], cache)
 934          + powi_lookup_cost (powi_table[n], cache) + 1;
 935 }
 936
 937 /* Return the number of multiplications required to calculate
 938    powi(x,n) for an arbitrary x, given the exponent N.  This
 939    function needs to be kept in sync with powi_as_mults below.  */
 940
 941 static int
 942 powi_cost (HOST_WIDE_INT n)
 943 {
 944   bool cache[POWI_TABLE_SIZE];
 945   unsigned HOST_WIDE_INT digit;
 946   unsigned HOST_WIDE_INT val;
 947   int result;
 948
 949   if (n == 0)
 950     return 0;
 951
 952   /* Ignore the reciprocal when calculating the cost.  */
 953   val = (n < 0) ? -n : n;
 954
 955   /* Initialize the exponent cache.  */
 956   memset (cache, 0, POWI_TABLE_SIZE * sizeof (bool));
 957   cache[1] = true;
 958
 959   result = 0;
 960
 961   while (val >= POWI_TABLE_SIZE)
 962     {
 963       if (val & 1)
 964         {
 965           digit = val & ((1 << POWI_WINDOW_SIZE) - 1);
 966           result += powi_lookup_cost (digit, cache)
 967                     + POWI_WINDOW_SIZE + 1;
 968           val >>= POWI_WINDOW_SIZE;
 969         }
 970       else
 971         {
 972           val >>= 1;
 973           result++;
 974         }
 975     }
 976
 977   return result + powi_lookup_cost (val, cache);
 978 }
 979
 980 /* Recursive subroutine of powi_as_mults.  This function takes the
 981    array, CACHE, of already calculated exponents and an exponent N and
 982    returns a tree that corresponds to CACHE[1]**N, with type TYPE.  */
 983
 984 static tree
 985 powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type,
 986                  HOST_WIDE_INT n, tree *cache)
 987 {
 988   tree op0, op1, ssa_target;
 989   unsigned HOST_WIDE_INT digit;
 990   gassign *mult_stmt;
 991
 992   if (n < POWI_TABLE_SIZE && cache[n])
 993     return cache[n];
 994
 995   ssa_target = make_temp_ssa_name (type, NULL, "powmult");
 996
 997   if (n < POWI_TABLE_SIZE)
 998     {
 999       cache[n] = ssa_target;
1000       op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache);
1001       op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache);
1002     }
1003   else if (n & 1)
1004     {
1005       digit = n & ((1 << POWI_WINDOW_SIZE) - 1);
1006       op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache);
1007       op1 = powi_as_mults_1 (gsi, loc, type, digit, cache);
1008     }
1009   else
1010     {
1011       op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache);
1012       op1 = op0;
1013     }
1014
1015   mult_stmt = gimple_build_assign (ssa_target, MULT_EXPR, op0, op1);
1016   gimple_set_location (mult_stmt, loc);
1017   gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
1018
1019   return ssa_target;
1020 }
1021
1022 /* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
1023    This function needs to be kept in sync with powi_cost above.  */
1024
1025 static tree
1026 powi_as_mults (gimple_stmt_iterator *gsi, location_t loc,
1027                tree arg0, HOST_WIDE_INT n)
1028 {
1029   tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0);
1030   gassign *div_stmt;
1031   tree target;
1032
1033   if (n == 0)
1034     return build_real (type, dconst1);
1035
1036   memset (cache, 0,  sizeof (cache));
1037   cache[1] = arg0;
1038
1039   result = powi_as_mults_1 (gsi, loc, type, (n < 0) ? -n : n, cache);
1040   if (n >= 0)
1041     return result;
1042
1043   /* If the original exponent was negative, reciprocate the result.  */
1044   target = make_temp_ssa_name (type, NULL, "powmult");
1045   div_stmt = gimple_build_assign (target, RDIV_EXPR,
1046                                   build_real (type, dconst1), result);
1047   gimple_set_location (div_stmt, loc);
1048   gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
1049
1050   return target;
1051 }
1052
1053 /* ARG0 and N are the two arguments to a powi builtin in GSI with
1054    location info LOC.  If the arguments are appropriate, create an
1055    equivalent sequence of statements prior to GSI using an optimal
1056    number of multiplications, and return an expession holding the
1057    result.  */
1058
1059 static tree
1060 gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc,
1061                             tree arg0, HOST_WIDE_INT n)
1062 {
1063   /* Avoid largest negative number.  */
1064   if (n != -n
1065       && ((n >= -1 && n <= 2)
1066           || (optimize_function_for_speed_p (cfun)
1067               && powi_cost (n) <= POWI_MAX_MULTS)))
1068     return powi_as_mults (gsi, loc, arg0, n);
1069
1070   return NULL_TREE;
1071 }
1072
1073 /* Build a gimple call statement that calls FN with argument ARG.
1074    Set the lhs of the call statement to a fresh SSA name.  Insert the
1075    statement prior to GSI's current position, and return the fresh
1076    SSA name.  */
1077
1078 static tree
1079 build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc,
1080                        tree fn, tree arg)
1081 {
1082   gcall *call_stmt;
1083   tree ssa_target;
1084
1085   call_stmt = gimple_build_call (fn, 1, arg);
1086   ssa_target = make_temp_ssa_name (TREE_TYPE (arg), NULL, "powroot");
1087   gimple_set_lhs (call_stmt, ssa_target);
1088   gimple_set_location (call_stmt, loc);
1089   gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
1090
1091   return ssa_target;
1092 }
1093
1094 /* Build a gimple binary operation with the given CODE and arguments
1095    ARG0, ARG1, assigning the result to a new SSA name for variable
1096    TARGET.  Insert the statement prior to GSI's current position, and
1097    return the fresh SSA name.*/
1098
1099 static tree
1100 build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc,
1101                         const char *name, enum tree_code code,
1102                         tree arg0, tree arg1)
1103 {
1104   tree result = make_temp_ssa_name (TREE_TYPE (arg0), NULL, name);
1105   gassign *stmt = gimple_build_assign (result, code, arg0, arg1);
1106   gimple_set_location (stmt, loc);
1107   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1108   return result;
1109 }
1110
1111 /* Build a gimple reference operation with the given CODE and argument
1112    ARG, assigning the result to a new SSA name of TYPE with NAME.
1113    Insert the statement prior to GSI's current position, and return
1114    the fresh SSA name.  */
1115
1116 static inline tree
1117 build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type,
1118                       const char *name, enum tree_code code, tree arg0)
1119 {
1120   tree result = make_temp_ssa_name (type, NULL, name);
1121   gimple stmt = gimple_build_assign (result, build1 (code, type, arg0));
1122   gimple_set_location (stmt, loc);
1123   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1124   return result;
1125 }
1126
1127 /* Build a gimple assignment to cast VAL to TYPE.  Insert the statement
1128    prior to GSI's current position, and return the fresh SSA name.  */
1129
1130 static tree
1131 build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,
1132                        tree type, tree val)
1133 {
1134   tree result = make_ssa_name (type);
1135   gassign *stmt = gimple_build_assign (result, NOP_EXPR, val);
1136   gimple_set_location (stmt, loc);
1137   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1138   return result;
1139 }
1140
1141 struct pow_synth_sqrt_info
1142 {
1143   bool *factors;
1144   unsigned int deepest;
1145   unsigned int num_mults;
1146 };
1147
1148 /* Return true iff the real value C can be represented as a
1149    sum of powers of 0.5 up to N.  That is:
1150    C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1.
1151    Record in INFO the various parameters of the synthesis algorithm such
1152    as the factors a[i], the maximum 0.5 power and the number of
1153    multiplications that will be required.  */
1154
1155 bool
1156 representable_as_half_series_p (REAL_VALUE_TYPE c, unsigned n,
1157                                  struct pow_synth_sqrt_info *info)
1158 {
1159   REAL_VALUE_TYPE factor = dconsthalf;
1160   REAL_VALUE_TYPE remainder = c;
1161
1162   info->deepest = 0;
1163   info->num_mults = 0;
1164   memset (info->factors, 0, n * sizeof (bool));
1165
1166   for (unsigned i = 0; i < n; i++)
1167     {
1168       REAL_VALUE_TYPE res;
1169
1170       /* If something inexact happened bail out now.  */
1171       if (REAL_ARITHMETIC (res, MINUS_EXPR, remainder, factor))
1172         return false;
1173
1174       /* We have hit zero.  The number is representable as a sum
1175          of powers of 0.5.  */
1176       if (REAL_VALUES_EQUAL (res, dconst0))
1177         {
1178           info->factors[i] = true;
1179           info->deepest = i + 1;
1180           return true;
1181         }
1182       else if (!REAL_VALUE_NEGATIVE (res))
1183         {
1184           remainder = res;
1185           info->factors[i] = true;
1186           info->num_mults++;
1187         }
1188       else
1189         info->factors[i] = false;
1190
1191       REAL_ARITHMETIC (factor, MULT_EXPR, factor, dconsthalf);
1192     }
1193   return false;
1194 }
1195
1196 /* Return the tree corresponding to FN being applied
1197    to ARG N times at GSI and LOC.
1198    Look up previous results from CACHE if need be.
1199    cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times.  */
1200
1201 static tree
1202 get_fn_chain (tree arg, unsigned int n, gimple_stmt_iterator *gsi,
1203               tree fn, location_t loc, tree *cache)
1204 {
1205   tree res = cache[n];
1206   if (!res)
1207     {
1208       tree prev = get_fn_chain (arg, n - 1, gsi, fn, loc, cache);
1209       res = build_and_insert_call (gsi, loc, fn, prev);
1210       cache[n] = res;
1211     }
1212
1213   return res;
1214 }
1215
1216 /* Print to STREAM the repeated application of function FNAME to ARG
1217    N times.  So, for FNAME = "foo", ARG = "x", N = 2 it would print:
1218    "foo (foo (x))".  */
1219
1220 static void
1221 print_nested_fn (FILE* stream, const char *fname, const char* arg,
1222                  unsigned int n)
1223 {
1224   if (n == 0)
1225     fprintf (stream, "%s", arg);
1226   else
1227     {
1228       fprintf (stream, "%s (", fname);
1229       print_nested_fn (stream, fname, arg, n - 1);
1230       fprintf (stream, ")");
1231     }
1232 }
1233
1234 /* Print to STREAM the fractional sequence of sqrt chains
1235    applied to ARG, described by INFO.  Used for the dump file.  */
1236
1237 static void
1238 dump_fractional_sqrt_sequence (FILE *stream, const char *arg,
1239                                 struct pow_synth_sqrt_info *info)
1240 {
1241   for (unsigned int i = 0; i < info->deepest; i++)
1242     {
1243       bool is_set = info->factors[i];
1244       if (is_set)
1245         {
1246           print_nested_fn (stream, "sqrt", arg, i + 1);
1247           if (i != info->deepest - 1)
1248             fprintf (stream, " * ");
1249         }
1250     }
1251 }
1252
1253 /* Print to STREAM a representation of raising ARG to an integer
1254    power N.  Used for the dump file.  */
1255
1256 static void
1257 dump_integer_part (FILE *stream, const char* arg, HOST_WIDE_INT n)
1258 {
1259   if (n > 1)
1260     fprintf (stream, "powi (%s, " HOST_WIDE_INT_PRINT_DEC ")", arg, n);
1261   else if (n == 1)
1262     fprintf (stream, "%s", arg);
1263 }
1264
1265 /* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of
1266    square roots.  Place at GSI and LOC.  Limit the maximum depth
1267    of the sqrt chains to MAX_DEPTH.  Return the tree holding the
1268    result of the expanded sequence or NULL_TREE if the expansion failed.
1269
1270    This routine assumes that ARG1 is a real number with a fractional part
1271    (the integer exponent case will have been handled earlier in
1272    gimple_expand_builtin_pow).
1273
1274    For ARG1 > 0.0:
1275    * For ARG1 composed of a whole part WHOLE_PART and a fractional part
1276      FRAC_PART i.e. WHOLE_PART == floor (ARG1) and
1277                     FRAC_PART == ARG1 - WHOLE_PART:
1278      Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where
1279      POW (ARG0, FRAC_PART) is expanded as a product of square root chains
1280      if it can be expressed as such, that is if FRAC_PART satisfies:
1281      FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i))
1282      where integer a[i] is either 0 or 1.
1283
1284      Example:
1285      POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625)
1286        --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x)))
1287
1288    For ARG1 < 0.0 there are two approaches:
1289    * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1)
1290          is calculated as above.
1291
1292      Example:
1293      POW (x, -5.625) == 1.0 / POW (x, 5.625)
1294        -->  1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x))))
1295
1296    * (B) : WHOLE_PART := - ceil (abs (ARG1))
1297            FRAC_PART  := ARG1 - WHOLE_PART
1298      and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART).
1299      Example:
1300      POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6)
1301        --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6))
1302
1303    For ARG1 < 0.0 we choose between (A) and (B) depending on
1304    how many multiplications we'd have to do.
1305    So, for the example in (B): POW (x, -5.875), if we were to
1306    follow algorithm (A) we would produce:
1307    1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X)))
1308    which contains more multiplications than approach (B).
1309
1310    Hopefully, this approach will eliminate potentially expensive POW library
1311    calls when unsafe floating point math is enabled and allow the compiler to
1312    further optimise the multiplies, square roots and divides produced by this
1313    function.  */
1314
1315 static tree
1316 expand_pow_as_sqrts (gimple_stmt_iterator *gsi, location_t loc,
1317                      tree arg0, tree arg1, HOST_WIDE_INT max_depth)
1318 {
1319   tree type = TREE_TYPE (arg0);
1320   machine_mode mode = TYPE_MODE (type);
1321   tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1322   bool one_over = true;
1323
1324   if (!sqrtfn)
1325     return NULL_TREE;
1326
1327   if (TREE_CODE (arg1) != REAL_CST)
1328     return NULL_TREE;
1329
1330   REAL_VALUE_TYPE exp_init = TREE_REAL_CST (arg1);
1331
1332   gcc_assert (max_depth > 0);
1333   tree *cache = XALLOCAVEC (tree, max_depth + 1);
1334
1335   struct pow_synth_sqrt_info synth_info;
1336   synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1337   synth_info.deepest = 0;
1338   synth_info.num_mults = 0;
1339
1340   bool neg_exp = REAL_VALUE_NEGATIVE (exp_init);
1341   REAL_VALUE_TYPE exp = real_value_abs (&exp_init);
1342
1343   /* The whole and fractional parts of exp.  */
1344   REAL_VALUE_TYPE whole_part;
1345   REAL_VALUE_TYPE frac_part;
1346
1347   real_floor (&whole_part, mode, &exp);
1348   REAL_ARITHMETIC (frac_part, MINUS_EXPR, exp, whole_part);
1349
1350
1351   REAL_VALUE_TYPE ceil_whole = dconst0;
1352   REAL_VALUE_TYPE ceil_fract = dconst0;
1353
1354   if (neg_exp)
1355     {
1356       real_ceil (&ceil_whole, mode, &exp);
1357       REAL_ARITHMETIC (ceil_fract, MINUS_EXPR, ceil_whole, exp);
1358     }
1359
1360   if (!representable_as_half_series_p (frac_part, max_depth, &synth_info))
1361     return NULL_TREE;
1362
1363   /* Check whether it's more profitable to not use 1.0 / ...  */
1364   if (neg_exp)
1365     {
1366       struct pow_synth_sqrt_info alt_synth_info;
1367       alt_synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1368       alt_synth_info.deepest = 0;
1369       alt_synth_info.num_mults = 0;
1370
1371       if (representable_as_half_series_p (ceil_fract, max_depth,
1372                                            &alt_synth_info)
1373           && alt_synth_info.deepest <= synth_info.deepest
1374           && alt_synth_info.num_mults < synth_info.num_mults)
1375         {
1376           whole_part = ceil_whole;
1377           frac_part = ceil_fract;
1378           synth_info.deepest = alt_synth_info.deepest;
1379           synth_info.num_mults = alt_synth_info.num_mults;
1380           memcpy (synth_info.factors, alt_synth_info.factors,
1381                   (max_depth + 1) * sizeof (bool));
1382           one_over = false;
1383         }
1384     }
1385
1386   HOST_WIDE_INT n = real_to_integer (&whole_part);
1387   REAL_VALUE_TYPE cint;
1388   real_from_integer (&cint, VOIDmode, n, SIGNED);
1389
1390   if (!real_identical (&whole_part, &cint))
1391     return NULL_TREE;
1392
1393   if (powi_cost (n) + synth_info.num_mults > POWI_MAX_MULTS)
1394     return NULL_TREE;
1395
1396   memset (cache, 0, (max_depth + 1) * sizeof (tree));
1397
1398   tree integer_res = n == 0 ? build_real (type, dconst1) : arg0;
1399
1400   /* Calculate the integer part of the exponent.  */
1401   if (n > 1)
1402     {
1403       integer_res = gimple_expand_builtin_powi (gsi, loc, arg0, n);
1404       if (!integer_res)
1405         return NULL_TREE;
1406     }
1407
1408   if (dump_file)
1409     {
1410       char string[64];
1411
1412       real_to_decimal (string, &exp_init, sizeof (string), 0, 1);
1413       fprintf (dump_file, "synthesizing pow (x, %s) as:\n", string);
1414
1415       if (neg_exp)
1416         {
1417           if (one_over)
1418             {
1419               fprintf (dump_file, "1.0 / (");
1420               dump_integer_part (dump_file, "x", n);
1421               if (n > 0)
1422                 fprintf (dump_file, " * ");
1423               dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1424               fprintf (dump_file, ")");
1425             }
1426           else
1427             {
1428               dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1429               fprintf (dump_file, " / (");
1430               dump_integer_part (dump_file, "x", n);
1431               fprintf (dump_file, ")");
1432             }
1433         }
1434       else
1435         {
1436           dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1437           if (n > 0)
1438             fprintf (dump_file, " * ");
1439           dump_integer_part (dump_file, "x", n);
1440         }
1441
1442       fprintf (dump_file, "\ndeepest sqrt chain: %d\n", synth_info.deepest);
1443     }
1444
1445
1446   tree fract_res = NULL_TREE;
1447   cache[0] = arg0;
1448
1449   /* Calculate the fractional part of the exponent.  */
1450   for (unsigned i = 0; i < synth_info.deepest; i++)
1451     {
1452       if (synth_info.factors[i])
1453         {
1454           tree sqrt_chain = get_fn_chain (arg0, i + 1, gsi, sqrtfn, loc, cache);
1455
1456           if (!fract_res)
1457               fract_res = sqrt_chain;
1458
1459           else
1460             fract_res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1461                                            fract_res, sqrt_chain);
1462         }
1463     }
1464
1465   tree res = NULL_TREE;
1466
1467   if (neg_exp)
1468     {
1469       if (one_over)
1470         {
1471           if (n > 0)
1472             res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1473                                            fract_res, integer_res);
1474           else
1475             res = fract_res;
1476
1477           res = build_and_insert_binop (gsi, loc, "powrootrecip", RDIV_EXPR,
1478                                           build_real (type, dconst1), res);
1479         }
1480       else
1481         {
1482           res = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1483                                          fract_res, integer_res);
1484         }
1485     }
1486   else
1487     res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1488                                    fract_res, integer_res);
1489   return res;
1490 }
1491
1492 /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
1493    with location info LOC.  If possible, create an equivalent and
1494    less expensive sequence of statements prior to GSI, and return an
1495    expession holding the result.  */
1496
1497 static tree
1498 gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,
1499                            tree arg0, tree arg1)
1500 {
1501   REAL_VALUE_TYPE c, cint, dconst1_3, dconst1_4, dconst1_6;
1502   REAL_VALUE_TYPE c2, dconst3;
1503   HOST_WIDE_INT n;
1504   tree type, sqrtfn, cbrtfn, sqrt_arg0, result, cbrt_x, powi_cbrt_x;
1505   machine_mode mode;
1506   bool speed_p = optimize_bb_for_speed_p (gsi_bb (*gsi));
1507   bool hw_sqrt_exists, c_is_int, c2_is_int;
1508
1509   dconst1_4 = dconst1;
1510   SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
1511
1512   /* If the exponent isn't a constant, there's nothing of interest
1513      to be done.  */
1514   if (TREE_CODE (arg1) != REAL_CST)
1515     return NULL_TREE;
1516
1517   /* If the exponent is equivalent to an integer, expand to an optimal
1518      multiplication sequence when profitable.  */
1519   c = TREE_REAL_CST (arg1);
1520   n = real_to_integer (&c);
1521   real_from_integer (&cint, VOIDmode, n, SIGNED);
1522   c_is_int = real_identical (&c, &cint);
1523
1524   if (c_is_int
1525       && ((n >= -1 && n <= 2)
1526           || (flag_unsafe_math_optimizations
1527               && speed_p
1528               && powi_cost (n) <= POWI_MAX_MULTS)))
1529     return gimple_expand_builtin_powi (gsi, loc, arg0, n);
1530
1531   /* Attempt various optimizations using sqrt and cbrt.  */
1532   type = TREE_TYPE (arg0);
1533   mode = TYPE_MODE (type);
1534   sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1535
1536   /* Optimize pow(x,0.5) = sqrt(x).  This replacement is always safe
1537      unless signed zeros must be maintained.  pow(-0,0.5) = +0, while
1538      sqrt(-0) = -0.  */
1539   if (sqrtfn
1540       && REAL_VALUES_EQUAL (c, dconsthalf)
1541       && !HONOR_SIGNED_ZEROS (mode))
1542     return build_and_insert_call (gsi, loc, sqrtfn, arg0);
1543
1544   hw_sqrt_exists = optab_handler (sqrt_optab, mode) != CODE_FOR_nothing;
1545
1546   /* Optimize pow(x,1./3.) = cbrt(x).  This requires unsafe math
1547      optimizations since 1./3. is not exactly representable.  If x
1548      is negative and finite, the correct value of pow(x,1./3.) is
1549      a NaN with the "invalid" exception raised, because the value
1550      of 1./3. actually has an even denominator.  The correct value
1551      of cbrt(x) is a negative real value.  */
1552   cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT);
1553   dconst1_3 = real_value_truncate (mode, dconst_third ());
1554
1555   if (flag_unsafe_math_optimizations
1556       && cbrtfn
1557       && (gimple_val_nonnegative_real_p (arg0) || !HONOR_NANS (mode))
1558       && REAL_VALUES_EQUAL (c, dconst1_3))
1559     return build_and_insert_call (gsi, loc, cbrtfn, arg0);
1560
1561   /* Optimize pow(x,1./6.) = cbrt(sqrt(x)).  Don't do this optimization
1562      if we don't have a hardware sqrt insn.  */
1563   dconst1_6 = dconst1_3;
1564   SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
1565
1566   if (flag_unsafe_math_optimizations
1567       && sqrtfn
1568       && cbrtfn
1569       && (gimple_val_nonnegative_real_p (arg0) || !HONOR_NANS (mode))
1570       && speed_p
1571       && hw_sqrt_exists
1572       && REAL_VALUES_EQUAL (c, dconst1_6))
1573     {
1574       /* sqrt(x)  */
1575       sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
1576
1577       /* cbrt(sqrt(x))  */
1578       return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0);
1579     }
1580
1581
1582   /* Attempt to expand the POW as a product of square root chains.
1583      Expand the 0.25 case even when otpimising for size.  */
1584   if (flag_unsafe_math_optimizations
1585       && sqrtfn
1586       && hw_sqrt_exists
1587       && (speed_p || REAL_VALUES_EQUAL (c, dconst1_4))
1588       && !HONOR_SIGNED_ZEROS (mode))
1589     {
1590       unsigned int max_depth = speed_p
1591                                 ? PARAM_VALUE (PARAM_MAX_POW_SQRT_DEPTH)
1592                                 : 2;
1593
1594       tree expand_with_sqrts
1595         = expand_pow_as_sqrts (gsi, loc, arg0, arg1, max_depth);
1596
1597       if (expand_with_sqrts)
1598         return expand_with_sqrts;
1599     }
1600
1601   real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
1602   n = real_to_integer (&c2);
1603   real_from_integer (&cint, VOIDmode, n, SIGNED);
1604   c2_is_int = real_identical (&c2, &cint);
1605
1606   /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
1607
1608      powi(x, n/3) * powi(cbrt(x), n%3),                    n > 0;
1609      1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)),  n < 0.
1610
1611      Do not calculate the first factor when n/3 = 0.  As cbrt(x) is
1612      different from pow(x, 1./3.) due to rounding and behavior with
1613      negative x, we need to constrain this transformation to unsafe
1614      math and positive x or finite math.  */
1615   real_from_integer (&dconst3, VOIDmode, 3, SIGNED);
1616   real_arithmetic (&c2, MULT_EXPR, &c, &dconst3);
1617   real_round (&c2, mode, &c2);
1618   n = real_to_integer (&c2);
1619   real_from_integer (&cint, VOIDmode, n, SIGNED);
1620   real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3);
1621   real_convert (&c2, mode, &c2);
1622
1623   if (flag_unsafe_math_optimizations
1624       && cbrtfn
1625       && (gimple_val_nonnegative_real_p (arg0) || !HONOR_NANS (mode))
1626       && real_identical (&c2, &c)
1627       && !c2_is_int
1628       && optimize_function_for_speed_p (cfun)
1629       && powi_cost (n / 3) <= POWI_MAX_MULTS)
1630     {
1631       tree powi_x_ndiv3 = NULL_TREE;
1632
1633       /* Attempt to fold powi(arg0, abs(n/3)) into multiplies.  If not
1634          possible or profitable, give up.  Skip the degenerate case when
1635          abs(n) < 3, where the result is always 1.  */
1636       if (absu_hwi (n) >= 3)
1637         {
1638           powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0,
1639                                                      abs_hwi (n / 3));
1640           if (!powi_x_ndiv3)
1641             return NULL_TREE;
1642         }
1643
1644       /* Calculate powi(cbrt(x), n%3).  Don't use gimple_expand_builtin_powi
1645          as that creates an unnecessary variable.  Instead, just produce
1646          either cbrt(x) or cbrt(x) * cbrt(x).  */
1647       cbrt_x = build_and_insert_call (gsi, loc, cbrtfn, arg0);
1648
1649       if (absu_hwi (n) % 3 == 1)
1650         powi_cbrt_x = cbrt_x;
1651       else
1652         powi_cbrt_x = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1653                                               cbrt_x, cbrt_x);
1654
1655       /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1.  */
1656       if (absu_hwi (n) < 3)
1657         result = powi_cbrt_x;
1658       else
1659         result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1660                                          powi_x_ndiv3, powi_cbrt_x);
1661
1662       /* If n is negative, reciprocate the result.  */
1663       if (n < 0)
1664         result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1665                                          build_real (type, dconst1), result);
1666
1667       return result;
1668     }
1669
1670   /* No optimizations succeeded.  */
1671   return NULL_TREE;
1672 }
1673
1674 /* ARG is the argument to a cabs builtin call in GSI with location info
1675    LOC.  Create a sequence of statements prior to GSI that calculates
1676    sqrt(R*R + I*I), where R and I are the real and imaginary components
1677    of ARG, respectively.  Return an expression holding the result.  */
1678
1679 static tree
1680 gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg)
1681 {
1682   tree real_part, imag_part, addend1, addend2, sum, result;
1683   tree type = TREE_TYPE (TREE_TYPE (arg));
1684   tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1685   machine_mode mode = TYPE_MODE (type);
1686
1687   if (!flag_unsafe_math_optimizations
1688       || !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi)))
1689       || !sqrtfn
1690       || optab_handler (sqrt_optab, mode) == CODE_FOR_nothing)
1691     return NULL_TREE;
1692
1693   real_part = build_and_insert_ref (gsi, loc, type, "cabs",
1694                                     REALPART_EXPR, arg);
1695   addend1 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
1696                                     real_part, real_part);
1697   imag_part = build_and_insert_ref (gsi, loc, type, "cabs",
1698                                     IMAGPART_EXPR, arg);
1699   addend2 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
1700                                     imag_part, imag_part);
1701   sum = build_and_insert_binop (gsi, loc, "cabs", PLUS_EXPR, addend1, addend2);
1702   result = build_and_insert_call (gsi, loc, sqrtfn, sum);
1703
1704   return result;
1705 }
1706
1707 /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
1708    on the SSA_NAME argument of each of them.  Also expand powi(x,n) into
1709    an optimal number of multiplies, when n is a constant.  */
1710
1711 namespace {
1712
1713 const pass_data pass_data_cse_sincos =
1714 {
1715   GIMPLE_PASS, /* type */
1716   "sincos", /* name */
1717   OPTGROUP_NONE, /* optinfo_flags */
1718   TV_NONE, /* tv_id */
1719   PROP_ssa, /* properties_required */
1720   0, /* properties_provided */
1721   0, /* properties_destroyed */
1722   0, /* todo_flags_start */
1723   TODO_update_ssa, /* todo_flags_finish */
1724 };
1725
1726 class pass_cse_sincos : public gimple_opt_pass
1727 {
1728 public:
1729   pass_cse_sincos (gcc::context *ctxt)
1730     : gimple_opt_pass (pass_data_cse_sincos, ctxt)
1731   {}
1732
1733   /* opt_pass methods: */
1734   virtual bool gate (function *)
1735     {
1736       /* We no longer require either sincos or cexp, since powi expansion
1737          piggybacks on this pass.  */
1738       return optimize;
1739     }
1740
1741   virtual unsigned int execute (function *);
1742
1743 }; // class pass_cse_sincos
1744
1745 unsigned int
1746 pass_cse_sincos::execute (function *fun)
1747 {
1748   basic_block bb;
1749   bool cfg_changed = false;
1750
1751   calculate_dominance_info (CDI_DOMINATORS);
1752   memset (&sincos_stats, 0, sizeof (sincos_stats));
1753
1754   FOR_EACH_BB_FN (bb, fun)
1755     {
1756       gimple_stmt_iterator gsi;
1757       bool cleanup_eh = false;
1758
1759       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1760         {
1761           gimple stmt = gsi_stmt (gsi);
1762           tree fndecl;
1763
1764           /* Only the last stmt in a bb could throw, no need to call
1765              gimple_purge_dead_eh_edges if we change something in the middle
1766              of a basic block.  */
1767           cleanup_eh = false;
1768
1769           if (is_gimple_call (stmt)
1770               && gimple_call_lhs (stmt)
1771               && (fndecl = gimple_call_fndecl (stmt))
1772               && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
1773             {
1774               tree arg, arg0, arg1, result;
1775               HOST_WIDE_INT n;
1776               location_t loc;
1777
1778               switch (DECL_FUNCTION_CODE (fndecl))
1779                 {
1780                 CASE_FLT_FN (BUILT_IN_COS):
1781                 CASE_FLT_FN (BUILT_IN_SIN):
1782                 CASE_FLT_FN (BUILT_IN_CEXPI):
1783                   /* Make sure we have either sincos or cexp.  */
1784                   if (!targetm.libc_has_function (function_c99_math_complex)
1785                       && !targetm.libc_has_function (function_sincos))
1786                     break;
1787
1788                   arg = gimple_call_arg (stmt, 0);
1789                   if (TREE_CODE (arg) == SSA_NAME)
1790                     cfg_changed |= execute_cse_sincos_1 (arg);
1791                   break;
1792
1793                 CASE_FLT_FN (BUILT_IN_POW):
1794                   arg0 = gimple_call_arg (stmt, 0);
1795                   arg1 = gimple_call_arg (stmt, 1);
1796
1797                   loc = gimple_location (stmt);
1798                   result = gimple_expand_builtin_pow (&gsi, loc, arg0, arg1);
1799
1800                   if (result)
1801                     {
1802                       tree lhs = gimple_get_lhs (stmt);
1803                       gassign *new_stmt = gimple_build_assign (lhs, result);
1804                       gimple_set_location (new_stmt, loc);
1805                       unlink_stmt_vdef (stmt);
1806                       gsi_replace (&gsi, new_stmt, true);
1807                       cleanup_eh = true;
1808                       if (gimple_vdef (stmt))
1809                         release_ssa_name (gimple_vdef (stmt));
1810                     }
1811                   break;
1812
1813                 CASE_FLT_FN (BUILT_IN_POWI):
1814                   arg0 = gimple_call_arg (stmt, 0);
1815                   arg1 = gimple_call_arg (stmt, 1);
1816                   loc = gimple_location (stmt);
1817
1818                   if (real_minus_onep (arg0))
1819                     {
1820                       tree t0, t1, cond, one, minus_one;
1821                       gassign *stmt;
1822
1823                       t0 = TREE_TYPE (arg0);
1824                       t1 = TREE_TYPE (arg1);
1825                       one = build_real (t0, dconst1);
1826                       minus_one = build_real (t0, dconstm1);
1827
1828                       cond = make_temp_ssa_name (t1, NULL, "powi_cond");
1829                       stmt = gimple_build_assign (cond, BIT_AND_EXPR,
1830                                                   arg1, build_int_cst (t1, 1));
1831                       gimple_set_location (stmt, loc);
1832                       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1833
1834                       result = make_temp_ssa_name (t0, NULL, "powi");
1835                       stmt = gimple_build_assign (result, COND_EXPR, cond,
1836                                                   minus_one, one);
1837                       gimple_set_location (stmt, loc);
1838                       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1839                     }
1840                   else
1841                     {
1842                       if (!tree_fits_shwi_p (arg1))
1843                         break;
1844
1845                       n = tree_to_shwi (arg1);
1846                       result = gimple_expand_builtin_powi (&gsi, loc, arg0, n);
1847                     }
1848
1849                   if (result)
1850                     {
1851                       tree lhs = gimple_get_lhs (stmt);
1852                       gassign *new_stmt = gimple_build_assign (lhs, result);
1853                       gimple_set_location (new_stmt, loc);
1854                       unlink_stmt_vdef (stmt);
1855                       gsi_replace (&gsi, new_stmt, true);
1856                       cleanup_eh = true;
1857                       if (gimple_vdef (stmt))
1858                         release_ssa_name (gimple_vdef (stmt));
1859                     }
1860                   break;
1861
1862                 CASE_FLT_FN (BUILT_IN_CABS):
1863                   arg0 = gimple_call_arg (stmt, 0);
1864                   loc = gimple_location (stmt);
1865                   result = gimple_expand_builtin_cabs (&gsi, loc, arg0);
1866
1867                   if (result)
1868                     {
1869                       tree lhs = gimple_get_lhs (stmt);
1870                       gassign *new_stmt = gimple_build_assign (lhs, result);
1871                       gimple_set_location (new_stmt, loc);
1872                       unlink_stmt_vdef (stmt);
1873                       gsi_replace (&gsi, new_stmt, true);
1874                       cleanup_eh = true;
1875                       if (gimple_vdef (stmt))
1876                         release_ssa_name (gimple_vdef (stmt));
1877                     }
1878                   break;
1879
1880                 default:;
1881                 }
1882             }
1883         }
1884       if (cleanup_eh)
1885         cfg_changed |= gimple_purge_dead_eh_edges (bb);
1886     }
1887
1888   statistics_counter_event (fun, "sincos statements inserted",
1889                             sincos_stats.inserted);
1890
1891   free_dominance_info (CDI_DOMINATORS);
1892   return cfg_changed ? TODO_cleanup_cfg : 0;
1893 }
1894
1895 } // anon namespace
1896
1897 gimple_opt_pass *
1898 make_pass_cse_sincos (gcc::context *ctxt)
1899 {
1900   return new pass_cse_sincos (ctxt);
1901 }
1902
1903 /* A symbolic number is used to detect byte permutation and selection
1904    patterns.  Therefore the field N contains an artificial number
1905    consisting of octet sized markers:
1906
1907    0    - target byte has the value 0
1908    FF   - target byte has an unknown value (eg. due to sign extension)
1909    1..size - marker value is the target byte index minus one.
1910
1911    To detect permutations on memory sources (arrays and structures), a symbolic
1912    number is also associated a base address (the array or structure the load is
1913    made from), an offset from the base address and a range which gives the
1914    difference between the highest and lowest accessed memory location to make
1915    such a symbolic number. The range is thus different from size which reflects
1916    the size of the type of current expression. Note that for non memory source,
1917    range holds the same value as size.
1918
1919    For instance, for an array char a[], (short) a[0] | (short) a[3] would have
1920    a size of 2 but a range of 4 while (short) a[0] | ((short) a[0] << 1) would
1921    still have a size of 2 but this time a range of 1.  */
1922
1923 struct symbolic_number {
1924   uint64_t n;
1925   tree type;
1926   tree base_addr;
1927   tree offset;
1928   HOST_WIDE_INT bytepos;
1929   tree alias_set;
1930   tree vuse;
1931   unsigned HOST_WIDE_INT range;
1932 };
1933
1934 #define BITS_PER_MARKER 8
1935 #define MARKER_MASK ((1 << BITS_PER_MARKER) - 1)
1936 #define MARKER_BYTE_UNKNOWN MARKER_MASK
1937 #define HEAD_MARKER(n, size) \
1938   ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER)))
1939
1940 /* The number which the find_bswap_or_nop_1 result should match in
1941    order to have a nop.  The number is masked according to the size of
1942    the symbolic number before using it.  */
1943 #define CMPNOP (sizeof (int64_t) < 8 ? 0 : \
1944   (uint64_t)0x08070605 << 32 | 0x04030201)
1945
1946 /* The number which the find_bswap_or_nop_1 result should match in
1947    order to have a byte swap.  The number is masked according to the
1948    size of the symbolic number before using it.  */
1949 #define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \
1950   (uint64_t)0x01020304 << 32 | 0x05060708)
1951
1952 /* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
1953    number N.  Return false if the requested operation is not permitted
1954    on a symbolic number.  */
1955
1956 static inline bool
1957 do_shift_rotate (enum tree_code code,
1958                  struct symbolic_number *n,
1959                  int count)
1960 {
1961   int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
1962   unsigned head_marker;
1963
1964   if (count % BITS_PER_UNIT != 0)
1965     return false;
1966   count = (count / BITS_PER_UNIT) * BITS_PER_MARKER;
1967
1968   /* Zero out the extra bits of N in order to avoid them being shifted
1969      into the significant bits.  */
1970   if (size < 64 / BITS_PER_MARKER)
1971     n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
1972
1973   switch (code)
1974     {
1975     case LSHIFT_EXPR:
1976       n->n <<= count;
1977       break;
1978     case RSHIFT_EXPR:
1979       head_marker = HEAD_MARKER (n->n, size);
1980       n->n >>= count;
1981       /* Arithmetic shift of signed type: result is dependent on the value.  */
1982       if (!TYPE_UNSIGNED (n->type) && head_marker)
1983         for (i = 0; i < count / BITS_PER_MARKER; i++)
1984           n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
1985                   << ((size - 1 - i) * BITS_PER_MARKER);
1986       break;
1987     case LROTATE_EXPR:
1988       n->n = (n->n << count) | (n->n >> ((size * BITS_PER_MARKER) - count));
1989       break;
1990     case RROTATE_EXPR:
1991       n->n = (n->n >> count) | (n->n << ((size * BITS_PER_MARKER) - count));
1992       break;
1993     default:
1994       return false;
1995     }
1996   /* Zero unused bits for size.  */
1997   if (size < 64 / BITS_PER_MARKER)
1998     n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
1999   return true;
2000 }
2001
2002 /* Perform sanity checking for the symbolic number N and the gimple
2003    statement STMT.  */
2004
2005 static inline bool
2006 verify_symbolic_number_p (struct symbolic_number *n, gimple stmt)
2007 {
2008   tree lhs_type;
2009
2010   lhs_type = gimple_expr_type (stmt);
2011
2012   if (TREE_CODE (lhs_type) != INTEGER_TYPE)
2013     return false;
2014
2015   if (TYPE_PRECISION (lhs_type) != TYPE_PRECISION (n->type))
2016     return false;
2017
2018   return true;
2019 }
2020
2021 /* Initialize the symbolic number N for the bswap pass from the base element
2022    SRC manipulated by the bitwise OR expression.  */
2023
2024 static bool
2025 init_symbolic_number (struct symbolic_number *n, tree src)
2026 {
2027   int size;
2028
2029   n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE;
2030
2031   /* Set up the symbolic number N by setting each byte to a value between 1 and
2032      the byte size of rhs1.  The highest order byte is set to n->size and the
2033      lowest order byte to 1.  */
2034   n->type = TREE_TYPE (src);
2035   size = TYPE_PRECISION (n->type);
2036   if (size % BITS_PER_UNIT != 0)
2037     return false;
2038   size /= BITS_PER_UNIT;
2039   if (size > 64 / BITS_PER_MARKER)
2040     return false;
2041   n->range = size;
2042   n->n = CMPNOP;
2043
2044   if (size < 64 / BITS_PER_MARKER)
2045     n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
2046
2047   return true;
2048 }
2049
2050 /* Check if STMT might be a byte swap or a nop from a memory source and returns
2051    the answer. If so, REF is that memory source and the base of the memory area
2052    accessed and the offset of the access from that base are recorded in N.  */
2053
2054 bool
2055 find_bswap_or_nop_load (gimple stmt, tree ref, struct symbolic_number *n)
2056 {
2057   /* Leaf node is an array or component ref. Memorize its base and
2058      offset from base to compare to other such leaf node.  */
2059   HOST_WIDE_INT bitsize, bitpos;
2060   machine_mode mode;
2061   int unsignedp, reversep, volatilep;
2062   tree offset, base_addr;
2063
2064   /* Not prepared to handle PDP endian.  */
2065   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
2066     return false;
2067
2068   if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt))
2069     return false;
2070
2071   base_addr = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
2072                                    &unsignedp, &reversep, &volatilep, false);
2073
2074   if (TREE_CODE (base_addr) == MEM_REF)
2075     {
2076       offset_int bit_offset = 0;
2077       tree off = TREE_OPERAND (base_addr, 1);
2078
2079       if (!integer_zerop (off))
2080         {
2081           offset_int boff, coff = mem_ref_offset (base_addr);
2082           boff = wi::lshift (coff, LOG2_BITS_PER_UNIT);
2083           bit_offset += boff;
2084         }
2085
2086       base_addr = TREE_OPERAND (base_addr, 0);
2087
2088       /* Avoid returning a negative bitpos as this may wreak havoc later.  */
2089       if (wi::neg_p (bit_offset))
2090         {
2091           offset_int mask = wi::mask <offset_int> (LOG2_BITS_PER_UNIT, false);
2092           offset_int tem = bit_offset.and_not (mask);
2093           /* TEM is the bitpos rounded to BITS_PER_UNIT towards -Inf.
2094              Subtract it to BIT_OFFSET and add it (scaled) to OFFSET.  */
2095           bit_offset -= tem;
2096           tem = wi::arshift (tem, LOG2_BITS_PER_UNIT);
2097           if (offset)
2098             offset = size_binop (PLUS_EXPR, offset,
2099                                     wide_int_to_tree (sizetype, tem));
2100           else
2101             offset = wide_int_to_tree (sizetype, tem);
2102         }
2103
2104       bitpos += bit_offset.to_shwi ();
2105     }
2106
2107   if (bitpos % BITS_PER_UNIT)
2108     return false;
2109   if (bitsize % BITS_PER_UNIT)
2110     return false;
2111   if (reversep)
2112     return false;
2113
2114   if (!init_symbolic_number (n, ref))
2115     return false;
2116   n->base_addr = base_addr;
2117   n->offset = offset;
2118   n->bytepos = bitpos / BITS_PER_UNIT;
2119   n->alias_set = reference_alias_ptr_type (ref);
2120   n->vuse = gimple_vuse (stmt);
2121   return true;
2122 }
2123
2124 /* Compute the symbolic number N representing the result of a bitwise OR on 2
2125    symbolic number N1 and N2 whose source statements are respectively
2126    SOURCE_STMT1 and SOURCE_STMT2.  */
2127
2128 static gimple
2129 perform_symbolic_merge (gimple source_stmt1, struct symbolic_number *n1,
2130                         gimple source_stmt2, struct symbolic_number *n2,
2131                         struct symbolic_number *n)
2132 {
2133   int i, size;
2134   uint64_t mask;
2135   gimple source_stmt;
2136   struct symbolic_number *n_start;
2137
2138   /* Sources are different, cancel bswap if they are not memory location with
2139      the same base (array, structure, ...).  */
2140   if (gimple_assign_rhs1 (source_stmt1) != gimple_assign_rhs1 (source_stmt2))
2141     {
2142       int64_t inc;
2143       HOST_WIDE_INT start_sub, end_sub, end1, end2, end;
2144       struct symbolic_number *toinc_n_ptr, *n_end;
2145
2146       if (!n1->base_addr || !n2->base_addr
2147           || !operand_equal_p (n1->base_addr, n2->base_addr, 0))
2148         return NULL;
2149
2150       if (!n1->offset != !n2->offset
2151           || (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0)))
2152         return NULL;
2153
2154       if (n1->bytepos < n2->bytepos)
2155         {
2156           n_start = n1;
2157           start_sub = n2->bytepos - n1->bytepos;
2158           source_stmt = source_stmt1;
2159         }
2160       else
2161         {
2162           n_start = n2;
2163           start_sub = n1->bytepos - n2->bytepos;
2164           source_stmt = source_stmt2;
2165         }
2166
2167       /* Find the highest address at which a load is performed and
2168          compute related info.  */
2169       end1 = n1->bytepos + (n1->range - 1);
2170       end2 = n2->bytepos + (n2->range - 1);
2171       if (end1 < end2)
2172         {
2173           end = end2;
2174           end_sub = end2 - end1;
2175         }
2176       else
2177         {
2178           end = end1;
2179           end_sub = end1 - end2;
2180         }
2181       n_end = (end2 > end1) ? n2 : n1;
2182
2183       /* Find symbolic number whose lsb is the most significant.  */
2184       if (BYTES_BIG_ENDIAN)
2185         toinc_n_ptr = (n_end == n1) ? n2 : n1;
2186       else
2187         toinc_n_ptr = (n_start == n1) ? n2 : n1;
2188
2189       n->range = end - n_start->bytepos + 1;
2190
2191       /* Check that the range of memory covered can be represented by
2192          a symbolic number.  */
2193       if (n->range > 64 / BITS_PER_MARKER)
2194         return NULL;
2195
2196       /* Reinterpret byte marks in symbolic number holding the value of
2197          bigger weight according to target endianness.  */
2198       inc = BYTES_BIG_ENDIAN ? end_sub : start_sub;
2199       size = TYPE_PRECISION (n1->type) / BITS_PER_UNIT;
2200       for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER)
2201         {
2202           unsigned marker
2203             = (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK;
2204           if (marker && marker != MARKER_BYTE_UNKNOWN)
2205             toinc_n_ptr->n += inc;
2206         }
2207     }
2208   else
2209     {
2210       n->range = n1->range;
2211       n_start = n1;
2212       source_stmt = source_stmt1;
2213     }
2214
2215   if (!n1->alias_set
2216       || alias_ptr_types_compatible_p (n1->alias_set, n2->alias_set))
2217     n->alias_set = n1->alias_set;
2218   else
2219     n->alias_set = ptr_type_node;
2220   n->vuse = n_start->vuse;
2221   n->base_addr = n_start->base_addr;
2222   n->offset = n_start->offset;
2223   n->bytepos = n_start->bytepos;
2224   n->type = n_start->type;
2225   size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
2226
2227   for (i = 0, mask = MARKER_MASK; i < size; i++, mask <<= BITS_PER_MARKER)
2228     {
2229       uint64_t masked1, masked2;
2230
2231       masked1 = n1->n & mask;
2232       masked2 = n2->n & mask;
2233       if (masked1 && masked2 && masked1 != masked2)
2234         return NULL;
2235     }
2236   n->n = n1->n | n2->n;
2237
2238   return source_stmt;
2239 }
2240
2241 /* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform
2242    the operation given by the rhs of STMT on the result.  If the operation
2243    could successfully be executed the function returns a gimple stmt whose
2244    rhs's first tree is the expression of the source operand and NULL
2245    otherwise.  */
2246
2247 static gimple
2248 find_bswap_or_nop_1 (gimple stmt, struct symbolic_number *n, int limit)
2249 {
2250   enum tree_code code;
2251   tree rhs1, rhs2 = NULL;
2252   gimple rhs1_stmt, rhs2_stmt, source_stmt1;
2253   enum gimple_rhs_class rhs_class;
2254
2255   if (!limit || !is_gimple_assign (stmt))
2256     return NULL;
2257
2258   rhs1 = gimple_assign_rhs1 (stmt);
2259
2260   if (find_bswap_or_nop_load (stmt, rhs1, n))
2261     return stmt;
2262
2263   if (TREE_CODE (rhs1) != SSA_NAME)
2264     return NULL;
2265
2266   code = gimple_assign_rhs_code (stmt);
2267   rhs_class = gimple_assign_rhs_class (stmt);
2268   rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2269
2270   if (rhs_class == GIMPLE_BINARY_RHS)
2271     rhs2 = gimple_assign_rhs2 (stmt);
2272
2273   /* Handle unary rhs and binary rhs with integer constants as second
2274      operand.  */
2275
2276   if (rhs_class == GIMPLE_UNARY_RHS
2277       || (rhs_class == GIMPLE_BINARY_RHS
2278           && TREE_CODE (rhs2) == INTEGER_CST))
2279     {
2280       if (code != BIT_AND_EXPR
2281           && code != LSHIFT_EXPR
2282           && code != RSHIFT_EXPR
2283           && code != LROTATE_EXPR
2284           && code != RROTATE_EXPR
2285           && !CONVERT_EXPR_CODE_P (code))
2286         return NULL;
2287
2288       source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1);
2289
2290       /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
2291          we have to initialize the symbolic number.  */
2292       if (!source_stmt1)
2293         {
2294           if (gimple_assign_load_p (stmt)
2295               || !init_symbolic_number (n, rhs1))
2296             return NULL;
2297           source_stmt1 = stmt;
2298         }
2299
2300       switch (code)
2301         {
2302         case BIT_AND_EXPR:
2303           {
2304             int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
2305             uint64_t val = int_cst_value (rhs2), mask = 0;
2306             uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
2307
2308             /* Only constants masking full bytes are allowed.  */
2309             for (i = 0; i < size; i++, tmp <<= BITS_PER_UNIT)
2310               if ((val & tmp) != 0 && (val & tmp) != tmp)
2311                 return NULL;
2312               else if (val & tmp)
2313                 mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
2314
2315             n->n &= mask;
2316           }
2317           break;
2318         case LSHIFT_EXPR:
2319         case RSHIFT_EXPR:
2320         case LROTATE_EXPR:
2321         case RROTATE_EXPR:
2322           if (!do_shift_rotate (code, n, (int) TREE_INT_CST_LOW (rhs2)))
2323             return NULL;
2324           break;
2325         CASE_CONVERT:
2326           {
2327             int i, type_size, old_type_size;
2328             tree type;
2329
2330             type = gimple_expr_type (stmt);
2331             type_size = TYPE_PRECISION (type);
2332             if (type_size % BITS_PER_UNIT != 0)
2333               return NULL;
2334             type_size /= BITS_PER_UNIT;
2335             if (type_size > 64 / BITS_PER_MARKER)
2336               return NULL;
2337
2338             /* Sign extension: result is dependent on the value.  */
2339             old_type_size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
2340             if (!TYPE_UNSIGNED (n->type) && type_size > old_type_size
2341                 && HEAD_MARKER (n->n, old_type_size))
2342               for (i = 0; i < type_size - old_type_size; i++)
2343                 n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
2344                         << ((type_size - 1 - i) * BITS_PER_MARKER);
2345
2346             if (type_size < 64 / BITS_PER_MARKER)
2347               {
2348                 /* If STMT casts to a smaller type mask out the bits not
2349                    belonging to the target type.  */
2350                 n->n &= ((uint64_t) 1 << (type_size * BITS_PER_MARKER)) - 1;
2351               }
2352             n->type = type;
2353             if (!n->base_addr)
2354               n->range = type_size;
2355           }
2356           break;
2357         default:
2358           return NULL;
2359         };
2360       return verify_symbolic_number_p (n, stmt) ? source_stmt1 : NULL;
2361     }
2362
2363   /* Handle binary rhs.  */
2364
2365   if (rhs_class == GIMPLE_BINARY_RHS)
2366     {
2367       struct symbolic_number n1, n2;
2368       gimple source_stmt, source_stmt2;
2369
2370       if (code != BIT_IOR_EXPR)
2371         return NULL;
2372
2373       if (TREE_CODE (rhs2) != SSA_NAME)
2374         return NULL;
2375
2376       rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2377
2378       switch (code)
2379         {
2380         case BIT_IOR_EXPR:
2381           source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1);
2382
2383           if (!source_stmt1)
2384             return NULL;
2385
2386           source_stmt2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1);
2387
2388           if (!source_stmt2)
2389             return NULL;
2390
2391           if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type))
2392             return NULL;
2393
2394           if (!n1.vuse != !n2.vuse
2395               || (n1.vuse && !operand_equal_p (n1.vuse, n2.vuse, 0)))
2396             return NULL;
2397
2398           source_stmt
2399             = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n);
2400
2401           if (!source_stmt)
2402             return NULL;
2403
2404           if (!verify_symbolic_number_p (n, stmt))
2405             return NULL;
2406
2407           break;
2408         default:
2409           return NULL;
2410         }
2411       return source_stmt;
2412     }
2413   return NULL;
2414 }
2415
2416 /* Check if STMT completes a bswap implementation or a read in a given
2417    endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
2418    accordingly.  It also sets N to represent the kind of operations
2419    performed: size of the resulting expression and whether it works on
2420    a memory source, and if so alias-set and vuse.  At last, the
2421    function returns a stmt whose rhs's first tree is the source
2422    expression.  */
2423
2424 static gimple
2425 find_bswap_or_nop (gimple stmt, struct symbolic_number *n, bool *bswap)
2426 {
2427 /* The number which the find_bswap_or_nop_1 result should match in order
2428    to have a full byte swap.  The number is shifted to the right
2429    according to the size of the symbolic number before using it.  */
2430   uint64_t cmpxchg = CMPXCHG;
2431   uint64_t cmpnop = CMPNOP;
2432
2433   gimple source_stmt;
2434   int limit;
2435
2436   /* The last parameter determines the depth search limit.  It usually
2437      correlates directly to the number n of bytes to be touched.  We
2438      increase that number by log2(n) + 1 here in order to also
2439      cover signed -> unsigned conversions of the src operand as can be seen
2440      in libgcc, and for initial shift/and operation of the src operand.  */
2441   limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt)));
2442   limit += 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit);
2443   source_stmt = find_bswap_or_nop_1 (stmt, n, limit);
2444
2445   if (!source_stmt)
2446     return NULL;
2447
2448   /* Find real size of result (highest non-zero byte).  */
2449   if (n->base_addr)
2450     {
2451       int rsize;
2452       uint64_t tmpn;
2453
2454       for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++);
2455       n->range = rsize;
2456     }
2457
2458   /* Zero out the extra bits of N and CMP*.  */
2459   if (n->range < (int) sizeof (int64_t))
2460     {
2461       uint64_t mask;
2462
2463       mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1;
2464       cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER;
2465       cmpnop &= mask;
2466     }
2467
2468   /* A complete byte swap should make the symbolic number to start with
2469      the largest digit in the highest order byte. Unchanged symbolic
2470      number indicates a read with same endianness as target architecture.  */
2471   if (n->n == cmpnop)
2472     *bswap = false;
2473   else if (n->n == cmpxchg)
2474     *bswap = true;
2475   else
2476     return NULL;
2477
2478   /* Useless bit manipulation performed by code.  */
2479   if (!n->base_addr && n->n == cmpnop)
2480     return NULL;
2481
2482   n->range *= BITS_PER_UNIT;
2483   return source_stmt;
2484 }
2485
2486 namespace {
2487
2488 const pass_data pass_data_optimize_bswap =
2489 {
2490   GIMPLE_PASS, /* type */
2491   "bswap", /* name */
2492   OPTGROUP_NONE, /* optinfo_flags */
2493   TV_NONE, /* tv_id */
2494   PROP_ssa, /* properties_required */
2495   0, /* properties_provided */
2496   0, /* properties_destroyed */
2497   0, /* todo_flags_start */
2498   0, /* todo_flags_finish */
2499 };
2500
2501 class pass_optimize_bswap : public gimple_opt_pass
2502 {
2503 public:
2504   pass_optimize_bswap (gcc::context *ctxt)
2505     : gimple_opt_pass (pass_data_optimize_bswap, ctxt)
2506   {}
2507
2508   /* opt_pass methods: */
2509   virtual bool gate (function *)
2510     {
2511       return flag_expensive_optimizations && optimize;
2512     }
2513
2514   virtual unsigned int execute (function *);
2515
2516 }; // class pass_optimize_bswap
2517
2518 /* Perform the bswap optimization: replace the expression computed in the rhs
2519    of CUR_STMT by an equivalent bswap, load or load + bswap expression.
2520    Which of these alternatives replace the rhs is given by N->base_addr (non
2521    null if a load is needed) and BSWAP.  The type, VUSE and set-alias of the
2522    load to perform are also given in N while the builtin bswap invoke is given
2523    in FNDEL.  Finally, if a load is involved, SRC_STMT refers to one of the
2524    load statements involved to construct the rhs in CUR_STMT and N->range gives
2525    the size of the rhs expression for maintaining some statistics.
2526
2527    Note that if the replacement involve a load, CUR_STMT is moved just after
2528    SRC_STMT to do the load with the same VUSE which can lead to CUR_STMT
2529    changing of basic block.  */
2530
2531 static bool
2532 bswap_replace (gimple cur_stmt, gimple src_stmt, tree fndecl, tree bswap_type,
2533                tree load_type, struct symbolic_number *n, bool bswap)
2534 {
2535   gimple_stmt_iterator gsi;
2536   tree src, tmp, tgt;
2537   gimple bswap_stmt;
2538
2539   gsi = gsi_for_stmt (cur_stmt);
2540   src = gimple_assign_rhs1 (src_stmt);
2541   tgt = gimple_assign_lhs (cur_stmt);
2542
2543   /* Need to load the value from memory first.  */
2544   if (n->base_addr)
2545     {
2546       gimple_stmt_iterator gsi_ins = gsi_for_stmt (src_stmt);
2547       tree addr_expr, addr_tmp, val_expr, val_tmp;
2548       tree load_offset_ptr, aligned_load_type;
2549       gimple addr_stmt, load_stmt;
2550       unsigned align;
2551       HOST_WIDE_INT load_offset = 0;
2552
2553       align = get_object_alignment (src);
2554       /* If the new access is smaller than the original one, we need
2555          to perform big endian adjustment.  */
2556       if (BYTES_BIG_ENDIAN)
2557         {
2558           HOST_WIDE_INT bitsize, bitpos;
2559           machine_mode mode;
2560           int unsignedp, reversep, volatilep;
2561           tree offset;
2562
2563           get_inner_reference (src, &bitsize, &bitpos, &offset, &mode,
2564                                &unsignedp, &reversep, &volatilep, false);
2565           if (n->range < (unsigned HOST_WIDE_INT) bitsize)
2566             {
2567               load_offset = (bitsize - n->range) / BITS_PER_UNIT;
2568               unsigned HOST_WIDE_INT l
2569                 = (load_offset * BITS_PER_UNIT) & (align - 1);
2570               if (l)
2571                 align = l & -l;
2572             }
2573         }
2574
2575       if (bswap
2576           && align < GET_MODE_ALIGNMENT (TYPE_MODE (load_type))
2577           && SLOW_UNALIGNED_ACCESS (TYPE_MODE (load_type), align))
2578         return false;
2579
2580       /* Move cur_stmt just before  one of the load of the original
2581          to ensure it has the same VUSE.  See PR61517 for what could
2582          go wrong.  */
2583       gsi_move_before (&gsi, &gsi_ins);
2584       gsi = gsi_for_stmt (cur_stmt);
2585
2586       /* Compute address to load from and cast according to the size
2587          of the load.  */
2588       addr_expr = build_fold_addr_expr (unshare_expr (src));
2589       if (is_gimple_mem_ref_addr (addr_expr))
2590         addr_tmp = addr_expr;
2591       else
2592         {
2593           addr_tmp = make_temp_ssa_name (TREE_TYPE (addr_expr), NULL,
2594                                          "load_src");
2595           addr_stmt = gimple_build_assign (addr_tmp, addr_expr);
2596           gsi_insert_before (&gsi, addr_stmt, GSI_SAME_STMT);
2597         }
2598
2599       /* Perform the load.  */
2600       aligned_load_type = load_type;
2601       if (align < TYPE_ALIGN (load_type))
2602         aligned_load_type = build_aligned_type (load_type, align);
2603       load_offset_ptr = build_int_cst (n->alias_set, load_offset);
2604       val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp,
2605                               load_offset_ptr);
2606
2607       if (!bswap)
2608         {
2609           if (n->range == 16)
2610             nop_stats.found_16bit++;
2611           else if (n->range == 32)
2612             nop_stats.found_32bit++;
2613           else
2614             {
2615               gcc_assert (n->range == 64);
2616               nop_stats.found_64bit++;
2617             }
2618
2619           /* Convert the result of load if necessary.  */
2620           if (!useless_type_conversion_p (TREE_TYPE (tgt), load_type))
2621             {
2622               val_tmp = make_temp_ssa_name (aligned_load_type, NULL,
2623                                             "load_dst");
2624               load_stmt = gimple_build_assign (val_tmp, val_expr);
2625               gimple_set_vuse (load_stmt, n->vuse);
2626               gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
2627               gimple_assign_set_rhs_with_ops (&gsi, NOP_EXPR, val_tmp);
2628             }
2629           else
2630             {
2631               gimple_assign_set_rhs_with_ops (&gsi, MEM_REF, val_expr);
2632               gimple_set_vuse (cur_stmt, n->vuse);
2633             }
2634           update_stmt (cur_stmt);
2635
2636           if (dump_file)
2637             {
2638               fprintf (dump_file,
2639                        "%d bit load in target endianness found at: ",
2640                        (int) n->range);
2641               print_gimple_stmt (dump_file, cur_stmt, 0, 0);
2642             }
2643           return true;
2644         }
2645       else
2646         {
2647           val_tmp = make_temp_ssa_name (aligned_load_type, NULL, "load_dst");
2648           load_stmt = gimple_build_assign (val_tmp, val_expr);
2649           gimple_set_vuse (load_stmt, n->vuse);
2650           gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
2651         }
2652       src = val_tmp;
2653     }
2654
2655   if (n->range == 16)
2656     bswap_stats.found_16bit++;
2657   else if (n->range == 32)
2658     bswap_stats.found_32bit++;
2659   else
2660     {
2661       gcc_assert (n->range == 64);
2662       bswap_stats.found_64bit++;
2663     }
2664
2665   tmp = src;
2666
2667   /* Convert the src expression if necessary.  */
2668   if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type))
2669     {
2670       gimple convert_stmt;
2671
2672       tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc");
2673       convert_stmt = gimple_build_assign (tmp, NOP_EXPR, src);
2674       gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
2675     }
2676
2677   /* Canonical form for 16 bit bswap is a rotate expression.  Only 16bit values
2678      are considered as rotation of 2N bit values by N bits is generally not
2679      equivalent to a bswap.  Consider for instance 0x01020304 r>> 16 which
2680      gives 0x03040102 while a bswap for that value is 0x04030201.  */
2681   if (bswap && n->range == 16)
2682     {
2683       tree count = build_int_cst (NULL, BITS_PER_UNIT);
2684       src = fold_build2 (LROTATE_EXPR, bswap_type, tmp, count);
2685       bswap_stmt = gimple_build_assign (NULL, src);
2686     }
2687   else
2688     bswap_stmt = gimple_build_call (fndecl, 1, tmp);
2689
2690   tmp = tgt;
2691
2692   /* Convert the result if necessary.  */
2693   if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type))
2694     {
2695       gimple convert_stmt;
2696
2697       tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
2698       convert_stmt = gimple_build_assign (tgt, NOP_EXPR, tmp);
2699       gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT);
2700     }
2701
2702   gimple_set_lhs (bswap_stmt, tmp);
2703
2704   if (dump_file)
2705     {
2706       fprintf (dump_file, "%d bit bswap implementation found at: ",
2707                (int) n->range);
2708       print_gimple_stmt (dump_file, cur_stmt, 0, 0);
2709     }
2710
2711   gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT);
2712   gsi_remove (&gsi, true);
2713   return true;
2714 }
2715
2716 /* Find manual byte swap implementations as well as load in a given
2717    endianness. Byte swaps are turned into a bswap builtin invokation
2718    while endian loads are converted to bswap builtin invokation or
2719    simple load according to the target endianness.  */
2720
2721 unsigned int
2722 pass_optimize_bswap::execute (function *fun)
2723 {
2724   basic_block bb;
2725   bool bswap32_p, bswap64_p;
2726   bool changed = false;
2727   tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;
2728
2729   if (BITS_PER_UNIT != 8)
2730     return 0;
2731
2732   bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
2733                && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing);
2734   bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
2735                && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
2736                    || (bswap32_p && word_mode == SImode)));
2737
2738   /* Determine the argument type of the builtins.  The code later on
2739      assumes that the return and argument type are the same.  */
2740   if (bswap32_p)
2741     {
2742       tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
2743       bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
2744     }
2745
2746   if (bswap64_p)
2747     {
2748       tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
2749       bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
2750     }
2751
2752   memset (&nop_stats, 0, sizeof (nop_stats));
2753   memset (&bswap_stats, 0, sizeof (bswap_stats));
2754
2755   FOR_EACH_BB_FN (bb, fun)
2756     {
2757       gimple_stmt_iterator gsi;
2758
2759       /* We do a reverse scan for bswap patterns to make sure we get the
2760          widest match. As bswap pattern matching doesn't handle previously
2761          inserted smaller bswap replacements as sub-patterns, the wider
2762          variant wouldn't be detected.  */
2763       for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
2764         {
2765           gimple src_stmt, cur_stmt = gsi_stmt (gsi);
2766           tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
2767           enum tree_code code;
2768           struct symbolic_number n;
2769           bool bswap;
2770
2771           /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt
2772              might be moved to a different basic block by bswap_replace and gsi
2773              must not points to it if that's the case.  Moving the gsi_prev
2774              there make sure that gsi points to the statement previous to
2775              cur_stmt while still making sure that all statements are
2776              considered in this basic block.  */
2777           gsi_prev (&gsi);
2778
2779           if (!is_gimple_assign (cur_stmt))
2780             continue;
2781
2782           code = gimple_assign_rhs_code (cur_stmt);
2783           switch (code)
2784             {
2785             case LROTATE_EXPR:
2786             case RROTATE_EXPR:
2787               if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt))
2788                   || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt))
2789                      % BITS_PER_UNIT)
2790                 continue;
2791               /* Fall through.  */
2792             case BIT_IOR_EXPR:
2793               break;
2794             default:
2795               continue;
2796             }
2797
2798           src_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap);
2799
2800           if (!src_stmt)
2801             continue;
2802
2803           switch (n.range)
2804             {
2805             case 16:
2806               /* Already in canonical form, nothing to do.  */
2807               if (code == LROTATE_EXPR || code == RROTATE_EXPR)
2808                 continue;
2809               load_type = bswap_type = uint16_type_node;
2810               break;
2811             case 32:
2812               load_type = uint32_type_node;
2813               if (bswap32_p)
2814                 {
2815                   fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
2816                   bswap_type = bswap32_type;
2817                 }
2818               break;
2819             case 64:
2820               load_type = uint64_type_node;
2821               if (bswap64_p)
2822                 {
2823                   fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
2824                   bswap_type = bswap64_type;
2825                 }
2826               break;
2827             default:
2828               continue;
2829             }
2830
2831           if (bswap && !fndecl && n.range != 16)
2832             continue;
2833
2834           if (bswap_replace (cur_stmt, src_stmt, fndecl, bswap_type, load_type,
2835                              &n, bswap))
2836             changed = true;
2837         }
2838     }
2839
2840   statistics_counter_event (fun, "16-bit nop implementations found",
2841                             nop_stats.found_16bit);
2842   statistics_counter_event (fun, "32-bit nop implementations found",
2843                             nop_stats.found_32bit);
2844   statistics_counter_event (fun, "64-bit nop implementations found",
2845                             nop_stats.found_64bit);
2846   statistics_counter_event (fun, "16-bit bswap implementations found",
2847                             bswap_stats.found_16bit);
2848   statistics_counter_event (fun, "32-bit bswap implementations found",
2849                             bswap_stats.found_32bit);
2850   statistics_counter_event (fun, "64-bit bswap implementations found",
2851                             bswap_stats.found_64bit);
2852
2853   return (changed ? TODO_update_ssa : 0);
2854 }
2855
2856 } // anon namespace
2857
2858 gimple_opt_pass *
2859 make_pass_optimize_bswap (gcc::context *ctxt)
2860 {
2861   return new pass_optimize_bswap (ctxt);
2862 }
2863
2864 /* Return true if stmt is a type conversion operation that can be stripped
2865    when used in a widening multiply operation.  */
2866 static bool
2867 widening_mult_conversion_strippable_p (tree result_type, gimple stmt)
2868 {
2869   enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
2870
2871   if (TREE_CODE (result_type) == INTEGER_TYPE)
2872     {
2873       tree op_type;
2874       tree inner_op_type;
2875
2876       if (!CONVERT_EXPR_CODE_P (rhs_code))
2877         return false;
2878
2879       op_type = TREE_TYPE (gimple_assign_lhs (stmt));
2880
2881       /* If the type of OP has the same precision as the result, then
2882          we can strip this conversion.  The multiply operation will be
2883          selected to create the correct extension as a by-product.  */
2884       if (TYPE_PRECISION (result_type) == TYPE_PRECISION (op_type))
2885         return true;
2886
2887       /* We can also strip a conversion if it preserves the signed-ness of
2888          the operation and doesn't narrow the range.  */
2889       inner_op_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
2890
2891       /* If the inner-most type is unsigned, then we can strip any
2892          intermediate widening operation.  If it's signed, then the
2893          intermediate widening operation must also be signed.  */
2894       if ((TYPE_UNSIGNED (inner_op_type)
2895            || TYPE_UNSIGNED (op_type) == TYPE_UNSIGNED (inner_op_type))
2896           && TYPE_PRECISION (op_type) > TYPE_PRECISION (inner_op_type))
2897         return true;
2898
2899       return false;
2900     }
2901
2902   return rhs_code == FIXED_CONVERT_EXPR;
2903 }
2904
2905 /* Return true if RHS is a suitable operand for a widening multiplication,
2906    assuming a target type of TYPE.
2907    There are two cases:
2908
2909      - RHS makes some value at least twice as wide.  Store that value
2910        in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
2911
2912      - RHS is an integer constant.  Store that value in *NEW_RHS_OUT if so,
2913        but leave *TYPE_OUT untouched.  */
2914
2915 static bool
2916 is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out,
2917                         tree *new_rhs_out)
2918 {
2919   gimple stmt;
2920   tree type1, rhs1;
2921
2922   if (TREE_CODE (rhs) == SSA_NAME)
2923     {
2924       stmt = SSA_NAME_DEF_STMT (rhs);
2925       if (is_gimple_assign (stmt))
2926         {
2927           if (! widening_mult_conversion_strippable_p (type, stmt))
2928             rhs1 = rhs;
2929           else
2930             {
2931               rhs1 = gimple_assign_rhs1 (stmt);
2932
2933               if (TREE_CODE (rhs1) == INTEGER_CST)
2934                 {
2935                   *new_rhs_out = rhs1;
2936                   *type_out = NULL;
2937                   return true;
2938                 }
2939             }
2940         }
2941       else
2942         rhs1 = rhs;
2943
2944       type1 = TREE_TYPE (rhs1);
2945
2946       if (TREE_CODE (type1) != TREE_CODE (type)
2947           || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type))
2948         return false;
2949
2950       *new_rhs_out = rhs1;
2951       *type_out = type1;
2952       return true;
2953     }
2954
2955   if (TREE_CODE (rhs) == INTEGER_CST)
2956     {
2957       *new_rhs_out = rhs;
2958       *type_out = NULL;
2959       return true;
2960     }
2961
2962   return false;
2963 }
2964
2965 /* Return true if STMT performs a widening multiplication, assuming the
2966    output type is TYPE.  If so, store the unwidened types of the operands
2967    in *TYPE1_OUT and *TYPE2_OUT respectively.  Also fill *RHS1_OUT and
2968    *RHS2_OUT such that converting those operands to types *TYPE1_OUT
2969    and *TYPE2_OUT would give the operands of the multiplication.  */
2970
2971 static bool
2972 is_widening_mult_p (gimple stmt,
2973                     tree *type1_out, tree *rhs1_out,
2974                     tree *type2_out, tree *rhs2_out)
2975 {
2976   tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2977
2978   if (TREE_CODE (type) != INTEGER_TYPE
2979       && TREE_CODE (type) != FIXED_POINT_TYPE)
2980     return false;
2981
2982   if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out,
2983                                rhs1_out))
2984     return false;
2985
2986   if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out,
2987                                rhs2_out))
2988     return false;
2989
2990   if (*type1_out == NULL)
2991     {
2992       if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out))
2993         return false;
2994       *type1_out = *type2_out;
2995     }
2996
2997   if (*type2_out == NULL)
2998     {
2999       if (!int_fits_type_p (*rhs2_out, *type1_out))
3000         return false;
3001       *type2_out = *type1_out;
3002     }
3003
3004   /* Ensure that the larger of the two operands comes first. */
3005   if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out))
3006     {
3007       std::swap (*type1_out, *type2_out);
3008       std::swap (*rhs1_out, *rhs2_out);
3009     }
3010
3011   return true;
3012 }
3013
3014 /* Process a single gimple statement STMT, which has a MULT_EXPR as
3015    its rhs, and try to convert it into a WIDEN_MULT_EXPR.  The return
3016    value is true iff we converted the statement.  */
3017
3018 static bool
3019 convert_mult_to_widen (gimple stmt, gimple_stmt_iterator *gsi)
3020 {
3021   tree lhs, rhs1, rhs2, type, type1, type2;
3022   enum insn_code handler;
3023   machine_mode to_mode, from_mode, actual_mode;
3024   optab op;
3025   int actual_precision;
3026   location_t loc = gimple_location (stmt);
3027   bool from_unsigned1, from_unsigned2;
3028
3029   lhs = gimple_assign_lhs (stmt);
3030   type = TREE_TYPE (lhs);
3031   if (TREE_CODE (type) != INTEGER_TYPE)
3032     return false;
3033
3034   if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2))
3035     return false;
3036
3037   to_mode = TYPE_MODE (type);
3038   from_mode = TYPE_MODE (type1);
3039   from_unsigned1 = TYPE_UNSIGNED (type1);
3040   from_unsigned2 = TYPE_UNSIGNED (type2);
3041
3042   if (from_unsigned1 && from_unsigned2)
3043     op = umul_widen_optab;
3044   else if (!from_unsigned1 && !from_unsigned2)
3045     op = smul_widen_optab;
3046   else
3047     op = usmul_widen_optab;
3048
3049   handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode,
3050                                                   0, &actual_mode);
3051
3052   if (handler == CODE_FOR_nothing)
3053     {
3054       if (op != smul_widen_optab)
3055         {
3056           /* We can use a signed multiply with unsigned types as long as
3057              there is a wider mode to use, or it is the smaller of the two
3058              types that is unsigned.  Note that type1 >= type2, always.  */
3059           if ((TYPE_UNSIGNED (type1)
3060                && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
3061               || (TYPE_UNSIGNED (type2)
3062                   && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
3063             {
3064               from_mode = GET_MODE_WIDER_MODE (from_mode);
3065               if (GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode))
3066                 return false;
3067             }
3068
3069           op = smul_widen_optab;
3070           handler = find_widening_optab_handler_and_mode (op, to_mode,
3071                                                           from_mode, 0,
3072                                                           &actual_mode);
3073
3074           if (handler == CODE_FOR_nothing)
3075             return false;
3076
3077           from_unsigned1 = from_unsigned2 = false;
3078         }
3079       else
3080         return false;
3081     }
3082
3083   /* Ensure that the inputs to the handler are in the correct precison
3084      for the opcode.  This will be the full mode size.  */
3085   actual_precision = GET_MODE_PRECISION (actual_mode);
3086   if (2 * actual_precision > TYPE_PRECISION (type))
3087     return false;
3088   if (actual_precision != TYPE_PRECISION (type1)
3089       || from_unsigned1 != TYPE_UNSIGNED (type1))
3090     rhs1 = build_and_insert_cast (gsi, loc,
3091                                   build_nonstandard_integer_type
3092                                     (actual_precision, from_unsigned1), rhs1);
3093   if (actual_precision != TYPE_PRECISION (type2)
3094       || from_unsigned2 != TYPE_UNSIGNED (type2))
3095     rhs2 = build_and_insert_cast (gsi, loc,
3096                                   build_nonstandard_integer_type
3097                                     (actual_precision, from_unsigned2), rhs2);
3098
3099   /* Handle constants.  */
3100   if (TREE_CODE (rhs1) == INTEGER_CST)
3101     rhs1 = fold_convert (type1, rhs1);
3102   if (TREE_CODE (rhs2) == INTEGER_CST)
3103     rhs2 = fold_convert (type2, rhs2);
3104
3105   gimple_assign_set_rhs1 (stmt, rhs1);
3106   gimple_assign_set_rhs2 (stmt, rhs2);
3107   gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
3108   update_stmt (stmt);
3109   widen_mul_stats.widen_mults_inserted++;
3110   return true;
3111 }
3112
3113 /* Process a single gimple statement STMT, which is found at the
3114    iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
3115    rhs (given by CODE), and try to convert it into a
3116    WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR.  The return value
3117    is true iff we converted the statement.  */
3118
3119 static bool
3120 convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt,
3121                             enum tree_code code)
3122 {
3123   gimple rhs1_stmt = NULL, rhs2_stmt = NULL;
3124   gimple conv1_stmt = NULL, conv2_stmt = NULL, conv_stmt;
3125   tree type, type1, type2, optype;
3126   tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
3127   enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
3128   optab this_optab;
3129   enum tree_code wmult_code;
3130   enum insn_code handler;
3131   machine_mode to_mode, from_mode, actual_mode;
3132   location_t loc = gimple_location (stmt);
3133   int actual_precision;
3134   bool from_unsigned1, from_unsigned2;
3135
3136   lhs = gimple_assign_lhs (stmt);
3137   type = TREE_TYPE (lhs);
3138   if (TREE_CODE (type) != INTEGER_TYPE
3139       && TREE_CODE (type) != FIXED_POINT_TYPE)
3140     return false;
3141
3142   if (code == MINUS_EXPR)
3143     wmult_code = WIDEN_MULT_MINUS_EXPR;
3144   else
3145     wmult_code = WIDEN_MULT_PLUS_EXPR;
3146
3147   rhs1 = gimple_assign_rhs1 (stmt);
3148   rhs2 = gimple_assign_rhs2 (stmt);
3149
3150   if (TREE_CODE (rhs1) == SSA_NAME)
3151     {
3152       rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
3153       if (is_gimple_assign (rhs1_stmt))
3154         rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
3155     }
3156
3157   if (TREE_CODE (rhs2) == SSA_NAME)
3158     {
3159       rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
3160       if (is_gimple_assign (rhs2_stmt))
3161         rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
3162     }
3163
3164   /* Allow for one conversion statement between the multiply
3165      and addition/subtraction statement.  If there are more than
3166      one conversions then we assume they would invalidate this
3167      transformation.  If that's not the case then they should have
3168      been folded before now.  */
3169   if (CONVERT_EXPR_CODE_P (rhs1_code))
3170     {
3171       conv1_stmt = rhs1_stmt;
3172       rhs1 = gimple_assign_rhs1 (rhs1_stmt);
3173       if (TREE_CODE (rhs1) == SSA_NAME)
3174         {
3175           rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
3176           if (is_gimple_assign (rhs1_stmt))
3177             rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
3178         }
3179       else
3180         return false;
3181     }
3182   if (CONVERT_EXPR_CODE_P (rhs2_code))
3183     {
3184       conv2_stmt = rhs2_stmt;
3185       rhs2 = gimple_assign_rhs1 (rhs2_stmt);
3186       if (TREE_CODE (rhs2) == SSA_NAME)
3187         {
3188           rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
3189           if (is_gimple_assign (rhs2_stmt))
3190             rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
3191         }
3192       else
3193         return false;
3194     }
3195
3196   /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
3197      is_widening_mult_p, but we still need the rhs returns.
3198
3199      It might also appear that it would be sufficient to use the existing
3200      operands of the widening multiply, but that would limit the choice of
3201      multiply-and-accumulate instructions.
3202
3203      If the widened-multiplication result has more than one uses, it is
3204      probably wiser not to do the conversion.  */
3205   if (code == PLUS_EXPR
3206       && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
3207     {
3208       if (!has_single_use (rhs1)
3209           || !is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
3210                                   &type2, &mult_rhs2))
3211         return false;
3212       add_rhs = rhs2;
3213       conv_stmt = conv1_stmt;
3214     }
3215   else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
3216     {
3217       if (!has_single_use (rhs2)
3218           || !is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
3219                                   &type2, &mult_rhs2))
3220         return false;
3221       add_rhs = rhs1;
3222       conv_stmt = conv2_stmt;
3223     }
3224   else
3225     return false;
3226
3227   to_mode = TYPE_MODE (type);
3228   from_mode = TYPE_MODE (type1);
3229   from_unsigned1 = TYPE_UNSIGNED (type1);
3230   from_unsigned2 = TYPE_UNSIGNED (type2);
3231   optype = type1;
3232
3233   /* There's no such thing as a mixed sign madd yet, so use a wider mode.  */
3234   if (from_unsigned1 != from_unsigned2)
3235     {
3236       if (!INTEGRAL_TYPE_P (type))
3237         return false;
3238       /* We can use a signed multiply with unsigned types as long as
3239          there is a wider mode to use, or it is the smaller of the two
3240          types that is unsigned.  Note that type1 >= type2, always.  */
3241       if ((from_unsigned1
3242            && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
3243           || (from_unsigned2
3244               && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
3245         {
3246           from_mode = GET_MODE_WIDER_MODE (from_mode);
3247           if (GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode))
3248             return false;
3249         }
3250
3251       from_unsigned1 = from_unsigned2 = false;
3252       optype = build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode),
3253                                                false);
3254     }
3255
3256   /* If there was a conversion between the multiply and addition
3257      then we need to make sure it fits a multiply-and-accumulate.
3258      The should be a single mode change which does not change the
3259      value.  */
3260   if (conv_stmt)
3261     {
3262       /* We use the original, unmodified data types for this.  */
3263       tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt));
3264       tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt));
3265       int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2);
3266       bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2);
3267
3268       if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type))
3269         {
3270           /* Conversion is a truncate.  */
3271           if (TYPE_PRECISION (to_type) < data_size)
3272             return false;
3273         }
3274       else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type))
3275         {
3276           /* Conversion is an extend.  Check it's the right sort.  */
3277           if (TYPE_UNSIGNED (from_type) != is_unsigned
3278               && !(is_unsigned && TYPE_PRECISION (from_type) > data_size))
3279             return false;
3280         }
3281       /* else convert is a no-op for our purposes.  */
3282     }
3283
3284   /* Verify that the machine can perform a widening multiply
3285      accumulate in this mode/signedness combination, otherwise
3286      this transformation is likely to pessimize code.  */
3287   this_optab = optab_for_tree_code (wmult_code, optype, optab_default);
3288   handler = find_widening_optab_handler_and_mode (this_optab, to_mode,
3289                                                   from_mode, 0, &actual_mode);
3290
3291   if (handler == CODE_FOR_nothing)
3292     return false;
3293
3294   /* Ensure that the inputs to the handler are in the correct precison
3295      for the opcode.  This will be the full mode size.  */
3296   actual_precision = GET_MODE_PRECISION (actual_mode);
3297   if (actual_precision != TYPE_PRECISION (type1)
3298       || from_unsigned1 != TYPE_UNSIGNED (type1))
3299     mult_rhs1 = build_and_insert_cast (gsi, loc,
3300                                        build_nonstandard_integer_type
3301                                          (actual_precision, from_unsigned1),
3302                                        mult_rhs1);
3303   if (actual_precision != TYPE_PRECISION (type2)
3304       || from_unsigned2 != TYPE_UNSIGNED (type2))
3305     mult_rhs2 = build_and_insert_cast (gsi, loc,
3306                                        build_nonstandard_integer_type
3307                                          (actual_precision, from_unsigned2),
3308                                        mult_rhs2);
3309
3310   if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs)))
3311     add_rhs = build_and_insert_cast (gsi, loc, type, add_rhs);
3312
3313   /* Handle constants.  */
3314   if (TREE_CODE (mult_rhs1) == INTEGER_CST)
3315     mult_rhs1 = fold_convert (type1, mult_rhs1);
3316   if (TREE_CODE (mult_rhs2) == INTEGER_CST)
3317     mult_rhs2 = fold_convert (type2, mult_rhs2);
3318
3319   gimple_assign_set_rhs_with_ops (gsi, wmult_code, mult_rhs1, mult_rhs2,
3320                                   add_rhs);
3321   update_stmt (gsi_stmt (*gsi));
3322   widen_mul_stats.maccs_inserted++;
3323   return true;
3324 }
3325
3326 /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
3327    with uses in additions and subtractions to form fused multiply-add
3328    operations.  Returns true if successful and MUL_STMT should be removed.  */
3329
3330 static bool
3331 convert_mult_to_fma (gimple mul_stmt, tree op1, tree op2)
3332 {
3333   tree mul_result = gimple_get_lhs (mul_stmt);
3334   tree type = TREE_TYPE (mul_result);
3335   gimple use_stmt, neguse_stmt;
3336   gassign *fma_stmt;
3337   use_operand_p use_p;
3338   imm_use_iterator imm_iter;
3339
3340   if (FLOAT_TYPE_P (type)
3341       && flag_fp_contract_mode == FP_CONTRACT_OFF)
3342     return false;
3343
3344   /* We don't want to do bitfield reduction ops.  */
3345   if (INTEGRAL_TYPE_P (type)
3346       && (TYPE_PRECISION (type)
3347           != GET_MODE_PRECISION (TYPE_MODE (type))))
3348     return false;
3349
3350   /* If the target doesn't support it, don't generate it.  We assume that
3351      if fma isn't available then fms, fnma or fnms are not either.  */
3352   if (optab_handler (fma_optab, TYPE_MODE (type)) == CODE_FOR_nothing)
3353     return false;
3354
3355   /* If the multiplication has zero uses, it is kept around probably because
3356      of -fnon-call-exceptions.  Don't optimize it away in that case,
3357      it is DCE job.  */
3358   if (has_zero_uses (mul_result))
3359     return false;
3360
3361   /* Make sure that the multiplication statement becomes dead after
3362      the transformation, thus that all uses are transformed to FMAs.
3363      This means we assume that an FMA operation has the same cost
3364      as an addition.  */
3365   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
3366     {
3367       enum tree_code use_code;
3368       tree result = mul_result;
3369       bool negate_p = false;
3370
3371       use_stmt = USE_STMT (use_p);
3372
3373       if (is_gimple_debug (use_stmt))
3374         continue;
3375
3376       /* For now restrict this operations to single basic blocks.  In theory
3377          we would want to support sinking the multiplication in
3378          m = a*b;
3379          if ()
3380            ma = m + c;
3381          else
3382            d = m;
3383          to form a fma in the then block and sink the multiplication to the
3384          else block.  */
3385       if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3386         return false;
3387
3388       if (!is_gimple_assign (use_stmt))
3389         return false;
3390
3391       use_code = gimple_assign_rhs_code (use_stmt);
3392
3393       /* A negate on the multiplication leads to FNMA.  */
3394       if (use_code == NEGATE_EXPR)
3395         {
3396           ssa_op_iter iter;
3397           use_operand_p usep;
3398
3399           result = gimple_assign_lhs (use_stmt);
3400
3401           /* Make sure the negate statement becomes dead with this
3402              single transformation.  */
3403           if (!single_imm_use (gimple_assign_lhs (use_stmt),
3404                                &use_p, &neguse_stmt))
3405             return false;
3406
3407           /* Make sure the multiplication isn't also used on that stmt.  */
3408           FOR_EACH_PHI_OR_STMT_USE (usep, neguse_stmt, iter, SSA_OP_USE)
3409             if (USE_FROM_PTR (usep) == mul_result)
3410               return false;
3411
3412           /* Re-validate.  */
3413           use_stmt = neguse_stmt;
3414           if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3415             return false;
3416           if (!is_gimple_assign (use_stmt))
3417             return false;
3418
3419           use_code = gimple_assign_rhs_code (use_stmt);
3420           negate_p = true;
3421         }
3422
3423       switch (use_code)
3424         {
3425         case MINUS_EXPR:
3426           if (gimple_assign_rhs2 (use_stmt) == result)
3427             negate_p = !negate_p;
3428           break;
3429         case PLUS_EXPR:
3430           break;
3431         default:
3432           /* FMA can only be formed from PLUS and MINUS.  */
3433           return false;
3434         }
3435
3436       /* If the subtrahend (gimple_assign_rhs2 (use_stmt)) is computed
3437          by a MULT_EXPR that we'll visit later, we might be able to
3438          get a more profitable match with fnma.
3439          OTOH, if we don't, a negate / fma pair has likely lower latency
3440          that a mult / subtract pair.  */
3441       if (use_code == MINUS_EXPR && !negate_p
3442           && gimple_assign_rhs1 (use_stmt) == result
3443           && optab_handler (fms_optab, TYPE_MODE (type)) == CODE_FOR_nothing
3444           && optab_handler (fnma_optab, TYPE_MODE (type)) != CODE_FOR_nothing)
3445         {
3446           tree rhs2 = gimple_assign_rhs2 (use_stmt);
3447
3448           if (TREE_CODE (rhs2) == SSA_NAME)
3449             {
3450               gimple stmt2 = SSA_NAME_DEF_STMT (rhs2);
3451               if (has_single_use (rhs2)
3452                   && is_gimple_assign (stmt2)
3453                   && gimple_assign_rhs_code (stmt2) == MULT_EXPR)
3454               return false;
3455             }
3456         }
3457
3458       /* We can't handle a * b + a * b.  */
3459       if (gimple_assign_rhs1 (use_stmt) == gimple_assign_rhs2 (use_stmt))
3460         return false;
3461
3462       /* While it is possible to validate whether or not the exact form
3463          that we've recognized is available in the backend, the assumption
3464          is that the transformation is never a loss.  For instance, suppose
3465          the target only has the plain FMA pattern available.  Consider
3466          a*b-c -> fma(a,b,-c): we've exchanged MUL+SUB for FMA+NEG, which
3467          is still two operations.  Consider -(a*b)-c -> fma(-a,b,-c): we
3468          still have 3 operations, but in the FMA form the two NEGs are
3469          independent and could be run in parallel.  */
3470     }
3471
3472   FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
3473     {
3474       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
3475       enum tree_code use_code;
3476       tree addop, mulop1 = op1, result = mul_result;
3477       bool negate_p = false;
3478
3479       if (is_gimple_debug (use_stmt))
3480         continue;
3481
3482       use_code = gimple_assign_rhs_code (use_stmt);
3483       if (use_code == NEGATE_EXPR)
3484         {
3485           result = gimple_assign_lhs (use_stmt);
3486           single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt);
3487           gsi_remove (&gsi, true);
3488           release_defs (use_stmt);
3489
3490           use_stmt = neguse_stmt;
3491           gsi = gsi_for_stmt (use_stmt);
3492           use_code = gimple_assign_rhs_code (use_stmt);
3493           negate_p = true;
3494         }
3495
3496       if (gimple_assign_rhs1 (use_stmt) == result)
3497         {
3498           addop = gimple_assign_rhs2 (use_stmt);
3499           /* a * b - c -> a * b + (-c)  */
3500           if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
3501             addop = force_gimple_operand_gsi (&gsi,
3502                                               build1 (NEGATE_EXPR,
3503                                                       type, addop),
3504                                               true, NULL_TREE, true,
3505                                               GSI_SAME_STMT);
3506         }
3507       else
3508         {
3509           addop = gimple_assign_rhs1 (use_stmt);
3510           /* a - b * c -> (-b) * c + a */
3511           if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
3512             negate_p = !negate_p;
3513         }
3514
3515       if (negate_p)
3516         mulop1 = force_gimple_operand_gsi (&gsi,
3517                                            build1 (NEGATE_EXPR,
3518                                                    type, mulop1),
3519                                            true, NULL_TREE, true,
3520                                            GSI_SAME_STMT);
3521
3522       fma_stmt = gimple_build_assign (gimple_assign_lhs (use_stmt),
3523                                       FMA_EXPR, mulop1, op2, addop);
3524       gsi_replace (&gsi, fma_stmt, true);
3525       widen_mul_stats.fmas_inserted++;
3526     }
3527
3528   return true;
3529 }
3530
3531 /* Find integer multiplications where the operands are extended from
3532    smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
3533    where appropriate.  */
3534
3535 namespace {
3536
3537 const pass_data pass_data_optimize_widening_mul =
3538 {
3539   GIMPLE_PASS, /* type */
3540   "widening_mul", /* name */
3541   OPTGROUP_NONE, /* optinfo_flags */
3542   TV_NONE, /* tv_id */
3543   PROP_ssa, /* properties_required */
3544   0, /* properties_provided */
3545   0, /* properties_destroyed */
3546   0, /* todo_flags_start */
3547   TODO_update_ssa, /* todo_flags_finish */
3548 };
3549
3550 class pass_optimize_widening_mul : public gimple_opt_pass
3551 {
3552 public:
3553   pass_optimize_widening_mul (gcc::context *ctxt)
3554     : gimple_opt_pass (pass_data_optimize_widening_mul, ctxt)
3555   {}
3556
3557   /* opt_pass methods: */
3558   virtual bool gate (function *)
3559     {
3560       return flag_expensive_optimizations && optimize;
3561     }
3562
3563   virtual unsigned int execute (function *);
3564
3565 }; // class pass_optimize_widening_mul
3566
3567 unsigned int
3568 pass_optimize_widening_mul::execute (function *fun)
3569 {
3570   basic_block bb;
3571   bool cfg_changed = false;
3572
3573   memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
3574
3575   FOR_EACH_BB_FN (bb, fun)
3576     {
3577       gimple_stmt_iterator gsi;
3578
3579       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
3580         {
3581           gimple stmt = gsi_stmt (gsi);
3582           enum tree_code code;
3583
3584           if (is_gimple_assign (stmt))
3585             {
3586               code = gimple_assign_rhs_code (stmt);
3587               switch (code)
3588                 {
3589                 case MULT_EXPR:
3590                   if (!convert_mult_to_widen (stmt, &gsi)
3591                       && convert_mult_to_fma (stmt,
3592                                               gimple_assign_rhs1 (stmt),
3593                                               gimple_assign_rhs2 (stmt)))
3594                     {
3595                       gsi_remove (&gsi, true);
3596                       release_defs (stmt);
3597                       continue;
3598                     }
3599                   break;
3600
3601                 case PLUS_EXPR:
3602                 case MINUS_EXPR:
3603                   convert_plusminus_to_widen (&gsi, stmt, code);
3604                   break;
3605
3606                 default:;
3607                 }
3608             }
3609           else if (is_gimple_call (stmt)
3610                    && gimple_call_lhs (stmt))
3611             {
3612               tree fndecl = gimple_call_fndecl (stmt);
3613               if (fndecl
3614                   && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
3615                 {
3616                   switch (DECL_FUNCTION_CODE (fndecl))
3617                     {
3618                       case BUILT_IN_POWF:
3619                       case BUILT_IN_POW:
3620                       case BUILT_IN_POWL:
3621                         if (TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST
3622                             && REAL_VALUES_EQUAL
3623                                  (TREE_REAL_CST (gimple_call_arg (stmt, 1)),
3624                                   dconst2)
3625                             && convert_mult_to_fma (stmt,
3626                                                     gimple_call_arg (stmt, 0),
3627                                                     gimple_call_arg (stmt, 0)))
3628                           {
3629                             unlink_stmt_vdef (stmt);
3630                             if (gsi_remove (&gsi, true)
3631                                 && gimple_purge_dead_eh_edges (bb))
3632                               cfg_changed = true;
3633                             release_defs (stmt);
3634                             continue;
3635                           }
3636                           break;
3637
3638                       default:;
3639                     }
3640                 }
3641             }
3642           gsi_next (&gsi);
3643         }
3644     }
3645
3646   statistics_counter_event (fun, "widening multiplications inserted",
3647                             widen_mul_stats.widen_mults_inserted);
3648   statistics_counter_event (fun, "widening maccs inserted",
3649                             widen_mul_stats.maccs_inserted);
3650   statistics_counter_event (fun, "fused multiply-adds inserted",
3651                             widen_mul_stats.fmas_inserted);
3652
3653   return cfg_changed ? TODO_cleanup_cfg : 0;
3654 }
3655
3656 } // anon namespace
3657
3658 gimple_opt_pass *
3659 make_pass_optimize_widening_mul (gcc::context *ctxt)
3660 {
3661   return new pass_optimize_widening_mul (ctxt);
3662 }