gcc/tree-ssa-math-opts.c

   1 /* Global, SSA-based optimizations using mathematical identities.
   2    Copyright (C) 2005-2015 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* Currently, the only mini-pass in this file tries to CSE reciprocal
  21    operations.  These are common in sequences such as this one:
  22
  23         modulus = sqrt(x*x + y*y + z*z);
  24         x = x / modulus;
  25         y = y / modulus;
  26         z = z / modulus;
  27
  28    that can be optimized to
  29
  30         modulus = sqrt(x*x + y*y + z*z);
  31         rmodulus = 1.0 / modulus;
  32         x = x * rmodulus;
  33         y = y * rmodulus;
  34         z = z * rmodulus;
  35
  36    We do this for loop invariant divisors, and with this pass whenever
  37    we notice that a division has the same divisor multiple times.
  38
  39    Of course, like in PRE, we don't insert a division if a dominator
  40    already has one.  However, this cannot be done as an extension of
  41    PRE for several reasons.
  42
  43    First of all, with some experiments it was found out that the
  44    transformation is not always useful if there are only two divisions
  45    hy the same divisor.  This is probably because modern processors
  46    can pipeline the divisions; on older, in-order processors it should
  47    still be effective to optimize two divisions by the same number.
  48    We make this a param, and it shall be called N in the remainder of
  49    this comment.
  50
  51    Second, if trapping math is active, we have less freedom on where
  52    to insert divisions: we can only do so in basic blocks that already
  53    contain one.  (If divisions don't trap, instead, we can insert
  54    divisions elsewhere, which will be in blocks that are common dominators
  55    of those that have the division).
  56
  57    We really don't want to compute the reciprocal unless a division will
  58    be found.  To do this, we won't insert the division in a basic block
  59    that has less than N divisions *post-dominating* it.
  60
  61    The algorithm constructs a subset of the dominator tree, holding the
  62    blocks containing the divisions and the common dominators to them,
  63    and walk it twice.  The first walk is in post-order, and it annotates
  64    each block with the number of divisions that post-dominate it: this
  65    gives information on where divisions can be inserted profitably.
  66    The second walk is in pre-order, and it inserts divisions as explained
  67    above, and replaces divisions by multiplications.
  68
  69    In the best case, the cost of the pass is O(n_statements).  In the
  70    worst-case, the cost is due to creating the dominator tree subset,
  71    with a cost of O(n_basic_blocks ^ 2); however this can only happen
  72    for n_statements / n_basic_blocks statements.  So, the amortized cost
  73    of creating the dominator tree subset is O(n_basic_blocks) and the
  74    worst-case cost of the pass is O(n_statements * n_basic_blocks).
  75
  76    More practically, the cost will be small because there are few
  77    divisions, and they tend to be in the same basic block, so insert_bb
  78    is called very few times.
  79
  80    If we did this using domwalk.c, an efficient implementation would have
  81    to work on all the variables in a single pass, because we could not
  82    work on just a subset of the dominator tree, as we do now, and the
  83    cost would also be something like O(n_statements * n_basic_blocks).
  84    The data structures would be more complex in order to work on all the
  85    variables in a single pass.  */
  86
  87 #include "config.h"
  88 #include "system.h"
  89 #include "coretypes.h"
  90 #include "tm.h"
  91 #include "flags.h"
  92 #include "input.h"
  93 #include "alias.h"
  94 #include "symtab.h"
  95 #include "tree.h"
  96 #include "fold-const.h"
  97 #include "predict.h"
  98 #include "hard-reg-set.h"
  99 #include "function.h"
 100 #include "dominance.h"
 101 #include "cfg.h"
 102 #include "basic-block.h"
 103 #include "tree-ssa-alias.h"
 104 #include "internal-fn.h"
 105 #include "gimple-fold.h"
 106 #include "gimple-expr.h"
 107 #include "is-a.h"
 108 #include "gimple.h"
 109 #include "gimple-iterator.h"
 110 #include "gimplify.h"
 111 #include "gimplify-me.h"
 112 #include "stor-layout.h"
 113 #include "gimple-ssa.h"
 114 #include "tree-cfg.h"
 115 #include "tree-phinodes.h"
 116 #include "ssa-iterators.h"
 117 #include "stringpool.h"
 118 #include "tree-ssanames.h"
 119 #include "rtl.h"
 120 #include "insn-config.h"
 121 #include "expmed.h"
 122 #include "dojump.h"
 123 #include "explow.h"
 124 #include "calls.h"
 125 #include "emit-rtl.h"
 126 #include "varasm.h"
 127 #include "stmt.h"
 128 #include "expr.h"
 129 #include "tree-dfa.h"
 130 #include "tree-ssa.h"
 131 #include "tree-pass.h"
 132 #include "alloc-pool.h"
 133 #include "target.h"
 134 #include "gimple-pretty-print.h"
 135 #include "builtins.h"
 136 #include "params.h"
 137
 138 /* FIXME: RTL headers have to be included here for optabs.  */
 139 #include "rtl.h"                /* Because optabs.h wants enum rtx_code.  */
 140 #include "expr.h"               /* Because optabs.h wants sepops.  */
 141 #include "insn-codes.h"
 142 #include "optabs.h"
 143
 144 /* This structure represents one basic block that either computes a
 145    division, or is a common dominator for basic block that compute a
 146    division.  */
 147 struct occurrence {
 148   /* The basic block represented by this structure.  */
 149   basic_block bb;
 150
 151   /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
 152      inserted in BB.  */
 153   tree recip_def;
 154
 155   /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
 156      was inserted in BB.  */
 157   gimple recip_def_stmt;
 158
 159   /* Pointer to a list of "struct occurrence"s for blocks dominated
 160      by BB.  */
 161   struct occurrence *children;
 162
 163   /* Pointer to the next "struct occurrence"s in the list of blocks
 164      sharing a common dominator.  */
 165   struct occurrence *next;
 166
 167   /* The number of divisions that are in BB before compute_merit.  The
 168      number of divisions that are in BB or post-dominate it after
 169      compute_merit.  */
 170   int num_divisions;
 171
 172   /* True if the basic block has a division, false if it is a common
 173      dominator for basic blocks that do.  If it is false and trapping
 174      math is active, BB is not a candidate for inserting a reciprocal.  */
 175   bool bb_has_division;
 176 };
 177
 178 static struct
 179 {
 180   /* Number of 1.0/X ops inserted.  */
 181   int rdivs_inserted;
 182
 183   /* Number of 1.0/FUNC ops inserted.  */
 184   int rfuncs_inserted;
 185 } reciprocal_stats;
 186
 187 static struct
 188 {
 189   /* Number of cexpi calls inserted.  */
 190   int inserted;
 191 } sincos_stats;
 192
 193 static struct
 194 {
 195   /* Number of hand-written 16-bit nop / bswaps found.  */
 196   int found_16bit;
 197
 198   /* Number of hand-written 32-bit nop / bswaps found.  */
 199   int found_32bit;
 200
 201   /* Number of hand-written 64-bit nop / bswaps found.  */
 202   int found_64bit;
 203 } nop_stats, bswap_stats;
 204
 205 static struct
 206 {
 207   /* Number of widening multiplication ops inserted.  */
 208   int widen_mults_inserted;
 209
 210   /* Number of integer multiply-and-accumulate ops inserted.  */
 211   int maccs_inserted;
 212
 213   /* Number of fp fused multiply-add ops inserted.  */
 214   int fmas_inserted;
 215 } widen_mul_stats;
 216
 217 /* The instance of "struct occurrence" representing the highest
 218    interesting block in the dominator tree.  */
 219 static struct occurrence *occ_head;
 220
 221 /* Allocation pool for getting instances of "struct occurrence".  */
 222 static pool_allocator<occurrence> *occ_pool;
 223
 224
 225
 226 /* Allocate and return a new struct occurrence for basic block BB, and
 227    whose children list is headed by CHILDREN.  */
 228 static struct occurrence *
 229 occ_new (basic_block bb, struct occurrence *children)
 230 {
 231   struct occurrence *occ;
 232
 233   bb->aux = occ = occ_pool->allocate ();
 234   memset (occ, 0, sizeof (struct occurrence));
 235
 236   occ->bb = bb;
 237   occ->children = children;
 238   return occ;
 239 }
 240
 241
 242 /* Insert NEW_OCC into our subset of the dominator tree.  P_HEAD points to a
 243    list of "struct occurrence"s, one per basic block, having IDOM as
 244    their common dominator.
 245
 246    We try to insert NEW_OCC as deep as possible in the tree, and we also
 247    insert any other block that is a common dominator for BB and one
 248    block already in the tree.  */
 249
 250 static void
 251 insert_bb (struct occurrence *new_occ, basic_block idom,
 252            struct occurrence **p_head)
 253 {
 254   struct occurrence *occ, **p_occ;
 255
 256   for (p_occ = p_head; (occ = *p_occ) != NULL; )
 257     {
 258       basic_block bb = new_occ->bb, occ_bb = occ->bb;
 259       basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb);
 260       if (dom == bb)
 261         {
 262           /* BB dominates OCC_BB.  OCC becomes NEW_OCC's child: remove OCC
 263              from its list.  */
 264           *p_occ = occ->next;
 265           occ->next = new_occ->children;
 266           new_occ->children = occ;
 267
 268           /* Try the next block (it may as well be dominated by BB).  */
 269         }
 270
 271       else if (dom == occ_bb)
 272         {
 273           /* OCC_BB dominates BB.  Tail recurse to look deeper.  */
 274           insert_bb (new_occ, dom, &occ->children);
 275           return;
 276         }
 277
 278       else if (dom != idom)
 279         {
 280           gcc_assert (!dom->aux);
 281
 282           /* There is a dominator between IDOM and BB, add it and make
 283              two children out of NEW_OCC and OCC.  First, remove OCC from
 284              its list.  */
 285           *p_occ = occ->next;
 286           new_occ->next = occ;
 287           occ->next = NULL;
 288
 289           /* None of the previous blocks has DOM as a dominator: if we tail
 290              recursed, we would reexamine them uselessly. Just switch BB with
 291              DOM, and go on looking for blocks dominated by DOM.  */
 292           new_occ = occ_new (dom, new_occ);
 293         }
 294
 295       else
 296         {
 297           /* Nothing special, go on with the next element.  */
 298           p_occ = &occ->next;
 299         }
 300     }
 301
 302   /* No place was found as a child of IDOM.  Make BB a sibling of IDOM.  */
 303   new_occ->next = *p_head;
 304   *p_head = new_occ;
 305 }
 306
 307 /* Register that we found a division in BB.  */
 308
 309 static inline void
 310 register_division_in (basic_block bb)
 311 {
 312   struct occurrence *occ;
 313
 314   occ = (struct occurrence *) bb->aux;
 315   if (!occ)
 316     {
 317       occ = occ_new (bb, NULL);
 318       insert_bb (occ, ENTRY_BLOCK_PTR_FOR_FN (cfun), &occ_head);
 319     }
 320
 321   occ->bb_has_division = true;
 322   occ->num_divisions++;
 323 }
 324
 325
 326 /* Compute the number of divisions that postdominate each block in OCC and
 327    its children.  */
 328
 329 static void
 330 compute_merit (struct occurrence *occ)
 331 {
 332   struct occurrence *occ_child;
 333   basic_block dom = occ->bb;
 334
 335   for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
 336     {
 337       basic_block bb;
 338       if (occ_child->children)
 339         compute_merit (occ_child);
 340
 341       if (flag_exceptions)
 342         bb = single_noncomplex_succ (dom);
 343       else
 344         bb = dom;
 345
 346       if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
 347         occ->num_divisions += occ_child->num_divisions;
 348     }
 349 }
 350
 351
 352 /* Return whether USE_STMT is a floating-point division by DEF.  */
 353 static inline bool
 354 is_division_by (gimple use_stmt, tree def)
 355 {
 356   return is_gimple_assign (use_stmt)
 357          && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
 358          && gimple_assign_rhs2 (use_stmt) == def
 359          /* Do not recognize x / x as valid division, as we are getting
 360             confused later by replacing all immediate uses x in such
 361             a stmt.  */
 362          && gimple_assign_rhs1 (use_stmt) != def;
 363 }
 364
 365 /* Walk the subset of the dominator tree rooted at OCC, setting the
 366    RECIP_DEF field to a definition of 1.0 / DEF that can be used in
 367    the given basic block.  The field may be left NULL, of course,
 368    if it is not possible or profitable to do the optimization.
 369
 370    DEF_BSI is an iterator pointing at the statement defining DEF.
 371    If RECIP_DEF is set, a dominator already has a computation that can
 372    be used.  */
 373
 374 static void
 375 insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
 376                     tree def, tree recip_def, int threshold)
 377 {
 378   tree type;
 379   gassign *new_stmt;
 380   gimple_stmt_iterator gsi;
 381   struct occurrence *occ_child;
 382
 383   if (!recip_def
 384       && (occ->bb_has_division || !flag_trapping_math)
 385       && occ->num_divisions >= threshold)
 386     {
 387       /* Make a variable with the replacement and substitute it.  */
 388       type = TREE_TYPE (def);
 389       recip_def = create_tmp_reg (type, "reciptmp");
 390       new_stmt = gimple_build_assign (recip_def, RDIV_EXPR,
 391                                       build_one_cst (type), def);
 392
 393       if (occ->bb_has_division)
 394         {
 395           /* Case 1: insert before an existing division.  */
 396           gsi = gsi_after_labels (occ->bb);
 397           while (!gsi_end_p (gsi) && !is_division_by (gsi_stmt (gsi), def))
 398             gsi_next (&gsi);
 399
 400           gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
 401         }
 402       else if (def_gsi && occ->bb == def_gsi->bb)
 403         {
 404           /* Case 2: insert right after the definition.  Note that this will
 405              never happen if the definition statement can throw, because in
 406              that case the sole successor of the statement's basic block will
 407              dominate all the uses as well.  */
 408           gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
 409         }
 410       else
 411         {
 412           /* Case 3: insert in a basic block not containing defs/uses.  */
 413           gsi = gsi_after_labels (occ->bb);
 414           gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
 415         }
 416
 417       reciprocal_stats.rdivs_inserted++;
 418
 419       occ->recip_def_stmt = new_stmt;
 420     }
 421
 422   occ->recip_def = recip_def;
 423   for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
 424     insert_reciprocals (def_gsi, occ_child, def, recip_def, threshold);
 425 }
 426
 427
 428 /* Replace the division at USE_P with a multiplication by the reciprocal, if
 429    possible.  */
 430
 431 static inline void
 432 replace_reciprocal (use_operand_p use_p)
 433 {
 434   gimple use_stmt = USE_STMT (use_p);
 435   basic_block bb = gimple_bb (use_stmt);
 436   struct occurrence *occ = (struct occurrence *) bb->aux;
 437
 438   if (optimize_bb_for_speed_p (bb)
 439       && occ->recip_def && use_stmt != occ->recip_def_stmt)
 440     {
 441       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
 442       gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
 443       SET_USE (use_p, occ->recip_def);
 444       fold_stmt_inplace (&gsi);
 445       update_stmt (use_stmt);
 446     }
 447 }
 448
 449
 450 /* Free OCC and return one more "struct occurrence" to be freed.  */
 451
 452 static struct occurrence *
 453 free_bb (struct occurrence *occ)
 454 {
 455   struct occurrence *child, *next;
 456
 457   /* First get the two pointers hanging off OCC.  */
 458   next = occ->next;
 459   child = occ->children;
 460   occ->bb->aux = NULL;
 461   occ_pool->remove (occ);
 462
 463   /* Now ensure that we don't recurse unless it is necessary.  */
 464   if (!child)
 465     return next;
 466   else
 467     {
 468       while (next)
 469         next = free_bb (next);
 470
 471       return child;
 472     }
 473 }
 474
 475
 476 /* Look for floating-point divisions among DEF's uses, and try to
 477    replace them by multiplications with the reciprocal.  Add
 478    as many statements computing the reciprocal as needed.
 479
 480    DEF must be a GIMPLE register of a floating-point type.  */
 481
 482 static void
 483 execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
 484 {
 485   use_operand_p use_p;
 486   imm_use_iterator use_iter;
 487   struct occurrence *occ;
 488   int count = 0, threshold;
 489
 490   gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && is_gimple_reg (def));
 491
 492   FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
 493     {
 494       gimple use_stmt = USE_STMT (use_p);
 495       if (is_division_by (use_stmt, def))
 496         {
 497           register_division_in (gimple_bb (use_stmt));
 498           count++;
 499         }
 500     }
 501
 502   /* Do the expensive part only if we can hope to optimize something.  */
 503   threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
 504   if (count >= threshold)
 505     {
 506       gimple use_stmt;
 507       for (occ = occ_head; occ; occ = occ->next)
 508         {
 509           compute_merit (occ);
 510           insert_reciprocals (def_gsi, occ, def, NULL, threshold);
 511         }
 512
 513       FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
 514         {
 515           if (is_division_by (use_stmt, def))
 516             {
 517               FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
 518                 replace_reciprocal (use_p);
 519             }
 520         }
 521     }
 522
 523   for (occ = occ_head; occ; )
 524     occ = free_bb (occ);
 525
 526   occ_head = NULL;
 527 }
 528
 529 /* Go through all the floating-point SSA_NAMEs, and call
 530    execute_cse_reciprocals_1 on each of them.  */
 531 namespace {
 532
 533 const pass_data pass_data_cse_reciprocals =
 534 {
 535   GIMPLE_PASS, /* type */
 536   "recip", /* name */
 537   OPTGROUP_NONE, /* optinfo_flags */
 538   TV_NONE, /* tv_id */
 539   PROP_ssa, /* properties_required */
 540   0, /* properties_provided */
 541   0, /* properties_destroyed */
 542   0, /* todo_flags_start */
 543   TODO_update_ssa, /* todo_flags_finish */
 544 };
 545
 546 class pass_cse_reciprocals : public gimple_opt_pass
 547 {
 548 public:
 549   pass_cse_reciprocals (gcc::context *ctxt)
 550     : gimple_opt_pass (pass_data_cse_reciprocals, ctxt)
 551   {}
 552
 553   /* opt_pass methods: */
 554   virtual bool gate (function *) { return optimize && flag_reciprocal_math; }
 555   virtual unsigned int execute (function *);
 556
 557 }; // class pass_cse_reciprocals
 558
 559 unsigned int
 560 pass_cse_reciprocals::execute (function *fun)
 561 {
 562   basic_block bb;
 563   tree arg;
 564
 565   occ_pool = new pool_allocator<occurrence>
 566     ("dominators for recip", n_basic_blocks_for_fn (fun) / 3 + 1);
 567
 568   memset (&reciprocal_stats, 0, sizeof (reciprocal_stats));
 569   calculate_dominance_info (CDI_DOMINATORS);
 570   calculate_dominance_info (CDI_POST_DOMINATORS);
 571
 572 #ifdef ENABLE_CHECKING
 573   FOR_EACH_BB_FN (bb, fun)
 574     gcc_assert (!bb->aux);
 575 #endif
 576
 577   for (arg = DECL_ARGUMENTS (fun->decl); arg; arg = DECL_CHAIN (arg))
 578     if (FLOAT_TYPE_P (TREE_TYPE (arg))
 579         && is_gimple_reg (arg))
 580       {
 581         tree name = ssa_default_def (fun, arg);
 582         if (name)
 583           execute_cse_reciprocals_1 (NULL, name);
 584       }
 585
 586   FOR_EACH_BB_FN (bb, fun)
 587     {
 588       tree def;
 589
 590       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
 591            gsi_next (&gsi))
 592         {
 593           gphi *phi = gsi.phi ();
 594           def = PHI_RESULT (phi);
 595           if (! virtual_operand_p (def)
 596               && FLOAT_TYPE_P (TREE_TYPE (def)))
 597             execute_cse_reciprocals_1 (NULL, def);
 598         }
 599
 600       for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
 601            gsi_next (&gsi))
 602         {
 603           gimple stmt = gsi_stmt (gsi);
 604
 605           if (gimple_has_lhs (stmt)
 606               && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
 607               && FLOAT_TYPE_P (TREE_TYPE (def))
 608               && TREE_CODE (def) == SSA_NAME)
 609             execute_cse_reciprocals_1 (&gsi, def);
 610         }
 611
 612       if (optimize_bb_for_size_p (bb))
 613         continue;
 614
 615       /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b).  */
 616       for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
 617            gsi_next (&gsi))
 618         {
 619           gimple stmt = gsi_stmt (gsi);
 620           tree fndecl;
 621
 622           if (is_gimple_assign (stmt)
 623               && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
 624             {
 625               tree arg1 = gimple_assign_rhs2 (stmt);
 626               gimple stmt1;
 627
 628               if (TREE_CODE (arg1) != SSA_NAME)
 629                 continue;
 630
 631               stmt1 = SSA_NAME_DEF_STMT (arg1);
 632
 633               if (is_gimple_call (stmt1)
 634                   && gimple_call_lhs (stmt1)
 635                   && (fndecl = gimple_call_fndecl (stmt1))
 636                   && (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
 637                       || DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD))
 638                 {
 639                   enum built_in_function code;
 640                   bool md_code, fail;
 641                   imm_use_iterator ui;
 642                   use_operand_p use_p;
 643
 644                   code = DECL_FUNCTION_CODE (fndecl);
 645                   md_code = DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD;
 646
 647                   fndecl = targetm.builtin_reciprocal (code, md_code, false);
 648                   if (!fndecl)
 649                     continue;
 650
 651                   /* Check that all uses of the SSA name are divisions,
 652                      otherwise replacing the defining statement will do
 653                      the wrong thing.  */
 654                   fail = false;
 655                   FOR_EACH_IMM_USE_FAST (use_p, ui, arg1)
 656                     {
 657                       gimple stmt2 = USE_STMT (use_p);
 658                       if (is_gimple_debug (stmt2))
 659                         continue;
 660                       if (!is_gimple_assign (stmt2)
 661                           || gimple_assign_rhs_code (stmt2) != RDIV_EXPR
 662                           || gimple_assign_rhs1 (stmt2) == arg1
 663                           || gimple_assign_rhs2 (stmt2) != arg1)
 664                         {
 665                           fail = true;
 666                           break;
 667                         }
 668                     }
 669                   if (fail)
 670                     continue;
 671
 672                   gimple_replace_ssa_lhs (stmt1, arg1);
 673                   gimple_call_set_fndecl (stmt1, fndecl);
 674                   update_stmt (stmt1);
 675                   reciprocal_stats.rfuncs_inserted++;
 676
 677                   FOR_EACH_IMM_USE_STMT (stmt, ui, arg1)
 678                     {
 679                       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
 680                       gimple_assign_set_rhs_code (stmt, MULT_EXPR);
 681                       fold_stmt_inplace (&gsi);
 682                       update_stmt (stmt);
 683                     }
 684                 }
 685             }
 686         }
 687     }
 688
 689   statistics_counter_event (fun, "reciprocal divs inserted",
 690                             reciprocal_stats.rdivs_inserted);
 691   statistics_counter_event (fun, "reciprocal functions inserted",
 692                             reciprocal_stats.rfuncs_inserted);
 693
 694   free_dominance_info (CDI_DOMINATORS);
 695   free_dominance_info (CDI_POST_DOMINATORS);
 696   delete occ_pool;
 697   return 0;
 698 }
 699
 700 } // anon namespace
 701
 702 gimple_opt_pass *
 703 make_pass_cse_reciprocals (gcc::context *ctxt)
 704 {
 705   return new pass_cse_reciprocals (ctxt);
 706 }
 707
 708 /* Records an occurrence at statement USE_STMT in the vector of trees
 709    STMTS if it is dominated by *TOP_BB or dominates it or this basic block
 710    is not yet initialized.  Returns true if the occurrence was pushed on
 711    the vector.  Adjusts *TOP_BB to be the basic block dominating all
 712    statements in the vector.  */
 713
 714 static bool
 715 maybe_record_sincos (vec<gimple> *stmts,
 716                      basic_block *top_bb, gimple use_stmt)
 717 {
 718   basic_block use_bb = gimple_bb (use_stmt);
 719   if (*top_bb
 720       && (*top_bb == use_bb
 721           || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb)))
 722     stmts->safe_push (use_stmt);
 723   else if (!*top_bb
 724            || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb))
 725     {
 726       stmts->safe_push (use_stmt);
 727       *top_bb = use_bb;
 728     }
 729   else
 730     return false;
 731
 732   return true;
 733 }
 734
 735 /* Look for sin, cos and cexpi calls with the same argument NAME and
 736    create a single call to cexpi CSEing the result in this case.
 737    We first walk over all immediate uses of the argument collecting
 738    statements that we can CSE in a vector and in a second pass replace
 739    the statement rhs with a REALPART or IMAGPART expression on the
 740    result of the cexpi call we insert before the use statement that
 741    dominates all other candidates.  */
 742
 743 static bool
 744 execute_cse_sincos_1 (tree name)
 745 {
 746   gimple_stmt_iterator gsi;
 747   imm_use_iterator use_iter;
 748   tree fndecl, res, type;
 749   gimple def_stmt, use_stmt, stmt;
 750   int seen_cos = 0, seen_sin = 0, seen_cexpi = 0;
 751   auto_vec<gimple> stmts;
 752   basic_block top_bb = NULL;
 753   int i;
 754   bool cfg_changed = false;
 755
 756   type = TREE_TYPE (name);
 757   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
 758     {
 759       if (gimple_code (use_stmt) != GIMPLE_CALL
 760           || !gimple_call_lhs (use_stmt)
 761           || !(fndecl = gimple_call_fndecl (use_stmt))
 762           || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
 763         continue;
 764
 765       switch (DECL_FUNCTION_CODE (fndecl))
 766         {
 767         CASE_FLT_FN (BUILT_IN_COS):
 768           seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
 769           break;
 770
 771         CASE_FLT_FN (BUILT_IN_SIN):
 772           seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
 773           break;
 774
 775         CASE_FLT_FN (BUILT_IN_CEXPI):
 776           seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
 777           break;
 778
 779         default:;
 780         }
 781     }
 782
 783   if (seen_cos + seen_sin + seen_cexpi <= 1)
 784     return false;
 785
 786   /* Simply insert cexpi at the beginning of top_bb but not earlier than
 787      the name def statement.  */
 788   fndecl = mathfn_built_in (type, BUILT_IN_CEXPI);
 789   if (!fndecl)
 790     return false;
 791   stmt = gimple_build_call (fndecl, 1, name);
 792   res = make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl)), stmt, "sincostmp");
 793   gimple_call_set_lhs (stmt, res);
 794
 795   def_stmt = SSA_NAME_DEF_STMT (name);
 796   if (!SSA_NAME_IS_DEFAULT_DEF (name)
 797       && gimple_code (def_stmt) != GIMPLE_PHI
 798       && gimple_bb (def_stmt) == top_bb)
 799     {
 800       gsi = gsi_for_stmt (def_stmt);
 801       gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
 802     }
 803   else
 804     {
 805       gsi = gsi_after_labels (top_bb);
 806       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
 807     }
 808   sincos_stats.inserted++;
 809
 810   /* And adjust the recorded old call sites.  */
 811   for (i = 0; stmts.iterate (i, &use_stmt); ++i)
 812     {
 813       tree rhs = NULL;
 814       fndecl = gimple_call_fndecl (use_stmt);
 815
 816       switch (DECL_FUNCTION_CODE (fndecl))
 817         {
 818         CASE_FLT_FN (BUILT_IN_COS):
 819           rhs = fold_build1 (REALPART_EXPR, type, res);
 820           break;
 821
 822         CASE_FLT_FN (BUILT_IN_SIN):
 823           rhs = fold_build1 (IMAGPART_EXPR, type, res);
 824           break;
 825
 826         CASE_FLT_FN (BUILT_IN_CEXPI):
 827           rhs = res;
 828           break;
 829
 830         default:;
 831           gcc_unreachable ();
 832         }
 833
 834         /* Replace call with a copy.  */
 835         stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs);
 836
 837         gsi = gsi_for_stmt (use_stmt);
 838         gsi_replace (&gsi, stmt, true);
 839         if (gimple_purge_dead_eh_edges (gimple_bb (stmt)))
 840           cfg_changed = true;
 841     }
 842
 843   return cfg_changed;
 844 }
 845
 846 /* To evaluate powi(x,n), the floating point value x raised to the
 847    constant integer exponent n, we use a hybrid algorithm that
 848    combines the "window method" with look-up tables.  For an
 849    introduction to exponentiation algorithms and "addition chains",
 850    see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
 851    "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
 852    3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
 853    Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998.  */
 854
 855 /* Provide a default value for POWI_MAX_MULTS, the maximum number of
 856    multiplications to inline before calling the system library's pow
 857    function.  powi(x,n) requires at worst 2*bits(n)-2 multiplications,
 858    so this default never requires calling pow, powf or powl.  */
 859
 860 #ifndef POWI_MAX_MULTS
 861 #define POWI_MAX_MULTS  (2*HOST_BITS_PER_WIDE_INT-2)
 862 #endif
 863
 864 /* The size of the "optimal power tree" lookup table.  All
 865    exponents less than this value are simply looked up in the
 866    powi_table below.  This threshold is also used to size the
 867    cache of pseudo registers that hold intermediate results.  */
 868 #define POWI_TABLE_SIZE 256
 869
 870 /* The size, in bits of the window, used in the "window method"
 871    exponentiation algorithm.  This is equivalent to a radix of
 872    (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method".  */
 873 #define POWI_WINDOW_SIZE 3
 874
 875 /* The following table is an efficient representation of an
 876    "optimal power tree".  For each value, i, the corresponding
 877    value, j, in the table states than an optimal evaluation
 878    sequence for calculating pow(x,i) can be found by evaluating
 879    pow(x,j)*pow(x,i-j).  An optimal power tree for the first
 880    100 integers is given in Knuth's "Seminumerical algorithms".  */
 881
 882 static const unsigned char powi_table[POWI_TABLE_SIZE] =
 883   {
 884       0,   1,   1,   2,   2,   3,   3,   4,  /*   0 -   7 */
 885       4,   6,   5,   6,   6,  10,   7,   9,  /*   8 -  15 */
 886       8,  16,   9,  16,  10,  12,  11,  13,  /*  16 -  23 */
 887      12,  17,  13,  18,  14,  24,  15,  26,  /*  24 -  31 */
 888      16,  17,  17,  19,  18,  33,  19,  26,  /*  32 -  39 */
 889      20,  25,  21,  40,  22,  27,  23,  44,  /*  40 -  47 */
 890      24,  32,  25,  34,  26,  29,  27,  44,  /*  48 -  55 */
 891      28,  31,  29,  34,  30,  60,  31,  36,  /*  56 -  63 */
 892      32,  64,  33,  34,  34,  46,  35,  37,  /*  64 -  71 */
 893      36,  65,  37,  50,  38,  48,  39,  69,  /*  72 -  79 */
 894      40,  49,  41,  43,  42,  51,  43,  58,  /*  80 -  87 */
 895      44,  64,  45,  47,  46,  59,  47,  76,  /*  88 -  95 */
 896      48,  65,  49,  66,  50,  67,  51,  66,  /*  96 - 103 */
 897      52,  70,  53,  74,  54, 104,  55,  74,  /* 104 - 111 */
 898      56,  64,  57,  69,  58,  78,  59,  68,  /* 112 - 119 */
 899      60,  61,  61,  80,  62,  75,  63,  68,  /* 120 - 127 */
 900      64,  65,  65, 128,  66, 129,  67,  90,  /* 128 - 135 */
 901      68,  73,  69, 131,  70,  94,  71,  88,  /* 136 - 143 */
 902      72, 128,  73,  98,  74, 132,  75, 121,  /* 144 - 151 */
 903      76, 102,  77, 124,  78, 132,  79, 106,  /* 152 - 159 */
 904      80,  97,  81, 160,  82,  99,  83, 134,  /* 160 - 167 */
 905      84,  86,  85,  95,  86, 160,  87, 100,  /* 168 - 175 */
 906      88, 113,  89,  98,  90, 107,  91, 122,  /* 176 - 183 */
 907      92, 111,  93, 102,  94, 126,  95, 150,  /* 184 - 191 */
 908      96, 128,  97, 130,  98, 133,  99, 195,  /* 192 - 199 */
 909     100, 128, 101, 123, 102, 164, 103, 138,  /* 200 - 207 */
 910     104, 145, 105, 146, 106, 109, 107, 149,  /* 208 - 215 */
 911     108, 200, 109, 146, 110, 170, 111, 157,  /* 216 - 223 */
 912     112, 128, 113, 130, 114, 182, 115, 132,  /* 224 - 231 */
 913     116, 200, 117, 132, 118, 158, 119, 206,  /* 232 - 239 */
 914     120, 240, 121, 162, 122, 147, 123, 152,  /* 240 - 247 */
 915     124, 166, 125, 214, 126, 138, 127, 153,  /* 248 - 255 */
 916   };
 917
 918
 919 /* Return the number of multiplications required to calculate
 920    powi(x,n) where n is less than POWI_TABLE_SIZE.  This is a
 921    subroutine of powi_cost.  CACHE is an array indicating
 922    which exponents have already been calculated.  */
 923
 924 static int
 925 powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache)
 926 {
 927   /* If we've already calculated this exponent, then this evaluation
 928      doesn't require any additional multiplications.  */
 929   if (cache[n])
 930     return 0;
 931
 932   cache[n] = true;
 933   return powi_lookup_cost (n - powi_table[n], cache)
 934          + powi_lookup_cost (powi_table[n], cache) + 1;
 935 }
 936
 937 /* Return the number of multiplications required to calculate
 938    powi(x,n) for an arbitrary x, given the exponent N.  This
 939    function needs to be kept in sync with powi_as_mults below.  */
 940
 941 static int
 942 powi_cost (HOST_WIDE_INT n)
 943 {
 944   bool cache[POWI_TABLE_SIZE];
 945   unsigned HOST_WIDE_INT digit;
 946   unsigned HOST_WIDE_INT val;
 947   int result;
 948
 949   if (n == 0)
 950     return 0;
 951
 952   /* Ignore the reciprocal when calculating the cost.  */
 953   val = (n < 0) ? -n : n;
 954
 955   /* Initialize the exponent cache.  */
 956   memset (cache, 0, POWI_TABLE_SIZE * sizeof (bool));
 957   cache[1] = true;
 958
 959   result = 0;
 960
 961   while (val >= POWI_TABLE_SIZE)
 962     {
 963       if (val & 1)
 964         {
 965           digit = val & ((1 << POWI_WINDOW_SIZE) - 1);
 966           result += powi_lookup_cost (digit, cache)
 967                     + POWI_WINDOW_SIZE + 1;
 968           val >>= POWI_WINDOW_SIZE;
 969         }
 970       else
 971         {
 972           val >>= 1;
 973           result++;
 974         }
 975     }
 976
 977   return result + powi_lookup_cost (val, cache);
 978 }
 979
 980 /* Recursive subroutine of powi_as_mults.  This function takes the
 981    array, CACHE, of already calculated exponents and an exponent N and
 982    returns a tree that corresponds to CACHE[1]**N, with type TYPE.  */
 983
 984 static tree
 985 powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type,
 986                  HOST_WIDE_INT n, tree *cache)
 987 {
 988   tree op0, op1, ssa_target;
 989   unsigned HOST_WIDE_INT digit;
 990   gassign *mult_stmt;
 991
 992   if (n < POWI_TABLE_SIZE && cache[n])
 993     return cache[n];
 994
 995   ssa_target = make_temp_ssa_name (type, NULL, "powmult");
 996
 997   if (n < POWI_TABLE_SIZE)
 998     {
 999       cache[n] = ssa_target;
1000       op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache);
1001       op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache);
1002     }
1003   else if (n & 1)
1004     {
1005       digit = n & ((1 << POWI_WINDOW_SIZE) - 1);
1006       op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache);
1007       op1 = powi_as_mults_1 (gsi, loc, type, digit, cache);
1008     }
1009   else
1010     {
1011       op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache);
1012       op1 = op0;
1013     }
1014
1015   mult_stmt = gimple_build_assign (ssa_target, MULT_EXPR, op0, op1);
1016   gimple_set_location (mult_stmt, loc);
1017   gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
1018
1019   return ssa_target;
1020 }
1021
1022 /* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
1023    This function needs to be kept in sync with powi_cost above.  */
1024
1025 static tree
1026 powi_as_mults (gimple_stmt_iterator *gsi, location_t loc,
1027                tree arg0, HOST_WIDE_INT n)
1028 {
1029   tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0);
1030   gassign *div_stmt;
1031   tree target;
1032
1033   if (n == 0)
1034     return build_real (type, dconst1);
1035
1036   memset (cache, 0,  sizeof (cache));
1037   cache[1] = arg0;
1038
1039   result = powi_as_mults_1 (gsi, loc, type, (n < 0) ? -n : n, cache);
1040   if (n >= 0)
1041     return result;
1042
1043   /* If the original exponent was negative, reciprocate the result.  */
1044   target = make_temp_ssa_name (type, NULL, "powmult");
1045   div_stmt = gimple_build_assign (target, RDIV_EXPR,
1046                                   build_real (type, dconst1), result);
1047   gimple_set_location (div_stmt, loc);
1048   gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
1049
1050   return target;
1051 }
1052
1053 /* ARG0 and N are the two arguments to a powi builtin in GSI with
1054    location info LOC.  If the arguments are appropriate, create an
1055    equivalent sequence of statements prior to GSI using an optimal
1056    number of multiplications, and return an expession holding the
1057    result.  */
1058
1059 static tree
1060 gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc,
1061                             tree arg0, HOST_WIDE_INT n)
1062 {
1063   /* Avoid largest negative number.  */
1064   if (n != -n
1065       && ((n >= -1 && n <= 2)
1066           || (optimize_function_for_speed_p (cfun)
1067               && powi_cost (n) <= POWI_MAX_MULTS)))
1068     return powi_as_mults (gsi, loc, arg0, n);
1069
1070   return NULL_TREE;
1071 }
1072
1073 /* Build a gimple call statement that calls FN with argument ARG.
1074    Set the lhs of the call statement to a fresh SSA name.  Insert the
1075    statement prior to GSI's current position, and return the fresh
1076    SSA name.  */
1077
1078 static tree
1079 build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc,
1080                        tree fn, tree arg)
1081 {
1082   gcall *call_stmt;
1083   tree ssa_target;
1084
1085   call_stmt = gimple_build_call (fn, 1, arg);
1086   ssa_target = make_temp_ssa_name (TREE_TYPE (arg), NULL, "powroot");
1087   gimple_set_lhs (call_stmt, ssa_target);
1088   gimple_set_location (call_stmt, loc);
1089   gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
1090
1091   return ssa_target;
1092 }
1093
1094 /* Build a gimple binary operation with the given CODE and arguments
1095    ARG0, ARG1, assigning the result to a new SSA name for variable
1096    TARGET.  Insert the statement prior to GSI's current position, and
1097    return the fresh SSA name.*/
1098
1099 static tree
1100 build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc,
1101                         const char *name, enum tree_code code,
1102                         tree arg0, tree arg1)
1103 {
1104   tree result = make_temp_ssa_name (TREE_TYPE (arg0), NULL, name);
1105   gassign *stmt = gimple_build_assign (result, code, arg0, arg1);
1106   gimple_set_location (stmt, loc);
1107   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1108   return result;
1109 }
1110
1111 /* Build a gimple reference operation with the given CODE and argument
1112    ARG, assigning the result to a new SSA name of TYPE with NAME.
1113    Insert the statement prior to GSI's current position, and return
1114    the fresh SSA name.  */
1115
1116 static inline tree
1117 build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type,
1118                       const char *name, enum tree_code code, tree arg0)
1119 {
1120   tree result = make_temp_ssa_name (type, NULL, name);
1121   gimple stmt = gimple_build_assign (result, build1 (code, type, arg0));
1122   gimple_set_location (stmt, loc);
1123   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1124   return result;
1125 }
1126
1127 /* Build a gimple assignment to cast VAL to TYPE.  Insert the statement
1128    prior to GSI's current position, and return the fresh SSA name.  */
1129
1130 static tree
1131 build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,
1132                        tree type, tree val)
1133 {
1134   tree result = make_ssa_name (type);
1135   gassign *stmt = gimple_build_assign (result, NOP_EXPR, val);
1136   gimple_set_location (stmt, loc);
1137   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1138   return result;
1139 }
1140
1141 struct pow_synth_sqrt_info
1142 {
1143   bool *factors;
1144   unsigned int deepest;
1145   unsigned int num_mults;
1146 };
1147
1148 /* Return true iff the real value C can be represented as a
1149    sum of powers of 0.5 up to N.  That is:
1150    C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1.
1151    Record in INFO the various parameters of the synthesis algorithm such
1152    as the factors a[i], the maximum 0.5 power and the number of
1153    multiplications that will be required.  */
1154
1155 bool
1156 representable_as_half_series_p (REAL_VALUE_TYPE c, unsigned n,
1157                                  struct pow_synth_sqrt_info *info)
1158 {
1159   REAL_VALUE_TYPE factor = dconsthalf;
1160   REAL_VALUE_TYPE remainder = c;
1161
1162   info->deepest = 0;
1163   info->num_mults = 0;
1164   memset (info->factors, 0, n * sizeof (bool));
1165
1166   for (unsigned i = 0; i < n; i++)
1167     {
1168       REAL_VALUE_TYPE res;
1169
1170       /* If something inexact happened bail out now.  */
1171       if (REAL_ARITHMETIC (res, MINUS_EXPR, remainder, factor))
1172         return false;
1173
1174       /* We have hit zero.  The number is representable as a sum
1175          of powers of 0.5.  */
1176       if (REAL_VALUES_EQUAL (res, dconst0))
1177         {
1178           info->factors[i] = true;
1179           info->deepest = i + 1;
1180           return true;
1181         }
1182       else if (!REAL_VALUE_NEGATIVE (res))
1183         {
1184           remainder = res;
1185           info->factors[i] = true;
1186           info->num_mults++;
1187         }
1188       else
1189         info->factors[i] = false;
1190
1191       REAL_ARITHMETIC (factor, MULT_EXPR, factor, dconsthalf);
1192     }
1193   return false;
1194 }
1195
1196 /* Return the tree corresponding to FN being applied
1197    to ARG N times at GSI and LOC.
1198    Look up previous results from CACHE if need be.
1199    cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times.  */
1200
1201 static tree
1202 get_fn_chain (tree arg, unsigned int n, gimple_stmt_iterator *gsi,
1203               tree fn, location_t loc, tree *cache)
1204 {
1205   tree res = cache[n];
1206   if (!res)
1207     {
1208       tree prev = get_fn_chain (arg, n - 1, gsi, fn, loc, cache);
1209       res = build_and_insert_call (gsi, loc, fn, prev);
1210       cache[n] = res;
1211     }
1212
1213   return res;
1214 }
1215
1216 /* Print to STREAM the repeated application of function FNAME to ARG
1217    N times.  So, for FNAME = "foo", ARG = "x", N = 2 it would print:
1218    "foo (foo (x))".  */
1219
1220 static void
1221 print_nested_fn (FILE* stream, const char *fname, const char* arg,
1222                  unsigned int n)
1223 {
1224   if (n == 0)
1225     fprintf (stream, "%s", arg);
1226   else
1227     {
1228       fprintf (stream, "%s (", fname);
1229       print_nested_fn (stream, fname, arg, n - 1);
1230       fprintf (stream, ")");
1231     }
1232 }
1233
1234 /* Print to STREAM the fractional sequence of sqrt chains
1235    applied to ARG, described by INFO.  Used for the dump file.  */
1236
1237 static void
1238 dump_fractional_sqrt_sequence (FILE *stream, const char *arg,
1239                                 struct pow_synth_sqrt_info *info)
1240 {
1241   for (unsigned int i = 0; i < info->deepest; i++)
1242     {
1243       bool is_set = info->factors[i];
1244       if (is_set)
1245         {
1246           print_nested_fn (stream, "sqrt", arg, i + 1);
1247           if (i != info->deepest - 1)
1248             fprintf (stream, " * ");
1249         }
1250     }
1251 }
1252
1253 /* Print to STREAM a representation of raising ARG to an integer
1254    power N.  Used for the dump file.  */
1255
1256 static void
1257 dump_integer_part (FILE *stream, const char* arg, HOST_WIDE_INT n)
1258 {
1259   if (n > 1)
1260     fprintf (stream, "powi (%s, " HOST_WIDE_INT_PRINT_DEC ")", arg, n);
1261   else if (n == 1)
1262     fprintf (stream, "%s", arg);
1263 }
1264
1265 /* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of
1266    square roots.  Place at GSI and LOC.  Limit the maximum depth
1267    of the sqrt chains to MAX_DEPTH.  Return the tree holding the
1268    result of the expanded sequence or NULL_TREE if the expansion failed.
1269
1270    This routine assumes that ARG1 is a real number with a fractional part
1271    (the integer exponent case will have been handled earlier in
1272    gimple_expand_builtin_pow).
1273
1274    For ARG1 > 0.0:
1275    * For ARG1 composed of a whole part WHOLE_PART and a fractional part
1276      FRAC_PART i.e. WHOLE_PART == floor (ARG1) and
1277                     FRAC_PART == ARG1 - WHOLE_PART:
1278      Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where
1279      POW (ARG0, FRAC_PART) is expanded as a product of square root chains
1280      if it can be expressed as such, that is if FRAC_PART satisfies:
1281      FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i))
1282      where integer a[i] is either 0 or 1.
1283
1284      Example:
1285      POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625)
1286        --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x)))
1287
1288    For ARG1 < 0.0 there are two approaches:
1289    * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1)
1290          is calculated as above.
1291
1292      Example:
1293      POW (x, -5.625) == 1.0 / POW (x, 5.625)
1294        -->  1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x))))
1295
1296    * (B) : WHOLE_PART := - ceil (abs (ARG1))
1297            FRAC_PART  := ARG1 - WHOLE_PART
1298      and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART).
1299      Example:
1300      POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6)
1301        --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6))
1302
1303    For ARG1 < 0.0 we choose between (A) and (B) depending on
1304    how many multiplications we'd have to do.
1305    So, for the example in (B): POW (x, -5.875), if we were to
1306    follow algorithm (A) we would produce:
1307    1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X)))
1308    which contains more multiplications than approach (B).
1309
1310    Hopefully, this approach will eliminate potentially expensive POW library
1311    calls when unsafe floating point math is enabled and allow the compiler to
1312    further optimise the multiplies, square roots and divides produced by this
1313    function.  */
1314
1315 static tree
1316 expand_pow_as_sqrts (gimple_stmt_iterator *gsi, location_t loc,
1317                      tree arg0, tree arg1, HOST_WIDE_INT max_depth)
1318 {
1319   tree type = TREE_TYPE (arg0);
1320   machine_mode mode = TYPE_MODE (type);
1321   tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1322   bool one_over = true;
1323
1324   if (!sqrtfn)
1325     return NULL_TREE;
1326
1327   if (TREE_CODE (arg1) != REAL_CST)
1328     return NULL_TREE;
1329
1330   REAL_VALUE_TYPE exp_init = TREE_REAL_CST (arg1);
1331
1332   gcc_assert (max_depth > 0);
1333   tree *cache = XALLOCAVEC (tree, max_depth + 1);
1334
1335   struct pow_synth_sqrt_info synth_info;
1336   synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1337   synth_info.deepest = 0;
1338   synth_info.num_mults = 0;
1339
1340   bool neg_exp = REAL_VALUE_NEGATIVE (exp_init);
1341   REAL_VALUE_TYPE exp = real_value_abs (&exp_init);
1342
1343   /* The whole and fractional parts of exp.  */
1344   REAL_VALUE_TYPE whole_part;
1345   REAL_VALUE_TYPE frac_part;
1346
1347   real_floor (&whole_part, mode, &exp);
1348   REAL_ARITHMETIC (frac_part, MINUS_EXPR, exp, whole_part);
1349
1350
1351   REAL_VALUE_TYPE ceil_whole = dconst0;
1352   REAL_VALUE_TYPE ceil_fract = dconst0;
1353
1354   if (neg_exp)
1355     {
1356       real_ceil (&ceil_whole, mode, &exp);
1357       REAL_ARITHMETIC (ceil_fract, MINUS_EXPR, ceil_whole, exp);
1358     }
1359
1360   if (!representable_as_half_series_p (frac_part, max_depth, &synth_info))
1361     return NULL_TREE;
1362
1363   /* Check whether it's more profitable to not use 1.0 / ...  */
1364   if (neg_exp)
1365     {
1366       struct pow_synth_sqrt_info alt_synth_info;
1367       alt_synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1368       alt_synth_info.deepest = 0;
1369       alt_synth_info.num_mults = 0;
1370
1371       if (representable_as_half_series_p (ceil_fract, max_depth,
1372                                            &alt_synth_info)
1373           && alt_synth_info.deepest <= synth_info.deepest
1374           && alt_synth_info.num_mults < synth_info.num_mults)
1375         {
1376           whole_part = ceil_whole;
1377           frac_part = ceil_fract;
1378           synth_info.deepest = alt_synth_info.deepest;
1379           synth_info.num_mults = alt_synth_info.num_mults;
1380           memcpy (synth_info.factors, alt_synth_info.factors,
1381                   (max_depth + 1) * sizeof (bool));
1382           one_over = false;
1383         }
1384     }
1385
1386   HOST_WIDE_INT n = real_to_integer (&whole_part);
1387   REAL_VALUE_TYPE cint;
1388   real_from_integer (&cint, VOIDmode, n, SIGNED);
1389
1390   if (!real_identical (&whole_part, &cint))
1391     return NULL_TREE;
1392
1393   if (powi_cost (n) + synth_info.num_mults > POWI_MAX_MULTS)
1394     return NULL_TREE;
1395
1396   memset (cache, 0, (max_depth + 1) * sizeof (tree));
1397
1398   tree integer_res = n == 0 ? build_real (type, dconst1) : arg0;
1399
1400   /* Calculate the integer part of the exponent.  */
1401   if (n > 1)
1402     {
1403       integer_res = gimple_expand_builtin_powi (gsi, loc, arg0, n);
1404       if (!integer_res)
1405         return NULL_TREE;
1406     }
1407
1408   if (dump_file)
1409     {
1410       char string[64];
1411
1412       real_to_decimal (string, &exp_init, sizeof (string), 0, 1);
1413       fprintf (dump_file, "synthesizing pow (x, %s) as:\n", string);
1414
1415       if (neg_exp)
1416         {
1417           if (one_over)
1418             {
1419               fprintf (dump_file, "1.0 / (");
1420               dump_integer_part (dump_file, "x", n);
1421               if (n > 0)
1422                 fprintf (dump_file, " * ");
1423               dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1424               fprintf (dump_file, ")");
1425             }
1426           else
1427             {
1428               dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1429               fprintf (dump_file, " / (");
1430               dump_integer_part (dump_file, "x", n);
1431               fprintf (dump_file, ")");
1432             }
1433         }
1434       else
1435         {
1436           dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1437           if (n > 0)
1438             fprintf (dump_file, " * ");
1439           dump_integer_part (dump_file, "x", n);
1440         }
1441
1442       fprintf (dump_file, "\ndeepest sqrt chain: %d\n", synth_info.deepest);
1443     }
1444
1445
1446   tree fract_res = NULL_TREE;
1447   cache[0] = arg0;
1448
1449   /* Calculate the fractional part of the exponent.  */
1450   for (unsigned i = 0; i < synth_info.deepest; i++)
1451     {
1452       if (synth_info.factors[i])
1453         {
1454           tree sqrt_chain = get_fn_chain (arg0, i + 1, gsi, sqrtfn, loc, cache);
1455
1456           if (!fract_res)
1457               fract_res = sqrt_chain;
1458
1459           else
1460             fract_res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1461                                            fract_res, sqrt_chain);
1462         }
1463     }
1464
1465   tree res = NULL_TREE;
1466
1467   if (neg_exp)
1468     {
1469       if (one_over)
1470         {
1471           if (n > 0)
1472             res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1473                                            fract_res, integer_res);
1474           else
1475             res = fract_res;
1476
1477           res = build_and_insert_binop (gsi, loc, "powrootrecip", RDIV_EXPR,
1478                                           build_real (type, dconst1), res);
1479         }
1480       else
1481         {
1482           res = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1483                                          fract_res, integer_res);
1484         }
1485     }
1486   else
1487     res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1488                                    fract_res, integer_res);
1489   return res;
1490 }
1491
1492 /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
1493    with location info LOC.  If possible, create an equivalent and
1494    less expensive sequence of statements prior to GSI, and return an
1495    expession holding the result.  */
1496
1497 static tree
1498 gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,
1499                            tree arg0, tree arg1)
1500 {
1501   REAL_VALUE_TYPE c, cint, dconst1_3, dconst1_4, dconst1_6;
1502   REAL_VALUE_TYPE c2, dconst3;
1503   HOST_WIDE_INT n;
1504   tree type, sqrtfn, cbrtfn, sqrt_arg0, result, cbrt_x, powi_cbrt_x;
1505   machine_mode mode;
1506   bool speed_p = optimize_bb_for_speed_p (gsi_bb (*gsi));
1507   bool hw_sqrt_exists, c_is_int, c2_is_int;
1508
1509   dconst1_4 = dconst1;
1510   SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
1511
1512   /* If the exponent isn't a constant, there's nothing of interest
1513      to be done.  */
1514   if (TREE_CODE (arg1) != REAL_CST)
1515     return NULL_TREE;
1516
1517   /* If the exponent is equivalent to an integer, expand to an optimal
1518      multiplication sequence when profitable.  */
1519   c = TREE_REAL_CST (arg1);
1520   n = real_to_integer (&c);
1521   real_from_integer (&cint, VOIDmode, n, SIGNED);
1522   c_is_int = real_identical (&c, &cint);
1523
1524   if (c_is_int
1525       && ((n >= -1 && n <= 2)
1526           || (flag_unsafe_math_optimizations
1527               && speed_p
1528               && powi_cost (n) <= POWI_MAX_MULTS)))
1529     return gimple_expand_builtin_powi (gsi, loc, arg0, n);
1530
1531   /* Attempt various optimizations using sqrt and cbrt.  */
1532   type = TREE_TYPE (arg0);
1533   mode = TYPE_MODE (type);
1534   sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1535
1536   /* Optimize pow(x,0.5) = sqrt(x).  This replacement is always safe
1537      unless signed zeros must be maintained.  pow(-0,0.5) = +0, while
1538      sqrt(-0) = -0.  */
1539   if (sqrtfn
1540       && REAL_VALUES_EQUAL (c, dconsthalf)
1541       && !HONOR_SIGNED_ZEROS (mode))
1542     return build_and_insert_call (gsi, loc, sqrtfn, arg0);
1543
1544   hw_sqrt_exists = optab_handler (sqrt_optab, mode) != CODE_FOR_nothing;
1545
1546   /* Optimize pow(x,1./3.) = cbrt(x).  This requires unsafe math
1547      optimizations since 1./3. is not exactly representable.  If x
1548      is negative and finite, the correct value of pow(x,1./3.) is
1549      a NaN with the "invalid" exception raised, because the value
1550      of 1./3. actually has an even denominator.  The correct value
1551      of cbrt(x) is a negative real value.  */
1552   cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT);
1553   dconst1_3 = real_value_truncate (mode, dconst_third ());
1554
1555   if (flag_unsafe_math_optimizations
1556       && cbrtfn
1557       && (gimple_val_nonnegative_real_p (arg0) || !HONOR_NANS (mode))
1558       && REAL_VALUES_EQUAL (c, dconst1_3))
1559     return build_and_insert_call (gsi, loc, cbrtfn, arg0);
1560
1561   /* Optimize pow(x,1./6.) = cbrt(sqrt(x)).  Don't do this optimization
1562      if we don't have a hardware sqrt insn.  */
1563   dconst1_6 = dconst1_3;
1564   SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
1565
1566   if (flag_unsafe_math_optimizations
1567       && sqrtfn
1568       && cbrtfn
1569       && (gimple_val_nonnegative_real_p (arg0) || !HONOR_NANS (mode))
1570       && speed_p
1571       && hw_sqrt_exists
1572       && REAL_VALUES_EQUAL (c, dconst1_6))
1573     {
1574       /* sqrt(x)  */
1575       sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
1576
1577       /* cbrt(sqrt(x))  */
1578       return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0);
1579     }
1580
1581
1582   /* Attempt to expand the POW as a product of square root chains.
1583      Expand the 0.25 case even when otpimising for size.  */
1584   if (flag_unsafe_math_optimizations
1585       && sqrtfn
1586       && hw_sqrt_exists
1587       && (speed_p || REAL_VALUES_EQUAL (c, dconst1_4))
1588       && !HONOR_SIGNED_ZEROS (mode))
1589     {
1590       unsigned int max_depth = speed_p
1591                                 ? PARAM_VALUE (PARAM_MAX_POW_SQRT_DEPTH)
1592                                 : 2;
1593
1594       tree expand_with_sqrts
1595         = expand_pow_as_sqrts (gsi, loc, arg0, arg1, max_depth);
1596
1597       if (expand_with_sqrts)
1598         return expand_with_sqrts;
1599     }
1600
1601   real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
1602   n = real_to_integer (&c2);
1603   real_from_integer (&cint, VOIDmode, n, SIGNED);
1604   c2_is_int = real_identical (&c2, &cint);
1605
1606   /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
1607
1608      powi(x, n/3) * powi(cbrt(x), n%3),                    n > 0;
1609      1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)),  n < 0.
1610
1611      Do not calculate the first factor when n/3 = 0.  As cbrt(x) is
1612      different from pow(x, 1./3.) due to rounding and behavior with
1613      negative x, we need to constrain this transformation to unsafe
1614      math and positive x or finite math.  */
1615   real_from_integer (&dconst3, VOIDmode, 3, SIGNED);
1616   real_arithmetic (&c2, MULT_EXPR, &c, &dconst3);
1617   real_round (&c2, mode, &c2);
1618   n = real_to_integer (&c2);
1619   real_from_integer (&cint, VOIDmode, n, SIGNED);
1620   real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3);
1621   real_convert (&c2, mode, &c2);
1622
1623   if (flag_unsafe_math_optimizations
1624       && cbrtfn
1625       && (gimple_val_nonnegative_real_p (arg0) || !HONOR_NANS (mode))
1626       && real_identical (&c2, &c)
1627       && !c2_is_int
1628       && optimize_function_for_speed_p (cfun)
1629       && powi_cost (n / 3) <= POWI_MAX_MULTS)
1630     {
1631       tree powi_x_ndiv3 = NULL_TREE;
1632
1633       /* Attempt to fold powi(arg0, abs(n/3)) into multiplies.  If not
1634          possible or profitable, give up.  Skip the degenerate case when
1635          abs(n) < 3, where the result is always 1.  */
1636       if (absu_hwi (n) >= 3)
1637         {
1638           powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0,
1639                                                      abs_hwi (n / 3));
1640           if (!powi_x_ndiv3)
1641             return NULL_TREE;
1642         }
1643
1644       /* Calculate powi(cbrt(x), n%3).  Don't use gimple_expand_builtin_powi
1645          as that creates an unnecessary variable.  Instead, just produce
1646          either cbrt(x) or cbrt(x) * cbrt(x).  */
1647       cbrt_x = build_and_insert_call (gsi, loc, cbrtfn, arg0);
1648
1649       if (absu_hwi (n) % 3 == 1)
1650         powi_cbrt_x = cbrt_x;
1651       else
1652         powi_cbrt_x = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1653                                               cbrt_x, cbrt_x);
1654
1655       /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1.  */
1656       if (absu_hwi (n) < 3)
1657         result = powi_cbrt_x;
1658       else
1659         result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1660                                          powi_x_ndiv3, powi_cbrt_x);
1661
1662       /* If n is negative, reciprocate the result.  */
1663       if (n < 0)
1664         result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1665                                          build_real (type, dconst1), result);
1666
1667       return result;
1668     }
1669
1670   /* No optimizations succeeded.  */
1671   return NULL_TREE;
1672 }
1673
1674 /* ARG is the argument to a cabs builtin call in GSI with location info
1675    LOC.  Create a sequence of statements prior to GSI that calculates
1676    sqrt(R*R + I*I), where R and I are the real and imaginary components
1677    of ARG, respectively.  Return an expression holding the result.  */
1678
1679 static tree
1680 gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg)
1681 {
1682   tree real_part, imag_part, addend1, addend2, sum, result;
1683   tree type = TREE_TYPE (TREE_TYPE (arg));
1684   tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1685   machine_mode mode = TYPE_MODE (type);
1686
1687   if (!flag_unsafe_math_optimizations
1688       || !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi)))
1689       || !sqrtfn
1690       || optab_handler (sqrt_optab, mode) == CODE_FOR_nothing)
1691     return NULL_TREE;
1692
1693   real_part = build_and_insert_ref (gsi, loc, type, "cabs",
1694                                     REALPART_EXPR, arg);
1695   addend1 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
1696                                     real_part, real_part);
1697   imag_part = build_and_insert_ref (gsi, loc, type, "cabs",
1698                                     IMAGPART_EXPR, arg);
1699   addend2 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
1700                                     imag_part, imag_part);
1701   sum = build_and_insert_binop (gsi, loc, "cabs", PLUS_EXPR, addend1, addend2);
1702   result = build_and_insert_call (gsi, loc, sqrtfn, sum);
1703
1704   return result;
1705 }
1706
1707 /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
1708    on the SSA_NAME argument of each of them.  Also expand powi(x,n) into
1709    an optimal number of multiplies, when n is a constant.  */
1710
1711 namespace {
1712
1713 const pass_data pass_data_cse_sincos =
1714 {
1715   GIMPLE_PASS, /* type */
1716   "sincos", /* name */
1717   OPTGROUP_NONE, /* optinfo_flags */
1718   TV_NONE, /* tv_id */
1719   PROP_ssa, /* properties_required */
1720   0, /* properties_provided */
1721   0, /* properties_destroyed */
1722   0, /* todo_flags_start */
1723   TODO_update_ssa, /* todo_flags_finish */
1724 };
1725
1726 class pass_cse_sincos : public gimple_opt_pass
1727 {
1728 public:
1729   pass_cse_sincos (gcc::context *ctxt)
1730     : gimple_opt_pass (pass_data_cse_sincos, ctxt)
1731   {}
1732
1733   /* opt_pass methods: */
1734   virtual bool gate (function *)
1735     {
1736       /* We no longer require either sincos or cexp, since powi expansion
1737          piggybacks on this pass.  */
1738       return optimize;
1739     }
1740
1741   virtual unsigned int execute (function *);
1742
1743 }; // class pass_cse_sincos
1744
1745 unsigned int
1746 pass_cse_sincos::execute (function *fun)
1747 {
1748   basic_block bb;
1749   bool cfg_changed = false;
1750
1751   calculate_dominance_info (CDI_DOMINATORS);
1752   memset (&sincos_stats, 0, sizeof (sincos_stats));
1753
1754   FOR_EACH_BB_FN (bb, fun)
1755     {
1756       gimple_stmt_iterator gsi;
1757       bool cleanup_eh = false;
1758
1759       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1760         {
1761           gimple stmt = gsi_stmt (gsi);
1762           tree fndecl;
1763
1764           /* Only the last stmt in a bb could throw, no need to call
1765              gimple_purge_dead_eh_edges if we change something in the middle
1766              of a basic block.  */
1767           cleanup_eh = false;
1768
1769           if (is_gimple_call (stmt)
1770               && gimple_call_lhs (stmt)
1771               && (fndecl = gimple_call_fndecl (stmt))
1772               && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
1773             {
1774               tree arg, arg0, arg1, result;
1775               HOST_WIDE_INT n;
1776               location_t loc;
1777
1778               switch (DECL_FUNCTION_CODE (fndecl))
1779                 {
1780                 CASE_FLT_FN (BUILT_IN_COS):
1781                 CASE_FLT_FN (BUILT_IN_SIN):
1782                 CASE_FLT_FN (BUILT_IN_CEXPI):
1783                   /* Make sure we have either sincos or cexp.  */
1784                   if (!targetm.libc_has_function (function_c99_math_complex)
1785                       && !targetm.libc_has_function (function_sincos))
1786                     break;
1787
1788                   arg = gimple_call_arg (stmt, 0);
1789                   if (TREE_CODE (arg) == SSA_NAME)
1790                     cfg_changed |= execute_cse_sincos_1 (arg);
1791                   break;
1792
1793                 CASE_FLT_FN (BUILT_IN_POW):
1794                   arg0 = gimple_call_arg (stmt, 0);
1795                   arg1 = gimple_call_arg (stmt, 1);
1796
1797                   loc = gimple_location (stmt);
1798                   result = gimple_expand_builtin_pow (&gsi, loc, arg0, arg1);
1799
1800                   if (result)
1801                     {
1802                       tree lhs = gimple_get_lhs (stmt);
1803                       gassign *new_stmt = gimple_build_assign (lhs, result);
1804                       gimple_set_location (new_stmt, loc);
1805                       unlink_stmt_vdef (stmt);
1806                       gsi_replace (&gsi, new_stmt, true);
1807                       cleanup_eh = true;
1808                       if (gimple_vdef (stmt))
1809                         release_ssa_name (gimple_vdef (stmt));
1810                     }
1811                   break;
1812
1813                 CASE_FLT_FN (BUILT_IN_POWI):
1814                   arg0 = gimple_call_arg (stmt, 0);
1815                   arg1 = gimple_call_arg (stmt, 1);
1816                   loc = gimple_location (stmt);
1817
1818                   if (real_minus_onep (arg0))
1819                     {
1820                       tree t0, t1, cond, one, minus_one;
1821                       gassign *stmt;
1822
1823                       t0 = TREE_TYPE (arg0);
1824                       t1 = TREE_TYPE (arg1);
1825                       one = build_real (t0, dconst1);
1826                       minus_one = build_real (t0, dconstm1);
1827
1828                       cond = make_temp_ssa_name (t1, NULL, "powi_cond");
1829                       stmt = gimple_build_assign (cond, BIT_AND_EXPR,
1830                                                   arg1, build_int_cst (t1, 1));
1831                       gimple_set_location (stmt, loc);
1832                       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1833
1834                       result = make_temp_ssa_name (t0, NULL, "powi");
1835                       stmt = gimple_build_assign (result, COND_EXPR, cond,
1836                                                   minus_one, one);
1837                       gimple_set_location (stmt, loc);
1838                       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1839                     }
1840                   else
1841                     {
1842                       if (!tree_fits_shwi_p (arg1))
1843                         break;
1844
1845                       n = tree_to_shwi (arg1);
1846                       result = gimple_expand_builtin_powi (&gsi, loc, arg0, n);
1847                     }
1848
1849                   if (result)
1850                     {
1851                       tree lhs = gimple_get_lhs (stmt);
1852                       gassign *new_stmt = gimple_build_assign (lhs, result);
1853                       gimple_set_location (new_stmt, loc);
1854                       unlink_stmt_vdef (stmt);
1855                       gsi_replace (&gsi, new_stmt, true);
1856                       cleanup_eh = true;
1857                       if (gimple_vdef (stmt))
1858                         release_ssa_name (gimple_vdef (stmt));
1859                     }
1860                   break;
1861
1862                 CASE_FLT_FN (BUILT_IN_CABS):
1863                   arg0 = gimple_call_arg (stmt, 0);
1864                   loc = gimple_location (stmt);
1865                   result = gimple_expand_builtin_cabs (&gsi, loc, arg0);
1866
1867                   if (result)
1868                     {
1869                       tree lhs = gimple_get_lhs (stmt);
1870                       gassign *new_stmt = gimple_build_assign (lhs, result);
1871                       gimple_set_location (new_stmt, loc);
1872                       unlink_stmt_vdef (stmt);
1873                       gsi_replace (&gsi, new_stmt, true);
1874                       cleanup_eh = true;
1875                       if (gimple_vdef (stmt))
1876                         release_ssa_name (gimple_vdef (stmt));
1877                     }
1878                   break;
1879
1880                 default:;
1881                 }
1882             }
1883         }
1884       if (cleanup_eh)
1885         cfg_changed |= gimple_purge_dead_eh_edges (bb);
1886     }
1887
1888   statistics_counter_event (fun, "sincos statements inserted",
1889                             sincos_stats.inserted);
1890
1891   free_dominance_info (CDI_DOMINATORS);
1892   return cfg_changed ? TODO_cleanup_cfg : 0;
1893 }
1894
1895 } // anon namespace
1896
1897 gimple_opt_pass *
1898 make_pass_cse_sincos (gcc::context *ctxt)
1899 {
1900   return new pass_cse_sincos (ctxt);
1901 }
1902
1903 /* A symbolic number is used to detect byte permutation and selection
1904    patterns.  Therefore the field N contains an artificial number
1905    consisting of octet sized markers:
1906
1907    0    - target byte has the value 0
1908    FF   - target byte has an unknown value (eg. due to sign extension)
1909    1..size - marker value is the target byte index minus one.
1910
1911    To detect permutations on memory sources (arrays and structures), a symbolic
1912    number is also associated a base address (the array or structure the load is
1913    made from), an offset from the base address and a range which gives the
1914    difference between the highest and lowest accessed memory location to make
1915    such a symbolic number. The range is thus different from size which reflects
1916    the size of the type of current expression. Note that for non memory source,
1917    range holds the same value as size.
1918
1919    For instance, for an array char a[], (short) a[0] | (short) a[3] would have
1920    a size of 2 but a range of 4 while (short) a[0] | ((short) a[0] << 1) would
1921    still have a size of 2 but this time a range of 1.  */
1922
1923 struct symbolic_number {
1924   uint64_t n;
1925   tree type;
1926   tree base_addr;
1927   tree offset;
1928   HOST_WIDE_INT bytepos;
1929   tree alias_set;
1930   tree vuse;
1931   unsigned HOST_WIDE_INT range;
1932 };
1933
1934 #define BITS_PER_MARKER 8
1935 #define MARKER_MASK ((1 << BITS_PER_MARKER) - 1)
1936 #define MARKER_BYTE_UNKNOWN MARKER_MASK
1937 #define HEAD_MARKER(n, size) \
1938   ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER)))
1939
1940 /* The number which the find_bswap_or_nop_1 result should match in
1941    order to have a nop.  The number is masked according to the size of
1942    the symbolic number before using it.  */
1943 #define CMPNOP (sizeof (int64_t) < 8 ? 0 : \
1944   (uint64_t)0x08070605 << 32 | 0x04030201)
1945
1946 /* The number which the find_bswap_or_nop_1 result should match in
1947    order to have a byte swap.  The number is masked according to the
1948    size of the symbolic number before using it.  */
1949 #define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \
1950   (uint64_t)0x01020304 << 32 | 0x05060708)
1951
1952 /* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
1953    number N.  Return false if the requested operation is not permitted
1954    on a symbolic number.  */
1955
1956 static inline bool
1957 do_shift_rotate (enum tree_code code,
1958                  struct symbolic_number *n,
1959                  int count)
1960 {
1961   int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
1962   unsigned head_marker;
1963
1964   if (count % BITS_PER_UNIT != 0)
1965     return false;
1966   count = (count / BITS_PER_UNIT) * BITS_PER_MARKER;
1967
1968   /* Zero out the extra bits of N in order to avoid them being shifted
1969      into the significant bits.  */
1970   if (size < 64 / BITS_PER_MARKER)
1971     n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
1972
1973   switch (code)
1974     {
1975     case LSHIFT_EXPR:
1976       n->n <<= count;
1977       break;
1978     case RSHIFT_EXPR:
1979       head_marker = HEAD_MARKER (n->n, size);
1980       n->n >>= count;
1981       /* Arithmetic shift of signed type: result is dependent on the value.  */
1982       if (!TYPE_UNSIGNED (n->type) && head_marker)
1983         for (i = 0; i < count / BITS_PER_MARKER; i++)
1984           n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
1985                   << ((size - 1 - i) * BITS_PER_MARKER);
1986       break;
1987     case LROTATE_EXPR:
1988       n->n = (n->n << count) | (n->n >> ((size * BITS_PER_MARKER) - count));
1989       break;
1990     case RROTATE_EXPR:
1991       n->n = (n->n >> count) | (n->n << ((size * BITS_PER_MARKER) - count));
1992       break;
1993     default:
1994       return false;
1995     }
1996   /* Zero unused bits for size.  */
1997   if (size < 64 / BITS_PER_MARKER)
1998     n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
1999   return true;
2000 }
2001
2002 /* Perform sanity checking for the symbolic number N and the gimple
2003    statement STMT.  */
2004
2005 static inline bool
2006 verify_symbolic_number_p (struct symbolic_number *n, gimple stmt)
2007 {
2008   tree lhs_type;
2009
2010   lhs_type = gimple_expr_type (stmt);
2011
2012   if (TREE_CODE (lhs_type) != INTEGER_TYPE)
2013     return false;
2014
2015   if (TYPE_PRECISION (lhs_type) != TYPE_PRECISION (n->type))
2016     return false;
2017
2018   return true;
2019 }
2020
2021 /* Initialize the symbolic number N for the bswap pass from the base element
2022    SRC manipulated by the bitwise OR expression.  */
2023
2024 static bool
2025 init_symbolic_number (struct symbolic_number *n, tree src)
2026 {
2027   int size;
2028
2029   n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE;
2030
2031   /* Set up the symbolic number N by setting each byte to a value between 1 and
2032      the byte size of rhs1.  The highest order byte is set to n->size and the
2033      lowest order byte to 1.  */
2034   n->type = TREE_TYPE (src);
2035   size = TYPE_PRECISION (n->type);
2036   if (size % BITS_PER_UNIT != 0)
2037     return false;
2038   size /= BITS_PER_UNIT;
2039   if (size > 64 / BITS_PER_MARKER)
2040     return false;
2041   n->range = size;
2042   n->n = CMPNOP;
2043
2044   if (size < 64 / BITS_PER_MARKER)
2045     n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
2046
2047   return true;
2048 }
2049
2050 /* Check if STMT might be a byte swap or a nop from a memory source and returns
2051    the answer. If so, REF is that memory source and the base of the memory area
2052    accessed and the offset of the access from that base are recorded in N.  */
2053
2054 bool
2055 find_bswap_or_nop_load (gimple stmt, tree ref, struct symbolic_number *n)
2056 {
2057   /* Leaf node is an array or component ref. Memorize its base and
2058      offset from base to compare to other such leaf node.  */
2059   HOST_WIDE_INT bitsize, bitpos;
2060   machine_mode mode;
2061   int unsignedp, volatilep;
2062   tree offset, base_addr;
2063
2064   /* Not prepared to handle PDP endian.  */
2065   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
2066     return false;
2067
2068   if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt))
2069     return false;
2070
2071   base_addr = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
2072                                    &unsignedp, &volatilep, false);
2073
2074   if (TREE_CODE (base_addr) == MEM_REF)
2075     {
2076       offset_int bit_offset = 0;
2077       tree off = TREE_OPERAND (base_addr, 1);
2078
2079       if (!integer_zerop (off))
2080         {
2081           offset_int boff, coff = mem_ref_offset (base_addr);
2082           boff = wi::lshift (coff, LOG2_BITS_PER_UNIT);
2083           bit_offset += boff;
2084         }
2085
2086       base_addr = TREE_OPERAND (base_addr, 0);
2087
2088       /* Avoid returning a negative bitpos as this may wreak havoc later.  */
2089       if (wi::neg_p (bit_offset))
2090         {
2091           offset_int mask = wi::mask <offset_int> (LOG2_BITS_PER_UNIT, false);
2092           offset_int tem = bit_offset.and_not (mask);
2093           /* TEM is the bitpos rounded to BITS_PER_UNIT towards -Inf.
2094              Subtract it to BIT_OFFSET and add it (scaled) to OFFSET.  */
2095           bit_offset -= tem;
2096           tem = wi::arshift (tem, LOG2_BITS_PER_UNIT);
2097           if (offset)
2098             offset = size_binop (PLUS_EXPR, offset,
2099                                     wide_int_to_tree (sizetype, tem));
2100           else
2101             offset = wide_int_to_tree (sizetype, tem);
2102         }
2103
2104       bitpos += bit_offset.to_shwi ();
2105     }
2106
2107   if (bitpos % BITS_PER_UNIT)
2108     return false;
2109   if (bitsize % BITS_PER_UNIT)
2110     return false;
2111
2112   if (!init_symbolic_number (n, ref))
2113     return false;
2114   n->base_addr = base_addr;
2115   n->offset = offset;
2116   n->bytepos = bitpos / BITS_PER_UNIT;
2117   n->alias_set = reference_alias_ptr_type (ref);
2118   n->vuse = gimple_vuse (stmt);
2119   return true;
2120 }
2121
2122 /* Compute the symbolic number N representing the result of a bitwise OR on 2
2123    symbolic number N1 and N2 whose source statements are respectively
2124    SOURCE_STMT1 and SOURCE_STMT2.  */
2125
2126 static gimple
2127 perform_symbolic_merge (gimple source_stmt1, struct symbolic_number *n1,
2128                         gimple source_stmt2, struct symbolic_number *n2,
2129                         struct symbolic_number *n)
2130 {
2131   int i, size;
2132   uint64_t mask;
2133   gimple source_stmt;
2134   struct symbolic_number *n_start;
2135
2136   /* Sources are different, cancel bswap if they are not memory location with
2137      the same base (array, structure, ...).  */
2138   if (gimple_assign_rhs1 (source_stmt1) != gimple_assign_rhs1 (source_stmt2))
2139     {
2140       int64_t inc;
2141       HOST_WIDE_INT start_sub, end_sub, end1, end2, end;
2142       struct symbolic_number *toinc_n_ptr, *n_end;
2143
2144       if (!n1->base_addr || !n2->base_addr
2145           || !operand_equal_p (n1->base_addr, n2->base_addr, 0))
2146         return NULL;
2147
2148       if (!n1->offset != !n2->offset
2149           || (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0)))
2150         return NULL;
2151
2152       if (n1->bytepos < n2->bytepos)
2153         {
2154           n_start = n1;
2155           start_sub = n2->bytepos - n1->bytepos;
2156           source_stmt = source_stmt1;
2157         }
2158       else
2159         {
2160           n_start = n2;
2161           start_sub = n1->bytepos - n2->bytepos;
2162           source_stmt = source_stmt2;
2163         }
2164
2165       /* Find the highest address at which a load is performed and
2166          compute related info.  */
2167       end1 = n1->bytepos + (n1->range - 1);
2168       end2 = n2->bytepos + (n2->range - 1);
2169       if (end1 < end2)
2170         {
2171           end = end2;
2172           end_sub = end2 - end1;
2173         }
2174       else
2175         {
2176           end = end1;
2177           end_sub = end1 - end2;
2178         }
2179       n_end = (end2 > end1) ? n2 : n1;
2180
2181       /* Find symbolic number whose lsb is the most significant.  */
2182       if (BYTES_BIG_ENDIAN)
2183         toinc_n_ptr = (n_end == n1) ? n2 : n1;
2184       else
2185         toinc_n_ptr = (n_start == n1) ? n2 : n1;
2186
2187       n->range = end - n_start->bytepos + 1;
2188
2189       /* Check that the range of memory covered can be represented by
2190          a symbolic number.  */
2191       if (n->range > 64 / BITS_PER_MARKER)
2192         return NULL;
2193
2194       /* Reinterpret byte marks in symbolic number holding the value of
2195          bigger weight according to target endianness.  */
2196       inc = BYTES_BIG_ENDIAN ? end_sub : start_sub;
2197       size = TYPE_PRECISION (n1->type) / BITS_PER_UNIT;
2198       for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER)
2199         {
2200           unsigned marker
2201             = (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK;
2202           if (marker && marker != MARKER_BYTE_UNKNOWN)
2203             toinc_n_ptr->n += inc;
2204         }
2205     }
2206   else
2207     {
2208       n->range = n1->range;
2209       n_start = n1;
2210       source_stmt = source_stmt1;
2211     }
2212
2213   if (!n1->alias_set
2214       || alias_ptr_types_compatible_p (n1->alias_set, n2->alias_set))
2215     n->alias_set = n1->alias_set;
2216   else
2217     n->alias_set = ptr_type_node;
2218   n->vuse = n_start->vuse;
2219   n->base_addr = n_start->base_addr;
2220   n->offset = n_start->offset;
2221   n->bytepos = n_start->bytepos;
2222   n->type = n_start->type;
2223   size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
2224
2225   for (i = 0, mask = MARKER_MASK; i < size; i++, mask <<= BITS_PER_MARKER)
2226     {
2227       uint64_t masked1, masked2;
2228
2229       masked1 = n1->n & mask;
2230       masked2 = n2->n & mask;
2231       if (masked1 && masked2 && masked1 != masked2)
2232         return NULL;
2233     }
2234   n->n = n1->n | n2->n;
2235
2236   return source_stmt;
2237 }
2238
2239 /* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform
2240    the operation given by the rhs of STMT on the result.  If the operation
2241    could successfully be executed the function returns a gimple stmt whose
2242    rhs's first tree is the expression of the source operand and NULL
2243    otherwise.  */
2244
2245 static gimple
2246 find_bswap_or_nop_1 (gimple stmt, struct symbolic_number *n, int limit)
2247 {
2248   enum tree_code code;
2249   tree rhs1, rhs2 = NULL;
2250   gimple rhs1_stmt, rhs2_stmt, source_stmt1;
2251   enum gimple_rhs_class rhs_class;
2252
2253   if (!limit || !is_gimple_assign (stmt))
2254     return NULL;
2255
2256   rhs1 = gimple_assign_rhs1 (stmt);
2257
2258   if (find_bswap_or_nop_load (stmt, rhs1, n))
2259     return stmt;
2260
2261   if (TREE_CODE (rhs1) != SSA_NAME)
2262     return NULL;
2263
2264   code = gimple_assign_rhs_code (stmt);
2265   rhs_class = gimple_assign_rhs_class (stmt);
2266   rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2267
2268   if (rhs_class == GIMPLE_BINARY_RHS)
2269     rhs2 = gimple_assign_rhs2 (stmt);
2270
2271   /* Handle unary rhs and binary rhs with integer constants as second
2272      operand.  */
2273
2274   if (rhs_class == GIMPLE_UNARY_RHS
2275       || (rhs_class == GIMPLE_BINARY_RHS
2276           && TREE_CODE (rhs2) == INTEGER_CST))
2277     {
2278       if (code != BIT_AND_EXPR
2279           && code != LSHIFT_EXPR
2280           && code != RSHIFT_EXPR
2281           && code != LROTATE_EXPR
2282           && code != RROTATE_EXPR
2283           && !CONVERT_EXPR_CODE_P (code))
2284         return NULL;
2285
2286       source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1);
2287
2288       /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
2289          we have to initialize the symbolic number.  */
2290       if (!source_stmt1)
2291         {
2292           if (gimple_assign_load_p (stmt)
2293               || !init_symbolic_number (n, rhs1))
2294             return NULL;
2295           source_stmt1 = stmt;
2296         }
2297
2298       switch (code)
2299         {
2300         case BIT_AND_EXPR:
2301           {
2302             int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
2303             uint64_t val = int_cst_value (rhs2), mask = 0;
2304             uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
2305
2306             /* Only constants masking full bytes are allowed.  */
2307             for (i = 0; i < size; i++, tmp <<= BITS_PER_UNIT)
2308               if ((val & tmp) != 0 && (val & tmp) != tmp)
2309                 return NULL;
2310               else if (val & tmp)
2311                 mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
2312
2313             n->n &= mask;
2314           }
2315           break;
2316         case LSHIFT_EXPR:
2317         case RSHIFT_EXPR:
2318         case LROTATE_EXPR:
2319         case RROTATE_EXPR:
2320           if (!do_shift_rotate (code, n, (int) TREE_INT_CST_LOW (rhs2)))
2321             return NULL;
2322           break;
2323         CASE_CONVERT:
2324           {
2325             int i, type_size, old_type_size;
2326             tree type;
2327
2328             type = gimple_expr_type (stmt);
2329             type_size = TYPE_PRECISION (type);
2330             if (type_size % BITS_PER_UNIT != 0)
2331               return NULL;
2332             type_size /= BITS_PER_UNIT;
2333             if (type_size > 64 / BITS_PER_MARKER)
2334               return NULL;
2335
2336             /* Sign extension: result is dependent on the value.  */
2337             old_type_size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
2338             if (!TYPE_UNSIGNED (n->type) && type_size > old_type_size
2339                 && HEAD_MARKER (n->n, old_type_size))
2340               for (i = 0; i < type_size - old_type_size; i++)
2341                 n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
2342                         << ((type_size - 1 - i) * BITS_PER_MARKER);
2343
2344             if (type_size < 64 / BITS_PER_MARKER)
2345               {
2346                 /* If STMT casts to a smaller type mask out the bits not
2347                    belonging to the target type.  */
2348                 n->n &= ((uint64_t) 1 << (type_size * BITS_PER_MARKER)) - 1;
2349               }
2350             n->type = type;
2351             if (!n->base_addr)
2352               n->range = type_size;
2353           }
2354           break;
2355         default:
2356           return NULL;
2357         };
2358       return verify_symbolic_number_p (n, stmt) ? source_stmt1 : NULL;
2359     }
2360
2361   /* Handle binary rhs.  */
2362
2363   if (rhs_class == GIMPLE_BINARY_RHS)
2364     {
2365       struct symbolic_number n1, n2;
2366       gimple source_stmt, source_stmt2;
2367
2368       if (code != BIT_IOR_EXPR)
2369         return NULL;
2370
2371       if (TREE_CODE (rhs2) != SSA_NAME)
2372         return NULL;
2373
2374       rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2375
2376       switch (code)
2377         {
2378         case BIT_IOR_EXPR:
2379           source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1);
2380
2381           if (!source_stmt1)
2382             return NULL;
2383
2384           source_stmt2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1);
2385
2386           if (!source_stmt2)
2387             return NULL;
2388
2389           if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type))
2390             return NULL;
2391
2392           if (!n1.vuse != !n2.vuse
2393               || (n1.vuse && !operand_equal_p (n1.vuse, n2.vuse, 0)))
2394             return NULL;
2395
2396           source_stmt
2397             = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n);
2398
2399           if (!source_stmt)
2400             return NULL;
2401
2402           if (!verify_symbolic_number_p (n, stmt))
2403             return NULL;
2404
2405           break;
2406         default:
2407           return NULL;
2408         }
2409       return source_stmt;
2410     }
2411   return NULL;
2412 }
2413
2414 /* Check if STMT completes a bswap implementation or a read in a given
2415    endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
2416    accordingly.  It also sets N to represent the kind of operations
2417    performed: size of the resulting expression and whether it works on
2418    a memory source, and if so alias-set and vuse.  At last, the
2419    function returns a stmt whose rhs's first tree is the source
2420    expression.  */
2421
2422 static gimple
2423 find_bswap_or_nop (gimple stmt, struct symbolic_number *n, bool *bswap)
2424 {
2425 /* The number which the find_bswap_or_nop_1 result should match in order
2426    to have a full byte swap.  The number is shifted to the right
2427    according to the size of the symbolic number before using it.  */
2428   uint64_t cmpxchg = CMPXCHG;
2429   uint64_t cmpnop = CMPNOP;
2430
2431   gimple source_stmt;
2432   int limit;
2433
2434   /* The last parameter determines the depth search limit.  It usually
2435      correlates directly to the number n of bytes to be touched.  We
2436      increase that number by log2(n) + 1 here in order to also
2437      cover signed -> unsigned conversions of the src operand as can be seen
2438      in libgcc, and for initial shift/and operation of the src operand.  */
2439   limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt)));
2440   limit += 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit);
2441   source_stmt = find_bswap_or_nop_1 (stmt, n, limit);
2442
2443   if (!source_stmt)
2444     return NULL;
2445
2446   /* Find real size of result (highest non-zero byte).  */
2447   if (n->base_addr)
2448     {
2449       int rsize;
2450       uint64_t tmpn;
2451
2452       for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++);
2453       n->range = rsize;
2454     }
2455
2456   /* Zero out the extra bits of N and CMP*.  */
2457   if (n->range < (int) sizeof (int64_t))
2458     {
2459       uint64_t mask;
2460
2461       mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1;
2462       cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER;
2463       cmpnop &= mask;
2464     }
2465
2466   /* A complete byte swap should make the symbolic number to start with
2467      the largest digit in the highest order byte. Unchanged symbolic
2468      number indicates a read with same endianness as target architecture.  */
2469   if (n->n == cmpnop)
2470     *bswap = false;
2471   else if (n->n == cmpxchg)
2472     *bswap = true;
2473   else
2474     return NULL;
2475
2476   /* Useless bit manipulation performed by code.  */
2477   if (!n->base_addr && n->n == cmpnop)
2478     return NULL;
2479
2480   n->range *= BITS_PER_UNIT;
2481   return source_stmt;
2482 }
2483
2484 namespace {
2485
2486 const pass_data pass_data_optimize_bswap =
2487 {
2488   GIMPLE_PASS, /* type */
2489   "bswap", /* name */
2490   OPTGROUP_NONE, /* optinfo_flags */
2491   TV_NONE, /* tv_id */
2492   PROP_ssa, /* properties_required */
2493   0, /* properties_provided */
2494   0, /* properties_destroyed */
2495   0, /* todo_flags_start */
2496   0, /* todo_flags_finish */
2497 };
2498
2499 class pass_optimize_bswap : public gimple_opt_pass
2500 {
2501 public:
2502   pass_optimize_bswap (gcc::context *ctxt)
2503     : gimple_opt_pass (pass_data_optimize_bswap, ctxt)
2504   {}
2505
2506   /* opt_pass methods: */
2507   virtual bool gate (function *)
2508     {
2509       return flag_expensive_optimizations && optimize;
2510     }
2511
2512   virtual unsigned int execute (function *);
2513
2514 }; // class pass_optimize_bswap
2515
2516 /* Perform the bswap optimization: replace the expression computed in the rhs
2517    of CUR_STMT by an equivalent bswap, load or load + bswap expression.
2518    Which of these alternatives replace the rhs is given by N->base_addr (non
2519    null if a load is needed) and BSWAP.  The type, VUSE and set-alias of the
2520    load to perform are also given in N while the builtin bswap invoke is given
2521    in FNDEL.  Finally, if a load is involved, SRC_STMT refers to one of the
2522    load statements involved to construct the rhs in CUR_STMT and N->range gives
2523    the size of the rhs expression for maintaining some statistics.
2524
2525    Note that if the replacement involve a load, CUR_STMT is moved just after
2526    SRC_STMT to do the load with the same VUSE which can lead to CUR_STMT
2527    changing of basic block.  */
2528
2529 static bool
2530 bswap_replace (gimple cur_stmt, gimple src_stmt, tree fndecl, tree bswap_type,
2531                tree load_type, struct symbolic_number *n, bool bswap)
2532 {
2533   gimple_stmt_iterator gsi;
2534   tree src, tmp, tgt;
2535   gimple bswap_stmt;
2536
2537   gsi = gsi_for_stmt (cur_stmt);
2538   src = gimple_assign_rhs1 (src_stmt);
2539   tgt = gimple_assign_lhs (cur_stmt);
2540
2541   /* Need to load the value from memory first.  */
2542   if (n->base_addr)
2543     {
2544       gimple_stmt_iterator gsi_ins = gsi_for_stmt (src_stmt);
2545       tree addr_expr, addr_tmp, val_expr, val_tmp;
2546       tree load_offset_ptr, aligned_load_type;
2547       gimple addr_stmt, load_stmt;
2548       unsigned align;
2549       HOST_WIDE_INT load_offset = 0;
2550
2551       align = get_object_alignment (src);
2552       /* If the new access is smaller than the original one, we need
2553          to perform big endian adjustment.  */
2554       if (BYTES_BIG_ENDIAN)
2555         {
2556           HOST_WIDE_INT bitsize, bitpos;
2557           machine_mode mode;
2558           int unsignedp, volatilep;
2559           tree offset;
2560
2561           get_inner_reference (src, &bitsize, &bitpos, &offset, &mode,
2562                                &unsignedp, &volatilep, false);
2563           if (n->range < (unsigned HOST_WIDE_INT) bitsize)
2564             {
2565               load_offset = (bitsize - n->range) / BITS_PER_UNIT;
2566               unsigned HOST_WIDE_INT l
2567                 = (load_offset * BITS_PER_UNIT) & (align - 1);
2568               if (l)
2569                 align = l & -l;
2570             }
2571         }
2572
2573       if (bswap
2574           && align < GET_MODE_ALIGNMENT (TYPE_MODE (load_type))
2575           && SLOW_UNALIGNED_ACCESS (TYPE_MODE (load_type), align))
2576         return false;
2577
2578       /* Move cur_stmt just before  one of the load of the original
2579          to ensure it has the same VUSE.  See PR61517 for what could
2580          go wrong.  */
2581       gsi_move_before (&gsi, &gsi_ins);
2582       gsi = gsi_for_stmt (cur_stmt);
2583
2584       /* Compute address to load from and cast according to the size
2585          of the load.  */
2586       addr_expr = build_fold_addr_expr (unshare_expr (src));
2587       if (is_gimple_mem_ref_addr (addr_expr))
2588         addr_tmp = addr_expr;
2589       else
2590         {
2591           addr_tmp = make_temp_ssa_name (TREE_TYPE (addr_expr), NULL,
2592                                          "load_src");
2593           addr_stmt = gimple_build_assign (addr_tmp, addr_expr);
2594           gsi_insert_before (&gsi, addr_stmt, GSI_SAME_STMT);
2595         }
2596
2597       /* Perform the load.  */
2598       aligned_load_type = load_type;
2599       if (align < TYPE_ALIGN (load_type))
2600         aligned_load_type = build_aligned_type (load_type, align);
2601       load_offset_ptr = build_int_cst (n->alias_set, load_offset);
2602       val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp,
2603                               load_offset_ptr);
2604
2605       if (!bswap)
2606         {
2607           if (n->range == 16)
2608             nop_stats.found_16bit++;
2609           else if (n->range == 32)
2610             nop_stats.found_32bit++;
2611           else
2612             {
2613               gcc_assert (n->range == 64);
2614               nop_stats.found_64bit++;
2615             }
2616
2617           /* Convert the result of load if necessary.  */
2618           if (!useless_type_conversion_p (TREE_TYPE (tgt), load_type))
2619             {
2620               val_tmp = make_temp_ssa_name (aligned_load_type, NULL,
2621                                             "load_dst");
2622               load_stmt = gimple_build_assign (val_tmp, val_expr);
2623               gimple_set_vuse (load_stmt, n->vuse);
2624               gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
2625               gimple_assign_set_rhs_with_ops (&gsi, NOP_EXPR, val_tmp);
2626             }
2627           else
2628             {
2629               gimple_assign_set_rhs_with_ops (&gsi, MEM_REF, val_expr);
2630               gimple_set_vuse (cur_stmt, n->vuse);
2631             }
2632           update_stmt (cur_stmt);
2633
2634           if (dump_file)
2635             {
2636               fprintf (dump_file,
2637                        "%d bit load in target endianness found at: ",
2638                        (int) n->range);
2639               print_gimple_stmt (dump_file, cur_stmt, 0, 0);
2640             }
2641           return true;
2642         }
2643       else
2644         {
2645           val_tmp = make_temp_ssa_name (aligned_load_type, NULL, "load_dst");
2646           load_stmt = gimple_build_assign (val_tmp, val_expr);
2647           gimple_set_vuse (load_stmt, n->vuse);
2648           gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
2649         }
2650       src = val_tmp;
2651     }
2652
2653   if (n->range == 16)
2654     bswap_stats.found_16bit++;
2655   else if (n->range == 32)
2656     bswap_stats.found_32bit++;
2657   else
2658     {
2659       gcc_assert (n->range == 64);
2660       bswap_stats.found_64bit++;
2661     }
2662
2663   tmp = src;
2664
2665   /* Convert the src expression if necessary.  */
2666   if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type))
2667     {
2668       gimple convert_stmt;
2669
2670       tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc");
2671       convert_stmt = gimple_build_assign (tmp, NOP_EXPR, src);
2672       gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
2673     }
2674
2675   /* Canonical form for 16 bit bswap is a rotate expression.  Only 16bit values
2676      are considered as rotation of 2N bit values by N bits is generally not
2677      equivalent to a bswap.  Consider for instance 0x01020304 r>> 16 which
2678      gives 0x03040102 while a bswap for that value is 0x04030201.  */
2679   if (bswap && n->range == 16)
2680     {
2681       tree count = build_int_cst (NULL, BITS_PER_UNIT);
2682       src = fold_build2 (LROTATE_EXPR, bswap_type, tmp, count);
2683       bswap_stmt = gimple_build_assign (NULL, src);
2684     }
2685   else
2686     bswap_stmt = gimple_build_call (fndecl, 1, tmp);
2687
2688   tmp = tgt;
2689
2690   /* Convert the result if necessary.  */
2691   if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type))
2692     {
2693       gimple convert_stmt;
2694
2695       tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
2696       convert_stmt = gimple_build_assign (tgt, NOP_EXPR, tmp);
2697       gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT);
2698     }
2699
2700   gimple_set_lhs (bswap_stmt, tmp);
2701
2702   if (dump_file)
2703     {
2704       fprintf (dump_file, "%d bit bswap implementation found at: ",
2705                (int) n->range);
2706       print_gimple_stmt (dump_file, cur_stmt, 0, 0);
2707     }
2708
2709   gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT);
2710   gsi_remove (&gsi, true);
2711   return true;
2712 }
2713
2714 /* Find manual byte swap implementations as well as load in a given
2715    endianness. Byte swaps are turned into a bswap builtin invokation
2716    while endian loads are converted to bswap builtin invokation or
2717    simple load according to the target endianness.  */
2718
2719 unsigned int
2720 pass_optimize_bswap::execute (function *fun)
2721 {
2722   basic_block bb;
2723   bool bswap32_p, bswap64_p;
2724   bool changed = false;
2725   tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;
2726
2727   if (BITS_PER_UNIT != 8)
2728     return 0;
2729
2730   bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
2731                && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing);
2732   bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
2733                && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
2734                    || (bswap32_p && word_mode == SImode)));
2735
2736   /* Determine the argument type of the builtins.  The code later on
2737      assumes that the return and argument type are the same.  */
2738   if (bswap32_p)
2739     {
2740       tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
2741       bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
2742     }
2743
2744   if (bswap64_p)
2745     {
2746       tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
2747       bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
2748     }
2749
2750   memset (&nop_stats, 0, sizeof (nop_stats));
2751   memset (&bswap_stats, 0, sizeof (bswap_stats));
2752
2753   FOR_EACH_BB_FN (bb, fun)
2754     {
2755       gimple_stmt_iterator gsi;
2756
2757       /* We do a reverse scan for bswap patterns to make sure we get the
2758          widest match. As bswap pattern matching doesn't handle previously
2759          inserted smaller bswap replacements as sub-patterns, the wider
2760          variant wouldn't be detected.  */
2761       for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
2762         {
2763           gimple src_stmt, cur_stmt = gsi_stmt (gsi);
2764           tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
2765           enum tree_code code;
2766           struct symbolic_number n;
2767           bool bswap;
2768
2769           /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt
2770              might be moved to a different basic block by bswap_replace and gsi
2771              must not points to it if that's the case.  Moving the gsi_prev
2772              there make sure that gsi points to the statement previous to
2773              cur_stmt while still making sure that all statements are
2774              considered in this basic block.  */
2775           gsi_prev (&gsi);
2776
2777           if (!is_gimple_assign (cur_stmt))
2778             continue;
2779
2780           code = gimple_assign_rhs_code (cur_stmt);
2781           switch (code)
2782             {
2783             case LROTATE_EXPR:
2784             case RROTATE_EXPR:
2785               if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt))
2786                   || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt))
2787                      % BITS_PER_UNIT)
2788                 continue;
2789               /* Fall through.  */
2790             case BIT_IOR_EXPR:
2791               break;
2792             default:
2793               continue;
2794             }
2795
2796           src_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap);
2797
2798           if (!src_stmt)
2799             continue;
2800
2801           switch (n.range)
2802             {
2803             case 16:
2804               /* Already in canonical form, nothing to do.  */
2805               if (code == LROTATE_EXPR || code == RROTATE_EXPR)
2806                 continue;
2807               load_type = bswap_type = uint16_type_node;
2808               break;
2809             case 32:
2810               load_type = uint32_type_node;
2811               if (bswap32_p)
2812                 {
2813                   fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
2814                   bswap_type = bswap32_type;
2815                 }
2816               break;
2817             case 64:
2818               load_type = uint64_type_node;
2819               if (bswap64_p)
2820                 {
2821                   fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
2822                   bswap_type = bswap64_type;
2823                 }
2824               break;
2825             default:
2826               continue;
2827             }
2828
2829           if (bswap && !fndecl && n.range != 16)
2830             continue;
2831
2832           if (bswap_replace (cur_stmt, src_stmt, fndecl, bswap_type, load_type,
2833                              &n, bswap))
2834             changed = true;
2835         }
2836     }
2837
2838   statistics_counter_event (fun, "16-bit nop implementations found",
2839                             nop_stats.found_16bit);
2840   statistics_counter_event (fun, "32-bit nop implementations found",
2841                             nop_stats.found_32bit);
2842   statistics_counter_event (fun, "64-bit nop implementations found",
2843                             nop_stats.found_64bit);
2844   statistics_counter_event (fun, "16-bit bswap implementations found",
2845                             bswap_stats.found_16bit);
2846   statistics_counter_event (fun, "32-bit bswap implementations found",
2847                             bswap_stats.found_32bit);
2848   statistics_counter_event (fun, "64-bit bswap implementations found",
2849                             bswap_stats.found_64bit);
2850
2851   return (changed ? TODO_update_ssa : 0);
2852 }
2853
2854 } // anon namespace
2855
2856 gimple_opt_pass *
2857 make_pass_optimize_bswap (gcc::context *ctxt)
2858 {
2859   return new pass_optimize_bswap (ctxt);
2860 }
2861
2862 /* Return true if stmt is a type conversion operation that can be stripped
2863    when used in a widening multiply operation.  */
2864 static bool
2865 widening_mult_conversion_strippable_p (tree result_type, gimple stmt)
2866 {
2867   enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
2868
2869   if (TREE_CODE (result_type) == INTEGER_TYPE)
2870     {
2871       tree op_type;
2872       tree inner_op_type;
2873
2874       if (!CONVERT_EXPR_CODE_P (rhs_code))
2875         return false;
2876
2877       op_type = TREE_TYPE (gimple_assign_lhs (stmt));
2878
2879       /* If the type of OP has the same precision as the result, then
2880          we can strip this conversion.  The multiply operation will be
2881          selected to create the correct extension as a by-product.  */
2882       if (TYPE_PRECISION (result_type) == TYPE_PRECISION (op_type))
2883         return true;
2884
2885       /* We can also strip a conversion if it preserves the signed-ness of
2886          the operation and doesn't narrow the range.  */
2887       inner_op_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
2888
2889       /* If the inner-most type is unsigned, then we can strip any
2890          intermediate widening operation.  If it's signed, then the
2891          intermediate widening operation must also be signed.  */
2892       if ((TYPE_UNSIGNED (inner_op_type)
2893            || TYPE_UNSIGNED (op_type) == TYPE_UNSIGNED (inner_op_type))
2894           && TYPE_PRECISION (op_type) > TYPE_PRECISION (inner_op_type))
2895         return true;
2896
2897       return false;
2898     }
2899
2900   return rhs_code == FIXED_CONVERT_EXPR;
2901 }
2902
2903 /* Return true if RHS is a suitable operand for a widening multiplication,
2904    assuming a target type of TYPE.
2905    There are two cases:
2906
2907      - RHS makes some value at least twice as wide.  Store that value
2908        in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
2909
2910      - RHS is an integer constant.  Store that value in *NEW_RHS_OUT if so,
2911        but leave *TYPE_OUT untouched.  */
2912
2913 static bool
2914 is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out,
2915                         tree *new_rhs_out)
2916 {
2917   gimple stmt;
2918   tree type1, rhs1;
2919
2920   if (TREE_CODE (rhs) == SSA_NAME)
2921     {
2922       stmt = SSA_NAME_DEF_STMT (rhs);
2923       if (is_gimple_assign (stmt))
2924         {
2925           if (! widening_mult_conversion_strippable_p (type, stmt))
2926             rhs1 = rhs;
2927           else
2928             {
2929               rhs1 = gimple_assign_rhs1 (stmt);
2930
2931               if (TREE_CODE (rhs1) == INTEGER_CST)
2932                 {
2933                   *new_rhs_out = rhs1;
2934                   *type_out = NULL;
2935                   return true;
2936                 }
2937             }
2938         }
2939       else
2940         rhs1 = rhs;
2941
2942       type1 = TREE_TYPE (rhs1);
2943
2944       if (TREE_CODE (type1) != TREE_CODE (type)
2945           || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type))
2946         return false;
2947
2948       *new_rhs_out = rhs1;
2949       *type_out = type1;
2950       return true;
2951     }
2952
2953   if (TREE_CODE (rhs) == INTEGER_CST)
2954     {
2955       *new_rhs_out = rhs;
2956       *type_out = NULL;
2957       return true;
2958     }
2959
2960   return false;
2961 }
2962
2963 /* Return true if STMT performs a widening multiplication, assuming the
2964    output type is TYPE.  If so, store the unwidened types of the operands
2965    in *TYPE1_OUT and *TYPE2_OUT respectively.  Also fill *RHS1_OUT and
2966    *RHS2_OUT such that converting those operands to types *TYPE1_OUT
2967    and *TYPE2_OUT would give the operands of the multiplication.  */
2968
2969 static bool
2970 is_widening_mult_p (gimple stmt,
2971                     tree *type1_out, tree *rhs1_out,
2972                     tree *type2_out, tree *rhs2_out)
2973 {
2974   tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2975
2976   if (TREE_CODE (type) != INTEGER_TYPE
2977       && TREE_CODE (type) != FIXED_POINT_TYPE)
2978     return false;
2979
2980   if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out,
2981                                rhs1_out))
2982     return false;
2983
2984   if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out,
2985                                rhs2_out))
2986     return false;
2987
2988   if (*type1_out == NULL)
2989     {
2990       if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out))
2991         return false;
2992       *type1_out = *type2_out;
2993     }
2994
2995   if (*type2_out == NULL)
2996     {
2997       if (!int_fits_type_p (*rhs2_out, *type1_out))
2998         return false;
2999       *type2_out = *type1_out;
3000     }
3001
3002   /* Ensure that the larger of the two operands comes first. */
3003   if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out))
3004     {
3005       std::swap (*type1_out, *type2_out);
3006       std::swap (*rhs1_out, *rhs2_out);
3007     }
3008
3009   return true;
3010 }
3011
3012 /* Process a single gimple statement STMT, which has a MULT_EXPR as
3013    its rhs, and try to convert it into a WIDEN_MULT_EXPR.  The return
3014    value is true iff we converted the statement.  */
3015
3016 static bool
3017 convert_mult_to_widen (gimple stmt, gimple_stmt_iterator *gsi)
3018 {
3019   tree lhs, rhs1, rhs2, type, type1, type2;
3020   enum insn_code handler;
3021   machine_mode to_mode, from_mode, actual_mode;
3022   optab op;
3023   int actual_precision;
3024   location_t loc = gimple_location (stmt);
3025   bool from_unsigned1, from_unsigned2;
3026
3027   lhs = gimple_assign_lhs (stmt);
3028   type = TREE_TYPE (lhs);
3029   if (TREE_CODE (type) != INTEGER_TYPE)
3030     return false;
3031
3032   if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2))
3033     return false;
3034
3035   to_mode = TYPE_MODE (type);
3036   from_mode = TYPE_MODE (type1);
3037   from_unsigned1 = TYPE_UNSIGNED (type1);
3038   from_unsigned2 = TYPE_UNSIGNED (type2);
3039
3040   if (from_unsigned1 && from_unsigned2)
3041     op = umul_widen_optab;
3042   else if (!from_unsigned1 && !from_unsigned2)
3043     op = smul_widen_optab;
3044   else
3045     op = usmul_widen_optab;
3046
3047   handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode,
3048                                                   0, &actual_mode);
3049
3050   if (handler == CODE_FOR_nothing)
3051     {
3052       if (op != smul_widen_optab)
3053         {
3054           /* We can use a signed multiply with unsigned types as long as
3055              there is a wider mode to use, or it is the smaller of the two
3056              types that is unsigned.  Note that type1 >= type2, always.  */
3057           if ((TYPE_UNSIGNED (type1)
3058                && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
3059               || (TYPE_UNSIGNED (type2)
3060                   && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
3061             {
3062               from_mode = GET_MODE_WIDER_MODE (from_mode);
3063               if (GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode))
3064                 return false;
3065             }
3066
3067           op = smul_widen_optab;
3068           handler = find_widening_optab_handler_and_mode (op, to_mode,
3069                                                           from_mode, 0,
3070                                                           &actual_mode);
3071
3072           if (handler == CODE_FOR_nothing)
3073             return false;
3074
3075           from_unsigned1 = from_unsigned2 = false;
3076         }
3077       else
3078         return false;
3079     }
3080
3081   /* Ensure that the inputs to the handler are in the correct precison
3082      for the opcode.  This will be the full mode size.  */
3083   actual_precision = GET_MODE_PRECISION (actual_mode);
3084   if (2 * actual_precision > TYPE_PRECISION (type))
3085     return false;
3086   if (actual_precision != TYPE_PRECISION (type1)
3087       || from_unsigned1 != TYPE_UNSIGNED (type1))
3088     rhs1 = build_and_insert_cast (gsi, loc,
3089                                   build_nonstandard_integer_type
3090                                     (actual_precision, from_unsigned1), rhs1);
3091   if (actual_precision != TYPE_PRECISION (type2)
3092       || from_unsigned2 != TYPE_UNSIGNED (type2))
3093     rhs2 = build_and_insert_cast (gsi, loc,
3094                                   build_nonstandard_integer_type
3095                                     (actual_precision, from_unsigned2), rhs2);
3096
3097   /* Handle constants.  */
3098   if (TREE_CODE (rhs1) == INTEGER_CST)
3099     rhs1 = fold_convert (type1, rhs1);
3100   if (TREE_CODE (rhs2) == INTEGER_CST)
3101     rhs2 = fold_convert (type2, rhs2);
3102
3103   gimple_assign_set_rhs1 (stmt, rhs1);
3104   gimple_assign_set_rhs2 (stmt, rhs2);
3105   gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
3106   update_stmt (stmt);
3107   widen_mul_stats.widen_mults_inserted++;
3108   return true;
3109 }
3110
3111 /* Process a single gimple statement STMT, which is found at the
3112    iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
3113    rhs (given by CODE), and try to convert it into a
3114    WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR.  The return value
3115    is true iff we converted the statement.  */
3116
3117 static bool
3118 convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt,
3119                             enum tree_code code)
3120 {
3121   gimple rhs1_stmt = NULL, rhs2_stmt = NULL;
3122   gimple conv1_stmt = NULL, conv2_stmt = NULL, conv_stmt;
3123   tree type, type1, type2, optype;
3124   tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
3125   enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
3126   optab this_optab;
3127   enum tree_code wmult_code;
3128   enum insn_code handler;
3129   machine_mode to_mode, from_mode, actual_mode;
3130   location_t loc = gimple_location (stmt);
3131   int actual_precision;
3132   bool from_unsigned1, from_unsigned2;
3133
3134   lhs = gimple_assign_lhs (stmt);
3135   type = TREE_TYPE (lhs);
3136   if (TREE_CODE (type) != INTEGER_TYPE
3137       && TREE_CODE (type) != FIXED_POINT_TYPE)
3138     return false;
3139
3140   if (code == MINUS_EXPR)
3141     wmult_code = WIDEN_MULT_MINUS_EXPR;
3142   else
3143     wmult_code = WIDEN_MULT_PLUS_EXPR;
3144
3145   rhs1 = gimple_assign_rhs1 (stmt);
3146   rhs2 = gimple_assign_rhs2 (stmt);
3147
3148   if (TREE_CODE (rhs1) == SSA_NAME)
3149     {
3150       rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
3151       if (is_gimple_assign (rhs1_stmt))
3152         rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
3153     }
3154
3155   if (TREE_CODE (rhs2) == SSA_NAME)
3156     {
3157       rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
3158       if (is_gimple_assign (rhs2_stmt))
3159         rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
3160     }
3161
3162   /* Allow for one conversion statement between the multiply
3163      and addition/subtraction statement.  If there are more than
3164      one conversions then we assume they would invalidate this
3165      transformation.  If that's not the case then they should have
3166      been folded before now.  */
3167   if (CONVERT_EXPR_CODE_P (rhs1_code))
3168     {
3169       conv1_stmt = rhs1_stmt;
3170       rhs1 = gimple_assign_rhs1 (rhs1_stmt);
3171       if (TREE_CODE (rhs1) == SSA_NAME)
3172         {
3173           rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
3174           if (is_gimple_assign (rhs1_stmt))
3175             rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
3176         }
3177       else
3178         return false;
3179     }
3180   if (CONVERT_EXPR_CODE_P (rhs2_code))
3181     {
3182       conv2_stmt = rhs2_stmt;
3183       rhs2 = gimple_assign_rhs1 (rhs2_stmt);
3184       if (TREE_CODE (rhs2) == SSA_NAME)
3185         {
3186           rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
3187           if (is_gimple_assign (rhs2_stmt))
3188             rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
3189         }
3190       else
3191         return false;
3192     }
3193
3194   /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
3195      is_widening_mult_p, but we still need the rhs returns.
3196
3197      It might also appear that it would be sufficient to use the existing
3198      operands of the widening multiply, but that would limit the choice of
3199      multiply-and-accumulate instructions.
3200
3201      If the widened-multiplication result has more than one uses, it is
3202      probably wiser not to do the conversion.  */
3203   if (code == PLUS_EXPR
3204       && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
3205     {
3206       if (!has_single_use (rhs1)
3207           || !is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
3208                                   &type2, &mult_rhs2))
3209         return false;
3210       add_rhs = rhs2;
3211       conv_stmt = conv1_stmt;
3212     }
3213   else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
3214     {
3215       if (!has_single_use (rhs2)
3216           || !is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
3217                                   &type2, &mult_rhs2))
3218         return false;
3219       add_rhs = rhs1;
3220       conv_stmt = conv2_stmt;
3221     }
3222   else
3223     return false;
3224
3225   to_mode = TYPE_MODE (type);
3226   from_mode = TYPE_MODE (type1);
3227   from_unsigned1 = TYPE_UNSIGNED (type1);
3228   from_unsigned2 = TYPE_UNSIGNED (type2);
3229   optype = type1;
3230
3231   /* There's no such thing as a mixed sign madd yet, so use a wider mode.  */
3232   if (from_unsigned1 != from_unsigned2)
3233     {
3234       if (!INTEGRAL_TYPE_P (type))
3235         return false;
3236       /* We can use a signed multiply with unsigned types as long as
3237          there is a wider mode to use, or it is the smaller of the two
3238          types that is unsigned.  Note that type1 >= type2, always.  */
3239       if ((from_unsigned1
3240            && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
3241           || (from_unsigned2
3242               && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
3243         {
3244           from_mode = GET_MODE_WIDER_MODE (from_mode);
3245           if (GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode))
3246             return false;
3247         }
3248
3249       from_unsigned1 = from_unsigned2 = false;
3250       optype = build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode),
3251                                                false);
3252     }
3253
3254   /* If there was a conversion between the multiply and addition
3255      then we need to make sure it fits a multiply-and-accumulate.
3256      The should be a single mode change which does not change the
3257      value.  */
3258   if (conv_stmt)
3259     {
3260       /* We use the original, unmodified data types for this.  */
3261       tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt));
3262       tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt));
3263       int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2);
3264       bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2);
3265
3266       if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type))
3267         {
3268           /* Conversion is a truncate.  */
3269           if (TYPE_PRECISION (to_type) < data_size)
3270             return false;
3271         }
3272       else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type))
3273         {
3274           /* Conversion is an extend.  Check it's the right sort.  */
3275           if (TYPE_UNSIGNED (from_type) != is_unsigned
3276               && !(is_unsigned && TYPE_PRECISION (from_type) > data_size))
3277             return false;
3278         }
3279       /* else convert is a no-op for our purposes.  */
3280     }
3281
3282   /* Verify that the machine can perform a widening multiply
3283      accumulate in this mode/signedness combination, otherwise
3284      this transformation is likely to pessimize code.  */
3285   this_optab = optab_for_tree_code (wmult_code, optype, optab_default);
3286   handler = find_widening_optab_handler_and_mode (this_optab, to_mode,
3287                                                   from_mode, 0, &actual_mode);
3288
3289   if (handler == CODE_FOR_nothing)
3290     return false;
3291
3292   /* Ensure that the inputs to the handler are in the correct precison
3293      for the opcode.  This will be the full mode size.  */
3294   actual_precision = GET_MODE_PRECISION (actual_mode);
3295   if (actual_precision != TYPE_PRECISION (type1)
3296       || from_unsigned1 != TYPE_UNSIGNED (type1))
3297     mult_rhs1 = build_and_insert_cast (gsi, loc,
3298                                        build_nonstandard_integer_type
3299                                          (actual_precision, from_unsigned1),
3300                                        mult_rhs1);
3301   if (actual_precision != TYPE_PRECISION (type2)
3302       || from_unsigned2 != TYPE_UNSIGNED (type2))
3303     mult_rhs2 = build_and_insert_cast (gsi, loc,
3304                                        build_nonstandard_integer_type
3305                                          (actual_precision, from_unsigned2),
3306                                        mult_rhs2);
3307
3308   if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs)))
3309     add_rhs = build_and_insert_cast (gsi, loc, type, add_rhs);
3310
3311   /* Handle constants.  */
3312   if (TREE_CODE (mult_rhs1) == INTEGER_CST)
3313     mult_rhs1 = fold_convert (type1, mult_rhs1);
3314   if (TREE_CODE (mult_rhs2) == INTEGER_CST)
3315     mult_rhs2 = fold_convert (type2, mult_rhs2);
3316
3317   gimple_assign_set_rhs_with_ops (gsi, wmult_code, mult_rhs1, mult_rhs2,
3318                                   add_rhs);
3319   update_stmt (gsi_stmt (*gsi));
3320   widen_mul_stats.maccs_inserted++;
3321   return true;
3322 }
3323
3324 /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
3325    with uses in additions and subtractions to form fused multiply-add
3326    operations.  Returns true if successful and MUL_STMT should be removed.  */
3327
3328 static bool
3329 convert_mult_to_fma (gimple mul_stmt, tree op1, tree op2)
3330 {
3331   tree mul_result = gimple_get_lhs (mul_stmt);
3332   tree type = TREE_TYPE (mul_result);
3333   gimple use_stmt, neguse_stmt;
3334   gassign *fma_stmt;
3335   use_operand_p use_p;
3336   imm_use_iterator imm_iter;
3337
3338   if (FLOAT_TYPE_P (type)
3339       && flag_fp_contract_mode == FP_CONTRACT_OFF)
3340     return false;
3341
3342   /* We don't want to do bitfield reduction ops.  */
3343   if (INTEGRAL_TYPE_P (type)
3344       && (TYPE_PRECISION (type)
3345           != GET_MODE_PRECISION (TYPE_MODE (type))))
3346     return false;
3347
3348   /* If the target doesn't support it, don't generate it.  We assume that
3349      if fma isn't available then fms, fnma or fnms are not either.  */
3350   if (optab_handler (fma_optab, TYPE_MODE (type)) == CODE_FOR_nothing)
3351     return false;
3352
3353   /* If the multiplication has zero uses, it is kept around probably because
3354      of -fnon-call-exceptions.  Don't optimize it away in that case,
3355      it is DCE job.  */
3356   if (has_zero_uses (mul_result))
3357     return false;
3358
3359   /* Make sure that the multiplication statement becomes dead after
3360      the transformation, thus that all uses are transformed to FMAs.
3361      This means we assume that an FMA operation has the same cost
3362      as an addition.  */
3363   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
3364     {
3365       enum tree_code use_code;
3366       tree result = mul_result;
3367       bool negate_p = false;
3368
3369       use_stmt = USE_STMT (use_p);
3370
3371       if (is_gimple_debug (use_stmt))
3372         continue;
3373
3374       /* For now restrict this operations to single basic blocks.  In theory
3375          we would want to support sinking the multiplication in
3376          m = a*b;
3377          if ()
3378            ma = m + c;
3379          else
3380            d = m;
3381          to form a fma in the then block and sink the multiplication to the
3382          else block.  */
3383       if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3384         return false;
3385
3386       if (!is_gimple_assign (use_stmt))
3387         return false;
3388
3389       use_code = gimple_assign_rhs_code (use_stmt);
3390
3391       /* A negate on the multiplication leads to FNMA.  */
3392       if (use_code == NEGATE_EXPR)
3393         {
3394           ssa_op_iter iter;
3395           use_operand_p usep;
3396
3397           result = gimple_assign_lhs (use_stmt);
3398
3399           /* Make sure the negate statement becomes dead with this
3400              single transformation.  */
3401           if (!single_imm_use (gimple_assign_lhs (use_stmt),
3402                                &use_p, &neguse_stmt))
3403             return false;
3404
3405           /* Make sure the multiplication isn't also used on that stmt.  */
3406           FOR_EACH_PHI_OR_STMT_USE (usep, neguse_stmt, iter, SSA_OP_USE)
3407             if (USE_FROM_PTR (usep) == mul_result)
3408               return false;
3409
3410           /* Re-validate.  */
3411           use_stmt = neguse_stmt;
3412           if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3413             return false;
3414           if (!is_gimple_assign (use_stmt))
3415             return false;
3416
3417           use_code = gimple_assign_rhs_code (use_stmt);
3418           negate_p = true;
3419         }
3420
3421       switch (use_code)
3422         {
3423         case MINUS_EXPR:
3424           if (gimple_assign_rhs2 (use_stmt) == result)
3425             negate_p = !negate_p;
3426           break;
3427         case PLUS_EXPR:
3428           break;
3429         default:
3430           /* FMA can only be formed from PLUS and MINUS.  */
3431           return false;
3432         }
3433
3434       /* If the subtrahend (gimple_assign_rhs2 (use_stmt)) is computed
3435          by a MULT_EXPR that we'll visit later, we might be able to
3436          get a more profitable match with fnma.
3437          OTOH, if we don't, a negate / fma pair has likely lower latency
3438          that a mult / subtract pair.  */
3439       if (use_code == MINUS_EXPR && !negate_p
3440           && gimple_assign_rhs1 (use_stmt) == result
3441           && optab_handler (fms_optab, TYPE_MODE (type)) == CODE_FOR_nothing
3442           && optab_handler (fnma_optab, TYPE_MODE (type)) != CODE_FOR_nothing)
3443         {
3444           tree rhs2 = gimple_assign_rhs2 (use_stmt);
3445
3446           if (TREE_CODE (rhs2) == SSA_NAME)
3447             {
3448               gimple stmt2 = SSA_NAME_DEF_STMT (rhs2);
3449               if (has_single_use (rhs2)
3450                   && is_gimple_assign (stmt2)
3451                   && gimple_assign_rhs_code (stmt2) == MULT_EXPR)
3452               return false;
3453             }
3454         }
3455
3456       /* We can't handle a * b + a * b.  */
3457       if (gimple_assign_rhs1 (use_stmt) == gimple_assign_rhs2 (use_stmt))
3458         return false;
3459
3460       /* While it is possible to validate whether or not the exact form
3461          that we've recognized is available in the backend, the assumption
3462          is that the transformation is never a loss.  For instance, suppose
3463          the target only has the plain FMA pattern available.  Consider
3464          a*b-c -> fma(a,b,-c): we've exchanged MUL+SUB for FMA+NEG, which
3465          is still two operations.  Consider -(a*b)-c -> fma(-a,b,-c): we
3466          still have 3 operations, but in the FMA form the two NEGs are
3467          independent and could be run in parallel.  */
3468     }
3469
3470   FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
3471     {
3472       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
3473       enum tree_code use_code;
3474       tree addop, mulop1 = op1, result = mul_result;
3475       bool negate_p = false;
3476
3477       if (is_gimple_debug (use_stmt))
3478         continue;
3479
3480       use_code = gimple_assign_rhs_code (use_stmt);
3481       if (use_code == NEGATE_EXPR)
3482         {
3483           result = gimple_assign_lhs (use_stmt);
3484           single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt);
3485           gsi_remove (&gsi, true);
3486           release_defs (use_stmt);
3487
3488           use_stmt = neguse_stmt;
3489           gsi = gsi_for_stmt (use_stmt);
3490           use_code = gimple_assign_rhs_code (use_stmt);
3491           negate_p = true;
3492         }
3493
3494       if (gimple_assign_rhs1 (use_stmt) == result)
3495         {
3496           addop = gimple_assign_rhs2 (use_stmt);
3497           /* a * b - c -> a * b + (-c)  */
3498           if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
3499             addop = force_gimple_operand_gsi (&gsi,
3500                                               build1 (NEGATE_EXPR,
3501                                                       type, addop),
3502                                               true, NULL_TREE, true,
3503                                               GSI_SAME_STMT);
3504         }
3505       else
3506         {
3507           addop = gimple_assign_rhs1 (use_stmt);
3508           /* a - b * c -> (-b) * c + a */
3509           if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
3510             negate_p = !negate_p;
3511         }
3512
3513       if (negate_p)
3514         mulop1 = force_gimple_operand_gsi (&gsi,
3515                                            build1 (NEGATE_EXPR,
3516                                                    type, mulop1),
3517                                            true, NULL_TREE, true,
3518                                            GSI_SAME_STMT);
3519
3520       fma_stmt = gimple_build_assign (gimple_assign_lhs (use_stmt),
3521                                       FMA_EXPR, mulop1, op2, addop);
3522       gsi_replace (&gsi, fma_stmt, true);
3523       widen_mul_stats.fmas_inserted++;
3524     }
3525
3526   return true;
3527 }
3528
3529 /* Find integer multiplications where the operands are extended from
3530    smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
3531    where appropriate.  */
3532
3533 namespace {
3534
3535 const pass_data pass_data_optimize_widening_mul =
3536 {
3537   GIMPLE_PASS, /* type */
3538   "widening_mul", /* name */
3539   OPTGROUP_NONE, /* optinfo_flags */
3540   TV_NONE, /* tv_id */
3541   PROP_ssa, /* properties_required */
3542   0, /* properties_provided */
3543   0, /* properties_destroyed */
3544   0, /* todo_flags_start */
3545   TODO_update_ssa, /* todo_flags_finish */
3546 };
3547
3548 class pass_optimize_widening_mul : public gimple_opt_pass
3549 {
3550 public:
3551   pass_optimize_widening_mul (gcc::context *ctxt)
3552     : gimple_opt_pass (pass_data_optimize_widening_mul, ctxt)
3553   {}
3554
3555   /* opt_pass methods: */
3556   virtual bool gate (function *)
3557     {
3558       return flag_expensive_optimizations && optimize;
3559     }
3560
3561   virtual unsigned int execute (function *);
3562
3563 }; // class pass_optimize_widening_mul
3564
3565 unsigned int
3566 pass_optimize_widening_mul::execute (function *fun)
3567 {
3568   basic_block bb;
3569   bool cfg_changed = false;
3570
3571   memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
3572
3573   FOR_EACH_BB_FN (bb, fun)
3574     {
3575       gimple_stmt_iterator gsi;
3576
3577       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
3578         {
3579           gimple stmt = gsi_stmt (gsi);
3580           enum tree_code code;
3581
3582           if (is_gimple_assign (stmt))
3583             {
3584               code = gimple_assign_rhs_code (stmt);
3585               switch (code)
3586                 {
3587                 case MULT_EXPR:
3588                   if (!convert_mult_to_widen (stmt, &gsi)
3589                       && convert_mult_to_fma (stmt,
3590                                               gimple_assign_rhs1 (stmt),
3591                                               gimple_assign_rhs2 (stmt)))
3592                     {
3593                       gsi_remove (&gsi, true);
3594                       release_defs (stmt);
3595                       continue;
3596                     }
3597                   break;
3598
3599                 case PLUS_EXPR:
3600                 case MINUS_EXPR:
3601                   convert_plusminus_to_widen (&gsi, stmt, code);
3602                   break;
3603
3604                 default:;
3605                 }
3606             }
3607           else if (is_gimple_call (stmt)
3608                    && gimple_call_lhs (stmt))
3609             {
3610               tree fndecl = gimple_call_fndecl (stmt);
3611               if (fndecl
3612                   && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
3613                 {
3614                   switch (DECL_FUNCTION_CODE (fndecl))
3615                     {
3616                       case BUILT_IN_POWF:
3617                       case BUILT_IN_POW:
3618                       case BUILT_IN_POWL:
3619                         if (TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST
3620                             && REAL_VALUES_EQUAL
3621                                  (TREE_REAL_CST (gimple_call_arg (stmt, 1)),
3622                                   dconst2)
3623                             && convert_mult_to_fma (stmt,
3624                                                     gimple_call_arg (stmt, 0),
3625                                                     gimple_call_arg (stmt, 0)))
3626                           {
3627                             unlink_stmt_vdef (stmt);
3628                             if (gsi_remove (&gsi, true)
3629                                 && gimple_purge_dead_eh_edges (bb))
3630                               cfg_changed = true;
3631                             release_defs (stmt);
3632                             continue;
3633                           }
3634                           break;
3635
3636                       default:;
3637                     }
3638                 }
3639             }
3640           gsi_next (&gsi);
3641         }
3642     }
3643
3644   statistics_counter_event (fun, "widening multiplications inserted",
3645                             widen_mul_stats.widen_mults_inserted);
3646   statistics_counter_event (fun, "widening maccs inserted",
3647                             widen_mul_stats.maccs_inserted);
3648   statistics_counter_event (fun, "fused multiply-adds inserted",
3649                             widen_mul_stats.fmas_inserted);
3650
3651   return cfg_changed ? TODO_cleanup_cfg : 0;
3652 }
3653
3654 } // anon namespace
3655
3656 gimple_opt_pass *
3657 make_pass_optimize_widening_mul (gcc::context *ctxt)
3658 {
3659   return new pass_optimize_widening_mul (ctxt);
3660 }