gcc/tree-ssa-math-opts.c

   1 /* Global, SSA-based optimizations using mathematical identities.
   2    Copyright (C) 2005-2015 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* Currently, the only mini-pass in this file tries to CSE reciprocal
  21    operations.  These are common in sequences such as this one:
  22
  23         modulus = sqrt(x*x + y*y + z*z);
  24         x = x / modulus;
  25         y = y / modulus;
  26         z = z / modulus;
  27
  28    that can be optimized to
  29
  30         modulus = sqrt(x*x + y*y + z*z);
  31         rmodulus = 1.0 / modulus;
  32         x = x * rmodulus;
  33         y = y * rmodulus;
  34         z = z * rmodulus;
  35
  36    We do this for loop invariant divisors, and with this pass whenever
  37    we notice that a division has the same divisor multiple times.
  38
  39    Of course, like in PRE, we don't insert a division if a dominator
  40    already has one.  However, this cannot be done as an extension of
  41    PRE for several reasons.
  42
  43    First of all, with some experiments it was found out that the
  44    transformation is not always useful if there are only two divisions
  45    hy the same divisor.  This is probably because modern processors
  46    can pipeline the divisions; on older, in-order processors it should
  47    still be effective to optimize two divisions by the same number.
  48    We make this a param, and it shall be called N in the remainder of
  49    this comment.
  50
  51    Second, if trapping math is active, we have less freedom on where
  52    to insert divisions: we can only do so in basic blocks that already
  53    contain one.  (If divisions don't trap, instead, we can insert
  54    divisions elsewhere, which will be in blocks that are common dominators
  55    of those that have the division).
  56
  57    We really don't want to compute the reciprocal unless a division will
  58    be found.  To do this, we won't insert the division in a basic block
  59    that has less than N divisions *post-dominating* it.
  60
  61    The algorithm constructs a subset of the dominator tree, holding the
  62    blocks containing the divisions and the common dominators to them,
  63    and walk it twice.  The first walk is in post-order, and it annotates
  64    each block with the number of divisions that post-dominate it: this
  65    gives information on where divisions can be inserted profitably.
  66    The second walk is in pre-order, and it inserts divisions as explained
  67    above, and replaces divisions by multiplications.
  68
  69    In the best case, the cost of the pass is O(n_statements).  In the
  70    worst-case, the cost is due to creating the dominator tree subset,
  71    with a cost of O(n_basic_blocks ^ 2); however this can only happen
  72    for n_statements / n_basic_blocks statements.  So, the amortized cost
  73    of creating the dominator tree subset is O(n_basic_blocks) and the
  74    worst-case cost of the pass is O(n_statements * n_basic_blocks).
  75
  76    More practically, the cost will be small because there are few
  77    divisions, and they tend to be in the same basic block, so insert_bb
  78    is called very few times.
  79
  80    If we did this using domwalk.c, an efficient implementation would have
  81    to work on all the variables in a single pass, because we could not
  82    work on just a subset of the dominator tree, as we do now, and the
  83    cost would also be something like O(n_statements * n_basic_blocks).
  84    The data structures would be more complex in order to work on all the
  85    variables in a single pass.  */
  86
  87 #include "config.h"
  88 #include "system.h"
  89 #include "coretypes.h"
  90 #include "tm.h"
  91 #include "flags.h"
  92 #include "hash-set.h"
  93 #include "machmode.h"
  94 #include "vec.h"
  95 #include "double-int.h"
  96 #include "input.h"
  97 #include "alias.h"
  98 #include "symtab.h"
  99 #include "wide-int.h"
 100 #include "inchash.h"
 101 #include "tree.h"
 102 #include "fold-const.h"
 103 #include "predict.h"
 104 #include "hard-reg-set.h"
 105 #include "function.h"
 106 #include "dominance.h"
 107 #include "cfg.h"
 108 #include "basic-block.h"
 109 #include "tree-ssa-alias.h"
 110 #include "internal-fn.h"
 111 #include "gimple-fold.h"
 112 #include "gimple-expr.h"
 113 #include "is-a.h"
 114 #include "gimple.h"
 115 #include "gimple-iterator.h"
 116 #include "gimplify.h"
 117 #include "gimplify-me.h"
 118 #include "stor-layout.h"
 119 #include "gimple-ssa.h"
 120 #include "tree-cfg.h"
 121 #include "tree-phinodes.h"
 122 #include "ssa-iterators.h"
 123 #include "stringpool.h"
 124 #include "tree-ssanames.h"
 125 #include "hashtab.h"
 126 #include "rtl.h"
 127 #include "statistics.h"
 128 #include "real.h"
 129 #include "fixed-value.h"
 130 #include "insn-config.h"
 131 #include "expmed.h"
 132 #include "dojump.h"
 133 #include "explow.h"
 134 #include "calls.h"
 135 #include "emit-rtl.h"
 136 #include "varasm.h"
 137 #include "stmt.h"
 138 #include "expr.h"
 139 #include "tree-dfa.h"
 140 #include "tree-ssa.h"
 141 #include "tree-pass.h"
 142 #include "alloc-pool.h"
 143 #include "target.h"
 144 #include "gimple-pretty-print.h"
 145 #include "builtins.h"
 146 #include "params.h"
 147
 148 /* FIXME: RTL headers have to be included here for optabs.  */
 149 #include "rtl.h"                /* Because optabs.h wants enum rtx_code.  */
 150 #include "expr.h"               /* Because optabs.h wants sepops.  */
 151 #include "insn-codes.h"
 152 #include "optabs.h"
 153
 154 /* This structure represents one basic block that either computes a
 155    division, or is a common dominator for basic block that compute a
 156    division.  */
 157 struct occurrence {
 158   /* The basic block represented by this structure.  */
 159   basic_block bb;
 160
 161   /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
 162      inserted in BB.  */
 163   tree recip_def;
 164
 165   /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
 166      was inserted in BB.  */
 167   gimple recip_def_stmt;
 168
 169   /* Pointer to a list of "struct occurrence"s for blocks dominated
 170      by BB.  */
 171   struct occurrence *children;
 172
 173   /* Pointer to the next "struct occurrence"s in the list of blocks
 174      sharing a common dominator.  */
 175   struct occurrence *next;
 176
 177   /* The number of divisions that are in BB before compute_merit.  The
 178      number of divisions that are in BB or post-dominate it after
 179      compute_merit.  */
 180   int num_divisions;
 181
 182   /* True if the basic block has a division, false if it is a common
 183      dominator for basic blocks that do.  If it is false and trapping
 184      math is active, BB is not a candidate for inserting a reciprocal.  */
 185   bool bb_has_division;
 186 };
 187
 188 static struct
 189 {
 190   /* Number of 1.0/X ops inserted.  */
 191   int rdivs_inserted;
 192
 193   /* Number of 1.0/FUNC ops inserted.  */
 194   int rfuncs_inserted;
 195 } reciprocal_stats;
 196
 197 static struct
 198 {
 199   /* Number of cexpi calls inserted.  */
 200   int inserted;
 201 } sincos_stats;
 202
 203 static struct
 204 {
 205   /* Number of hand-written 16-bit nop / bswaps found.  */
 206   int found_16bit;
 207
 208   /* Number of hand-written 32-bit nop / bswaps found.  */
 209   int found_32bit;
 210
 211   /* Number of hand-written 64-bit nop / bswaps found.  */
 212   int found_64bit;
 213 } nop_stats, bswap_stats;
 214
 215 static struct
 216 {
 217   /* Number of widening multiplication ops inserted.  */
 218   int widen_mults_inserted;
 219
 220   /* Number of integer multiply-and-accumulate ops inserted.  */
 221   int maccs_inserted;
 222
 223   /* Number of fp fused multiply-add ops inserted.  */
 224   int fmas_inserted;
 225 } widen_mul_stats;
 226
 227 /* The instance of "struct occurrence" representing the highest
 228    interesting block in the dominator tree.  */
 229 static struct occurrence *occ_head;
 230
 231 /* Allocation pool for getting instances of "struct occurrence".  */
 232 static alloc_pool occ_pool;
 233
 234
 235
 236 /* Allocate and return a new struct occurrence for basic block BB, and
 237    whose children list is headed by CHILDREN.  */
 238 static struct occurrence *
 239 occ_new (basic_block bb, struct occurrence *children)
 240 {
 241   struct occurrence *occ;
 242
 243   bb->aux = occ = (struct occurrence *) pool_alloc (occ_pool);
 244   memset (occ, 0, sizeof (struct occurrence));
 245
 246   occ->bb = bb;
 247   occ->children = children;
 248   return occ;
 249 }
 250
 251
 252 /* Insert NEW_OCC into our subset of the dominator tree.  P_HEAD points to a
 253    list of "struct occurrence"s, one per basic block, having IDOM as
 254    their common dominator.
 255
 256    We try to insert NEW_OCC as deep as possible in the tree, and we also
 257    insert any other block that is a common dominator for BB and one
 258    block already in the tree.  */
 259
 260 static void
 261 insert_bb (struct occurrence *new_occ, basic_block idom,
 262            struct occurrence **p_head)
 263 {
 264   struct occurrence *occ, **p_occ;
 265
 266   for (p_occ = p_head; (occ = *p_occ) != NULL; )
 267     {
 268       basic_block bb = new_occ->bb, occ_bb = occ->bb;
 269       basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb);
 270       if (dom == bb)
 271         {
 272           /* BB dominates OCC_BB.  OCC becomes NEW_OCC's child: remove OCC
 273              from its list.  */
 274           *p_occ = occ->next;
 275           occ->next = new_occ->children;
 276           new_occ->children = occ;
 277
 278           /* Try the next block (it may as well be dominated by BB).  */
 279         }
 280
 281       else if (dom == occ_bb)
 282         {
 283           /* OCC_BB dominates BB.  Tail recurse to look deeper.  */
 284           insert_bb (new_occ, dom, &occ->children);
 285           return;
 286         }
 287
 288       else if (dom != idom)
 289         {
 290           gcc_assert (!dom->aux);
 291
 292           /* There is a dominator between IDOM and BB, add it and make
 293              two children out of NEW_OCC and OCC.  First, remove OCC from
 294              its list.  */
 295           *p_occ = occ->next;
 296           new_occ->next = occ;
 297           occ->next = NULL;
 298
 299           /* None of the previous blocks has DOM as a dominator: if we tail
 300              recursed, we would reexamine them uselessly. Just switch BB with
 301              DOM, and go on looking for blocks dominated by DOM.  */
 302           new_occ = occ_new (dom, new_occ);
 303         }
 304
 305       else
 306         {
 307           /* Nothing special, go on with the next element.  */
 308           p_occ = &occ->next;
 309         }
 310     }
 311
 312   /* No place was found as a child of IDOM.  Make BB a sibling of IDOM.  */
 313   new_occ->next = *p_head;
 314   *p_head = new_occ;
 315 }
 316
 317 /* Register that we found a division in BB.  */
 318
 319 static inline void
 320 register_division_in (basic_block bb)
 321 {
 322   struct occurrence *occ;
 323
 324   occ = (struct occurrence *) bb->aux;
 325   if (!occ)
 326     {
 327       occ = occ_new (bb, NULL);
 328       insert_bb (occ, ENTRY_BLOCK_PTR_FOR_FN (cfun), &occ_head);
 329     }
 330
 331   occ->bb_has_division = true;
 332   occ->num_divisions++;
 333 }
 334
 335
 336 /* Compute the number of divisions that postdominate each block in OCC and
 337    its children.  */
 338
 339 static void
 340 compute_merit (struct occurrence *occ)
 341 {
 342   struct occurrence *occ_child;
 343   basic_block dom = occ->bb;
 344
 345   for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
 346     {
 347       basic_block bb;
 348       if (occ_child->children)
 349         compute_merit (occ_child);
 350
 351       if (flag_exceptions)
 352         bb = single_noncomplex_succ (dom);
 353       else
 354         bb = dom;
 355
 356       if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
 357         occ->num_divisions += occ_child->num_divisions;
 358     }
 359 }
 360
 361
 362 /* Return whether USE_STMT is a floating-point division by DEF.  */
 363 static inline bool
 364 is_division_by (gimple use_stmt, tree def)
 365 {
 366   return is_gimple_assign (use_stmt)
 367          && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
 368          && gimple_assign_rhs2 (use_stmt) == def
 369          /* Do not recognize x / x as valid division, as we are getting
 370             confused later by replacing all immediate uses x in such
 371             a stmt.  */
 372          && gimple_assign_rhs1 (use_stmt) != def;
 373 }
 374
 375 /* Walk the subset of the dominator tree rooted at OCC, setting the
 376    RECIP_DEF field to a definition of 1.0 / DEF that can be used in
 377    the given basic block.  The field may be left NULL, of course,
 378    if it is not possible or profitable to do the optimization.
 379
 380    DEF_BSI is an iterator pointing at the statement defining DEF.
 381    If RECIP_DEF is set, a dominator already has a computation that can
 382    be used.  */
 383
 384 static void
 385 insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
 386                     tree def, tree recip_def, int threshold)
 387 {
 388   tree type;
 389   gassign *new_stmt;
 390   gimple_stmt_iterator gsi;
 391   struct occurrence *occ_child;
 392
 393   if (!recip_def
 394       && (occ->bb_has_division || !flag_trapping_math)
 395       && occ->num_divisions >= threshold)
 396     {
 397       /* Make a variable with the replacement and substitute it.  */
 398       type = TREE_TYPE (def);
 399       recip_def = create_tmp_reg (type, "reciptmp");
 400       new_stmt = gimple_build_assign (recip_def, RDIV_EXPR,
 401                                       build_one_cst (type), def);
 402
 403       if (occ->bb_has_division)
 404         {
 405           /* Case 1: insert before an existing division.  */
 406           gsi = gsi_after_labels (occ->bb);
 407           while (!gsi_end_p (gsi) && !is_division_by (gsi_stmt (gsi), def))
 408             gsi_next (&gsi);
 409
 410           gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
 411         }
 412       else if (def_gsi && occ->bb == def_gsi->bb)
 413         {
 414           /* Case 2: insert right after the definition.  Note that this will
 415              never happen if the definition statement can throw, because in
 416              that case the sole successor of the statement's basic block will
 417              dominate all the uses as well.  */
 418           gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
 419         }
 420       else
 421         {
 422           /* Case 3: insert in a basic block not containing defs/uses.  */
 423           gsi = gsi_after_labels (occ->bb);
 424           gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
 425         }
 426
 427       reciprocal_stats.rdivs_inserted++;
 428
 429       occ->recip_def_stmt = new_stmt;
 430     }
 431
 432   occ->recip_def = recip_def;
 433   for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
 434     insert_reciprocals (def_gsi, occ_child, def, recip_def, threshold);
 435 }
 436
 437
 438 /* Replace the division at USE_P with a multiplication by the reciprocal, if
 439    possible.  */
 440
 441 static inline void
 442 replace_reciprocal (use_operand_p use_p)
 443 {
 444   gimple use_stmt = USE_STMT (use_p);
 445   basic_block bb = gimple_bb (use_stmt);
 446   struct occurrence *occ = (struct occurrence *) bb->aux;
 447
 448   if (optimize_bb_for_speed_p (bb)
 449       && occ->recip_def && use_stmt != occ->recip_def_stmt)
 450     {
 451       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
 452       gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
 453       SET_USE (use_p, occ->recip_def);
 454       fold_stmt_inplace (&gsi);
 455       update_stmt (use_stmt);
 456     }
 457 }
 458
 459
 460 /* Free OCC and return one more "struct occurrence" to be freed.  */
 461
 462 static struct occurrence *
 463 free_bb (struct occurrence *occ)
 464 {
 465   struct occurrence *child, *next;
 466
 467   /* First get the two pointers hanging off OCC.  */
 468   next = occ->next;
 469   child = occ->children;
 470   occ->bb->aux = NULL;
 471   pool_free (occ_pool, occ);
 472
 473   /* Now ensure that we don't recurse unless it is necessary.  */
 474   if (!child)
 475     return next;
 476   else
 477     {
 478       while (next)
 479         next = free_bb (next);
 480
 481       return child;
 482     }
 483 }
 484
 485
 486 /* Look for floating-point divisions among DEF's uses, and try to
 487    replace them by multiplications with the reciprocal.  Add
 488    as many statements computing the reciprocal as needed.
 489
 490    DEF must be a GIMPLE register of a floating-point type.  */
 491
 492 static void
 493 execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
 494 {
 495   use_operand_p use_p;
 496   imm_use_iterator use_iter;
 497   struct occurrence *occ;
 498   int count = 0, threshold;
 499
 500   gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && is_gimple_reg (def));
 501
 502   FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
 503     {
 504       gimple use_stmt = USE_STMT (use_p);
 505       if (is_division_by (use_stmt, def))
 506         {
 507           register_division_in (gimple_bb (use_stmt));
 508           count++;
 509         }
 510     }
 511
 512   /* Do the expensive part only if we can hope to optimize something.  */
 513   threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
 514   if (count >= threshold)
 515     {
 516       gimple use_stmt;
 517       for (occ = occ_head; occ; occ = occ->next)
 518         {
 519           compute_merit (occ);
 520           insert_reciprocals (def_gsi, occ, def, NULL, threshold);
 521         }
 522
 523       FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
 524         {
 525           if (is_division_by (use_stmt, def))
 526             {
 527               FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
 528                 replace_reciprocal (use_p);
 529             }
 530         }
 531     }
 532
 533   for (occ = occ_head; occ; )
 534     occ = free_bb (occ);
 535
 536   occ_head = NULL;
 537 }
 538
 539 /* Go through all the floating-point SSA_NAMEs, and call
 540    execute_cse_reciprocals_1 on each of them.  */
 541 namespace {
 542
 543 const pass_data pass_data_cse_reciprocals =
 544 {
 545   GIMPLE_PASS, /* type */
 546   "recip", /* name */
 547   OPTGROUP_NONE, /* optinfo_flags */
 548   TV_NONE, /* tv_id */
 549   PROP_ssa, /* properties_required */
 550   0, /* properties_provided */
 551   0, /* properties_destroyed */
 552   0, /* todo_flags_start */
 553   TODO_update_ssa, /* todo_flags_finish */
 554 };
 555
 556 class pass_cse_reciprocals : public gimple_opt_pass
 557 {
 558 public:
 559   pass_cse_reciprocals (gcc::context *ctxt)
 560     : gimple_opt_pass (pass_data_cse_reciprocals, ctxt)
 561   {}
 562
 563   /* opt_pass methods: */
 564   virtual bool gate (function *) { return optimize && flag_reciprocal_math; }
 565   virtual unsigned int execute (function *);
 566
 567 }; // class pass_cse_reciprocals
 568
 569 unsigned int
 570 pass_cse_reciprocals::execute (function *fun)
 571 {
 572   basic_block bb;
 573   tree arg;
 574
 575   occ_pool = create_alloc_pool ("dominators for recip",
 576                                 sizeof (struct occurrence),
 577                                 n_basic_blocks_for_fn (fun) / 3 + 1);
 578
 579   memset (&reciprocal_stats, 0, sizeof (reciprocal_stats));
 580   calculate_dominance_info (CDI_DOMINATORS);
 581   calculate_dominance_info (CDI_POST_DOMINATORS);
 582
 583 #ifdef ENABLE_CHECKING
 584   FOR_EACH_BB_FN (bb, fun)
 585     gcc_assert (!bb->aux);
 586 #endif
 587
 588   for (arg = DECL_ARGUMENTS (fun->decl); arg; arg = DECL_CHAIN (arg))
 589     if (FLOAT_TYPE_P (TREE_TYPE (arg))
 590         && is_gimple_reg (arg))
 591       {
 592         tree name = ssa_default_def (fun, arg);
 593         if (name)
 594           execute_cse_reciprocals_1 (NULL, name);
 595       }
 596
 597   FOR_EACH_BB_FN (bb, fun)
 598     {
 599       tree def;
 600
 601       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
 602            gsi_next (&gsi))
 603         {
 604           gphi *phi = gsi.phi ();
 605           def = PHI_RESULT (phi);
 606           if (! virtual_operand_p (def)
 607               && FLOAT_TYPE_P (TREE_TYPE (def)))
 608             execute_cse_reciprocals_1 (NULL, def);
 609         }
 610
 611       for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
 612            gsi_next (&gsi))
 613         {
 614           gimple stmt = gsi_stmt (gsi);
 615
 616           if (gimple_has_lhs (stmt)
 617               && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
 618               && FLOAT_TYPE_P (TREE_TYPE (def))
 619               && TREE_CODE (def) == SSA_NAME)
 620             execute_cse_reciprocals_1 (&gsi, def);
 621         }
 622
 623       if (optimize_bb_for_size_p (bb))
 624         continue;
 625
 626       /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b).  */
 627       for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
 628            gsi_next (&gsi))
 629         {
 630           gimple stmt = gsi_stmt (gsi);
 631           tree fndecl;
 632
 633           if (is_gimple_assign (stmt)
 634               && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
 635             {
 636               tree arg1 = gimple_assign_rhs2 (stmt);
 637               gimple stmt1;
 638
 639               if (TREE_CODE (arg1) != SSA_NAME)
 640                 continue;
 641
 642               stmt1 = SSA_NAME_DEF_STMT (arg1);
 643
 644               if (is_gimple_call (stmt1)
 645                   && gimple_call_lhs (stmt1)
 646                   && (fndecl = gimple_call_fndecl (stmt1))
 647                   && (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
 648                       || DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD))
 649                 {
 650                   enum built_in_function code;
 651                   bool md_code, fail;
 652                   imm_use_iterator ui;
 653                   use_operand_p use_p;
 654
 655                   code = DECL_FUNCTION_CODE (fndecl);
 656                   md_code = DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD;
 657
 658                   fndecl = targetm.builtin_reciprocal (code, md_code, false);
 659                   if (!fndecl)
 660                     continue;
 661
 662                   /* Check that all uses of the SSA name are divisions,
 663                      otherwise replacing the defining statement will do
 664                      the wrong thing.  */
 665                   fail = false;
 666                   FOR_EACH_IMM_USE_FAST (use_p, ui, arg1)
 667                     {
 668                       gimple stmt2 = USE_STMT (use_p);
 669                       if (is_gimple_debug (stmt2))
 670                         continue;
 671                       if (!is_gimple_assign (stmt2)
 672                           || gimple_assign_rhs_code (stmt2) != RDIV_EXPR
 673                           || gimple_assign_rhs1 (stmt2) == arg1
 674                           || gimple_assign_rhs2 (stmt2) != arg1)
 675                         {
 676                           fail = true;
 677                           break;
 678                         }
 679                     }
 680                   if (fail)
 681                     continue;
 682
 683                   gimple_replace_ssa_lhs (stmt1, arg1);
 684                   gimple_call_set_fndecl (stmt1, fndecl);
 685                   update_stmt (stmt1);
 686                   reciprocal_stats.rfuncs_inserted++;
 687
 688                   FOR_EACH_IMM_USE_STMT (stmt, ui, arg1)
 689                     {
 690                       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
 691                       gimple_assign_set_rhs_code (stmt, MULT_EXPR);
 692                       fold_stmt_inplace (&gsi);
 693                       update_stmt (stmt);
 694                     }
 695                 }
 696             }
 697         }
 698     }
 699
 700   statistics_counter_event (fun, "reciprocal divs inserted",
 701                             reciprocal_stats.rdivs_inserted);
 702   statistics_counter_event (fun, "reciprocal functions inserted",
 703                             reciprocal_stats.rfuncs_inserted);
 704
 705   free_dominance_info (CDI_DOMINATORS);
 706   free_dominance_info (CDI_POST_DOMINATORS);
 707   free_alloc_pool (occ_pool);
 708   return 0;
 709 }
 710
 711 } // anon namespace
 712
 713 gimple_opt_pass *
 714 make_pass_cse_reciprocals (gcc::context *ctxt)
 715 {
 716   return new pass_cse_reciprocals (ctxt);
 717 }
 718
 719 /* Records an occurrence at statement USE_STMT in the vector of trees
 720    STMTS if it is dominated by *TOP_BB or dominates it or this basic block
 721    is not yet initialized.  Returns true if the occurrence was pushed on
 722    the vector.  Adjusts *TOP_BB to be the basic block dominating all
 723    statements in the vector.  */
 724
 725 static bool
 726 maybe_record_sincos (vec<gimple> *stmts,
 727                      basic_block *top_bb, gimple use_stmt)
 728 {
 729   basic_block use_bb = gimple_bb (use_stmt);
 730   if (*top_bb
 731       && (*top_bb == use_bb
 732           || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb)))
 733     stmts->safe_push (use_stmt);
 734   else if (!*top_bb
 735            || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb))
 736     {
 737       stmts->safe_push (use_stmt);
 738       *top_bb = use_bb;
 739     }
 740   else
 741     return false;
 742
 743   return true;
 744 }
 745
 746 /* Look for sin, cos and cexpi calls with the same argument NAME and
 747    create a single call to cexpi CSEing the result in this case.
 748    We first walk over all immediate uses of the argument collecting
 749    statements that we can CSE in a vector and in a second pass replace
 750    the statement rhs with a REALPART or IMAGPART expression on the
 751    result of the cexpi call we insert before the use statement that
 752    dominates all other candidates.  */
 753
 754 static bool
 755 execute_cse_sincos_1 (tree name)
 756 {
 757   gimple_stmt_iterator gsi;
 758   imm_use_iterator use_iter;
 759   tree fndecl, res, type;
 760   gimple def_stmt, use_stmt, stmt;
 761   int seen_cos = 0, seen_sin = 0, seen_cexpi = 0;
 762   auto_vec<gimple> stmts;
 763   basic_block top_bb = NULL;
 764   int i;
 765   bool cfg_changed = false;
 766
 767   type = TREE_TYPE (name);
 768   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
 769     {
 770       if (gimple_code (use_stmt) != GIMPLE_CALL
 771           || !gimple_call_lhs (use_stmt)
 772           || !(fndecl = gimple_call_fndecl (use_stmt))
 773           || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
 774         continue;
 775
 776       switch (DECL_FUNCTION_CODE (fndecl))
 777         {
 778         CASE_FLT_FN (BUILT_IN_COS):
 779           seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
 780           break;
 781
 782         CASE_FLT_FN (BUILT_IN_SIN):
 783           seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
 784           break;
 785
 786         CASE_FLT_FN (BUILT_IN_CEXPI):
 787           seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
 788           break;
 789
 790         default:;
 791         }
 792     }
 793
 794   if (seen_cos + seen_sin + seen_cexpi <= 1)
 795     return false;
 796
 797   /* Simply insert cexpi at the beginning of top_bb but not earlier than
 798      the name def statement.  */
 799   fndecl = mathfn_built_in (type, BUILT_IN_CEXPI);
 800   if (!fndecl)
 801     return false;
 802   stmt = gimple_build_call (fndecl, 1, name);
 803   res = make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl)), stmt, "sincostmp");
 804   gimple_call_set_lhs (stmt, res);
 805
 806   def_stmt = SSA_NAME_DEF_STMT (name);
 807   if (!SSA_NAME_IS_DEFAULT_DEF (name)
 808       && gimple_code (def_stmt) != GIMPLE_PHI
 809       && gimple_bb (def_stmt) == top_bb)
 810     {
 811       gsi = gsi_for_stmt (def_stmt);
 812       gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
 813     }
 814   else
 815     {
 816       gsi = gsi_after_labels (top_bb);
 817       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
 818     }
 819   sincos_stats.inserted++;
 820
 821   /* And adjust the recorded old call sites.  */
 822   for (i = 0; stmts.iterate (i, &use_stmt); ++i)
 823     {
 824       tree rhs = NULL;
 825       fndecl = gimple_call_fndecl (use_stmt);
 826
 827       switch (DECL_FUNCTION_CODE (fndecl))
 828         {
 829         CASE_FLT_FN (BUILT_IN_COS):
 830           rhs = fold_build1 (REALPART_EXPR, type, res);
 831           break;
 832
 833         CASE_FLT_FN (BUILT_IN_SIN):
 834           rhs = fold_build1 (IMAGPART_EXPR, type, res);
 835           break;
 836
 837         CASE_FLT_FN (BUILT_IN_CEXPI):
 838           rhs = res;
 839           break;
 840
 841         default:;
 842           gcc_unreachable ();
 843         }
 844
 845         /* Replace call with a copy.  */
 846         stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs);
 847
 848         gsi = gsi_for_stmt (use_stmt);
 849         gsi_replace (&gsi, stmt, true);
 850         if (gimple_purge_dead_eh_edges (gimple_bb (stmt)))
 851           cfg_changed = true;
 852     }
 853
 854   return cfg_changed;
 855 }
 856
 857 /* To evaluate powi(x,n), the floating point value x raised to the
 858    constant integer exponent n, we use a hybrid algorithm that
 859    combines the "window method" with look-up tables.  For an
 860    introduction to exponentiation algorithms and "addition chains",
 861    see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
 862    "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
 863    3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
 864    Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998.  */
 865
 866 /* Provide a default value for POWI_MAX_MULTS, the maximum number of
 867    multiplications to inline before calling the system library's pow
 868    function.  powi(x,n) requires at worst 2*bits(n)-2 multiplications,
 869    so this default never requires calling pow, powf or powl.  */
 870
 871 #ifndef POWI_MAX_MULTS
 872 #define POWI_MAX_MULTS  (2*HOST_BITS_PER_WIDE_INT-2)
 873 #endif
 874
 875 /* The size of the "optimal power tree" lookup table.  All
 876    exponents less than this value are simply looked up in the
 877    powi_table below.  This threshold is also used to size the
 878    cache of pseudo registers that hold intermediate results.  */
 879 #define POWI_TABLE_SIZE 256
 880
 881 /* The size, in bits of the window, used in the "window method"
 882    exponentiation algorithm.  This is equivalent to a radix of
 883    (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method".  */
 884 #define POWI_WINDOW_SIZE 3
 885
 886 /* The following table is an efficient representation of an
 887    "optimal power tree".  For each value, i, the corresponding
 888    value, j, in the table states than an optimal evaluation
 889    sequence for calculating pow(x,i) can be found by evaluating
 890    pow(x,j)*pow(x,i-j).  An optimal power tree for the first
 891    100 integers is given in Knuth's "Seminumerical algorithms".  */
 892
 893 static const unsigned char powi_table[POWI_TABLE_SIZE] =
 894   {
 895       0,   1,   1,   2,   2,   3,   3,   4,  /*   0 -   7 */
 896       4,   6,   5,   6,   6,  10,   7,   9,  /*   8 -  15 */
 897       8,  16,   9,  16,  10,  12,  11,  13,  /*  16 -  23 */
 898      12,  17,  13,  18,  14,  24,  15,  26,  /*  24 -  31 */
 899      16,  17,  17,  19,  18,  33,  19,  26,  /*  32 -  39 */
 900      20,  25,  21,  40,  22,  27,  23,  44,  /*  40 -  47 */
 901      24,  32,  25,  34,  26,  29,  27,  44,  /*  48 -  55 */
 902      28,  31,  29,  34,  30,  60,  31,  36,  /*  56 -  63 */
 903      32,  64,  33,  34,  34,  46,  35,  37,  /*  64 -  71 */
 904      36,  65,  37,  50,  38,  48,  39,  69,  /*  72 -  79 */
 905      40,  49,  41,  43,  42,  51,  43,  58,  /*  80 -  87 */
 906      44,  64,  45,  47,  46,  59,  47,  76,  /*  88 -  95 */
 907      48,  65,  49,  66,  50,  67,  51,  66,  /*  96 - 103 */
 908      52,  70,  53,  74,  54, 104,  55,  74,  /* 104 - 111 */
 909      56,  64,  57,  69,  58,  78,  59,  68,  /* 112 - 119 */
 910      60,  61,  61,  80,  62,  75,  63,  68,  /* 120 - 127 */
 911      64,  65,  65, 128,  66, 129,  67,  90,  /* 128 - 135 */
 912      68,  73,  69, 131,  70,  94,  71,  88,  /* 136 - 143 */
 913      72, 128,  73,  98,  74, 132,  75, 121,  /* 144 - 151 */
 914      76, 102,  77, 124,  78, 132,  79, 106,  /* 152 - 159 */
 915      80,  97,  81, 160,  82,  99,  83, 134,  /* 160 - 167 */
 916      84,  86,  85,  95,  86, 160,  87, 100,  /* 168 - 175 */
 917      88, 113,  89,  98,  90, 107,  91, 122,  /* 176 - 183 */
 918      92, 111,  93, 102,  94, 126,  95, 150,  /* 184 - 191 */
 919      96, 128,  97, 130,  98, 133,  99, 195,  /* 192 - 199 */
 920     100, 128, 101, 123, 102, 164, 103, 138,  /* 200 - 207 */
 921     104, 145, 105, 146, 106, 109, 107, 149,  /* 208 - 215 */
 922     108, 200, 109, 146, 110, 170, 111, 157,  /* 216 - 223 */
 923     112, 128, 113, 130, 114, 182, 115, 132,  /* 224 - 231 */
 924     116, 200, 117, 132, 118, 158, 119, 206,  /* 232 - 239 */
 925     120, 240, 121, 162, 122, 147, 123, 152,  /* 240 - 247 */
 926     124, 166, 125, 214, 126, 138, 127, 153,  /* 248 - 255 */
 927   };
 928
 929
 930 /* Return the number of multiplications required to calculate
 931    powi(x,n) where n is less than POWI_TABLE_SIZE.  This is a
 932    subroutine of powi_cost.  CACHE is an array indicating
 933    which exponents have already been calculated.  */
 934
 935 static int
 936 powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache)
 937 {
 938   /* If we've already calculated this exponent, then this evaluation
 939      doesn't require any additional multiplications.  */
 940   if (cache[n])
 941     return 0;
 942
 943   cache[n] = true;
 944   return powi_lookup_cost (n - powi_table[n], cache)
 945          + powi_lookup_cost (powi_table[n], cache) + 1;
 946 }
 947
 948 /* Return the number of multiplications required to calculate
 949    powi(x,n) for an arbitrary x, given the exponent N.  This
 950    function needs to be kept in sync with powi_as_mults below.  */
 951
 952 static int
 953 powi_cost (HOST_WIDE_INT n)
 954 {
 955   bool cache[POWI_TABLE_SIZE];
 956   unsigned HOST_WIDE_INT digit;
 957   unsigned HOST_WIDE_INT val;
 958   int result;
 959
 960   if (n == 0)
 961     return 0;
 962
 963   /* Ignore the reciprocal when calculating the cost.  */
 964   val = (n < 0) ? -n : n;
 965
 966   /* Initialize the exponent cache.  */
 967   memset (cache, 0, POWI_TABLE_SIZE * sizeof (bool));
 968   cache[1] = true;
 969
 970   result = 0;
 971
 972   while (val >= POWI_TABLE_SIZE)
 973     {
 974       if (val & 1)
 975         {
 976           digit = val & ((1 << POWI_WINDOW_SIZE) - 1);
 977           result += powi_lookup_cost (digit, cache)
 978                     + POWI_WINDOW_SIZE + 1;
 979           val >>= POWI_WINDOW_SIZE;
 980         }
 981       else
 982         {
 983           val >>= 1;
 984           result++;
 985         }
 986     }
 987
 988   return result + powi_lookup_cost (val, cache);
 989 }
 990
 991 /* Recursive subroutine of powi_as_mults.  This function takes the
 992    array, CACHE, of already calculated exponents and an exponent N and
 993    returns a tree that corresponds to CACHE[1]**N, with type TYPE.  */
 994
 995 static tree
 996 powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type,
 997                  HOST_WIDE_INT n, tree *cache)
 998 {
 999   tree op0, op1, ssa_target;
1000   unsigned HOST_WIDE_INT digit;
1001   gassign *mult_stmt;
1002
1003   if (n < POWI_TABLE_SIZE && cache[n])
1004     return cache[n];
1005
1006   ssa_target = make_temp_ssa_name (type, NULL, "powmult");
1007
1008   if (n < POWI_TABLE_SIZE)
1009     {
1010       cache[n] = ssa_target;
1011       op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache);
1012       op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache);
1013     }
1014   else if (n & 1)
1015     {
1016       digit = n & ((1 << POWI_WINDOW_SIZE) - 1);
1017       op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache);
1018       op1 = powi_as_mults_1 (gsi, loc, type, digit, cache);
1019     }
1020   else
1021     {
1022       op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache);
1023       op1 = op0;
1024     }
1025
1026   mult_stmt = gimple_build_assign (ssa_target, MULT_EXPR, op0, op1);
1027   gimple_set_location (mult_stmt, loc);
1028   gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
1029
1030   return ssa_target;
1031 }
1032
1033 /* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
1034    This function needs to be kept in sync with powi_cost above.  */
1035
1036 static tree
1037 powi_as_mults (gimple_stmt_iterator *gsi, location_t loc,
1038                tree arg0, HOST_WIDE_INT n)
1039 {
1040   tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0);
1041   gassign *div_stmt;
1042   tree target;
1043
1044   if (n == 0)
1045     return build_real (type, dconst1);
1046
1047   memset (cache, 0,  sizeof (cache));
1048   cache[1] = arg0;
1049
1050   result = powi_as_mults_1 (gsi, loc, type, (n < 0) ? -n : n, cache);
1051   if (n >= 0)
1052     return result;
1053
1054   /* If the original exponent was negative, reciprocate the result.  */
1055   target = make_temp_ssa_name (type, NULL, "powmult");
1056   div_stmt = gimple_build_assign (target, RDIV_EXPR,
1057                                   build_real (type, dconst1), result);
1058   gimple_set_location (div_stmt, loc);
1059   gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
1060
1061   return target;
1062 }
1063
1064 /* ARG0 and N are the two arguments to a powi builtin in GSI with
1065    location info LOC.  If the arguments are appropriate, create an
1066    equivalent sequence of statements prior to GSI using an optimal
1067    number of multiplications, and return an expession holding the
1068    result.  */
1069
1070 static tree
1071 gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc,
1072                             tree arg0, HOST_WIDE_INT n)
1073 {
1074   /* Avoid largest negative number.  */
1075   if (n != -n
1076       && ((n >= -1 && n <= 2)
1077           || (optimize_function_for_speed_p (cfun)
1078               && powi_cost (n) <= POWI_MAX_MULTS)))
1079     return powi_as_mults (gsi, loc, arg0, n);
1080
1081   return NULL_TREE;
1082 }
1083
1084 /* Build a gimple call statement that calls FN with argument ARG.
1085    Set the lhs of the call statement to a fresh SSA name.  Insert the
1086    statement prior to GSI's current position, and return the fresh
1087    SSA name.  */
1088
1089 static tree
1090 build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc,
1091                        tree fn, tree arg)
1092 {
1093   gcall *call_stmt;
1094   tree ssa_target;
1095
1096   call_stmt = gimple_build_call (fn, 1, arg);
1097   ssa_target = make_temp_ssa_name (TREE_TYPE (arg), NULL, "powroot");
1098   gimple_set_lhs (call_stmt, ssa_target);
1099   gimple_set_location (call_stmt, loc);
1100   gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
1101
1102   return ssa_target;
1103 }
1104
1105 /* Build a gimple binary operation with the given CODE and arguments
1106    ARG0, ARG1, assigning the result to a new SSA name for variable
1107    TARGET.  Insert the statement prior to GSI's current position, and
1108    return the fresh SSA name.*/
1109
1110 static tree
1111 build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc,
1112                         const char *name, enum tree_code code,
1113                         tree arg0, tree arg1)
1114 {
1115   tree result = make_temp_ssa_name (TREE_TYPE (arg0), NULL, name);
1116   gassign *stmt = gimple_build_assign (result, code, arg0, arg1);
1117   gimple_set_location (stmt, loc);
1118   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1119   return result;
1120 }
1121
1122 /* Build a gimple reference operation with the given CODE and argument
1123    ARG, assigning the result to a new SSA name of TYPE with NAME.
1124    Insert the statement prior to GSI's current position, and return
1125    the fresh SSA name.  */
1126
1127 static inline tree
1128 build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type,
1129                       const char *name, enum tree_code code, tree arg0)
1130 {
1131   tree result = make_temp_ssa_name (type, NULL, name);
1132   gimple stmt = gimple_build_assign (result, build1 (code, type, arg0));
1133   gimple_set_location (stmt, loc);
1134   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1135   return result;
1136 }
1137
1138 /* Build a gimple assignment to cast VAL to TYPE.  Insert the statement
1139    prior to GSI's current position, and return the fresh SSA name.  */
1140
1141 static tree
1142 build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,
1143                        tree type, tree val)
1144 {
1145   tree result = make_ssa_name (type);
1146   gassign *stmt = gimple_build_assign (result, NOP_EXPR, val);
1147   gimple_set_location (stmt, loc);
1148   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1149   return result;
1150 }
1151
1152 struct pow_synth_sqrt_info
1153 {
1154   bool *factors;
1155   unsigned int deepest;
1156   unsigned int num_mults;
1157 };
1158
1159 /* Return true iff the real value C can be represented as a
1160    sum of powers of 0.5 up to N.  That is:
1161    C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1.
1162    Record in INFO the various parameters of the synthesis algorithm such
1163    as the factors a[i], the maximum 0.5 power and the number of
1164    multiplications that will be required.  */
1165
1166 bool
1167 representable_as_half_series_p (REAL_VALUE_TYPE c, unsigned n,
1168                                  struct pow_synth_sqrt_info *info)
1169 {
1170   REAL_VALUE_TYPE factor = dconsthalf;
1171   REAL_VALUE_TYPE remainder = c;
1172
1173   info->deepest = 0;
1174   info->num_mults = 0;
1175   memset (info->factors, 0, n * sizeof (bool));
1176
1177   for (unsigned i = 0; i < n; i++)
1178     {
1179       REAL_VALUE_TYPE res;
1180
1181       /* If something inexact happened bail out now.  */
1182       if (REAL_ARITHMETIC (res, MINUS_EXPR, remainder, factor))
1183         return false;
1184
1185       /* We have hit zero.  The number is representable as a sum
1186          of powers of 0.5.  */
1187       if (REAL_VALUES_EQUAL (res, dconst0))
1188         {
1189           info->factors[i] = true;
1190           info->deepest = i + 1;
1191           return true;
1192         }
1193       else if (!REAL_VALUE_NEGATIVE (res))
1194         {
1195           remainder = res;
1196           info->factors[i] = true;
1197           info->num_mults++;
1198         }
1199       else
1200         info->factors[i] = false;
1201
1202       REAL_ARITHMETIC (factor, MULT_EXPR, factor, dconsthalf);
1203     }
1204   return false;
1205 }
1206
1207 /* Return the tree corresponding to FN being applied
1208    to ARG N times at GSI and LOC.
1209    Look up previous results from CACHE if need be.
1210    cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times.  */
1211
1212 static tree
1213 get_fn_chain (tree arg, unsigned int n, gimple_stmt_iterator *gsi,
1214               tree fn, location_t loc, tree *cache)
1215 {
1216   tree res = cache[n];
1217   if (!res)
1218     {
1219       tree prev = get_fn_chain (arg, n - 1, gsi, fn, loc, cache);
1220       res = build_and_insert_call (gsi, loc, fn, prev);
1221       cache[n] = res;
1222     }
1223
1224   return res;
1225 }
1226
1227 /* Print to STREAM the repeated application of function FNAME to ARG
1228    N times.  So, for FNAME = "foo", ARG = "x", N = 2 it would print:
1229    "foo (foo (x))".  */
1230
1231 static void
1232 print_nested_fn (FILE* stream, const char *fname, const char* arg,
1233                  unsigned int n)
1234 {
1235   if (n == 0)
1236     fprintf (stream, "%s", arg);
1237   else
1238     {
1239       fprintf (stream, "%s (", fname);
1240       print_nested_fn (stream, fname, arg, n - 1);
1241       fprintf (stream, ")");
1242     }
1243 }
1244
1245 /* Print to STREAM the fractional sequence of sqrt chains
1246    applied to ARG, described by INFO.  Used for the dump file.  */
1247
1248 static void
1249 dump_fractional_sqrt_sequence (FILE *stream, const char *arg,
1250                                 struct pow_synth_sqrt_info *info)
1251 {
1252   for (unsigned int i = 0; i < info->deepest; i++)
1253     {
1254       bool is_set = info->factors[i];
1255       if (is_set)
1256         {
1257           print_nested_fn (stream, "sqrt", arg, i + 1);
1258           if (i != info->deepest - 1)
1259             fprintf (stream, " * ");
1260         }
1261     }
1262 }
1263
1264 /* Print to STREAM a representation of raising ARG to an integer
1265    power N.  Used for the dump file.  */
1266
1267 static void
1268 dump_integer_part (FILE *stream, const char* arg, HOST_WIDE_INT n)
1269 {
1270   if (n > 1)
1271     fprintf (stream, "powi (%s, " HOST_WIDE_INT_PRINT_DEC ")", arg, n);
1272   else if (n == 1)
1273     fprintf (stream, "%s", arg);
1274 }
1275
1276 /* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of
1277    square roots.  Place at GSI and LOC.  Limit the maximum depth
1278    of the sqrt chains to MAX_DEPTH.  Return the tree holding the
1279    result of the expanded sequence or NULL_TREE if the expansion failed.
1280
1281    This routine assumes that ARG1 is a real number with a fractional part
1282    (the integer exponent case will have been handled earlier in
1283    gimple_expand_builtin_pow).
1284
1285    For ARG1 > 0.0:
1286    * For ARG1 composed of a whole part WHOLE_PART and a fractional part
1287      FRAC_PART i.e. WHOLE_PART == floor (ARG1) and
1288                     FRAC_PART == ARG1 - WHOLE_PART:
1289      Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where
1290      POW (ARG0, FRAC_PART) is expanded as a product of square root chains
1291      if it can be expressed as such, that is if FRAC_PART satisfies:
1292      FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i))
1293      where integer a[i] is either 0 or 1.
1294
1295      Example:
1296      POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625)
1297        --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x)))
1298
1299    For ARG1 < 0.0 there are two approaches:
1300    * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1)
1301          is calculated as above.
1302
1303      Example:
1304      POW (x, -5.625) == 1.0 / POW (x, 5.625)
1305        -->  1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x))))
1306
1307    * (B) : WHOLE_PART := - ceil (abs (ARG1))
1308            FRAC_PART  := ARG1 - WHOLE_PART
1309      and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART).
1310      Example:
1311      POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6)
1312        --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6))
1313
1314    For ARG1 < 0.0 we choose between (A) and (B) depending on
1315    how many multiplications we'd have to do.
1316    So, for the example in (B): POW (x, -5.875), if we were to
1317    follow algorithm (A) we would produce:
1318    1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X)))
1319    which contains more multiplications than approach (B).
1320
1321    Hopefully, this approach will eliminate potentially expensive POW library
1322    calls when unsafe floating point math is enabled and allow the compiler to
1323    further optimise the multiplies, square roots and divides produced by this
1324    function.  */
1325
1326 static tree
1327 expand_pow_as_sqrts (gimple_stmt_iterator *gsi, location_t loc,
1328                      tree arg0, tree arg1, HOST_WIDE_INT max_depth)
1329 {
1330   tree type = TREE_TYPE (arg0);
1331   machine_mode mode = TYPE_MODE (type);
1332   tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1333   bool one_over = true;
1334
1335   if (!sqrtfn)
1336     return NULL_TREE;
1337
1338   if (TREE_CODE (arg1) != REAL_CST)
1339     return NULL_TREE;
1340
1341   REAL_VALUE_TYPE exp_init = TREE_REAL_CST (arg1);
1342
1343   gcc_assert (max_depth > 0);
1344   tree *cache = XALLOCAVEC (tree, max_depth + 1);
1345
1346   struct pow_synth_sqrt_info synth_info;
1347   synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1348   synth_info.deepest = 0;
1349   synth_info.num_mults = 0;
1350
1351   bool neg_exp = REAL_VALUE_NEGATIVE (exp_init);
1352   REAL_VALUE_TYPE exp = real_value_abs (&exp_init);
1353
1354   /* The whole and fractional parts of exp.  */
1355   REAL_VALUE_TYPE whole_part;
1356   REAL_VALUE_TYPE frac_part;
1357
1358   real_floor (&whole_part, mode, &exp);
1359   REAL_ARITHMETIC (frac_part, MINUS_EXPR, exp, whole_part);
1360
1361
1362   REAL_VALUE_TYPE ceil_whole = dconst0;
1363   REAL_VALUE_TYPE ceil_fract = dconst0;
1364
1365   if (neg_exp)
1366     {
1367       real_ceil (&ceil_whole, mode, &exp);
1368       REAL_ARITHMETIC (ceil_fract, MINUS_EXPR, ceil_whole, exp);
1369     }
1370
1371   if (!representable_as_half_series_p (frac_part, max_depth, &synth_info))
1372     return NULL_TREE;
1373
1374   /* Check whether it's more profitable to not use 1.0 / ...  */
1375   if (neg_exp)
1376     {
1377       struct pow_synth_sqrt_info alt_synth_info;
1378       alt_synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1379       alt_synth_info.deepest = 0;
1380       alt_synth_info.num_mults = 0;
1381
1382       if (representable_as_half_series_p (ceil_fract, max_depth,
1383                                            &alt_synth_info)
1384           && alt_synth_info.deepest <= synth_info.deepest
1385           && alt_synth_info.num_mults < synth_info.num_mults)
1386         {
1387           whole_part = ceil_whole;
1388           frac_part = ceil_fract;
1389           synth_info.deepest = alt_synth_info.deepest;
1390           synth_info.num_mults = alt_synth_info.num_mults;
1391           memcpy (synth_info.factors, alt_synth_info.factors,
1392                   (max_depth + 1) * sizeof (bool));
1393           one_over = false;
1394         }
1395     }
1396
1397   HOST_WIDE_INT n = real_to_integer (&whole_part);
1398   REAL_VALUE_TYPE cint;
1399   real_from_integer (&cint, VOIDmode, n, SIGNED);
1400
1401   if (!real_identical (&whole_part, &cint))
1402     return NULL_TREE;
1403
1404   if (powi_cost (n) + synth_info.num_mults > POWI_MAX_MULTS)
1405     return NULL_TREE;
1406
1407   memset (cache, 0, (max_depth + 1) * sizeof (tree));
1408
1409   tree integer_res = n == 0 ? build_real (type, dconst1) : arg0;
1410
1411   /* Calculate the integer part of the exponent.  */
1412   if (n > 1)
1413     {
1414       integer_res = gimple_expand_builtin_powi (gsi, loc, arg0, n);
1415       if (!integer_res)
1416         return NULL_TREE;
1417     }
1418
1419   if (dump_file)
1420     {
1421       char string[64];
1422
1423       real_to_decimal (string, &exp_init, sizeof (string), 0, 1);
1424       fprintf (dump_file, "synthesizing pow (x, %s) as:\n", string);
1425
1426       if (neg_exp)
1427         {
1428           if (one_over)
1429             {
1430               fprintf (dump_file, "1.0 / (");
1431               dump_integer_part (dump_file, "x", n);
1432               if (n > 0)
1433                 fprintf (dump_file, " * ");
1434               dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1435               fprintf (dump_file, ")");
1436             }
1437           else
1438             {
1439               dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1440               fprintf (dump_file, " / (");
1441               dump_integer_part (dump_file, "x", n);
1442               fprintf (dump_file, ")");
1443             }
1444         }
1445       else
1446         {
1447           dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1448           if (n > 0)
1449             fprintf (dump_file, " * ");
1450           dump_integer_part (dump_file, "x", n);
1451         }
1452
1453       fprintf (dump_file, "\ndeepest sqrt chain: %d\n", synth_info.deepest);
1454     }
1455
1456
1457   tree fract_res = NULL_TREE;
1458   cache[0] = arg0;
1459
1460   /* Calculate the fractional part of the exponent.  */
1461   for (unsigned i = 0; i < synth_info.deepest; i++)
1462     {
1463       if (synth_info.factors[i])
1464         {
1465           tree sqrt_chain = get_fn_chain (arg0, i + 1, gsi, sqrtfn, loc, cache);
1466
1467           if (!fract_res)
1468               fract_res = sqrt_chain;
1469
1470           else
1471             fract_res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1472                                            fract_res, sqrt_chain);
1473         }
1474     }
1475
1476   tree res = NULL_TREE;
1477
1478   if (neg_exp)
1479     {
1480       if (one_over)
1481         {
1482           if (n > 0)
1483             res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1484                                            fract_res, integer_res);
1485           else
1486             res = fract_res;
1487
1488           res = build_and_insert_binop (gsi, loc, "powrootrecip", RDIV_EXPR,
1489                                           build_real (type, dconst1), res);
1490         }
1491       else
1492         {
1493           res = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1494                                          fract_res, integer_res);
1495         }
1496     }
1497   else
1498     res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1499                                    fract_res, integer_res);
1500   return res;
1501 }
1502
1503 /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
1504    with location info LOC.  If possible, create an equivalent and
1505    less expensive sequence of statements prior to GSI, and return an
1506    expession holding the result.  */
1507
1508 static tree
1509 gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,
1510                            tree arg0, tree arg1)
1511 {
1512   REAL_VALUE_TYPE c, cint, dconst1_3, dconst1_4, dconst1_6;
1513   REAL_VALUE_TYPE c2, dconst3;
1514   HOST_WIDE_INT n;
1515   tree type, sqrtfn, cbrtfn, sqrt_arg0, result, cbrt_x, powi_cbrt_x;
1516   machine_mode mode;
1517   bool speed_p = optimize_bb_for_speed_p (gsi_bb (*gsi));
1518   bool hw_sqrt_exists, c_is_int, c2_is_int;
1519
1520   dconst1_4 = dconst1;
1521   SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
1522
1523   /* If the exponent isn't a constant, there's nothing of interest
1524      to be done.  */
1525   if (TREE_CODE (arg1) != REAL_CST)
1526     return NULL_TREE;
1527
1528   /* If the exponent is equivalent to an integer, expand to an optimal
1529      multiplication sequence when profitable.  */
1530   c = TREE_REAL_CST (arg1);
1531   n = real_to_integer (&c);
1532   real_from_integer (&cint, VOIDmode, n, SIGNED);
1533   c_is_int = real_identical (&c, &cint);
1534
1535   if (c_is_int
1536       && ((n >= -1 && n <= 2)
1537           || (flag_unsafe_math_optimizations
1538               && speed_p
1539               && powi_cost (n) <= POWI_MAX_MULTS)))
1540     return gimple_expand_builtin_powi (gsi, loc, arg0, n);
1541
1542   /* Attempt various optimizations using sqrt and cbrt.  */
1543   type = TREE_TYPE (arg0);
1544   mode = TYPE_MODE (type);
1545   sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1546
1547   /* Optimize pow(x,0.5) = sqrt(x).  This replacement is always safe
1548      unless signed zeros must be maintained.  pow(-0,0.5) = +0, while
1549      sqrt(-0) = -0.  */
1550   if (sqrtfn
1551       && REAL_VALUES_EQUAL (c, dconsthalf)
1552       && !HONOR_SIGNED_ZEROS (mode))
1553     return build_and_insert_call (gsi, loc, sqrtfn, arg0);
1554
1555   hw_sqrt_exists = optab_handler (sqrt_optab, mode) != CODE_FOR_nothing;
1556
1557   /* Optimize pow(x,1./3.) = cbrt(x).  This requires unsafe math
1558      optimizations since 1./3. is not exactly representable.  If x
1559      is negative and finite, the correct value of pow(x,1./3.) is
1560      a NaN with the "invalid" exception raised, because the value
1561      of 1./3. actually has an even denominator.  The correct value
1562      of cbrt(x) is a negative real value.  */
1563   cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT);
1564   dconst1_3 = real_value_truncate (mode, dconst_third ());
1565
1566   if (flag_unsafe_math_optimizations
1567       && cbrtfn
1568       && (gimple_val_nonnegative_real_p (arg0) || !HONOR_NANS (mode))
1569       && REAL_VALUES_EQUAL (c, dconst1_3))
1570     return build_and_insert_call (gsi, loc, cbrtfn, arg0);
1571
1572   /* Optimize pow(x,1./6.) = cbrt(sqrt(x)).  Don't do this optimization
1573      if we don't have a hardware sqrt insn.  */
1574   dconst1_6 = dconst1_3;
1575   SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
1576
1577   if (flag_unsafe_math_optimizations
1578       && sqrtfn
1579       && cbrtfn
1580       && (gimple_val_nonnegative_real_p (arg0) || !HONOR_NANS (mode))
1581       && speed_p
1582       && hw_sqrt_exists
1583       && REAL_VALUES_EQUAL (c, dconst1_6))
1584     {
1585       /* sqrt(x)  */
1586       sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
1587
1588       /* cbrt(sqrt(x))  */
1589       return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0);
1590     }
1591
1592
1593   /* Attempt to expand the POW as a product of square root chains.
1594      Expand the 0.25 case even when otpimising for size.  */
1595   if (flag_unsafe_math_optimizations
1596       && sqrtfn
1597       && hw_sqrt_exists
1598       && (speed_p || REAL_VALUES_EQUAL (c, dconst1_4))
1599       && !HONOR_SIGNED_ZEROS (mode))
1600     {
1601       unsigned int max_depth = speed_p
1602                                 ? PARAM_VALUE (PARAM_MAX_POW_SQRT_DEPTH)
1603                                 : 2;
1604
1605       tree expand_with_sqrts
1606         = expand_pow_as_sqrts (gsi, loc, arg0, arg1, max_depth);
1607
1608       if (expand_with_sqrts)
1609         return expand_with_sqrts;
1610     }
1611
1612   real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
1613   n = real_to_integer (&c2);
1614   real_from_integer (&cint, VOIDmode, n, SIGNED);
1615   c2_is_int = real_identical (&c2, &cint);
1616
1617   /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
1618
1619      powi(x, n/3) * powi(cbrt(x), n%3),                    n > 0;
1620      1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)),  n < 0.
1621
1622      Do not calculate the first factor when n/3 = 0.  As cbrt(x) is
1623      different from pow(x, 1./3.) due to rounding and behavior with
1624      negative x, we need to constrain this transformation to unsafe
1625      math and positive x or finite math.  */
1626   real_from_integer (&dconst3, VOIDmode, 3, SIGNED);
1627   real_arithmetic (&c2, MULT_EXPR, &c, &dconst3);
1628   real_round (&c2, mode, &c2);
1629   n = real_to_integer (&c2);
1630   real_from_integer (&cint, VOIDmode, n, SIGNED);
1631   real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3);
1632   real_convert (&c2, mode, &c2);
1633
1634   if (flag_unsafe_math_optimizations
1635       && cbrtfn
1636       && (gimple_val_nonnegative_real_p (arg0) || !HONOR_NANS (mode))
1637       && real_identical (&c2, &c)
1638       && !c2_is_int
1639       && optimize_function_for_speed_p (cfun)
1640       && powi_cost (n / 3) <= POWI_MAX_MULTS)
1641     {
1642       tree powi_x_ndiv3 = NULL_TREE;
1643
1644       /* Attempt to fold powi(arg0, abs(n/3)) into multiplies.  If not
1645          possible or profitable, give up.  Skip the degenerate case when
1646          abs(n) < 3, where the result is always 1.  */
1647       if (absu_hwi (n) >= 3)
1648         {
1649           powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0,
1650                                                      abs_hwi (n / 3));
1651           if (!powi_x_ndiv3)
1652             return NULL_TREE;
1653         }
1654
1655       /* Calculate powi(cbrt(x), n%3).  Don't use gimple_expand_builtin_powi
1656          as that creates an unnecessary variable.  Instead, just produce
1657          either cbrt(x) or cbrt(x) * cbrt(x).  */
1658       cbrt_x = build_and_insert_call (gsi, loc, cbrtfn, arg0);
1659
1660       if (absu_hwi (n) % 3 == 1)
1661         powi_cbrt_x = cbrt_x;
1662       else
1663         powi_cbrt_x = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1664                                               cbrt_x, cbrt_x);
1665
1666       /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1.  */
1667       if (absu_hwi (n) < 3)
1668         result = powi_cbrt_x;
1669       else
1670         result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1671                                          powi_x_ndiv3, powi_cbrt_x);
1672
1673       /* If n is negative, reciprocate the result.  */
1674       if (n < 0)
1675         result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1676                                          build_real (type, dconst1), result);
1677
1678       return result;
1679     }
1680
1681   /* No optimizations succeeded.  */
1682   return NULL_TREE;
1683 }
1684
1685 /* ARG is the argument to a cabs builtin call in GSI with location info
1686    LOC.  Create a sequence of statements prior to GSI that calculates
1687    sqrt(R*R + I*I), where R and I are the real and imaginary components
1688    of ARG, respectively.  Return an expression holding the result.  */
1689
1690 static tree
1691 gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg)
1692 {
1693   tree real_part, imag_part, addend1, addend2, sum, result;
1694   tree type = TREE_TYPE (TREE_TYPE (arg));
1695   tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1696   machine_mode mode = TYPE_MODE (type);
1697
1698   if (!flag_unsafe_math_optimizations
1699       || !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi)))
1700       || !sqrtfn
1701       || optab_handler (sqrt_optab, mode) == CODE_FOR_nothing)
1702     return NULL_TREE;
1703
1704   real_part = build_and_insert_ref (gsi, loc, type, "cabs",
1705                                     REALPART_EXPR, arg);
1706   addend1 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
1707                                     real_part, real_part);
1708   imag_part = build_and_insert_ref (gsi, loc, type, "cabs",
1709                                     IMAGPART_EXPR, arg);
1710   addend2 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
1711                                     imag_part, imag_part);
1712   sum = build_and_insert_binop (gsi, loc, "cabs", PLUS_EXPR, addend1, addend2);
1713   result = build_and_insert_call (gsi, loc, sqrtfn, sum);
1714
1715   return result;
1716 }
1717
1718 /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
1719    on the SSA_NAME argument of each of them.  Also expand powi(x,n) into
1720    an optimal number of multiplies, when n is a constant.  */
1721
1722 namespace {
1723
1724 const pass_data pass_data_cse_sincos =
1725 {
1726   GIMPLE_PASS, /* type */
1727   "sincos", /* name */
1728   OPTGROUP_NONE, /* optinfo_flags */
1729   TV_NONE, /* tv_id */
1730   PROP_ssa, /* properties_required */
1731   0, /* properties_provided */
1732   0, /* properties_destroyed */
1733   0, /* todo_flags_start */
1734   TODO_update_ssa, /* todo_flags_finish */
1735 };
1736
1737 class pass_cse_sincos : public gimple_opt_pass
1738 {
1739 public:
1740   pass_cse_sincos (gcc::context *ctxt)
1741     : gimple_opt_pass (pass_data_cse_sincos, ctxt)
1742   {}
1743
1744   /* opt_pass methods: */
1745   virtual bool gate (function *)
1746     {
1747       /* We no longer require either sincos or cexp, since powi expansion
1748          piggybacks on this pass.  */
1749       return optimize;
1750     }
1751
1752   virtual unsigned int execute (function *);
1753
1754 }; // class pass_cse_sincos
1755
1756 unsigned int
1757 pass_cse_sincos::execute (function *fun)
1758 {
1759   basic_block bb;
1760   bool cfg_changed = false;
1761
1762   calculate_dominance_info (CDI_DOMINATORS);
1763   memset (&sincos_stats, 0, sizeof (sincos_stats));
1764
1765   FOR_EACH_BB_FN (bb, fun)
1766     {
1767       gimple_stmt_iterator gsi;
1768       bool cleanup_eh = false;
1769
1770       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1771         {
1772           gimple stmt = gsi_stmt (gsi);
1773           tree fndecl;
1774
1775           /* Only the last stmt in a bb could throw, no need to call
1776              gimple_purge_dead_eh_edges if we change something in the middle
1777              of a basic block.  */
1778           cleanup_eh = false;
1779
1780           if (is_gimple_call (stmt)
1781               && gimple_call_lhs (stmt)
1782               && (fndecl = gimple_call_fndecl (stmt))
1783               && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
1784             {
1785               tree arg, arg0, arg1, result;
1786               HOST_WIDE_INT n;
1787               location_t loc;
1788
1789               switch (DECL_FUNCTION_CODE (fndecl))
1790                 {
1791                 CASE_FLT_FN (BUILT_IN_COS):
1792                 CASE_FLT_FN (BUILT_IN_SIN):
1793                 CASE_FLT_FN (BUILT_IN_CEXPI):
1794                   /* Make sure we have either sincos or cexp.  */
1795                   if (!targetm.libc_has_function (function_c99_math_complex)
1796                       && !targetm.libc_has_function (function_sincos))
1797                     break;
1798
1799                   arg = gimple_call_arg (stmt, 0);
1800                   if (TREE_CODE (arg) == SSA_NAME)
1801                     cfg_changed |= execute_cse_sincos_1 (arg);
1802                   break;
1803
1804                 CASE_FLT_FN (BUILT_IN_POW):
1805                   arg0 = gimple_call_arg (stmt, 0);
1806                   arg1 = gimple_call_arg (stmt, 1);
1807
1808                   loc = gimple_location (stmt);
1809                   result = gimple_expand_builtin_pow (&gsi, loc, arg0, arg1);
1810
1811                   if (result)
1812                     {
1813                       tree lhs = gimple_get_lhs (stmt);
1814                       gassign *new_stmt = gimple_build_assign (lhs, result);
1815                       gimple_set_location (new_stmt, loc);
1816                       unlink_stmt_vdef (stmt);
1817                       gsi_replace (&gsi, new_stmt, true);
1818                       cleanup_eh = true;
1819                       if (gimple_vdef (stmt))
1820                         release_ssa_name (gimple_vdef (stmt));
1821                     }
1822                   break;
1823
1824                 CASE_FLT_FN (BUILT_IN_POWI):
1825                   arg0 = gimple_call_arg (stmt, 0);
1826                   arg1 = gimple_call_arg (stmt, 1);
1827                   loc = gimple_location (stmt);
1828
1829                   if (real_minus_onep (arg0))
1830                     {
1831                       tree t0, t1, cond, one, minus_one;
1832                       gassign *stmt;
1833
1834                       t0 = TREE_TYPE (arg0);
1835                       t1 = TREE_TYPE (arg1);
1836                       one = build_real (t0, dconst1);
1837                       minus_one = build_real (t0, dconstm1);
1838
1839                       cond = make_temp_ssa_name (t1, NULL, "powi_cond");
1840                       stmt = gimple_build_assign (cond, BIT_AND_EXPR,
1841                                                   arg1, build_int_cst (t1, 1));
1842                       gimple_set_location (stmt, loc);
1843                       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1844
1845                       result = make_temp_ssa_name (t0, NULL, "powi");
1846                       stmt = gimple_build_assign (result, COND_EXPR, cond,
1847                                                   minus_one, one);
1848                       gimple_set_location (stmt, loc);
1849                       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1850                     }
1851                   else
1852                     {
1853                       if (!tree_fits_shwi_p (arg1))
1854                         break;
1855
1856                       n = tree_to_shwi (arg1);
1857                       result = gimple_expand_builtin_powi (&gsi, loc, arg0, n);
1858                     }
1859
1860                   if (result)
1861                     {
1862                       tree lhs = gimple_get_lhs (stmt);
1863                       gassign *new_stmt = gimple_build_assign (lhs, result);
1864                       gimple_set_location (new_stmt, loc);
1865                       unlink_stmt_vdef (stmt);
1866                       gsi_replace (&gsi, new_stmt, true);
1867                       cleanup_eh = true;
1868                       if (gimple_vdef (stmt))
1869                         release_ssa_name (gimple_vdef (stmt));
1870                     }
1871                   break;
1872
1873                 CASE_FLT_FN (BUILT_IN_CABS):
1874                   arg0 = gimple_call_arg (stmt, 0);
1875                   loc = gimple_location (stmt);
1876                   result = gimple_expand_builtin_cabs (&gsi, loc, arg0);
1877
1878                   if (result)
1879                     {
1880                       tree lhs = gimple_get_lhs (stmt);
1881                       gassign *new_stmt = gimple_build_assign (lhs, result);
1882                       gimple_set_location (new_stmt, loc);
1883                       unlink_stmt_vdef (stmt);
1884                       gsi_replace (&gsi, new_stmt, true);
1885                       cleanup_eh = true;
1886                       if (gimple_vdef (stmt))
1887                         release_ssa_name (gimple_vdef (stmt));
1888                     }
1889                   break;
1890
1891                 default:;
1892                 }
1893             }
1894         }
1895       if (cleanup_eh)
1896         cfg_changed |= gimple_purge_dead_eh_edges (bb);
1897     }
1898
1899   statistics_counter_event (fun, "sincos statements inserted",
1900                             sincos_stats.inserted);
1901
1902   free_dominance_info (CDI_DOMINATORS);
1903   return cfg_changed ? TODO_cleanup_cfg : 0;
1904 }
1905
1906 } // anon namespace
1907
1908 gimple_opt_pass *
1909 make_pass_cse_sincos (gcc::context *ctxt)
1910 {
1911   return new pass_cse_sincos (ctxt);
1912 }
1913
1914 /* A symbolic number is used to detect byte permutation and selection
1915    patterns.  Therefore the field N contains an artificial number
1916    consisting of octet sized markers:
1917
1918    0    - target byte has the value 0
1919    FF   - target byte has an unknown value (eg. due to sign extension)
1920    1..size - marker value is the target byte index minus one.
1921
1922    To detect permutations on memory sources (arrays and structures), a symbolic
1923    number is also associated a base address (the array or structure the load is
1924    made from), an offset from the base address and a range which gives the
1925    difference between the highest and lowest accessed memory location to make
1926    such a symbolic number. The range is thus different from size which reflects
1927    the size of the type of current expression. Note that for non memory source,
1928    range holds the same value as size.
1929
1930    For instance, for an array char a[], (short) a[0] | (short) a[3] would have
1931    a size of 2 but a range of 4 while (short) a[0] | ((short) a[0] << 1) would
1932    still have a size of 2 but this time a range of 1.  */
1933
1934 struct symbolic_number {
1935   uint64_t n;
1936   tree type;
1937   tree base_addr;
1938   tree offset;
1939   HOST_WIDE_INT bytepos;
1940   tree alias_set;
1941   tree vuse;
1942   unsigned HOST_WIDE_INT range;
1943 };
1944
1945 #define BITS_PER_MARKER 8
1946 #define MARKER_MASK ((1 << BITS_PER_MARKER) - 1)
1947 #define MARKER_BYTE_UNKNOWN MARKER_MASK
1948 #define HEAD_MARKER(n, size) \
1949   ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER)))
1950
1951 /* The number which the find_bswap_or_nop_1 result should match in
1952    order to have a nop.  The number is masked according to the size of
1953    the symbolic number before using it.  */
1954 #define CMPNOP (sizeof (int64_t) < 8 ? 0 : \
1955   (uint64_t)0x08070605 << 32 | 0x04030201)
1956
1957 /* The number which the find_bswap_or_nop_1 result should match in
1958    order to have a byte swap.  The number is masked according to the
1959    size of the symbolic number before using it.  */
1960 #define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \
1961   (uint64_t)0x01020304 << 32 | 0x05060708)
1962
1963 /* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
1964    number N.  Return false if the requested operation is not permitted
1965    on a symbolic number.  */
1966
1967 static inline bool
1968 do_shift_rotate (enum tree_code code,
1969                  struct symbolic_number *n,
1970                  int count)
1971 {
1972   int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
1973   unsigned head_marker;
1974
1975   if (count % BITS_PER_UNIT != 0)
1976     return false;
1977   count = (count / BITS_PER_UNIT) * BITS_PER_MARKER;
1978
1979   /* Zero out the extra bits of N in order to avoid them being shifted
1980      into the significant bits.  */
1981   if (size < 64 / BITS_PER_MARKER)
1982     n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
1983
1984   switch (code)
1985     {
1986     case LSHIFT_EXPR:
1987       n->n <<= count;
1988       break;
1989     case RSHIFT_EXPR:
1990       head_marker = HEAD_MARKER (n->n, size);
1991       n->n >>= count;
1992       /* Arithmetic shift of signed type: result is dependent on the value.  */
1993       if (!TYPE_UNSIGNED (n->type) && head_marker)
1994         for (i = 0; i < count / BITS_PER_MARKER; i++)
1995           n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
1996                   << ((size - 1 - i) * BITS_PER_MARKER);
1997       break;
1998     case LROTATE_EXPR:
1999       n->n = (n->n << count) | (n->n >> ((size * BITS_PER_MARKER) - count));
2000       break;
2001     case RROTATE_EXPR:
2002       n->n = (n->n >> count) | (n->n << ((size * BITS_PER_MARKER) - count));
2003       break;
2004     default:
2005       return false;
2006     }
2007   /* Zero unused bits for size.  */
2008   if (size < 64 / BITS_PER_MARKER)
2009     n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
2010   return true;
2011 }
2012
2013 /* Perform sanity checking for the symbolic number N and the gimple
2014    statement STMT.  */
2015
2016 static inline bool
2017 verify_symbolic_number_p (struct symbolic_number *n, gimple stmt)
2018 {
2019   tree lhs_type;
2020
2021   lhs_type = gimple_expr_type (stmt);
2022
2023   if (TREE_CODE (lhs_type) != INTEGER_TYPE)
2024     return false;
2025
2026   if (TYPE_PRECISION (lhs_type) != TYPE_PRECISION (n->type))
2027     return false;
2028
2029   return true;
2030 }
2031
2032 /* Initialize the symbolic number N for the bswap pass from the base element
2033    SRC manipulated by the bitwise OR expression.  */
2034
2035 static bool
2036 init_symbolic_number (struct symbolic_number *n, tree src)
2037 {
2038   int size;
2039
2040   n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE;
2041
2042   /* Set up the symbolic number N by setting each byte to a value between 1 and
2043      the byte size of rhs1.  The highest order byte is set to n->size and the
2044      lowest order byte to 1.  */
2045   n->type = TREE_TYPE (src);
2046   size = TYPE_PRECISION (n->type);
2047   if (size % BITS_PER_UNIT != 0)
2048     return false;
2049   size /= BITS_PER_UNIT;
2050   if (size > 64 / BITS_PER_MARKER)
2051     return false;
2052   n->range = size;
2053   n->n = CMPNOP;
2054
2055   if (size < 64 / BITS_PER_MARKER)
2056     n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
2057
2058   return true;
2059 }
2060
2061 /* Check if STMT might be a byte swap or a nop from a memory source and returns
2062    the answer. If so, REF is that memory source and the base of the memory area
2063    accessed and the offset of the access from that base are recorded in N.  */
2064
2065 bool
2066 find_bswap_or_nop_load (gimple stmt, tree ref, struct symbolic_number *n)
2067 {
2068   /* Leaf node is an array or component ref. Memorize its base and
2069      offset from base to compare to other such leaf node.  */
2070   HOST_WIDE_INT bitsize, bitpos;
2071   machine_mode mode;
2072   int unsignedp, volatilep;
2073   tree offset, base_addr;
2074
2075   /* Not prepared to handle PDP endian.  */
2076   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
2077     return false;
2078
2079   if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt))
2080     return false;
2081
2082   base_addr = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
2083                                    &unsignedp, &volatilep, false);
2084
2085   if (TREE_CODE (base_addr) == MEM_REF)
2086     {
2087       offset_int bit_offset = 0;
2088       tree off = TREE_OPERAND (base_addr, 1);
2089
2090       if (!integer_zerop (off))
2091         {
2092           offset_int boff, coff = mem_ref_offset (base_addr);
2093           boff = wi::lshift (coff, LOG2_BITS_PER_UNIT);
2094           bit_offset += boff;
2095         }
2096
2097       base_addr = TREE_OPERAND (base_addr, 0);
2098
2099       /* Avoid returning a negative bitpos as this may wreak havoc later.  */
2100       if (wi::neg_p (bit_offset))
2101         {
2102           offset_int mask = wi::mask <offset_int> (LOG2_BITS_PER_UNIT, false);
2103           offset_int tem = bit_offset.and_not (mask);
2104           /* TEM is the bitpos rounded to BITS_PER_UNIT towards -Inf.
2105              Subtract it to BIT_OFFSET and add it (scaled) to OFFSET.  */
2106           bit_offset -= tem;
2107           tem = wi::arshift (tem, LOG2_BITS_PER_UNIT);
2108           if (offset)
2109             offset = size_binop (PLUS_EXPR, offset,
2110                                     wide_int_to_tree (sizetype, tem));
2111           else
2112             offset = wide_int_to_tree (sizetype, tem);
2113         }
2114
2115       bitpos += bit_offset.to_shwi ();
2116     }
2117
2118   if (bitpos % BITS_PER_UNIT)
2119     return false;
2120   if (bitsize % BITS_PER_UNIT)
2121     return false;
2122
2123   if (!init_symbolic_number (n, ref))
2124     return false;
2125   n->base_addr = base_addr;
2126   n->offset = offset;
2127   n->bytepos = bitpos / BITS_PER_UNIT;
2128   n->alias_set = reference_alias_ptr_type (ref);
2129   n->vuse = gimple_vuse (stmt);
2130   return true;
2131 }
2132
2133 /* Compute the symbolic number N representing the result of a bitwise OR on 2
2134    symbolic number N1 and N2 whose source statements are respectively
2135    SOURCE_STMT1 and SOURCE_STMT2.  */
2136
2137 static gimple
2138 perform_symbolic_merge (gimple source_stmt1, struct symbolic_number *n1,
2139                         gimple source_stmt2, struct symbolic_number *n2,
2140                         struct symbolic_number *n)
2141 {
2142   int i, size;
2143   uint64_t mask;
2144   gimple source_stmt;
2145   struct symbolic_number *n_start;
2146
2147   /* Sources are different, cancel bswap if they are not memory location with
2148      the same base (array, structure, ...).  */
2149   if (gimple_assign_rhs1 (source_stmt1) != gimple_assign_rhs1 (source_stmt2))
2150     {
2151       int64_t inc;
2152       HOST_WIDE_INT start_sub, end_sub, end1, end2, end;
2153       struct symbolic_number *toinc_n_ptr, *n_end;
2154
2155       if (!n1->base_addr || !n2->base_addr
2156           || !operand_equal_p (n1->base_addr, n2->base_addr, 0))
2157         return NULL;
2158
2159       if (!n1->offset != !n2->offset
2160           || (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0)))
2161         return NULL;
2162
2163       if (n1->bytepos < n2->bytepos)
2164         {
2165           n_start = n1;
2166           start_sub = n2->bytepos - n1->bytepos;
2167           source_stmt = source_stmt1;
2168         }
2169       else
2170         {
2171           n_start = n2;
2172           start_sub = n1->bytepos - n2->bytepos;
2173           source_stmt = source_stmt2;
2174         }
2175
2176       /* Find the highest address at which a load is performed and
2177          compute related info.  */
2178       end1 = n1->bytepos + (n1->range - 1);
2179       end2 = n2->bytepos + (n2->range - 1);
2180       if (end1 < end2)
2181         {
2182           end = end2;
2183           end_sub = end2 - end1;
2184         }
2185       else
2186         {
2187           end = end1;
2188           end_sub = end1 - end2;
2189         }
2190       n_end = (end2 > end1) ? n2 : n1;
2191
2192       /* Find symbolic number whose lsb is the most significant.  */
2193       if (BYTES_BIG_ENDIAN)
2194         toinc_n_ptr = (n_end == n1) ? n2 : n1;
2195       else
2196         toinc_n_ptr = (n_start == n1) ? n2 : n1;
2197
2198       n->range = end - n_start->bytepos + 1;
2199
2200       /* Check that the range of memory covered can be represented by
2201          a symbolic number.  */
2202       if (n->range > 64 / BITS_PER_MARKER)
2203         return NULL;
2204
2205       /* Reinterpret byte marks in symbolic number holding the value of
2206          bigger weight according to target endianness.  */
2207       inc = BYTES_BIG_ENDIAN ? end_sub : start_sub;
2208       size = TYPE_PRECISION (n1->type) / BITS_PER_UNIT;
2209       for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER)
2210         {
2211           unsigned marker
2212             = (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK;
2213           if (marker && marker != MARKER_BYTE_UNKNOWN)
2214             toinc_n_ptr->n += inc;
2215         }
2216     }
2217   else
2218     {
2219       n->range = n1->range;
2220       n_start = n1;
2221       source_stmt = source_stmt1;
2222     }
2223
2224   if (!n1->alias_set
2225       || alias_ptr_types_compatible_p (n1->alias_set, n2->alias_set))
2226     n->alias_set = n1->alias_set;
2227   else
2228     n->alias_set = ptr_type_node;
2229   n->vuse = n_start->vuse;
2230   n->base_addr = n_start->base_addr;
2231   n->offset = n_start->offset;
2232   n->bytepos = n_start->bytepos;
2233   n->type = n_start->type;
2234   size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
2235
2236   for (i = 0, mask = MARKER_MASK; i < size; i++, mask <<= BITS_PER_MARKER)
2237     {
2238       uint64_t masked1, masked2;
2239
2240       masked1 = n1->n & mask;
2241       masked2 = n2->n & mask;
2242       if (masked1 && masked2 && masked1 != masked2)
2243         return NULL;
2244     }
2245   n->n = n1->n | n2->n;
2246
2247   return source_stmt;
2248 }
2249
2250 /* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform
2251    the operation given by the rhs of STMT on the result.  If the operation
2252    could successfully be executed the function returns a gimple stmt whose
2253    rhs's first tree is the expression of the source operand and NULL
2254    otherwise.  */
2255
2256 static gimple
2257 find_bswap_or_nop_1 (gimple stmt, struct symbolic_number *n, int limit)
2258 {
2259   enum tree_code code;
2260   tree rhs1, rhs2 = NULL;
2261   gimple rhs1_stmt, rhs2_stmt, source_stmt1;
2262   enum gimple_rhs_class rhs_class;
2263
2264   if (!limit || !is_gimple_assign (stmt))
2265     return NULL;
2266
2267   rhs1 = gimple_assign_rhs1 (stmt);
2268
2269   if (find_bswap_or_nop_load (stmt, rhs1, n))
2270     return stmt;
2271
2272   if (TREE_CODE (rhs1) != SSA_NAME)
2273     return NULL;
2274
2275   code = gimple_assign_rhs_code (stmt);
2276   rhs_class = gimple_assign_rhs_class (stmt);
2277   rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2278
2279   if (rhs_class == GIMPLE_BINARY_RHS)
2280     rhs2 = gimple_assign_rhs2 (stmt);
2281
2282   /* Handle unary rhs and binary rhs with integer constants as second
2283      operand.  */
2284
2285   if (rhs_class == GIMPLE_UNARY_RHS
2286       || (rhs_class == GIMPLE_BINARY_RHS
2287           && TREE_CODE (rhs2) == INTEGER_CST))
2288     {
2289       if (code != BIT_AND_EXPR
2290           && code != LSHIFT_EXPR
2291           && code != RSHIFT_EXPR
2292           && code != LROTATE_EXPR
2293           && code != RROTATE_EXPR
2294           && !CONVERT_EXPR_CODE_P (code))
2295         return NULL;
2296
2297       source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1);
2298
2299       /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
2300          we have to initialize the symbolic number.  */
2301       if (!source_stmt1)
2302         {
2303           if (gimple_assign_load_p (stmt)
2304               || !init_symbolic_number (n, rhs1))
2305             return NULL;
2306           source_stmt1 = stmt;
2307         }
2308
2309       switch (code)
2310         {
2311         case BIT_AND_EXPR:
2312           {
2313             int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
2314             uint64_t val = int_cst_value (rhs2), mask = 0;
2315             uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
2316
2317             /* Only constants masking full bytes are allowed.  */
2318             for (i = 0; i < size; i++, tmp <<= BITS_PER_UNIT)
2319               if ((val & tmp) != 0 && (val & tmp) != tmp)
2320                 return NULL;
2321               else if (val & tmp)
2322                 mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
2323
2324             n->n &= mask;
2325           }
2326           break;
2327         case LSHIFT_EXPR:
2328         case RSHIFT_EXPR:
2329         case LROTATE_EXPR:
2330         case RROTATE_EXPR:
2331           if (!do_shift_rotate (code, n, (int) TREE_INT_CST_LOW (rhs2)))
2332             return NULL;
2333           break;
2334         CASE_CONVERT:
2335           {
2336             int i, type_size, old_type_size;
2337             tree type;
2338
2339             type = gimple_expr_type (stmt);
2340             type_size = TYPE_PRECISION (type);
2341             if (type_size % BITS_PER_UNIT != 0)
2342               return NULL;
2343             type_size /= BITS_PER_UNIT;
2344             if (type_size > 64 / BITS_PER_MARKER)
2345               return NULL;
2346
2347             /* Sign extension: result is dependent on the value.  */
2348             old_type_size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
2349             if (!TYPE_UNSIGNED (n->type) && type_size > old_type_size
2350                 && HEAD_MARKER (n->n, old_type_size))
2351               for (i = 0; i < type_size - old_type_size; i++)
2352                 n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
2353                         << ((type_size - 1 - i) * BITS_PER_MARKER);
2354
2355             if (type_size < 64 / BITS_PER_MARKER)
2356               {
2357                 /* If STMT casts to a smaller type mask out the bits not
2358                    belonging to the target type.  */
2359                 n->n &= ((uint64_t) 1 << (type_size * BITS_PER_MARKER)) - 1;
2360               }
2361             n->type = type;
2362             if (!n->base_addr)
2363               n->range = type_size;
2364           }
2365           break;
2366         default:
2367           return NULL;
2368         };
2369       return verify_symbolic_number_p (n, stmt) ? source_stmt1 : NULL;
2370     }
2371
2372   /* Handle binary rhs.  */
2373
2374   if (rhs_class == GIMPLE_BINARY_RHS)
2375     {
2376       struct symbolic_number n1, n2;
2377       gimple source_stmt, source_stmt2;
2378
2379       if (code != BIT_IOR_EXPR)
2380         return NULL;
2381
2382       if (TREE_CODE (rhs2) != SSA_NAME)
2383         return NULL;
2384
2385       rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2386
2387       switch (code)
2388         {
2389         case BIT_IOR_EXPR:
2390           source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1);
2391
2392           if (!source_stmt1)
2393             return NULL;
2394
2395           source_stmt2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1);
2396
2397           if (!source_stmt2)
2398             return NULL;
2399
2400           if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type))
2401             return NULL;
2402
2403           if (!n1.vuse != !n2.vuse
2404               || (n1.vuse && !operand_equal_p (n1.vuse, n2.vuse, 0)))
2405             return NULL;
2406
2407           source_stmt
2408             = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n);
2409
2410           if (!source_stmt)
2411             return NULL;
2412
2413           if (!verify_symbolic_number_p (n, stmt))
2414             return NULL;
2415
2416           break;
2417         default:
2418           return NULL;
2419         }
2420       return source_stmt;
2421     }
2422   return NULL;
2423 }
2424
2425 /* Check if STMT completes a bswap implementation or a read in a given
2426    endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
2427    accordingly.  It also sets N to represent the kind of operations
2428    performed: size of the resulting expression and whether it works on
2429    a memory source, and if so alias-set and vuse.  At last, the
2430    function returns a stmt whose rhs's first tree is the source
2431    expression.  */
2432
2433 static gimple
2434 find_bswap_or_nop (gimple stmt, struct symbolic_number *n, bool *bswap)
2435 {
2436 /* The number which the find_bswap_or_nop_1 result should match in order
2437    to have a full byte swap.  The number is shifted to the right
2438    according to the size of the symbolic number before using it.  */
2439   uint64_t cmpxchg = CMPXCHG;
2440   uint64_t cmpnop = CMPNOP;
2441
2442   gimple source_stmt;
2443   int limit;
2444
2445   /* The last parameter determines the depth search limit.  It usually
2446      correlates directly to the number n of bytes to be touched.  We
2447      increase that number by log2(n) + 1 here in order to also
2448      cover signed -> unsigned conversions of the src operand as can be seen
2449      in libgcc, and for initial shift/and operation of the src operand.  */
2450   limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt)));
2451   limit += 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit);
2452   source_stmt = find_bswap_or_nop_1 (stmt, n, limit);
2453
2454   if (!source_stmt)
2455     return NULL;
2456
2457   /* Find real size of result (highest non-zero byte).  */
2458   if (n->base_addr)
2459     {
2460       int rsize;
2461       uint64_t tmpn;
2462
2463       for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++);
2464       n->range = rsize;
2465     }
2466
2467   /* Zero out the extra bits of N and CMP*.  */
2468   if (n->range < (int) sizeof (int64_t))
2469     {
2470       uint64_t mask;
2471
2472       mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1;
2473       cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER;
2474       cmpnop &= mask;
2475     }
2476
2477   /* A complete byte swap should make the symbolic number to start with
2478      the largest digit in the highest order byte. Unchanged symbolic
2479      number indicates a read with same endianness as target architecture.  */
2480   if (n->n == cmpnop)
2481     *bswap = false;
2482   else if (n->n == cmpxchg)
2483     *bswap = true;
2484   else
2485     return NULL;
2486
2487   /* Useless bit manipulation performed by code.  */
2488   if (!n->base_addr && n->n == cmpnop)
2489     return NULL;
2490
2491   n->range *= BITS_PER_UNIT;
2492   return source_stmt;
2493 }
2494
2495 namespace {
2496
2497 const pass_data pass_data_optimize_bswap =
2498 {
2499   GIMPLE_PASS, /* type */
2500   "bswap", /* name */
2501   OPTGROUP_NONE, /* optinfo_flags */
2502   TV_NONE, /* tv_id */
2503   PROP_ssa, /* properties_required */
2504   0, /* properties_provided */
2505   0, /* properties_destroyed */
2506   0, /* todo_flags_start */
2507   0, /* todo_flags_finish */
2508 };
2509
2510 class pass_optimize_bswap : public gimple_opt_pass
2511 {
2512 public:
2513   pass_optimize_bswap (gcc::context *ctxt)
2514     : gimple_opt_pass (pass_data_optimize_bswap, ctxt)
2515   {}
2516
2517   /* opt_pass methods: */
2518   virtual bool gate (function *)
2519     {
2520       return flag_expensive_optimizations && optimize;
2521     }
2522
2523   virtual unsigned int execute (function *);
2524
2525 }; // class pass_optimize_bswap
2526
2527 /* Perform the bswap optimization: replace the expression computed in the rhs
2528    of CUR_STMT by an equivalent bswap, load or load + bswap expression.
2529    Which of these alternatives replace the rhs is given by N->base_addr (non
2530    null if a load is needed) and BSWAP.  The type, VUSE and set-alias of the
2531    load to perform are also given in N while the builtin bswap invoke is given
2532    in FNDEL.  Finally, if a load is involved, SRC_STMT refers to one of the
2533    load statements involved to construct the rhs in CUR_STMT and N->range gives
2534    the size of the rhs expression for maintaining some statistics.
2535
2536    Note that if the replacement involve a load, CUR_STMT is moved just after
2537    SRC_STMT to do the load with the same VUSE which can lead to CUR_STMT
2538    changing of basic block.  */
2539
2540 static bool
2541 bswap_replace (gimple cur_stmt, gimple src_stmt, tree fndecl, tree bswap_type,
2542                tree load_type, struct symbolic_number *n, bool bswap)
2543 {
2544   gimple_stmt_iterator gsi;
2545   tree src, tmp, tgt;
2546   gimple bswap_stmt;
2547
2548   gsi = gsi_for_stmt (cur_stmt);
2549   src = gimple_assign_rhs1 (src_stmt);
2550   tgt = gimple_assign_lhs (cur_stmt);
2551
2552   /* Need to load the value from memory first.  */
2553   if (n->base_addr)
2554     {
2555       gimple_stmt_iterator gsi_ins = gsi_for_stmt (src_stmt);
2556       tree addr_expr, addr_tmp, val_expr, val_tmp;
2557       tree load_offset_ptr, aligned_load_type;
2558       gimple addr_stmt, load_stmt;
2559       unsigned align;
2560       HOST_WIDE_INT load_offset = 0;
2561
2562       align = get_object_alignment (src);
2563       /* If the new access is smaller than the original one, we need
2564          to perform big endian adjustment.  */
2565       if (BYTES_BIG_ENDIAN)
2566         {
2567           HOST_WIDE_INT bitsize, bitpos;
2568           machine_mode mode;
2569           int unsignedp, volatilep;
2570           tree offset;
2571
2572           get_inner_reference (src, &bitsize, &bitpos, &offset, &mode,
2573                                &unsignedp, &volatilep, false);
2574           if (n->range < (unsigned HOST_WIDE_INT) bitsize)
2575             {
2576               load_offset = (bitsize - n->range) / BITS_PER_UNIT;
2577               unsigned HOST_WIDE_INT l
2578                 = (load_offset * BITS_PER_UNIT) & (align - 1);
2579               if (l)
2580                 align = l & -l;
2581             }
2582         }
2583
2584       if (bswap
2585           && align < GET_MODE_ALIGNMENT (TYPE_MODE (load_type))
2586           && SLOW_UNALIGNED_ACCESS (TYPE_MODE (load_type), align))
2587         return false;
2588
2589       /* Move cur_stmt just before  one of the load of the original
2590          to ensure it has the same VUSE.  See PR61517 for what could
2591          go wrong.  */
2592       gsi_move_before (&gsi, &gsi_ins);
2593       gsi = gsi_for_stmt (cur_stmt);
2594
2595       /* Compute address to load from and cast according to the size
2596          of the load.  */
2597       addr_expr = build_fold_addr_expr (unshare_expr (src));
2598       if (is_gimple_mem_ref_addr (addr_expr))
2599         addr_tmp = addr_expr;
2600       else
2601         {
2602           addr_tmp = make_temp_ssa_name (TREE_TYPE (addr_expr), NULL,
2603                                          "load_src");
2604           addr_stmt = gimple_build_assign (addr_tmp, addr_expr);
2605           gsi_insert_before (&gsi, addr_stmt, GSI_SAME_STMT);
2606         }
2607
2608       /* Perform the load.  */
2609       aligned_load_type = load_type;
2610       if (align < TYPE_ALIGN (load_type))
2611         aligned_load_type = build_aligned_type (load_type, align);
2612       load_offset_ptr = build_int_cst (n->alias_set, load_offset);
2613       val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp,
2614                               load_offset_ptr);
2615
2616       if (!bswap)
2617         {
2618           if (n->range == 16)
2619             nop_stats.found_16bit++;
2620           else if (n->range == 32)
2621             nop_stats.found_32bit++;
2622           else
2623             {
2624               gcc_assert (n->range == 64);
2625               nop_stats.found_64bit++;
2626             }
2627
2628           /* Convert the result of load if necessary.  */
2629           if (!useless_type_conversion_p (TREE_TYPE (tgt), load_type))
2630             {
2631               val_tmp = make_temp_ssa_name (aligned_load_type, NULL,
2632                                             "load_dst");
2633               load_stmt = gimple_build_assign (val_tmp, val_expr);
2634               gimple_set_vuse (load_stmt, n->vuse);
2635               gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
2636               gimple_assign_set_rhs_with_ops (&gsi, NOP_EXPR, val_tmp);
2637             }
2638           else
2639             {
2640               gimple_assign_set_rhs_with_ops (&gsi, MEM_REF, val_expr);
2641               gimple_set_vuse (cur_stmt, n->vuse);
2642             }
2643           update_stmt (cur_stmt);
2644
2645           if (dump_file)
2646             {
2647               fprintf (dump_file,
2648                        "%d bit load in target endianness found at: ",
2649                        (int) n->range);
2650               print_gimple_stmt (dump_file, cur_stmt, 0, 0);
2651             }
2652           return true;
2653         }
2654       else
2655         {
2656           val_tmp = make_temp_ssa_name (aligned_load_type, NULL, "load_dst");
2657           load_stmt = gimple_build_assign (val_tmp, val_expr);
2658           gimple_set_vuse (load_stmt, n->vuse);
2659           gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
2660         }
2661       src = val_tmp;
2662     }
2663
2664   if (n->range == 16)
2665     bswap_stats.found_16bit++;
2666   else if (n->range == 32)
2667     bswap_stats.found_32bit++;
2668   else
2669     {
2670       gcc_assert (n->range == 64);
2671       bswap_stats.found_64bit++;
2672     }
2673
2674   tmp = src;
2675
2676   /* Convert the src expression if necessary.  */
2677   if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type))
2678     {
2679       gimple convert_stmt;
2680
2681       tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc");
2682       convert_stmt = gimple_build_assign (tmp, NOP_EXPR, src);
2683       gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
2684     }
2685
2686   /* Canonical form for 16 bit bswap is a rotate expression.  Only 16bit values
2687      are considered as rotation of 2N bit values by N bits is generally not
2688      equivalent to a bswap.  Consider for instance 0x01020304 r>> 16 which
2689      gives 0x03040102 while a bswap for that value is 0x04030201.  */
2690   if (bswap && n->range == 16)
2691     {
2692       tree count = build_int_cst (NULL, BITS_PER_UNIT);
2693       src = fold_build2 (LROTATE_EXPR, bswap_type, tmp, count);
2694       bswap_stmt = gimple_build_assign (NULL, src);
2695     }
2696   else
2697     bswap_stmt = gimple_build_call (fndecl, 1, tmp);
2698
2699   tmp = tgt;
2700
2701   /* Convert the result if necessary.  */
2702   if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type))
2703     {
2704       gimple convert_stmt;
2705
2706       tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
2707       convert_stmt = gimple_build_assign (tgt, NOP_EXPR, tmp);
2708       gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT);
2709     }
2710
2711   gimple_set_lhs (bswap_stmt, tmp);
2712
2713   if (dump_file)
2714     {
2715       fprintf (dump_file, "%d bit bswap implementation found at: ",
2716                (int) n->range);
2717       print_gimple_stmt (dump_file, cur_stmt, 0, 0);
2718     }
2719
2720   gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT);
2721   gsi_remove (&gsi, true);
2722   return true;
2723 }
2724
2725 /* Find manual byte swap implementations as well as load in a given
2726    endianness. Byte swaps are turned into a bswap builtin invokation
2727    while endian loads are converted to bswap builtin invokation or
2728    simple load according to the target endianness.  */
2729
2730 unsigned int
2731 pass_optimize_bswap::execute (function *fun)
2732 {
2733   basic_block bb;
2734   bool bswap32_p, bswap64_p;
2735   bool changed = false;
2736   tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;
2737
2738   if (BITS_PER_UNIT != 8)
2739     return 0;
2740
2741   bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
2742                && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing);
2743   bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
2744                && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
2745                    || (bswap32_p && word_mode == SImode)));
2746
2747   /* Determine the argument type of the builtins.  The code later on
2748      assumes that the return and argument type are the same.  */
2749   if (bswap32_p)
2750     {
2751       tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
2752       bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
2753     }
2754
2755   if (bswap64_p)
2756     {
2757       tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
2758       bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
2759     }
2760
2761   memset (&nop_stats, 0, sizeof (nop_stats));
2762   memset (&bswap_stats, 0, sizeof (bswap_stats));
2763
2764   FOR_EACH_BB_FN (bb, fun)
2765     {
2766       gimple_stmt_iterator gsi;
2767
2768       /* We do a reverse scan for bswap patterns to make sure we get the
2769          widest match. As bswap pattern matching doesn't handle previously
2770          inserted smaller bswap replacements as sub-patterns, the wider
2771          variant wouldn't be detected.  */
2772       for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
2773         {
2774           gimple src_stmt, cur_stmt = gsi_stmt (gsi);
2775           tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
2776           enum tree_code code;
2777           struct symbolic_number n;
2778           bool bswap;
2779
2780           /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt
2781              might be moved to a different basic block by bswap_replace and gsi
2782              must not points to it if that's the case.  Moving the gsi_prev
2783              there make sure that gsi points to the statement previous to
2784              cur_stmt while still making sure that all statements are
2785              considered in this basic block.  */
2786           gsi_prev (&gsi);
2787
2788           if (!is_gimple_assign (cur_stmt))
2789             continue;
2790
2791           code = gimple_assign_rhs_code (cur_stmt);
2792           switch (code)
2793             {
2794             case LROTATE_EXPR:
2795             case RROTATE_EXPR:
2796               if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt))
2797                   || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt))
2798                      % BITS_PER_UNIT)
2799                 continue;
2800               /* Fall through.  */
2801             case BIT_IOR_EXPR:
2802               break;
2803             default:
2804               continue;
2805             }
2806
2807           src_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap);
2808
2809           if (!src_stmt)
2810             continue;
2811
2812           switch (n.range)
2813             {
2814             case 16:
2815               /* Already in canonical form, nothing to do.  */
2816               if (code == LROTATE_EXPR || code == RROTATE_EXPR)
2817                 continue;
2818               load_type = bswap_type = uint16_type_node;
2819               break;
2820             case 32:
2821               load_type = uint32_type_node;
2822               if (bswap32_p)
2823                 {
2824                   fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
2825                   bswap_type = bswap32_type;
2826                 }
2827               break;
2828             case 64:
2829               load_type = uint64_type_node;
2830               if (bswap64_p)
2831                 {
2832                   fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
2833                   bswap_type = bswap64_type;
2834                 }
2835               break;
2836             default:
2837               continue;
2838             }
2839
2840           if (bswap && !fndecl && n.range != 16)
2841             continue;
2842
2843           if (bswap_replace (cur_stmt, src_stmt, fndecl, bswap_type, load_type,
2844                              &n, bswap))
2845             changed = true;
2846         }
2847     }
2848
2849   statistics_counter_event (fun, "16-bit nop implementations found",
2850                             nop_stats.found_16bit);
2851   statistics_counter_event (fun, "32-bit nop implementations found",
2852                             nop_stats.found_32bit);
2853   statistics_counter_event (fun, "64-bit nop implementations found",
2854                             nop_stats.found_64bit);
2855   statistics_counter_event (fun, "16-bit bswap implementations found",
2856                             bswap_stats.found_16bit);
2857   statistics_counter_event (fun, "32-bit bswap implementations found",
2858                             bswap_stats.found_32bit);
2859   statistics_counter_event (fun, "64-bit bswap implementations found",
2860                             bswap_stats.found_64bit);
2861
2862   return (changed ? TODO_update_ssa : 0);
2863 }
2864
2865 } // anon namespace
2866
2867 gimple_opt_pass *
2868 make_pass_optimize_bswap (gcc::context *ctxt)
2869 {
2870   return new pass_optimize_bswap (ctxt);
2871 }
2872
2873 /* Return true if stmt is a type conversion operation that can be stripped
2874    when used in a widening multiply operation.  */
2875 static bool
2876 widening_mult_conversion_strippable_p (tree result_type, gimple stmt)
2877 {
2878   enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
2879
2880   if (TREE_CODE (result_type) == INTEGER_TYPE)
2881     {
2882       tree op_type;
2883       tree inner_op_type;
2884
2885       if (!CONVERT_EXPR_CODE_P (rhs_code))
2886         return false;
2887
2888       op_type = TREE_TYPE (gimple_assign_lhs (stmt));
2889
2890       /* If the type of OP has the same precision as the result, then
2891          we can strip this conversion.  The multiply operation will be
2892          selected to create the correct extension as a by-product.  */
2893       if (TYPE_PRECISION (result_type) == TYPE_PRECISION (op_type))
2894         return true;
2895
2896       /* We can also strip a conversion if it preserves the signed-ness of
2897          the operation and doesn't narrow the range.  */
2898       inner_op_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
2899
2900       /* If the inner-most type is unsigned, then we can strip any
2901          intermediate widening operation.  If it's signed, then the
2902          intermediate widening operation must also be signed.  */
2903       if ((TYPE_UNSIGNED (inner_op_type)
2904            || TYPE_UNSIGNED (op_type) == TYPE_UNSIGNED (inner_op_type))
2905           && TYPE_PRECISION (op_type) > TYPE_PRECISION (inner_op_type))
2906         return true;
2907
2908       return false;
2909     }
2910
2911   return rhs_code == FIXED_CONVERT_EXPR;
2912 }
2913
2914 /* Return true if RHS is a suitable operand for a widening multiplication,
2915    assuming a target type of TYPE.
2916    There are two cases:
2917
2918      - RHS makes some value at least twice as wide.  Store that value
2919        in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
2920
2921      - RHS is an integer constant.  Store that value in *NEW_RHS_OUT if so,
2922        but leave *TYPE_OUT untouched.  */
2923
2924 static bool
2925 is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out,
2926                         tree *new_rhs_out)
2927 {
2928   gimple stmt;
2929   tree type1, rhs1;
2930
2931   if (TREE_CODE (rhs) == SSA_NAME)
2932     {
2933       stmt = SSA_NAME_DEF_STMT (rhs);
2934       if (is_gimple_assign (stmt))
2935         {
2936           if (! widening_mult_conversion_strippable_p (type, stmt))
2937             rhs1 = rhs;
2938           else
2939             {
2940               rhs1 = gimple_assign_rhs1 (stmt);
2941
2942               if (TREE_CODE (rhs1) == INTEGER_CST)
2943                 {
2944                   *new_rhs_out = rhs1;
2945                   *type_out = NULL;
2946                   return true;
2947                 }
2948             }
2949         }
2950       else
2951         rhs1 = rhs;
2952
2953       type1 = TREE_TYPE (rhs1);
2954
2955       if (TREE_CODE (type1) != TREE_CODE (type)
2956           || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type))
2957         return false;
2958
2959       *new_rhs_out = rhs1;
2960       *type_out = type1;
2961       return true;
2962     }
2963
2964   if (TREE_CODE (rhs) == INTEGER_CST)
2965     {
2966       *new_rhs_out = rhs;
2967       *type_out = NULL;
2968       return true;
2969     }
2970
2971   return false;
2972 }
2973
2974 /* Return true if STMT performs a widening multiplication, assuming the
2975    output type is TYPE.  If so, store the unwidened types of the operands
2976    in *TYPE1_OUT and *TYPE2_OUT respectively.  Also fill *RHS1_OUT and
2977    *RHS2_OUT such that converting those operands to types *TYPE1_OUT
2978    and *TYPE2_OUT would give the operands of the multiplication.  */
2979
2980 static bool
2981 is_widening_mult_p (gimple stmt,
2982                     tree *type1_out, tree *rhs1_out,
2983                     tree *type2_out, tree *rhs2_out)
2984 {
2985   tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2986
2987   if (TREE_CODE (type) != INTEGER_TYPE
2988       && TREE_CODE (type) != FIXED_POINT_TYPE)
2989     return false;
2990
2991   if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out,
2992                                rhs1_out))
2993     return false;
2994
2995   if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out,
2996                                rhs2_out))
2997     return false;
2998
2999   if (*type1_out == NULL)
3000     {
3001       if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out))
3002         return false;
3003       *type1_out = *type2_out;
3004     }
3005
3006   if (*type2_out == NULL)
3007     {
3008       if (!int_fits_type_p (*rhs2_out, *type1_out))
3009         return false;
3010       *type2_out = *type1_out;
3011     }
3012
3013   /* Ensure that the larger of the two operands comes first. */
3014   if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out))
3015     {
3016       std::swap (*type1_out, *type2_out);
3017       std::swap (*rhs1_out, *rhs2_out);
3018     }
3019
3020   return true;
3021 }
3022
3023 /* Process a single gimple statement STMT, which has a MULT_EXPR as
3024    its rhs, and try to convert it into a WIDEN_MULT_EXPR.  The return
3025    value is true iff we converted the statement.  */
3026
3027 static bool
3028 convert_mult_to_widen (gimple stmt, gimple_stmt_iterator *gsi)
3029 {
3030   tree lhs, rhs1, rhs2, type, type1, type2;
3031   enum insn_code handler;
3032   machine_mode to_mode, from_mode, actual_mode;
3033   optab op;
3034   int actual_precision;
3035   location_t loc = gimple_location (stmt);
3036   bool from_unsigned1, from_unsigned2;
3037
3038   lhs = gimple_assign_lhs (stmt);
3039   type = TREE_TYPE (lhs);
3040   if (TREE_CODE (type) != INTEGER_TYPE)
3041     return false;
3042
3043   if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2))
3044     return false;
3045
3046   to_mode = TYPE_MODE (type);
3047   from_mode = TYPE_MODE (type1);
3048   from_unsigned1 = TYPE_UNSIGNED (type1);
3049   from_unsigned2 = TYPE_UNSIGNED (type2);
3050
3051   if (from_unsigned1 && from_unsigned2)
3052     op = umul_widen_optab;
3053   else if (!from_unsigned1 && !from_unsigned2)
3054     op = smul_widen_optab;
3055   else
3056     op = usmul_widen_optab;
3057
3058   handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode,
3059                                                   0, &actual_mode);
3060
3061   if (handler == CODE_FOR_nothing)
3062     {
3063       if (op != smul_widen_optab)
3064         {
3065           /* We can use a signed multiply with unsigned types as long as
3066              there is a wider mode to use, or it is the smaller of the two
3067              types that is unsigned.  Note that type1 >= type2, always.  */
3068           if ((TYPE_UNSIGNED (type1)
3069                && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
3070               || (TYPE_UNSIGNED (type2)
3071                   && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
3072             {
3073               from_mode = GET_MODE_WIDER_MODE (from_mode);
3074               if (GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode))
3075                 return false;
3076             }
3077
3078           op = smul_widen_optab;
3079           handler = find_widening_optab_handler_and_mode (op, to_mode,
3080                                                           from_mode, 0,
3081                                                           &actual_mode);
3082
3083           if (handler == CODE_FOR_nothing)
3084             return false;
3085
3086           from_unsigned1 = from_unsigned2 = false;
3087         }
3088       else
3089         return false;
3090     }
3091
3092   /* Ensure that the inputs to the handler are in the correct precison
3093      for the opcode.  This will be the full mode size.  */
3094   actual_precision = GET_MODE_PRECISION (actual_mode);
3095   if (2 * actual_precision > TYPE_PRECISION (type))
3096     return false;
3097   if (actual_precision != TYPE_PRECISION (type1)
3098       || from_unsigned1 != TYPE_UNSIGNED (type1))
3099     rhs1 = build_and_insert_cast (gsi, loc,
3100                                   build_nonstandard_integer_type
3101                                     (actual_precision, from_unsigned1), rhs1);
3102   if (actual_precision != TYPE_PRECISION (type2)
3103       || from_unsigned2 != TYPE_UNSIGNED (type2))
3104     rhs2 = build_and_insert_cast (gsi, loc,
3105                                   build_nonstandard_integer_type
3106                                     (actual_precision, from_unsigned2), rhs2);
3107
3108   /* Handle constants.  */
3109   if (TREE_CODE (rhs1) == INTEGER_CST)
3110     rhs1 = fold_convert (type1, rhs1);
3111   if (TREE_CODE (rhs2) == INTEGER_CST)
3112     rhs2 = fold_convert (type2, rhs2);
3113
3114   gimple_assign_set_rhs1 (stmt, rhs1);
3115   gimple_assign_set_rhs2 (stmt, rhs2);
3116   gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
3117   update_stmt (stmt);
3118   widen_mul_stats.widen_mults_inserted++;
3119   return true;
3120 }
3121
3122 /* Process a single gimple statement STMT, which is found at the
3123    iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
3124    rhs (given by CODE), and try to convert it into a
3125    WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR.  The return value
3126    is true iff we converted the statement.  */
3127
3128 static bool
3129 convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt,
3130                             enum tree_code code)
3131 {
3132   gimple rhs1_stmt = NULL, rhs2_stmt = NULL;
3133   gimple conv1_stmt = NULL, conv2_stmt = NULL, conv_stmt;
3134   tree type, type1, type2, optype;
3135   tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
3136   enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
3137   optab this_optab;
3138   enum tree_code wmult_code;
3139   enum insn_code handler;
3140   machine_mode to_mode, from_mode, actual_mode;
3141   location_t loc = gimple_location (stmt);
3142   int actual_precision;
3143   bool from_unsigned1, from_unsigned2;
3144
3145   lhs = gimple_assign_lhs (stmt);
3146   type = TREE_TYPE (lhs);
3147   if (TREE_CODE (type) != INTEGER_TYPE
3148       && TREE_CODE (type) != FIXED_POINT_TYPE)
3149     return false;
3150
3151   if (code == MINUS_EXPR)
3152     wmult_code = WIDEN_MULT_MINUS_EXPR;
3153   else
3154     wmult_code = WIDEN_MULT_PLUS_EXPR;
3155
3156   rhs1 = gimple_assign_rhs1 (stmt);
3157   rhs2 = gimple_assign_rhs2 (stmt);
3158
3159   if (TREE_CODE (rhs1) == SSA_NAME)
3160     {
3161       rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
3162       if (is_gimple_assign (rhs1_stmt))
3163         rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
3164     }
3165
3166   if (TREE_CODE (rhs2) == SSA_NAME)
3167     {
3168       rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
3169       if (is_gimple_assign (rhs2_stmt))
3170         rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
3171     }
3172
3173   /* Allow for one conversion statement between the multiply
3174      and addition/subtraction statement.  If there are more than
3175      one conversions then we assume they would invalidate this
3176      transformation.  If that's not the case then they should have
3177      been folded before now.  */
3178   if (CONVERT_EXPR_CODE_P (rhs1_code))
3179     {
3180       conv1_stmt = rhs1_stmt;
3181       rhs1 = gimple_assign_rhs1 (rhs1_stmt);
3182       if (TREE_CODE (rhs1) == SSA_NAME)
3183         {
3184           rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
3185           if (is_gimple_assign (rhs1_stmt))
3186             rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
3187         }
3188       else
3189         return false;
3190     }
3191   if (CONVERT_EXPR_CODE_P (rhs2_code))
3192     {
3193       conv2_stmt = rhs2_stmt;
3194       rhs2 = gimple_assign_rhs1 (rhs2_stmt);
3195       if (TREE_CODE (rhs2) == SSA_NAME)
3196         {
3197           rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
3198           if (is_gimple_assign (rhs2_stmt))
3199             rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
3200         }
3201       else
3202         return false;
3203     }
3204
3205   /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
3206      is_widening_mult_p, but we still need the rhs returns.
3207
3208      It might also appear that it would be sufficient to use the existing
3209      operands of the widening multiply, but that would limit the choice of
3210      multiply-and-accumulate instructions.
3211
3212      If the widened-multiplication result has more than one uses, it is
3213      probably wiser not to do the conversion.  */
3214   if (code == PLUS_EXPR
3215       && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
3216     {
3217       if (!has_single_use (rhs1)
3218           || !is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
3219                                   &type2, &mult_rhs2))
3220         return false;
3221       add_rhs = rhs2;
3222       conv_stmt = conv1_stmt;
3223     }
3224   else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
3225     {
3226       if (!has_single_use (rhs2)
3227           || !is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
3228                                   &type2, &mult_rhs2))
3229         return false;
3230       add_rhs = rhs1;
3231       conv_stmt = conv2_stmt;
3232     }
3233   else
3234     return false;
3235
3236   to_mode = TYPE_MODE (type);
3237   from_mode = TYPE_MODE (type1);
3238   from_unsigned1 = TYPE_UNSIGNED (type1);
3239   from_unsigned2 = TYPE_UNSIGNED (type2);
3240   optype = type1;
3241
3242   /* There's no such thing as a mixed sign madd yet, so use a wider mode.  */
3243   if (from_unsigned1 != from_unsigned2)
3244     {
3245       if (!INTEGRAL_TYPE_P (type))
3246         return false;
3247       /* We can use a signed multiply with unsigned types as long as
3248          there is a wider mode to use, or it is the smaller of the two
3249          types that is unsigned.  Note that type1 >= type2, always.  */
3250       if ((from_unsigned1
3251            && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
3252           || (from_unsigned2
3253               && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
3254         {
3255           from_mode = GET_MODE_WIDER_MODE (from_mode);
3256           if (GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode))
3257             return false;
3258         }
3259
3260       from_unsigned1 = from_unsigned2 = false;
3261       optype = build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode),
3262                                                false);
3263     }
3264
3265   /* If there was a conversion between the multiply and addition
3266      then we need to make sure it fits a multiply-and-accumulate.
3267      The should be a single mode change which does not change the
3268      value.  */
3269   if (conv_stmt)
3270     {
3271       /* We use the original, unmodified data types for this.  */
3272       tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt));
3273       tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt));
3274       int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2);
3275       bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2);
3276
3277       if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type))
3278         {
3279           /* Conversion is a truncate.  */
3280           if (TYPE_PRECISION (to_type) < data_size)
3281             return false;
3282         }
3283       else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type))
3284         {
3285           /* Conversion is an extend.  Check it's the right sort.  */
3286           if (TYPE_UNSIGNED (from_type) != is_unsigned
3287               && !(is_unsigned && TYPE_PRECISION (from_type) > data_size))
3288             return false;
3289         }
3290       /* else convert is a no-op for our purposes.  */
3291     }
3292
3293   /* Verify that the machine can perform a widening multiply
3294      accumulate in this mode/signedness combination, otherwise
3295      this transformation is likely to pessimize code.  */
3296   this_optab = optab_for_tree_code (wmult_code, optype, optab_default);
3297   handler = find_widening_optab_handler_and_mode (this_optab, to_mode,
3298                                                   from_mode, 0, &actual_mode);
3299
3300   if (handler == CODE_FOR_nothing)
3301     return false;
3302
3303   /* Ensure that the inputs to the handler are in the correct precison
3304      for the opcode.  This will be the full mode size.  */
3305   actual_precision = GET_MODE_PRECISION (actual_mode);
3306   if (actual_precision != TYPE_PRECISION (type1)
3307       || from_unsigned1 != TYPE_UNSIGNED (type1))
3308     mult_rhs1 = build_and_insert_cast (gsi, loc,
3309                                        build_nonstandard_integer_type
3310                                          (actual_precision, from_unsigned1),
3311                                        mult_rhs1);
3312   if (actual_precision != TYPE_PRECISION (type2)
3313       || from_unsigned2 != TYPE_UNSIGNED (type2))
3314     mult_rhs2 = build_and_insert_cast (gsi, loc,
3315                                        build_nonstandard_integer_type
3316                                          (actual_precision, from_unsigned2),
3317                                        mult_rhs2);
3318
3319   if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs)))
3320     add_rhs = build_and_insert_cast (gsi, loc, type, add_rhs);
3321
3322   /* Handle constants.  */
3323   if (TREE_CODE (mult_rhs1) == INTEGER_CST)
3324     mult_rhs1 = fold_convert (type1, mult_rhs1);
3325   if (TREE_CODE (mult_rhs2) == INTEGER_CST)
3326     mult_rhs2 = fold_convert (type2, mult_rhs2);
3327
3328   gimple_assign_set_rhs_with_ops (gsi, wmult_code, mult_rhs1, mult_rhs2,
3329                                   add_rhs);
3330   update_stmt (gsi_stmt (*gsi));
3331   widen_mul_stats.maccs_inserted++;
3332   return true;
3333 }
3334
3335 /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
3336    with uses in additions and subtractions to form fused multiply-add
3337    operations.  Returns true if successful and MUL_STMT should be removed.  */
3338
3339 static bool
3340 convert_mult_to_fma (gimple mul_stmt, tree op1, tree op2)
3341 {
3342   tree mul_result = gimple_get_lhs (mul_stmt);
3343   tree type = TREE_TYPE (mul_result);
3344   gimple use_stmt, neguse_stmt;
3345   gassign *fma_stmt;
3346   use_operand_p use_p;
3347   imm_use_iterator imm_iter;
3348
3349   if (FLOAT_TYPE_P (type)
3350       && flag_fp_contract_mode == FP_CONTRACT_OFF)
3351     return false;
3352
3353   /* We don't want to do bitfield reduction ops.  */
3354   if (INTEGRAL_TYPE_P (type)
3355       && (TYPE_PRECISION (type)
3356           != GET_MODE_PRECISION (TYPE_MODE (type))))
3357     return false;
3358
3359   /* If the target doesn't support it, don't generate it.  We assume that
3360      if fma isn't available then fms, fnma or fnms are not either.  */
3361   if (optab_handler (fma_optab, TYPE_MODE (type)) == CODE_FOR_nothing)
3362     return false;
3363
3364   /* If the multiplication has zero uses, it is kept around probably because
3365      of -fnon-call-exceptions.  Don't optimize it away in that case,
3366      it is DCE job.  */
3367   if (has_zero_uses (mul_result))
3368     return false;
3369
3370   /* Make sure that the multiplication statement becomes dead after
3371      the transformation, thus that all uses are transformed to FMAs.
3372      This means we assume that an FMA operation has the same cost
3373      as an addition.  */
3374   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
3375     {
3376       enum tree_code use_code;
3377       tree result = mul_result;
3378       bool negate_p = false;
3379
3380       use_stmt = USE_STMT (use_p);
3381
3382       if (is_gimple_debug (use_stmt))
3383         continue;
3384
3385       /* For now restrict this operations to single basic blocks.  In theory
3386          we would want to support sinking the multiplication in
3387          m = a*b;
3388          if ()
3389            ma = m + c;
3390          else
3391            d = m;
3392          to form a fma in the then block and sink the multiplication to the
3393          else block.  */
3394       if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3395         return false;
3396
3397       if (!is_gimple_assign (use_stmt))
3398         return false;
3399
3400       use_code = gimple_assign_rhs_code (use_stmt);
3401
3402       /* A negate on the multiplication leads to FNMA.  */
3403       if (use_code == NEGATE_EXPR)
3404         {
3405           ssa_op_iter iter;
3406           use_operand_p usep;
3407
3408           result = gimple_assign_lhs (use_stmt);
3409
3410           /* Make sure the negate statement becomes dead with this
3411              single transformation.  */
3412           if (!single_imm_use (gimple_assign_lhs (use_stmt),
3413                                &use_p, &neguse_stmt))
3414             return false;
3415
3416           /* Make sure the multiplication isn't also used on that stmt.  */
3417           FOR_EACH_PHI_OR_STMT_USE (usep, neguse_stmt, iter, SSA_OP_USE)
3418             if (USE_FROM_PTR (usep) == mul_result)
3419               return false;
3420
3421           /* Re-validate.  */
3422           use_stmt = neguse_stmt;
3423           if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3424             return false;
3425           if (!is_gimple_assign (use_stmt))
3426             return false;
3427
3428           use_code = gimple_assign_rhs_code (use_stmt);
3429           negate_p = true;
3430         }
3431
3432       switch (use_code)
3433         {
3434         case MINUS_EXPR:
3435           if (gimple_assign_rhs2 (use_stmt) == result)
3436             negate_p = !negate_p;
3437           break;
3438         case PLUS_EXPR:
3439           break;
3440         default:
3441           /* FMA can only be formed from PLUS and MINUS.  */
3442           return false;
3443         }
3444
3445       /* If the subtrahend (gimple_assign_rhs2 (use_stmt)) is computed
3446          by a MULT_EXPR that we'll visit later, we might be able to
3447          get a more profitable match with fnma.
3448          OTOH, if we don't, a negate / fma pair has likely lower latency
3449          that a mult / subtract pair.  */
3450       if (use_code == MINUS_EXPR && !negate_p
3451           && gimple_assign_rhs1 (use_stmt) == result
3452           && optab_handler (fms_optab, TYPE_MODE (type)) == CODE_FOR_nothing
3453           && optab_handler (fnma_optab, TYPE_MODE (type)) != CODE_FOR_nothing)
3454         {
3455           tree rhs2 = gimple_assign_rhs2 (use_stmt);
3456
3457           if (TREE_CODE (rhs2) == SSA_NAME)
3458             {
3459               gimple stmt2 = SSA_NAME_DEF_STMT (rhs2);
3460               if (has_single_use (rhs2)
3461                   && is_gimple_assign (stmt2)
3462                   && gimple_assign_rhs_code (stmt2) == MULT_EXPR)
3463               return false;
3464             }
3465         }
3466
3467       /* We can't handle a * b + a * b.  */
3468       if (gimple_assign_rhs1 (use_stmt) == gimple_assign_rhs2 (use_stmt))
3469         return false;
3470
3471       /* While it is possible to validate whether or not the exact form
3472          that we've recognized is available in the backend, the assumption
3473          is that the transformation is never a loss.  For instance, suppose
3474          the target only has the plain FMA pattern available.  Consider
3475          a*b-c -> fma(a,b,-c): we've exchanged MUL+SUB for FMA+NEG, which
3476          is still two operations.  Consider -(a*b)-c -> fma(-a,b,-c): we
3477          still have 3 operations, but in the FMA form the two NEGs are
3478          independent and could be run in parallel.  */
3479     }
3480
3481   FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
3482     {
3483       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
3484       enum tree_code use_code;
3485       tree addop, mulop1 = op1, result = mul_result;
3486       bool negate_p = false;
3487
3488       if (is_gimple_debug (use_stmt))
3489         continue;
3490
3491       use_code = gimple_assign_rhs_code (use_stmt);
3492       if (use_code == NEGATE_EXPR)
3493         {
3494           result = gimple_assign_lhs (use_stmt);
3495           single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt);
3496           gsi_remove (&gsi, true);
3497           release_defs (use_stmt);
3498
3499           use_stmt = neguse_stmt;
3500           gsi = gsi_for_stmt (use_stmt);
3501           use_code = gimple_assign_rhs_code (use_stmt);
3502           negate_p = true;
3503         }
3504
3505       if (gimple_assign_rhs1 (use_stmt) == result)
3506         {
3507           addop = gimple_assign_rhs2 (use_stmt);
3508           /* a * b - c -> a * b + (-c)  */
3509           if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
3510             addop = force_gimple_operand_gsi (&gsi,
3511                                               build1 (NEGATE_EXPR,
3512                                                       type, addop),
3513                                               true, NULL_TREE, true,
3514                                               GSI_SAME_STMT);
3515         }
3516       else
3517         {
3518           addop = gimple_assign_rhs1 (use_stmt);
3519           /* a - b * c -> (-b) * c + a */
3520           if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
3521             negate_p = !negate_p;
3522         }
3523
3524       if (negate_p)
3525         mulop1 = force_gimple_operand_gsi (&gsi,
3526                                            build1 (NEGATE_EXPR,
3527                                                    type, mulop1),
3528                                            true, NULL_TREE, true,
3529                                            GSI_SAME_STMT);
3530
3531       fma_stmt = gimple_build_assign (gimple_assign_lhs (use_stmt),
3532                                       FMA_EXPR, mulop1, op2, addop);
3533       gsi_replace (&gsi, fma_stmt, true);
3534       widen_mul_stats.fmas_inserted++;
3535     }
3536
3537   return true;
3538 }
3539
3540 /* Find integer multiplications where the operands are extended from
3541    smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
3542    where appropriate.  */
3543
3544 namespace {
3545
3546 const pass_data pass_data_optimize_widening_mul =
3547 {
3548   GIMPLE_PASS, /* type */
3549   "widening_mul", /* name */
3550   OPTGROUP_NONE, /* optinfo_flags */
3551   TV_NONE, /* tv_id */
3552   PROP_ssa, /* properties_required */
3553   0, /* properties_provided */
3554   0, /* properties_destroyed */
3555   0, /* todo_flags_start */
3556   TODO_update_ssa, /* todo_flags_finish */
3557 };
3558
3559 class pass_optimize_widening_mul : public gimple_opt_pass
3560 {
3561 public:
3562   pass_optimize_widening_mul (gcc::context *ctxt)
3563     : gimple_opt_pass (pass_data_optimize_widening_mul, ctxt)
3564   {}
3565
3566   /* opt_pass methods: */
3567   virtual bool gate (function *)
3568     {
3569       return flag_expensive_optimizations && optimize;
3570     }
3571
3572   virtual unsigned int execute (function *);
3573
3574 }; // class pass_optimize_widening_mul
3575
3576 unsigned int
3577 pass_optimize_widening_mul::execute (function *fun)
3578 {
3579   basic_block bb;
3580   bool cfg_changed = false;
3581
3582   memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
3583
3584   FOR_EACH_BB_FN (bb, fun)
3585     {
3586       gimple_stmt_iterator gsi;
3587
3588       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
3589         {
3590           gimple stmt = gsi_stmt (gsi);
3591           enum tree_code code;
3592
3593           if (is_gimple_assign (stmt))
3594             {
3595               code = gimple_assign_rhs_code (stmt);
3596               switch (code)
3597                 {
3598                 case MULT_EXPR:
3599                   if (!convert_mult_to_widen (stmt, &gsi)
3600                       && convert_mult_to_fma (stmt,
3601                                               gimple_assign_rhs1 (stmt),
3602                                               gimple_assign_rhs2 (stmt)))
3603                     {
3604                       gsi_remove (&gsi, true);
3605                       release_defs (stmt);
3606                       continue;
3607                     }
3608                   break;
3609
3610                 case PLUS_EXPR:
3611                 case MINUS_EXPR:
3612                   convert_plusminus_to_widen (&gsi, stmt, code);
3613                   break;
3614
3615                 default:;
3616                 }
3617             }
3618           else if (is_gimple_call (stmt)
3619                    && gimple_call_lhs (stmt))
3620             {
3621               tree fndecl = gimple_call_fndecl (stmt);
3622               if (fndecl
3623                   && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
3624                 {
3625                   switch (DECL_FUNCTION_CODE (fndecl))
3626                     {
3627                       case BUILT_IN_POWF:
3628                       case BUILT_IN_POW:
3629                       case BUILT_IN_POWL:
3630                         if (TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST
3631                             && REAL_VALUES_EQUAL
3632                                  (TREE_REAL_CST (gimple_call_arg (stmt, 1)),
3633                                   dconst2)
3634                             && convert_mult_to_fma (stmt,
3635                                                     gimple_call_arg (stmt, 0),
3636                                                     gimple_call_arg (stmt, 0)))
3637                           {
3638                             unlink_stmt_vdef (stmt);
3639                             if (gsi_remove (&gsi, true)
3640                                 && gimple_purge_dead_eh_edges (bb))
3641                               cfg_changed = true;
3642                             release_defs (stmt);
3643                             continue;
3644                           }
3645                           break;
3646
3647                       default:;
3648                     }
3649                 }
3650             }
3651           gsi_next (&gsi);
3652         }
3653     }
3654
3655   statistics_counter_event (fun, "widening multiplications inserted",
3656                             widen_mul_stats.widen_mults_inserted);
3657   statistics_counter_event (fun, "widening maccs inserted",
3658                             widen_mul_stats.maccs_inserted);
3659   statistics_counter_event (fun, "fused multiply-adds inserted",
3660                             widen_mul_stats.fmas_inserted);
3661
3662   return cfg_changed ? TODO_cleanup_cfg : 0;
3663 }
3664
3665 } // anon namespace
3666
3667 gimple_opt_pass *
3668 make_pass_optimize_widening_mul (gcc::context *ctxt)
3669 {
3670   return new pass_optimize_widening_mul (ctxt);
3671 }