gcc/tree-ssa-math-opts.c

   1 /* Global, SSA-based optimizations using mathematical identities.
   2    Copyright (C) 2005-2015 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* Currently, the only mini-pass in this file tries to CSE reciprocal
  21    operations.  These are common in sequences such as this one:
  22
  23         modulus = sqrt(x*x + y*y + z*z);
  24         x = x / modulus;
  25         y = y / modulus;
  26         z = z / modulus;
  27
  28    that can be optimized to
  29
  30         modulus = sqrt(x*x + y*y + z*z);
  31         rmodulus = 1.0 / modulus;
  32         x = x * rmodulus;
  33         y = y * rmodulus;
  34         z = z * rmodulus;
  35
  36    We do this for loop invariant divisors, and with this pass whenever
  37    we notice that a division has the same divisor multiple times.
  38
  39    Of course, like in PRE, we don't insert a division if a dominator
  40    already has one.  However, this cannot be done as an extension of
  41    PRE for several reasons.
  42
  43    First of all, with some experiments it was found out that the
  44    transformation is not always useful if there are only two divisions
  45    hy the same divisor.  This is probably because modern processors
  46    can pipeline the divisions; on older, in-order processors it should
  47    still be effective to optimize two divisions by the same number.
  48    We make this a param, and it shall be called N in the remainder of
  49    this comment.
  50
  51    Second, if trapping math is active, we have less freedom on where
  52    to insert divisions: we can only do so in basic blocks that already
  53    contain one.  (If divisions don't trap, instead, we can insert
  54    divisions elsewhere, which will be in blocks that are common dominators
  55    of those that have the division).
  56
  57    We really don't want to compute the reciprocal unless a division will
  58    be found.  To do this, we won't insert the division in a basic block
  59    that has less than N divisions *post-dominating* it.
  60
  61    The algorithm constructs a subset of the dominator tree, holding the
  62    blocks containing the divisions and the common dominators to them,
  63    and walk it twice.  The first walk is in post-order, and it annotates
  64    each block with the number of divisions that post-dominate it: this
  65    gives information on where divisions can be inserted profitably.
  66    The second walk is in pre-order, and it inserts divisions as explained
  67    above, and replaces divisions by multiplications.
  68
  69    In the best case, the cost of the pass is O(n_statements).  In the
  70    worst-case, the cost is due to creating the dominator tree subset,
  71    with a cost of O(n_basic_blocks ^ 2); however this can only happen
  72    for n_statements / n_basic_blocks statements.  So, the amortized cost
  73    of creating the dominator tree subset is O(n_basic_blocks) and the
  74    worst-case cost of the pass is O(n_statements * n_basic_blocks).
  75
  76    More practically, the cost will be small because there are few
  77    divisions, and they tend to be in the same basic block, so insert_bb
  78    is called very few times.
  79
  80    If we did this using domwalk.c, an efficient implementation would have
  81    to work on all the variables in a single pass, because we could not
  82    work on just a subset of the dominator tree, as we do now, and the
  83    cost would also be something like O(n_statements * n_basic_blocks).
  84    The data structures would be more complex in order to work on all the
  85    variables in a single pass.  */
  86
  87 #include "config.h"
  88 #include "system.h"
  89 #include "coretypes.h"
  90 #include "tm.h"
  91 #include "flags.h"
  92 #include "alias.h"
  93 #include "symtab.h"
  94 #include "tree.h"
  95 #include "fold-const.h"
  96 #include "predict.h"
  97 #include "hard-reg-set.h"
  98 #include "function.h"
  99 #include "dominance.h"
 100 #include "cfg.h"
 101 #include "basic-block.h"
 102 #include "tree-ssa-alias.h"
 103 #include "internal-fn.h"
 104 #include "gimple-fold.h"
 105 #include "gimple-expr.h"
 106 #include "gimple.h"
 107 #include "gimple-iterator.h"
 108 #include "gimplify.h"
 109 #include "gimplify-me.h"
 110 #include "stor-layout.h"
 111 #include "gimple-ssa.h"
 112 #include "tree-cfg.h"
 113 #include "tree-phinodes.h"
 114 #include "ssa-iterators.h"
 115 #include "stringpool.h"
 116 #include "tree-ssanames.h"
 117 #include "rtl.h"
 118 #include "insn-config.h"
 119 #include "expmed.h"
 120 #include "dojump.h"
 121 #include "explow.h"
 122 #include "calls.h"
 123 #include "emit-rtl.h"
 124 #include "varasm.h"
 125 #include "stmt.h"
 126 #include "expr.h"
 127 #include "tree-dfa.h"
 128 #include "tree-ssa.h"
 129 #include "tree-pass.h"
 130 #include "alloc-pool.h"
 131 #include "target.h"
 132 #include "gimple-pretty-print.h"
 133 #include "builtins.h"
 134 #include "params.h"
 135
 136 /* FIXME: RTL headers have to be included here for optabs.  */
 137 #include "rtl.h"                /* Because optabs.h wants enum rtx_code.  */
 138 #include "expr.h"               /* Because optabs.h wants sepops.  */
 139 #include "insn-codes.h"
 140 #include "optabs.h"
 141
 142 /* This structure represents one basic block that either computes a
 143    division, or is a common dominator for basic block that compute a
 144    division.  */
 145 struct occurrence {
 146   /* The basic block represented by this structure.  */
 147   basic_block bb;
 148
 149   /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
 150      inserted in BB.  */
 151   tree recip_def;
 152
 153   /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
 154      was inserted in BB.  */
 155   gimple recip_def_stmt;
 156
 157   /* Pointer to a list of "struct occurrence"s for blocks dominated
 158      by BB.  */
 159   struct occurrence *children;
 160
 161   /* Pointer to the next "struct occurrence"s in the list of blocks
 162      sharing a common dominator.  */
 163   struct occurrence *next;
 164
 165   /* The number of divisions that are in BB before compute_merit.  The
 166      number of divisions that are in BB or post-dominate it after
 167      compute_merit.  */
 168   int num_divisions;
 169
 170   /* True if the basic block has a division, false if it is a common
 171      dominator for basic blocks that do.  If it is false and trapping
 172      math is active, BB is not a candidate for inserting a reciprocal.  */
 173   bool bb_has_division;
 174 };
 175
 176 static struct
 177 {
 178   /* Number of 1.0/X ops inserted.  */
 179   int rdivs_inserted;
 180
 181   /* Number of 1.0/FUNC ops inserted.  */
 182   int rfuncs_inserted;
 183 } reciprocal_stats;
 184
 185 static struct
 186 {
 187   /* Number of cexpi calls inserted.  */
 188   int inserted;
 189 } sincos_stats;
 190
 191 static struct
 192 {
 193   /* Number of hand-written 16-bit nop / bswaps found.  */
 194   int found_16bit;
 195
 196   /* Number of hand-written 32-bit nop / bswaps found.  */
 197   int found_32bit;
 198
 199   /* Number of hand-written 64-bit nop / bswaps found.  */
 200   int found_64bit;
 201 } nop_stats, bswap_stats;
 202
 203 static struct
 204 {
 205   /* Number of widening multiplication ops inserted.  */
 206   int widen_mults_inserted;
 207
 208   /* Number of integer multiply-and-accumulate ops inserted.  */
 209   int maccs_inserted;
 210
 211   /* Number of fp fused multiply-add ops inserted.  */
 212   int fmas_inserted;
 213 } widen_mul_stats;
 214
 215 /* The instance of "struct occurrence" representing the highest
 216    interesting block in the dominator tree.  */
 217 static struct occurrence *occ_head;
 218
 219 /* Allocation pool for getting instances of "struct occurrence".  */
 220 static pool_allocator<occurrence> *occ_pool;
 221
 222
 223
 224 /* Allocate and return a new struct occurrence for basic block BB, and
 225    whose children list is headed by CHILDREN.  */
 226 static struct occurrence *
 227 occ_new (basic_block bb, struct occurrence *children)
 228 {
 229   struct occurrence *occ;
 230
 231   bb->aux = occ = occ_pool->allocate ();
 232   memset (occ, 0, sizeof (struct occurrence));
 233
 234   occ->bb = bb;
 235   occ->children = children;
 236   return occ;
 237 }
 238
 239
 240 /* Insert NEW_OCC into our subset of the dominator tree.  P_HEAD points to a
 241    list of "struct occurrence"s, one per basic block, having IDOM as
 242    their common dominator.
 243
 244    We try to insert NEW_OCC as deep as possible in the tree, and we also
 245    insert any other block that is a common dominator for BB and one
 246    block already in the tree.  */
 247
 248 static void
 249 insert_bb (struct occurrence *new_occ, basic_block idom,
 250            struct occurrence **p_head)
 251 {
 252   struct occurrence *occ, **p_occ;
 253
 254   for (p_occ = p_head; (occ = *p_occ) != NULL; )
 255     {
 256       basic_block bb = new_occ->bb, occ_bb = occ->bb;
 257       basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb);
 258       if (dom == bb)
 259         {
 260           /* BB dominates OCC_BB.  OCC becomes NEW_OCC's child: remove OCC
 261              from its list.  */
 262           *p_occ = occ->next;
 263           occ->next = new_occ->children;
 264           new_occ->children = occ;
 265
 266           /* Try the next block (it may as well be dominated by BB).  */
 267         }
 268
 269       else if (dom == occ_bb)
 270         {
 271           /* OCC_BB dominates BB.  Tail recurse to look deeper.  */
 272           insert_bb (new_occ, dom, &occ->children);
 273           return;
 274         }
 275
 276       else if (dom != idom)
 277         {
 278           gcc_assert (!dom->aux);
 279
 280           /* There is a dominator between IDOM and BB, add it and make
 281              two children out of NEW_OCC and OCC.  First, remove OCC from
 282              its list.  */
 283           *p_occ = occ->next;
 284           new_occ->next = occ;
 285           occ->next = NULL;
 286
 287           /* None of the previous blocks has DOM as a dominator: if we tail
 288              recursed, we would reexamine them uselessly. Just switch BB with
 289              DOM, and go on looking for blocks dominated by DOM.  */
 290           new_occ = occ_new (dom, new_occ);
 291         }
 292
 293       else
 294         {
 295           /* Nothing special, go on with the next element.  */
 296           p_occ = &occ->next;
 297         }
 298     }
 299
 300   /* No place was found as a child of IDOM.  Make BB a sibling of IDOM.  */
 301   new_occ->next = *p_head;
 302   *p_head = new_occ;
 303 }
 304
 305 /* Register that we found a division in BB.  */
 306
 307 static inline void
 308 register_division_in (basic_block bb)
 309 {
 310   struct occurrence *occ;
 311
 312   occ = (struct occurrence *) bb->aux;
 313   if (!occ)
 314     {
 315       occ = occ_new (bb, NULL);
 316       insert_bb (occ, ENTRY_BLOCK_PTR_FOR_FN (cfun), &occ_head);
 317     }
 318
 319   occ->bb_has_division = true;
 320   occ->num_divisions++;
 321 }
 322
 323
 324 /* Compute the number of divisions that postdominate each block in OCC and
 325    its children.  */
 326
 327 static void
 328 compute_merit (struct occurrence *occ)
 329 {
 330   struct occurrence *occ_child;
 331   basic_block dom = occ->bb;
 332
 333   for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
 334     {
 335       basic_block bb;
 336       if (occ_child->children)
 337         compute_merit (occ_child);
 338
 339       if (flag_exceptions)
 340         bb = single_noncomplex_succ (dom);
 341       else
 342         bb = dom;
 343
 344       if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
 345         occ->num_divisions += occ_child->num_divisions;
 346     }
 347 }
 348
 349
 350 /* Return whether USE_STMT is a floating-point division by DEF.  */
 351 static inline bool
 352 is_division_by (gimple use_stmt, tree def)
 353 {
 354   return is_gimple_assign (use_stmt)
 355          && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
 356          && gimple_assign_rhs2 (use_stmt) == def
 357          /* Do not recognize x / x as valid division, as we are getting
 358             confused later by replacing all immediate uses x in such
 359             a stmt.  */
 360          && gimple_assign_rhs1 (use_stmt) != def;
 361 }
 362
 363 /* Walk the subset of the dominator tree rooted at OCC, setting the
 364    RECIP_DEF field to a definition of 1.0 / DEF that can be used in
 365    the given basic block.  The field may be left NULL, of course,
 366    if it is not possible or profitable to do the optimization.
 367
 368    DEF_BSI is an iterator pointing at the statement defining DEF.
 369    If RECIP_DEF is set, a dominator already has a computation that can
 370    be used.  */
 371
 372 static void
 373 insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
 374                     tree def, tree recip_def, int threshold)
 375 {
 376   tree type;
 377   gassign *new_stmt;
 378   gimple_stmt_iterator gsi;
 379   struct occurrence *occ_child;
 380
 381   if (!recip_def
 382       && (occ->bb_has_division || !flag_trapping_math)
 383       && occ->num_divisions >= threshold)
 384     {
 385       /* Make a variable with the replacement and substitute it.  */
 386       type = TREE_TYPE (def);
 387       recip_def = create_tmp_reg (type, "reciptmp");
 388       new_stmt = gimple_build_assign (recip_def, RDIV_EXPR,
 389                                       build_one_cst (type), def);
 390
 391       if (occ->bb_has_division)
 392         {
 393           /* Case 1: insert before an existing division.  */
 394           gsi = gsi_after_labels (occ->bb);
 395           while (!gsi_end_p (gsi) && !is_division_by (gsi_stmt (gsi), def))
 396             gsi_next (&gsi);
 397
 398           gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
 399         }
 400       else if (def_gsi && occ->bb == def_gsi->bb)
 401         {
 402           /* Case 2: insert right after the definition.  Note that this will
 403              never happen if the definition statement can throw, because in
 404              that case the sole successor of the statement's basic block will
 405              dominate all the uses as well.  */
 406           gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
 407         }
 408       else
 409         {
 410           /* Case 3: insert in a basic block not containing defs/uses.  */
 411           gsi = gsi_after_labels (occ->bb);
 412           gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
 413         }
 414
 415       reciprocal_stats.rdivs_inserted++;
 416
 417       occ->recip_def_stmt = new_stmt;
 418     }
 419
 420   occ->recip_def = recip_def;
 421   for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
 422     insert_reciprocals (def_gsi, occ_child, def, recip_def, threshold);
 423 }
 424
 425
 426 /* Replace the division at USE_P with a multiplication by the reciprocal, if
 427    possible.  */
 428
 429 static inline void
 430 replace_reciprocal (use_operand_p use_p)
 431 {
 432   gimple use_stmt = USE_STMT (use_p);
 433   basic_block bb = gimple_bb (use_stmt);
 434   struct occurrence *occ = (struct occurrence *) bb->aux;
 435
 436   if (optimize_bb_for_speed_p (bb)
 437       && occ->recip_def && use_stmt != occ->recip_def_stmt)
 438     {
 439       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
 440       gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
 441       SET_USE (use_p, occ->recip_def);
 442       fold_stmt_inplace (&gsi);
 443       update_stmt (use_stmt);
 444     }
 445 }
 446
 447
 448 /* Free OCC and return one more "struct occurrence" to be freed.  */
 449
 450 static struct occurrence *
 451 free_bb (struct occurrence *occ)
 452 {
 453   struct occurrence *child, *next;
 454
 455   /* First get the two pointers hanging off OCC.  */
 456   next = occ->next;
 457   child = occ->children;
 458   occ->bb->aux = NULL;
 459   occ_pool->remove (occ);
 460
 461   /* Now ensure that we don't recurse unless it is necessary.  */
 462   if (!child)
 463     return next;
 464   else
 465     {
 466       while (next)
 467         next = free_bb (next);
 468
 469       return child;
 470     }
 471 }
 472
 473
 474 /* Look for floating-point divisions among DEF's uses, and try to
 475    replace them by multiplications with the reciprocal.  Add
 476    as many statements computing the reciprocal as needed.
 477
 478    DEF must be a GIMPLE register of a floating-point type.  */
 479
 480 static void
 481 execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
 482 {
 483   use_operand_p use_p;
 484   imm_use_iterator use_iter;
 485   struct occurrence *occ;
 486   int count = 0, threshold;
 487
 488   gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && is_gimple_reg (def));
 489
 490   FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
 491     {
 492       gimple use_stmt = USE_STMT (use_p);
 493       if (is_division_by (use_stmt, def))
 494         {
 495           register_division_in (gimple_bb (use_stmt));
 496           count++;
 497         }
 498     }
 499
 500   /* Do the expensive part only if we can hope to optimize something.  */
 501   threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
 502   if (count >= threshold)
 503     {
 504       gimple use_stmt;
 505       for (occ = occ_head; occ; occ = occ->next)
 506         {
 507           compute_merit (occ);
 508           insert_reciprocals (def_gsi, occ, def, NULL, threshold);
 509         }
 510
 511       FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
 512         {
 513           if (is_division_by (use_stmt, def))
 514             {
 515               FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
 516                 replace_reciprocal (use_p);
 517             }
 518         }
 519     }
 520
 521   for (occ = occ_head; occ; )
 522     occ = free_bb (occ);
 523
 524   occ_head = NULL;
 525 }
 526
 527 /* Go through all the floating-point SSA_NAMEs, and call
 528    execute_cse_reciprocals_1 on each of them.  */
 529 namespace {
 530
 531 const pass_data pass_data_cse_reciprocals =
 532 {
 533   GIMPLE_PASS, /* type */
 534   "recip", /* name */
 535   OPTGROUP_NONE, /* optinfo_flags */
 536   TV_NONE, /* tv_id */
 537   PROP_ssa, /* properties_required */
 538   0, /* properties_provided */
 539   0, /* properties_destroyed */
 540   0, /* todo_flags_start */
 541   TODO_update_ssa, /* todo_flags_finish */
 542 };
 543
 544 class pass_cse_reciprocals : public gimple_opt_pass
 545 {
 546 public:
 547   pass_cse_reciprocals (gcc::context *ctxt)
 548     : gimple_opt_pass (pass_data_cse_reciprocals, ctxt)
 549   {}
 550
 551   /* opt_pass methods: */
 552   virtual bool gate (function *) { return optimize && flag_reciprocal_math; }
 553   virtual unsigned int execute (function *);
 554
 555 }; // class pass_cse_reciprocals
 556
 557 unsigned int
 558 pass_cse_reciprocals::execute (function *fun)
 559 {
 560   basic_block bb;
 561   tree arg;
 562
 563   occ_pool = new pool_allocator<occurrence>
 564     ("dominators for recip", n_basic_blocks_for_fn (fun) / 3 + 1);
 565
 566   memset (&reciprocal_stats, 0, sizeof (reciprocal_stats));
 567   calculate_dominance_info (CDI_DOMINATORS);
 568   calculate_dominance_info (CDI_POST_DOMINATORS);
 569
 570 #ifdef ENABLE_CHECKING
 571   FOR_EACH_BB_FN (bb, fun)
 572     gcc_assert (!bb->aux);
 573 #endif
 574
 575   for (arg = DECL_ARGUMENTS (fun->decl); arg; arg = DECL_CHAIN (arg))
 576     if (FLOAT_TYPE_P (TREE_TYPE (arg))
 577         && is_gimple_reg (arg))
 578       {
 579         tree name = ssa_default_def (fun, arg);
 580         if (name)
 581           execute_cse_reciprocals_1 (NULL, name);
 582       }
 583
 584   FOR_EACH_BB_FN (bb, fun)
 585     {
 586       tree def;
 587
 588       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
 589            gsi_next (&gsi))
 590         {
 591           gphi *phi = gsi.phi ();
 592           def = PHI_RESULT (phi);
 593           if (! virtual_operand_p (def)
 594               && FLOAT_TYPE_P (TREE_TYPE (def)))
 595             execute_cse_reciprocals_1 (NULL, def);
 596         }
 597
 598       for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
 599            gsi_next (&gsi))
 600         {
 601           gimple stmt = gsi_stmt (gsi);
 602
 603           if (gimple_has_lhs (stmt)
 604               && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
 605               && FLOAT_TYPE_P (TREE_TYPE (def))
 606               && TREE_CODE (def) == SSA_NAME)
 607             execute_cse_reciprocals_1 (&gsi, def);
 608         }
 609
 610       if (optimize_bb_for_size_p (bb))
 611         continue;
 612
 613       /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b).  */
 614       for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
 615            gsi_next (&gsi))
 616         {
 617           gimple stmt = gsi_stmt (gsi);
 618           tree fndecl;
 619
 620           if (is_gimple_assign (stmt)
 621               && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
 622             {
 623               tree arg1 = gimple_assign_rhs2 (stmt);
 624               gimple stmt1;
 625
 626               if (TREE_CODE (arg1) != SSA_NAME)
 627                 continue;
 628
 629               stmt1 = SSA_NAME_DEF_STMT (arg1);
 630
 631               if (is_gimple_call (stmt1)
 632                   && gimple_call_lhs (stmt1)
 633                   && (fndecl = gimple_call_fndecl (stmt1))
 634                   && (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
 635                       || DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD))
 636                 {
 637                   enum built_in_function code;
 638                   bool md_code, fail;
 639                   imm_use_iterator ui;
 640                   use_operand_p use_p;
 641
 642                   code = DECL_FUNCTION_CODE (fndecl);
 643                   md_code = DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD;
 644
 645                   fndecl = targetm.builtin_reciprocal (code, md_code, false);
 646                   if (!fndecl)
 647                     continue;
 648
 649                   /* Check that all uses of the SSA name are divisions,
 650                      otherwise replacing the defining statement will do
 651                      the wrong thing.  */
 652                   fail = false;
 653                   FOR_EACH_IMM_USE_FAST (use_p, ui, arg1)
 654                     {
 655                       gimple stmt2 = USE_STMT (use_p);
 656                       if (is_gimple_debug (stmt2))
 657                         continue;
 658                       if (!is_gimple_assign (stmt2)
 659                           || gimple_assign_rhs_code (stmt2) != RDIV_EXPR
 660                           || gimple_assign_rhs1 (stmt2) == arg1
 661                           || gimple_assign_rhs2 (stmt2) != arg1)
 662                         {
 663                           fail = true;
 664                           break;
 665                         }
 666                     }
 667                   if (fail)
 668                     continue;
 669
 670                   gimple_replace_ssa_lhs (stmt1, arg1);
 671                   gimple_call_set_fndecl (stmt1, fndecl);
 672                   update_stmt (stmt1);
 673                   reciprocal_stats.rfuncs_inserted++;
 674
 675                   FOR_EACH_IMM_USE_STMT (stmt, ui, arg1)
 676                     {
 677                       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
 678                       gimple_assign_set_rhs_code (stmt, MULT_EXPR);
 679                       fold_stmt_inplace (&gsi);
 680                       update_stmt (stmt);
 681                     }
 682                 }
 683             }
 684         }
 685     }
 686
 687   statistics_counter_event (fun, "reciprocal divs inserted",
 688                             reciprocal_stats.rdivs_inserted);
 689   statistics_counter_event (fun, "reciprocal functions inserted",
 690                             reciprocal_stats.rfuncs_inserted);
 691
 692   free_dominance_info (CDI_DOMINATORS);
 693   free_dominance_info (CDI_POST_DOMINATORS);
 694   delete occ_pool;
 695   return 0;
 696 }
 697
 698 } // anon namespace
 699
 700 gimple_opt_pass *
 701 make_pass_cse_reciprocals (gcc::context *ctxt)
 702 {
 703   return new pass_cse_reciprocals (ctxt);
 704 }
 705
 706 /* Records an occurrence at statement USE_STMT in the vector of trees
 707    STMTS if it is dominated by *TOP_BB or dominates it or this basic block
 708    is not yet initialized.  Returns true if the occurrence was pushed on
 709    the vector.  Adjusts *TOP_BB to be the basic block dominating all
 710    statements in the vector.  */
 711
 712 static bool
 713 maybe_record_sincos (vec<gimple> *stmts,
 714                      basic_block *top_bb, gimple use_stmt)
 715 {
 716   basic_block use_bb = gimple_bb (use_stmt);
 717   if (*top_bb
 718       && (*top_bb == use_bb
 719           || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb)))
 720     stmts->safe_push (use_stmt);
 721   else if (!*top_bb
 722            || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb))
 723     {
 724       stmts->safe_push (use_stmt);
 725       *top_bb = use_bb;
 726     }
 727   else
 728     return false;
 729
 730   return true;
 731 }
 732
 733 /* Look for sin, cos and cexpi calls with the same argument NAME and
 734    create a single call to cexpi CSEing the result in this case.
 735    We first walk over all immediate uses of the argument collecting
 736    statements that we can CSE in a vector and in a second pass replace
 737    the statement rhs with a REALPART or IMAGPART expression on the
 738    result of the cexpi call we insert before the use statement that
 739    dominates all other candidates.  */
 740
 741 static bool
 742 execute_cse_sincos_1 (tree name)
 743 {
 744   gimple_stmt_iterator gsi;
 745   imm_use_iterator use_iter;
 746   tree fndecl, res, type;
 747   gimple def_stmt, use_stmt, stmt;
 748   int seen_cos = 0, seen_sin = 0, seen_cexpi = 0;
 749   auto_vec<gimple> stmts;
 750   basic_block top_bb = NULL;
 751   int i;
 752   bool cfg_changed = false;
 753
 754   type = TREE_TYPE (name);
 755   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
 756     {
 757       if (gimple_code (use_stmt) != GIMPLE_CALL
 758           || !gimple_call_lhs (use_stmt)
 759           || !(fndecl = gimple_call_fndecl (use_stmt))
 760           || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
 761         continue;
 762
 763       switch (DECL_FUNCTION_CODE (fndecl))
 764         {
 765         CASE_FLT_FN (BUILT_IN_COS):
 766           seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
 767           break;
 768
 769         CASE_FLT_FN (BUILT_IN_SIN):
 770           seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
 771           break;
 772
 773         CASE_FLT_FN (BUILT_IN_CEXPI):
 774           seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
 775           break;
 776
 777         default:;
 778         }
 779     }
 780
 781   if (seen_cos + seen_sin + seen_cexpi <= 1)
 782     return false;
 783
 784   /* Simply insert cexpi at the beginning of top_bb but not earlier than
 785      the name def statement.  */
 786   fndecl = mathfn_built_in (type, BUILT_IN_CEXPI);
 787   if (!fndecl)
 788     return false;
 789   stmt = gimple_build_call (fndecl, 1, name);
 790   res = make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl)), stmt, "sincostmp");
 791   gimple_call_set_lhs (stmt, res);
 792
 793   def_stmt = SSA_NAME_DEF_STMT (name);
 794   if (!SSA_NAME_IS_DEFAULT_DEF (name)
 795       && gimple_code (def_stmt) != GIMPLE_PHI
 796       && gimple_bb (def_stmt) == top_bb)
 797     {
 798       gsi = gsi_for_stmt (def_stmt);
 799       gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
 800     }
 801   else
 802     {
 803       gsi = gsi_after_labels (top_bb);
 804       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
 805     }
 806   sincos_stats.inserted++;
 807
 808   /* And adjust the recorded old call sites.  */
 809   for (i = 0; stmts.iterate (i, &use_stmt); ++i)
 810     {
 811       tree rhs = NULL;
 812       fndecl = gimple_call_fndecl (use_stmt);
 813
 814       switch (DECL_FUNCTION_CODE (fndecl))
 815         {
 816         CASE_FLT_FN (BUILT_IN_COS):
 817           rhs = fold_build1 (REALPART_EXPR, type, res);
 818           break;
 819
 820         CASE_FLT_FN (BUILT_IN_SIN):
 821           rhs = fold_build1 (IMAGPART_EXPR, type, res);
 822           break;
 823
 824         CASE_FLT_FN (BUILT_IN_CEXPI):
 825           rhs = res;
 826           break;
 827
 828         default:;
 829           gcc_unreachable ();
 830         }
 831
 832         /* Replace call with a copy.  */
 833         stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs);
 834
 835         gsi = gsi_for_stmt (use_stmt);
 836         gsi_replace (&gsi, stmt, true);
 837         if (gimple_purge_dead_eh_edges (gimple_bb (stmt)))
 838           cfg_changed = true;
 839     }
 840
 841   return cfg_changed;
 842 }
 843
 844 /* To evaluate powi(x,n), the floating point value x raised to the
 845    constant integer exponent n, we use a hybrid algorithm that
 846    combines the "window method" with look-up tables.  For an
 847    introduction to exponentiation algorithms and "addition chains",
 848    see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
 849    "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
 850    3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
 851    Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998.  */
 852
 853 /* Provide a default value for POWI_MAX_MULTS, the maximum number of
 854    multiplications to inline before calling the system library's pow
 855    function.  powi(x,n) requires at worst 2*bits(n)-2 multiplications,
 856    so this default never requires calling pow, powf or powl.  */
 857
 858 #ifndef POWI_MAX_MULTS
 859 #define POWI_MAX_MULTS  (2*HOST_BITS_PER_WIDE_INT-2)
 860 #endif
 861
 862 /* The size of the "optimal power tree" lookup table.  All
 863    exponents less than this value are simply looked up in the
 864    powi_table below.  This threshold is also used to size the
 865    cache of pseudo registers that hold intermediate results.  */
 866 #define POWI_TABLE_SIZE 256
 867
 868 /* The size, in bits of the window, used in the "window method"
 869    exponentiation algorithm.  This is equivalent to a radix of
 870    (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method".  */
 871 #define POWI_WINDOW_SIZE 3
 872
 873 /* The following table is an efficient representation of an
 874    "optimal power tree".  For each value, i, the corresponding
 875    value, j, in the table states than an optimal evaluation
 876    sequence for calculating pow(x,i) can be found by evaluating
 877    pow(x,j)*pow(x,i-j).  An optimal power tree for the first
 878    100 integers is given in Knuth's "Seminumerical algorithms".  */
 879
 880 static const unsigned char powi_table[POWI_TABLE_SIZE] =
 881   {
 882       0,   1,   1,   2,   2,   3,   3,   4,  /*   0 -   7 */
 883       4,   6,   5,   6,   6,  10,   7,   9,  /*   8 -  15 */
 884       8,  16,   9,  16,  10,  12,  11,  13,  /*  16 -  23 */
 885      12,  17,  13,  18,  14,  24,  15,  26,  /*  24 -  31 */
 886      16,  17,  17,  19,  18,  33,  19,  26,  /*  32 -  39 */
 887      20,  25,  21,  40,  22,  27,  23,  44,  /*  40 -  47 */
 888      24,  32,  25,  34,  26,  29,  27,  44,  /*  48 -  55 */
 889      28,  31,  29,  34,  30,  60,  31,  36,  /*  56 -  63 */
 890      32,  64,  33,  34,  34,  46,  35,  37,  /*  64 -  71 */
 891      36,  65,  37,  50,  38,  48,  39,  69,  /*  72 -  79 */
 892      40,  49,  41,  43,  42,  51,  43,  58,  /*  80 -  87 */
 893      44,  64,  45,  47,  46,  59,  47,  76,  /*  88 -  95 */
 894      48,  65,  49,  66,  50,  67,  51,  66,  /*  96 - 103 */
 895      52,  70,  53,  74,  54, 104,  55,  74,  /* 104 - 111 */
 896      56,  64,  57,  69,  58,  78,  59,  68,  /* 112 - 119 */
 897      60,  61,  61,  80,  62,  75,  63,  68,  /* 120 - 127 */
 898      64,  65,  65, 128,  66, 129,  67,  90,  /* 128 - 135 */
 899      68,  73,  69, 131,  70,  94,  71,  88,  /* 136 - 143 */
 900      72, 128,  73,  98,  74, 132,  75, 121,  /* 144 - 151 */
 901      76, 102,  77, 124,  78, 132,  79, 106,  /* 152 - 159 */
 902      80,  97,  81, 160,  82,  99,  83, 134,  /* 160 - 167 */
 903      84,  86,  85,  95,  86, 160,  87, 100,  /* 168 - 175 */
 904      88, 113,  89,  98,  90, 107,  91, 122,  /* 176 - 183 */
 905      92, 111,  93, 102,  94, 126,  95, 150,  /* 184 - 191 */
 906      96, 128,  97, 130,  98, 133,  99, 195,  /* 192 - 199 */
 907     100, 128, 101, 123, 102, 164, 103, 138,  /* 200 - 207 */
 908     104, 145, 105, 146, 106, 109, 107, 149,  /* 208 - 215 */
 909     108, 200, 109, 146, 110, 170, 111, 157,  /* 216 - 223 */
 910     112, 128, 113, 130, 114, 182, 115, 132,  /* 224 - 231 */
 911     116, 200, 117, 132, 118, 158, 119, 206,  /* 232 - 239 */
 912     120, 240, 121, 162, 122, 147, 123, 152,  /* 240 - 247 */
 913     124, 166, 125, 214, 126, 138, 127, 153,  /* 248 - 255 */
 914   };
 915
 916
 917 /* Return the number of multiplications required to calculate
 918    powi(x,n) where n is less than POWI_TABLE_SIZE.  This is a
 919    subroutine of powi_cost.  CACHE is an array indicating
 920    which exponents have already been calculated.  */
 921
 922 static int
 923 powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache)
 924 {
 925   /* If we've already calculated this exponent, then this evaluation
 926      doesn't require any additional multiplications.  */
 927   if (cache[n])
 928     return 0;
 929
 930   cache[n] = true;
 931   return powi_lookup_cost (n - powi_table[n], cache)
 932          + powi_lookup_cost (powi_table[n], cache) + 1;
 933 }
 934
 935 /* Return the number of multiplications required to calculate
 936    powi(x,n) for an arbitrary x, given the exponent N.  This
 937    function needs to be kept in sync with powi_as_mults below.  */
 938
 939 static int
 940 powi_cost (HOST_WIDE_INT n)
 941 {
 942   bool cache[POWI_TABLE_SIZE];
 943   unsigned HOST_WIDE_INT digit;
 944   unsigned HOST_WIDE_INT val;
 945   int result;
 946
 947   if (n == 0)
 948     return 0;
 949
 950   /* Ignore the reciprocal when calculating the cost.  */
 951   val = (n < 0) ? -n : n;
 952
 953   /* Initialize the exponent cache.  */
 954   memset (cache, 0, POWI_TABLE_SIZE * sizeof (bool));
 955   cache[1] = true;
 956
 957   result = 0;
 958
 959   while (val >= POWI_TABLE_SIZE)
 960     {
 961       if (val & 1)
 962         {
 963           digit = val & ((1 << POWI_WINDOW_SIZE) - 1);
 964           result += powi_lookup_cost (digit, cache)
 965                     + POWI_WINDOW_SIZE + 1;
 966           val >>= POWI_WINDOW_SIZE;
 967         }
 968       else
 969         {
 970           val >>= 1;
 971           result++;
 972         }
 973     }
 974
 975   return result + powi_lookup_cost (val, cache);
 976 }
 977
 978 /* Recursive subroutine of powi_as_mults.  This function takes the
 979    array, CACHE, of already calculated exponents and an exponent N and
 980    returns a tree that corresponds to CACHE[1]**N, with type TYPE.  */
 981
 982 static tree
 983 powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type,
 984                  HOST_WIDE_INT n, tree *cache)
 985 {
 986   tree op0, op1, ssa_target;
 987   unsigned HOST_WIDE_INT digit;
 988   gassign *mult_stmt;
 989
 990   if (n < POWI_TABLE_SIZE && cache[n])
 991     return cache[n];
 992
 993   ssa_target = make_temp_ssa_name (type, NULL, "powmult");
 994
 995   if (n < POWI_TABLE_SIZE)
 996     {
 997       cache[n] = ssa_target;
 998       op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache);
 999       op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache);
1000     }
1001   else if (n & 1)
1002     {
1003       digit = n & ((1 << POWI_WINDOW_SIZE) - 1);
1004       op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache);
1005       op1 = powi_as_mults_1 (gsi, loc, type, digit, cache);
1006     }
1007   else
1008     {
1009       op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache);
1010       op1 = op0;
1011     }
1012
1013   mult_stmt = gimple_build_assign (ssa_target, MULT_EXPR, op0, op1);
1014   gimple_set_location (mult_stmt, loc);
1015   gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
1016
1017   return ssa_target;
1018 }
1019
1020 /* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
1021    This function needs to be kept in sync with powi_cost above.  */
1022
1023 static tree
1024 powi_as_mults (gimple_stmt_iterator *gsi, location_t loc,
1025                tree arg0, HOST_WIDE_INT n)
1026 {
1027   tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0);
1028   gassign *div_stmt;
1029   tree target;
1030
1031   if (n == 0)
1032     return build_real (type, dconst1);
1033
1034   memset (cache, 0,  sizeof (cache));
1035   cache[1] = arg0;
1036
1037   result = powi_as_mults_1 (gsi, loc, type, (n < 0) ? -n : n, cache);
1038   if (n >= 0)
1039     return result;
1040
1041   /* If the original exponent was negative, reciprocate the result.  */
1042   target = make_temp_ssa_name (type, NULL, "powmult");
1043   div_stmt = gimple_build_assign (target, RDIV_EXPR,
1044                                   build_real (type, dconst1), result);
1045   gimple_set_location (div_stmt, loc);
1046   gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
1047
1048   return target;
1049 }
1050
1051 /* ARG0 and N are the two arguments to a powi builtin in GSI with
1052    location info LOC.  If the arguments are appropriate, create an
1053    equivalent sequence of statements prior to GSI using an optimal
1054    number of multiplications, and return an expession holding the
1055    result.  */
1056
1057 static tree
1058 gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc,
1059                             tree arg0, HOST_WIDE_INT n)
1060 {
1061   /* Avoid largest negative number.  */
1062   if (n != -n
1063       && ((n >= -1 && n <= 2)
1064           || (optimize_function_for_speed_p (cfun)
1065               && powi_cost (n) <= POWI_MAX_MULTS)))
1066     return powi_as_mults (gsi, loc, arg0, n);
1067
1068   return NULL_TREE;
1069 }
1070
1071 /* Build a gimple call statement that calls FN with argument ARG.
1072    Set the lhs of the call statement to a fresh SSA name.  Insert the
1073    statement prior to GSI's current position, and return the fresh
1074    SSA name.  */
1075
1076 static tree
1077 build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc,
1078                        tree fn, tree arg)
1079 {
1080   gcall *call_stmt;
1081   tree ssa_target;
1082
1083   call_stmt = gimple_build_call (fn, 1, arg);
1084   ssa_target = make_temp_ssa_name (TREE_TYPE (arg), NULL, "powroot");
1085   gimple_set_lhs (call_stmt, ssa_target);
1086   gimple_set_location (call_stmt, loc);
1087   gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
1088
1089   return ssa_target;
1090 }
1091
1092 /* Build a gimple binary operation with the given CODE and arguments
1093    ARG0, ARG1, assigning the result to a new SSA name for variable
1094    TARGET.  Insert the statement prior to GSI's current position, and
1095    return the fresh SSA name.*/
1096
1097 static tree
1098 build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc,
1099                         const char *name, enum tree_code code,
1100                         tree arg0, tree arg1)
1101 {
1102   tree result = make_temp_ssa_name (TREE_TYPE (arg0), NULL, name);
1103   gassign *stmt = gimple_build_assign (result, code, arg0, arg1);
1104   gimple_set_location (stmt, loc);
1105   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1106   return result;
1107 }
1108
1109 /* Build a gimple reference operation with the given CODE and argument
1110    ARG, assigning the result to a new SSA name of TYPE with NAME.
1111    Insert the statement prior to GSI's current position, and return
1112    the fresh SSA name.  */
1113
1114 static inline tree
1115 build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type,
1116                       const char *name, enum tree_code code, tree arg0)
1117 {
1118   tree result = make_temp_ssa_name (type, NULL, name);
1119   gimple stmt = gimple_build_assign (result, build1 (code, type, arg0));
1120   gimple_set_location (stmt, loc);
1121   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1122   return result;
1123 }
1124
1125 /* Build a gimple assignment to cast VAL to TYPE.  Insert the statement
1126    prior to GSI's current position, and return the fresh SSA name.  */
1127
1128 static tree
1129 build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,
1130                        tree type, tree val)
1131 {
1132   tree result = make_ssa_name (type);
1133   gassign *stmt = gimple_build_assign (result, NOP_EXPR, val);
1134   gimple_set_location (stmt, loc);
1135   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1136   return result;
1137 }
1138
1139 struct pow_synth_sqrt_info
1140 {
1141   bool *factors;
1142   unsigned int deepest;
1143   unsigned int num_mults;
1144 };
1145
1146 /* Return true iff the real value C can be represented as a
1147    sum of powers of 0.5 up to N.  That is:
1148    C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1.
1149    Record in INFO the various parameters of the synthesis algorithm such
1150    as the factors a[i], the maximum 0.5 power and the number of
1151    multiplications that will be required.  */
1152
1153 bool
1154 representable_as_half_series_p (REAL_VALUE_TYPE c, unsigned n,
1155                                  struct pow_synth_sqrt_info *info)
1156 {
1157   REAL_VALUE_TYPE factor = dconsthalf;
1158   REAL_VALUE_TYPE remainder = c;
1159
1160   info->deepest = 0;
1161   info->num_mults = 0;
1162   memset (info->factors, 0, n * sizeof (bool));
1163
1164   for (unsigned i = 0; i < n; i++)
1165     {
1166       REAL_VALUE_TYPE res;
1167
1168       /* If something inexact happened bail out now.  */
1169       if (REAL_ARITHMETIC (res, MINUS_EXPR, remainder, factor))
1170         return false;
1171
1172       /* We have hit zero.  The number is representable as a sum
1173          of powers of 0.5.  */
1174       if (REAL_VALUES_EQUAL (res, dconst0))
1175         {
1176           info->factors[i] = true;
1177           info->deepest = i + 1;
1178           return true;
1179         }
1180       else if (!REAL_VALUE_NEGATIVE (res))
1181         {
1182           remainder = res;
1183           info->factors[i] = true;
1184           info->num_mults++;
1185         }
1186       else
1187         info->factors[i] = false;
1188
1189       REAL_ARITHMETIC (factor, MULT_EXPR, factor, dconsthalf);
1190     }
1191   return false;
1192 }
1193
1194 /* Return the tree corresponding to FN being applied
1195    to ARG N times at GSI and LOC.
1196    Look up previous results from CACHE if need be.
1197    cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times.  */
1198
1199 static tree
1200 get_fn_chain (tree arg, unsigned int n, gimple_stmt_iterator *gsi,
1201               tree fn, location_t loc, tree *cache)
1202 {
1203   tree res = cache[n];
1204   if (!res)
1205     {
1206       tree prev = get_fn_chain (arg, n - 1, gsi, fn, loc, cache);
1207       res = build_and_insert_call (gsi, loc, fn, prev);
1208       cache[n] = res;
1209     }
1210
1211   return res;
1212 }
1213
1214 /* Print to STREAM the repeated application of function FNAME to ARG
1215    N times.  So, for FNAME = "foo", ARG = "x", N = 2 it would print:
1216    "foo (foo (x))".  */
1217
1218 static void
1219 print_nested_fn (FILE* stream, const char *fname, const char* arg,
1220                  unsigned int n)
1221 {
1222   if (n == 0)
1223     fprintf (stream, "%s", arg);
1224   else
1225     {
1226       fprintf (stream, "%s (", fname);
1227       print_nested_fn (stream, fname, arg, n - 1);
1228       fprintf (stream, ")");
1229     }
1230 }
1231
1232 /* Print to STREAM the fractional sequence of sqrt chains
1233    applied to ARG, described by INFO.  Used for the dump file.  */
1234
1235 static void
1236 dump_fractional_sqrt_sequence (FILE *stream, const char *arg,
1237                                 struct pow_synth_sqrt_info *info)
1238 {
1239   for (unsigned int i = 0; i < info->deepest; i++)
1240     {
1241       bool is_set = info->factors[i];
1242       if (is_set)
1243         {
1244           print_nested_fn (stream, "sqrt", arg, i + 1);
1245           if (i != info->deepest - 1)
1246             fprintf (stream, " * ");
1247         }
1248     }
1249 }
1250
1251 /* Print to STREAM a representation of raising ARG to an integer
1252    power N.  Used for the dump file.  */
1253
1254 static void
1255 dump_integer_part (FILE *stream, const char* arg, HOST_WIDE_INT n)
1256 {
1257   if (n > 1)
1258     fprintf (stream, "powi (%s, " HOST_WIDE_INT_PRINT_DEC ")", arg, n);
1259   else if (n == 1)
1260     fprintf (stream, "%s", arg);
1261 }
1262
1263 /* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of
1264    square roots.  Place at GSI and LOC.  Limit the maximum depth
1265    of the sqrt chains to MAX_DEPTH.  Return the tree holding the
1266    result of the expanded sequence or NULL_TREE if the expansion failed.
1267
1268    This routine assumes that ARG1 is a real number with a fractional part
1269    (the integer exponent case will have been handled earlier in
1270    gimple_expand_builtin_pow).
1271
1272    For ARG1 > 0.0:
1273    * For ARG1 composed of a whole part WHOLE_PART and a fractional part
1274      FRAC_PART i.e. WHOLE_PART == floor (ARG1) and
1275                     FRAC_PART == ARG1 - WHOLE_PART:
1276      Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where
1277      POW (ARG0, FRAC_PART) is expanded as a product of square root chains
1278      if it can be expressed as such, that is if FRAC_PART satisfies:
1279      FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i))
1280      where integer a[i] is either 0 or 1.
1281
1282      Example:
1283      POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625)
1284        --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x)))
1285
1286    For ARG1 < 0.0 there are two approaches:
1287    * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1)
1288          is calculated as above.
1289
1290      Example:
1291      POW (x, -5.625) == 1.0 / POW (x, 5.625)
1292        -->  1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x))))
1293
1294    * (B) : WHOLE_PART := - ceil (abs (ARG1))
1295            FRAC_PART  := ARG1 - WHOLE_PART
1296      and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART).
1297      Example:
1298      POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6)
1299        --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6))
1300
1301    For ARG1 < 0.0 we choose between (A) and (B) depending on
1302    how many multiplications we'd have to do.
1303    So, for the example in (B): POW (x, -5.875), if we were to
1304    follow algorithm (A) we would produce:
1305    1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X)))
1306    which contains more multiplications than approach (B).
1307
1308    Hopefully, this approach will eliminate potentially expensive POW library
1309    calls when unsafe floating point math is enabled and allow the compiler to
1310    further optimise the multiplies, square roots and divides produced by this
1311    function.  */
1312
1313 static tree
1314 expand_pow_as_sqrts (gimple_stmt_iterator *gsi, location_t loc,
1315                      tree arg0, tree arg1, HOST_WIDE_INT max_depth)
1316 {
1317   tree type = TREE_TYPE (arg0);
1318   machine_mode mode = TYPE_MODE (type);
1319   tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1320   bool one_over = true;
1321
1322   if (!sqrtfn)
1323     return NULL_TREE;
1324
1325   if (TREE_CODE (arg1) != REAL_CST)
1326     return NULL_TREE;
1327
1328   REAL_VALUE_TYPE exp_init = TREE_REAL_CST (arg1);
1329
1330   gcc_assert (max_depth > 0);
1331   tree *cache = XALLOCAVEC (tree, max_depth + 1);
1332
1333   struct pow_synth_sqrt_info synth_info;
1334   synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1335   synth_info.deepest = 0;
1336   synth_info.num_mults = 0;
1337
1338   bool neg_exp = REAL_VALUE_NEGATIVE (exp_init);
1339   REAL_VALUE_TYPE exp = real_value_abs (&exp_init);
1340
1341   /* The whole and fractional parts of exp.  */
1342   REAL_VALUE_TYPE whole_part;
1343   REAL_VALUE_TYPE frac_part;
1344
1345   real_floor (&whole_part, mode, &exp);
1346   REAL_ARITHMETIC (frac_part, MINUS_EXPR, exp, whole_part);
1347
1348
1349   REAL_VALUE_TYPE ceil_whole = dconst0;
1350   REAL_VALUE_TYPE ceil_fract = dconst0;
1351
1352   if (neg_exp)
1353     {
1354       real_ceil (&ceil_whole, mode, &exp);
1355       REAL_ARITHMETIC (ceil_fract, MINUS_EXPR, ceil_whole, exp);
1356     }
1357
1358   if (!representable_as_half_series_p (frac_part, max_depth, &synth_info))
1359     return NULL_TREE;
1360
1361   /* Check whether it's more profitable to not use 1.0 / ...  */
1362   if (neg_exp)
1363     {
1364       struct pow_synth_sqrt_info alt_synth_info;
1365       alt_synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1366       alt_synth_info.deepest = 0;
1367       alt_synth_info.num_mults = 0;
1368
1369       if (representable_as_half_series_p (ceil_fract, max_depth,
1370                                            &alt_synth_info)
1371           && alt_synth_info.deepest <= synth_info.deepest
1372           && alt_synth_info.num_mults < synth_info.num_mults)
1373         {
1374           whole_part = ceil_whole;
1375           frac_part = ceil_fract;
1376           synth_info.deepest = alt_synth_info.deepest;
1377           synth_info.num_mults = alt_synth_info.num_mults;
1378           memcpy (synth_info.factors, alt_synth_info.factors,
1379                   (max_depth + 1) * sizeof (bool));
1380           one_over = false;
1381         }
1382     }
1383
1384   HOST_WIDE_INT n = real_to_integer (&whole_part);
1385   REAL_VALUE_TYPE cint;
1386   real_from_integer (&cint, VOIDmode, n, SIGNED);
1387
1388   if (!real_identical (&whole_part, &cint))
1389     return NULL_TREE;
1390
1391   if (powi_cost (n) + synth_info.num_mults > POWI_MAX_MULTS)
1392     return NULL_TREE;
1393
1394   memset (cache, 0, (max_depth + 1) * sizeof (tree));
1395
1396   tree integer_res = n == 0 ? build_real (type, dconst1) : arg0;
1397
1398   /* Calculate the integer part of the exponent.  */
1399   if (n > 1)
1400     {
1401       integer_res = gimple_expand_builtin_powi (gsi, loc, arg0, n);
1402       if (!integer_res)
1403         return NULL_TREE;
1404     }
1405
1406   if (dump_file)
1407     {
1408       char string[64];
1409
1410       real_to_decimal (string, &exp_init, sizeof (string), 0, 1);
1411       fprintf (dump_file, "synthesizing pow (x, %s) as:\n", string);
1412
1413       if (neg_exp)
1414         {
1415           if (one_over)
1416             {
1417               fprintf (dump_file, "1.0 / (");
1418               dump_integer_part (dump_file, "x", n);
1419               if (n > 0)
1420                 fprintf (dump_file, " * ");
1421               dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1422               fprintf (dump_file, ")");
1423             }
1424           else
1425             {
1426               dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1427               fprintf (dump_file, " / (");
1428               dump_integer_part (dump_file, "x", n);
1429               fprintf (dump_file, ")");
1430             }
1431         }
1432       else
1433         {
1434           dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1435           if (n > 0)
1436             fprintf (dump_file, " * ");
1437           dump_integer_part (dump_file, "x", n);
1438         }
1439
1440       fprintf (dump_file, "\ndeepest sqrt chain: %d\n", synth_info.deepest);
1441     }
1442
1443
1444   tree fract_res = NULL_TREE;
1445   cache[0] = arg0;
1446
1447   /* Calculate the fractional part of the exponent.  */
1448   for (unsigned i = 0; i < synth_info.deepest; i++)
1449     {
1450       if (synth_info.factors[i])
1451         {
1452           tree sqrt_chain = get_fn_chain (arg0, i + 1, gsi, sqrtfn, loc, cache);
1453
1454           if (!fract_res)
1455               fract_res = sqrt_chain;
1456
1457           else
1458             fract_res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1459                                            fract_res, sqrt_chain);
1460         }
1461     }
1462
1463   tree res = NULL_TREE;
1464
1465   if (neg_exp)
1466     {
1467       if (one_over)
1468         {
1469           if (n > 0)
1470             res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1471                                            fract_res, integer_res);
1472           else
1473             res = fract_res;
1474
1475           res = build_and_insert_binop (gsi, loc, "powrootrecip", RDIV_EXPR,
1476                                           build_real (type, dconst1), res);
1477         }
1478       else
1479         {
1480           res = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1481                                          fract_res, integer_res);
1482         }
1483     }
1484   else
1485     res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1486                                    fract_res, integer_res);
1487   return res;
1488 }
1489
1490 /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
1491    with location info LOC.  If possible, create an equivalent and
1492    less expensive sequence of statements prior to GSI, and return an
1493    expession holding the result.  */
1494
1495 static tree
1496 gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,
1497                            tree arg0, tree arg1)
1498 {
1499   REAL_VALUE_TYPE c, cint, dconst1_3, dconst1_4, dconst1_6;
1500   REAL_VALUE_TYPE c2, dconst3;
1501   HOST_WIDE_INT n;
1502   tree type, sqrtfn, cbrtfn, sqrt_arg0, result, cbrt_x, powi_cbrt_x;
1503   machine_mode mode;
1504   bool speed_p = optimize_bb_for_speed_p (gsi_bb (*gsi));
1505   bool hw_sqrt_exists, c_is_int, c2_is_int;
1506
1507   dconst1_4 = dconst1;
1508   SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
1509
1510   /* If the exponent isn't a constant, there's nothing of interest
1511      to be done.  */
1512   if (TREE_CODE (arg1) != REAL_CST)
1513     return NULL_TREE;
1514
1515   /* If the exponent is equivalent to an integer, expand to an optimal
1516      multiplication sequence when profitable.  */
1517   c = TREE_REAL_CST (arg1);
1518   n = real_to_integer (&c);
1519   real_from_integer (&cint, VOIDmode, n, SIGNED);
1520   c_is_int = real_identical (&c, &cint);
1521
1522   if (c_is_int
1523       && ((n >= -1 && n <= 2)
1524           || (flag_unsafe_math_optimizations
1525               && speed_p
1526               && powi_cost (n) <= POWI_MAX_MULTS)))
1527     return gimple_expand_builtin_powi (gsi, loc, arg0, n);
1528
1529   /* Attempt various optimizations using sqrt and cbrt.  */
1530   type = TREE_TYPE (arg0);
1531   mode = TYPE_MODE (type);
1532   sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1533
1534   /* Optimize pow(x,0.5) = sqrt(x).  This replacement is always safe
1535      unless signed zeros must be maintained.  pow(-0,0.5) = +0, while
1536      sqrt(-0) = -0.  */
1537   if (sqrtfn
1538       && REAL_VALUES_EQUAL (c, dconsthalf)
1539       && !HONOR_SIGNED_ZEROS (mode))
1540     return build_and_insert_call (gsi, loc, sqrtfn, arg0);
1541
1542   hw_sqrt_exists = optab_handler (sqrt_optab, mode) != CODE_FOR_nothing;
1543
1544   /* Optimize pow(x,1./3.) = cbrt(x).  This requires unsafe math
1545      optimizations since 1./3. is not exactly representable.  If x
1546      is negative and finite, the correct value of pow(x,1./3.) is
1547      a NaN with the "invalid" exception raised, because the value
1548      of 1./3. actually has an even denominator.  The correct value
1549      of cbrt(x) is a negative real value.  */
1550   cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT);
1551   dconst1_3 = real_value_truncate (mode, dconst_third ());
1552
1553   if (flag_unsafe_math_optimizations
1554       && cbrtfn
1555       && (gimple_val_nonnegative_real_p (arg0) || !HONOR_NANS (mode))
1556       && REAL_VALUES_EQUAL (c, dconst1_3))
1557     return build_and_insert_call (gsi, loc, cbrtfn, arg0);
1558
1559   /* Optimize pow(x,1./6.) = cbrt(sqrt(x)).  Don't do this optimization
1560      if we don't have a hardware sqrt insn.  */
1561   dconst1_6 = dconst1_3;
1562   SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
1563
1564   if (flag_unsafe_math_optimizations
1565       && sqrtfn
1566       && cbrtfn
1567       && (gimple_val_nonnegative_real_p (arg0) || !HONOR_NANS (mode))
1568       && speed_p
1569       && hw_sqrt_exists
1570       && REAL_VALUES_EQUAL (c, dconst1_6))
1571     {
1572       /* sqrt(x)  */
1573       sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
1574
1575       /* cbrt(sqrt(x))  */
1576       return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0);
1577     }
1578
1579
1580   /* Attempt to expand the POW as a product of square root chains.
1581      Expand the 0.25 case even when otpimising for size.  */
1582   if (flag_unsafe_math_optimizations
1583       && sqrtfn
1584       && hw_sqrt_exists
1585       && (speed_p || REAL_VALUES_EQUAL (c, dconst1_4))
1586       && !HONOR_SIGNED_ZEROS (mode))
1587     {
1588       unsigned int max_depth = speed_p
1589                                 ? PARAM_VALUE (PARAM_MAX_POW_SQRT_DEPTH)
1590                                 : 2;
1591
1592       tree expand_with_sqrts
1593         = expand_pow_as_sqrts (gsi, loc, arg0, arg1, max_depth);
1594
1595       if (expand_with_sqrts)
1596         return expand_with_sqrts;
1597     }
1598
1599   real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
1600   n = real_to_integer (&c2);
1601   real_from_integer (&cint, VOIDmode, n, SIGNED);
1602   c2_is_int = real_identical (&c2, &cint);
1603
1604   /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
1605
1606      powi(x, n/3) * powi(cbrt(x), n%3),                    n > 0;
1607      1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)),  n < 0.
1608
1609      Do not calculate the first factor when n/3 = 0.  As cbrt(x) is
1610      different from pow(x, 1./3.) due to rounding and behavior with
1611      negative x, we need to constrain this transformation to unsafe
1612      math and positive x or finite math.  */
1613   real_from_integer (&dconst3, VOIDmode, 3, SIGNED);
1614   real_arithmetic (&c2, MULT_EXPR, &c, &dconst3);
1615   real_round (&c2, mode, &c2);
1616   n = real_to_integer (&c2);
1617   real_from_integer (&cint, VOIDmode, n, SIGNED);
1618   real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3);
1619   real_convert (&c2, mode, &c2);
1620
1621   if (flag_unsafe_math_optimizations
1622       && cbrtfn
1623       && (gimple_val_nonnegative_real_p (arg0) || !HONOR_NANS (mode))
1624       && real_identical (&c2, &c)
1625       && !c2_is_int
1626       && optimize_function_for_speed_p (cfun)
1627       && powi_cost (n / 3) <= POWI_MAX_MULTS)
1628     {
1629       tree powi_x_ndiv3 = NULL_TREE;
1630
1631       /* Attempt to fold powi(arg0, abs(n/3)) into multiplies.  If not
1632          possible or profitable, give up.  Skip the degenerate case when
1633          abs(n) < 3, where the result is always 1.  */
1634       if (absu_hwi (n) >= 3)
1635         {
1636           powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0,
1637                                                      abs_hwi (n / 3));
1638           if (!powi_x_ndiv3)
1639             return NULL_TREE;
1640         }
1641
1642       /* Calculate powi(cbrt(x), n%3).  Don't use gimple_expand_builtin_powi
1643          as that creates an unnecessary variable.  Instead, just produce
1644          either cbrt(x) or cbrt(x) * cbrt(x).  */
1645       cbrt_x = build_and_insert_call (gsi, loc, cbrtfn, arg0);
1646
1647       if (absu_hwi (n) % 3 == 1)
1648         powi_cbrt_x = cbrt_x;
1649       else
1650         powi_cbrt_x = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1651                                               cbrt_x, cbrt_x);
1652
1653       /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1.  */
1654       if (absu_hwi (n) < 3)
1655         result = powi_cbrt_x;
1656       else
1657         result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1658                                          powi_x_ndiv3, powi_cbrt_x);
1659
1660       /* If n is negative, reciprocate the result.  */
1661       if (n < 0)
1662         result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1663                                          build_real (type, dconst1), result);
1664
1665       return result;
1666     }
1667
1668   /* No optimizations succeeded.  */
1669   return NULL_TREE;
1670 }
1671
1672 /* ARG is the argument to a cabs builtin call in GSI with location info
1673    LOC.  Create a sequence of statements prior to GSI that calculates
1674    sqrt(R*R + I*I), where R and I are the real and imaginary components
1675    of ARG, respectively.  Return an expression holding the result.  */
1676
1677 static tree
1678 gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg)
1679 {
1680   tree real_part, imag_part, addend1, addend2, sum, result;
1681   tree type = TREE_TYPE (TREE_TYPE (arg));
1682   tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1683   machine_mode mode = TYPE_MODE (type);
1684
1685   if (!flag_unsafe_math_optimizations
1686       || !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi)))
1687       || !sqrtfn
1688       || optab_handler (sqrt_optab, mode) == CODE_FOR_nothing)
1689     return NULL_TREE;
1690
1691   real_part = build_and_insert_ref (gsi, loc, type, "cabs",
1692                                     REALPART_EXPR, arg);
1693   addend1 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
1694                                     real_part, real_part);
1695   imag_part = build_and_insert_ref (gsi, loc, type, "cabs",
1696                                     IMAGPART_EXPR, arg);
1697   addend2 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
1698                                     imag_part, imag_part);
1699   sum = build_and_insert_binop (gsi, loc, "cabs", PLUS_EXPR, addend1, addend2);
1700   result = build_and_insert_call (gsi, loc, sqrtfn, sum);
1701
1702   return result;
1703 }
1704
1705 /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
1706    on the SSA_NAME argument of each of them.  Also expand powi(x,n) into
1707    an optimal number of multiplies, when n is a constant.  */
1708
1709 namespace {
1710
1711 const pass_data pass_data_cse_sincos =
1712 {
1713   GIMPLE_PASS, /* type */
1714   "sincos", /* name */
1715   OPTGROUP_NONE, /* optinfo_flags */
1716   TV_NONE, /* tv_id */
1717   PROP_ssa, /* properties_required */
1718   0, /* properties_provided */
1719   0, /* properties_destroyed */
1720   0, /* todo_flags_start */
1721   TODO_update_ssa, /* todo_flags_finish */
1722 };
1723
1724 class pass_cse_sincos : public gimple_opt_pass
1725 {
1726 public:
1727   pass_cse_sincos (gcc::context *ctxt)
1728     : gimple_opt_pass (pass_data_cse_sincos, ctxt)
1729   {}
1730
1731   /* opt_pass methods: */
1732   virtual bool gate (function *)
1733     {
1734       /* We no longer require either sincos or cexp, since powi expansion
1735          piggybacks on this pass.  */
1736       return optimize;
1737     }
1738
1739   virtual unsigned int execute (function *);
1740
1741 }; // class pass_cse_sincos
1742
1743 unsigned int
1744 pass_cse_sincos::execute (function *fun)
1745 {
1746   basic_block bb;
1747   bool cfg_changed = false;
1748
1749   calculate_dominance_info (CDI_DOMINATORS);
1750   memset (&sincos_stats, 0, sizeof (sincos_stats));
1751
1752   FOR_EACH_BB_FN (bb, fun)
1753     {
1754       gimple_stmt_iterator gsi;
1755       bool cleanup_eh = false;
1756
1757       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1758         {
1759           gimple stmt = gsi_stmt (gsi);
1760           tree fndecl;
1761
1762           /* Only the last stmt in a bb could throw, no need to call
1763              gimple_purge_dead_eh_edges if we change something in the middle
1764              of a basic block.  */
1765           cleanup_eh = false;
1766
1767           if (is_gimple_call (stmt)
1768               && gimple_call_lhs (stmt)
1769               && (fndecl = gimple_call_fndecl (stmt))
1770               && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
1771             {
1772               tree arg, arg0, arg1, result;
1773               HOST_WIDE_INT n;
1774               location_t loc;
1775
1776               switch (DECL_FUNCTION_CODE (fndecl))
1777                 {
1778                 CASE_FLT_FN (BUILT_IN_COS):
1779                 CASE_FLT_FN (BUILT_IN_SIN):
1780                 CASE_FLT_FN (BUILT_IN_CEXPI):
1781                   /* Make sure we have either sincos or cexp.  */
1782                   if (!targetm.libc_has_function (function_c99_math_complex)
1783                       && !targetm.libc_has_function (function_sincos))
1784                     break;
1785
1786                   arg = gimple_call_arg (stmt, 0);
1787                   if (TREE_CODE (arg) == SSA_NAME)
1788                     cfg_changed |= execute_cse_sincos_1 (arg);
1789                   break;
1790
1791                 CASE_FLT_FN (BUILT_IN_POW):
1792                   arg0 = gimple_call_arg (stmt, 0);
1793                   arg1 = gimple_call_arg (stmt, 1);
1794
1795                   loc = gimple_location (stmt);
1796                   result = gimple_expand_builtin_pow (&gsi, loc, arg0, arg1);
1797
1798                   if (result)
1799                     {
1800                       tree lhs = gimple_get_lhs (stmt);
1801                       gassign *new_stmt = gimple_build_assign (lhs, result);
1802                       gimple_set_location (new_stmt, loc);
1803                       unlink_stmt_vdef (stmt);
1804                       gsi_replace (&gsi, new_stmt, true);
1805                       cleanup_eh = true;
1806                       if (gimple_vdef (stmt))
1807                         release_ssa_name (gimple_vdef (stmt));
1808                     }
1809                   break;
1810
1811                 CASE_FLT_FN (BUILT_IN_POWI):
1812                   arg0 = gimple_call_arg (stmt, 0);
1813                   arg1 = gimple_call_arg (stmt, 1);
1814                   loc = gimple_location (stmt);
1815
1816                   if (real_minus_onep (arg0))
1817                     {
1818                       tree t0, t1, cond, one, minus_one;
1819                       gassign *stmt;
1820
1821                       t0 = TREE_TYPE (arg0);
1822                       t1 = TREE_TYPE (arg1);
1823                       one = build_real (t0, dconst1);
1824                       minus_one = build_real (t0, dconstm1);
1825
1826                       cond = make_temp_ssa_name (t1, NULL, "powi_cond");
1827                       stmt = gimple_build_assign (cond, BIT_AND_EXPR,
1828                                                   arg1, build_int_cst (t1, 1));
1829                       gimple_set_location (stmt, loc);
1830                       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1831
1832                       result = make_temp_ssa_name (t0, NULL, "powi");
1833                       stmt = gimple_build_assign (result, COND_EXPR, cond,
1834                                                   minus_one, one);
1835                       gimple_set_location (stmt, loc);
1836                       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1837                     }
1838                   else
1839                     {
1840                       if (!tree_fits_shwi_p (arg1))
1841                         break;
1842
1843                       n = tree_to_shwi (arg1);
1844                       result = gimple_expand_builtin_powi (&gsi, loc, arg0, n);
1845                     }
1846
1847                   if (result)
1848                     {
1849                       tree lhs = gimple_get_lhs (stmt);
1850                       gassign *new_stmt = gimple_build_assign (lhs, result);
1851                       gimple_set_location (new_stmt, loc);
1852                       unlink_stmt_vdef (stmt);
1853                       gsi_replace (&gsi, new_stmt, true);
1854                       cleanup_eh = true;
1855                       if (gimple_vdef (stmt))
1856                         release_ssa_name (gimple_vdef (stmt));
1857                     }
1858                   break;
1859
1860                 CASE_FLT_FN (BUILT_IN_CABS):
1861                   arg0 = gimple_call_arg (stmt, 0);
1862                   loc = gimple_location (stmt);
1863                   result = gimple_expand_builtin_cabs (&gsi, loc, arg0);
1864
1865                   if (result)
1866                     {
1867                       tree lhs = gimple_get_lhs (stmt);
1868                       gassign *new_stmt = gimple_build_assign (lhs, result);
1869                       gimple_set_location (new_stmt, loc);
1870                       unlink_stmt_vdef (stmt);
1871                       gsi_replace (&gsi, new_stmt, true);
1872                       cleanup_eh = true;
1873                       if (gimple_vdef (stmt))
1874                         release_ssa_name (gimple_vdef (stmt));
1875                     }
1876                   break;
1877
1878                 default:;
1879                 }
1880             }
1881         }
1882       if (cleanup_eh)
1883         cfg_changed |= gimple_purge_dead_eh_edges (bb);
1884     }
1885
1886   statistics_counter_event (fun, "sincos statements inserted",
1887                             sincos_stats.inserted);
1888
1889   free_dominance_info (CDI_DOMINATORS);
1890   return cfg_changed ? TODO_cleanup_cfg : 0;
1891 }
1892
1893 } // anon namespace
1894
1895 gimple_opt_pass *
1896 make_pass_cse_sincos (gcc::context *ctxt)
1897 {
1898   return new pass_cse_sincos (ctxt);
1899 }
1900
1901 /* A symbolic number is used to detect byte permutation and selection
1902    patterns.  Therefore the field N contains an artificial number
1903    consisting of octet sized markers:
1904
1905    0    - target byte has the value 0
1906    FF   - target byte has an unknown value (eg. due to sign extension)
1907    1..size - marker value is the target byte index minus one.
1908
1909    To detect permutations on memory sources (arrays and structures), a symbolic
1910    number is also associated a base address (the array or structure the load is
1911    made from), an offset from the base address and a range which gives the
1912    difference between the highest and lowest accessed memory location to make
1913    such a symbolic number. The range is thus different from size which reflects
1914    the size of the type of current expression. Note that for non memory source,
1915    range holds the same value as size.
1916
1917    For instance, for an array char a[], (short) a[0] | (short) a[3] would have
1918    a size of 2 but a range of 4 while (short) a[0] | ((short) a[0] << 1) would
1919    still have a size of 2 but this time a range of 1.  */
1920
1921 struct symbolic_number {
1922   uint64_t n;
1923   tree type;
1924   tree base_addr;
1925   tree offset;
1926   HOST_WIDE_INT bytepos;
1927   tree alias_set;
1928   tree vuse;
1929   unsigned HOST_WIDE_INT range;
1930 };
1931
1932 #define BITS_PER_MARKER 8
1933 #define MARKER_MASK ((1 << BITS_PER_MARKER) - 1)
1934 #define MARKER_BYTE_UNKNOWN MARKER_MASK
1935 #define HEAD_MARKER(n, size) \
1936   ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER)))
1937
1938 /* The number which the find_bswap_or_nop_1 result should match in
1939    order to have a nop.  The number is masked according to the size of
1940    the symbolic number before using it.  */
1941 #define CMPNOP (sizeof (int64_t) < 8 ? 0 : \
1942   (uint64_t)0x08070605 << 32 | 0x04030201)
1943
1944 /* The number which the find_bswap_or_nop_1 result should match in
1945    order to have a byte swap.  The number is masked according to the
1946    size of the symbolic number before using it.  */
1947 #define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \
1948   (uint64_t)0x01020304 << 32 | 0x05060708)
1949
1950 /* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
1951    number N.  Return false if the requested operation is not permitted
1952    on a symbolic number.  */
1953
1954 static inline bool
1955 do_shift_rotate (enum tree_code code,
1956                  struct symbolic_number *n,
1957                  int count)
1958 {
1959   int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
1960   unsigned head_marker;
1961
1962   if (count % BITS_PER_UNIT != 0)
1963     return false;
1964   count = (count / BITS_PER_UNIT) * BITS_PER_MARKER;
1965
1966   /* Zero out the extra bits of N in order to avoid them being shifted
1967      into the significant bits.  */
1968   if (size < 64 / BITS_PER_MARKER)
1969     n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
1970
1971   switch (code)
1972     {
1973     case LSHIFT_EXPR:
1974       n->n <<= count;
1975       break;
1976     case RSHIFT_EXPR:
1977       head_marker = HEAD_MARKER (n->n, size);
1978       n->n >>= count;
1979       /* Arithmetic shift of signed type: result is dependent on the value.  */
1980       if (!TYPE_UNSIGNED (n->type) && head_marker)
1981         for (i = 0; i < count / BITS_PER_MARKER; i++)
1982           n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
1983                   << ((size - 1 - i) * BITS_PER_MARKER);
1984       break;
1985     case LROTATE_EXPR:
1986       n->n = (n->n << count) | (n->n >> ((size * BITS_PER_MARKER) - count));
1987       break;
1988     case RROTATE_EXPR:
1989       n->n = (n->n >> count) | (n->n << ((size * BITS_PER_MARKER) - count));
1990       break;
1991     default:
1992       return false;
1993     }
1994   /* Zero unused bits for size.  */
1995   if (size < 64 / BITS_PER_MARKER)
1996     n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
1997   return true;
1998 }
1999
2000 /* Perform sanity checking for the symbolic number N and the gimple
2001    statement STMT.  */
2002
2003 static inline bool
2004 verify_symbolic_number_p (struct symbolic_number *n, gimple stmt)
2005 {
2006   tree lhs_type;
2007
2008   lhs_type = gimple_expr_type (stmt);
2009
2010   if (TREE_CODE (lhs_type) != INTEGER_TYPE)
2011     return false;
2012
2013   if (TYPE_PRECISION (lhs_type) != TYPE_PRECISION (n->type))
2014     return false;
2015
2016   return true;
2017 }
2018
2019 /* Initialize the symbolic number N for the bswap pass from the base element
2020    SRC manipulated by the bitwise OR expression.  */
2021
2022 static bool
2023 init_symbolic_number (struct symbolic_number *n, tree src)
2024 {
2025   int size;
2026
2027   n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE;
2028
2029   /* Set up the symbolic number N by setting each byte to a value between 1 and
2030      the byte size of rhs1.  The highest order byte is set to n->size and the
2031      lowest order byte to 1.  */
2032   n->type = TREE_TYPE (src);
2033   size = TYPE_PRECISION (n->type);
2034   if (size % BITS_PER_UNIT != 0)
2035     return false;
2036   size /= BITS_PER_UNIT;
2037   if (size > 64 / BITS_PER_MARKER)
2038     return false;
2039   n->range = size;
2040   n->n = CMPNOP;
2041
2042   if (size < 64 / BITS_PER_MARKER)
2043     n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
2044
2045   return true;
2046 }
2047
2048 /* Check if STMT might be a byte swap or a nop from a memory source and returns
2049    the answer. If so, REF is that memory source and the base of the memory area
2050    accessed and the offset of the access from that base are recorded in N.  */
2051
2052 bool
2053 find_bswap_or_nop_load (gimple stmt, tree ref, struct symbolic_number *n)
2054 {
2055   /* Leaf node is an array or component ref. Memorize its base and
2056      offset from base to compare to other such leaf node.  */
2057   HOST_WIDE_INT bitsize, bitpos;
2058   machine_mode mode;
2059   int unsignedp, volatilep;
2060   tree offset, base_addr;
2061
2062   /* Not prepared to handle PDP endian.  */
2063   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
2064     return false;
2065
2066   if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt))
2067     return false;
2068
2069   base_addr = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
2070                                    &unsignedp, &volatilep, false);
2071
2072   if (TREE_CODE (base_addr) == MEM_REF)
2073     {
2074       offset_int bit_offset = 0;
2075       tree off = TREE_OPERAND (base_addr, 1);
2076
2077       if (!integer_zerop (off))
2078         {
2079           offset_int boff, coff = mem_ref_offset (base_addr);
2080           boff = wi::lshift (coff, LOG2_BITS_PER_UNIT);
2081           bit_offset += boff;
2082         }
2083
2084       base_addr = TREE_OPERAND (base_addr, 0);
2085
2086       /* Avoid returning a negative bitpos as this may wreak havoc later.  */
2087       if (wi::neg_p (bit_offset))
2088         {
2089           offset_int mask = wi::mask <offset_int> (LOG2_BITS_PER_UNIT, false);
2090           offset_int tem = bit_offset.and_not (mask);
2091           /* TEM is the bitpos rounded to BITS_PER_UNIT towards -Inf.
2092              Subtract it to BIT_OFFSET and add it (scaled) to OFFSET.  */
2093           bit_offset -= tem;
2094           tem = wi::arshift (tem, LOG2_BITS_PER_UNIT);
2095           if (offset)
2096             offset = size_binop (PLUS_EXPR, offset,
2097                                     wide_int_to_tree (sizetype, tem));
2098           else
2099             offset = wide_int_to_tree (sizetype, tem);
2100         }
2101
2102       bitpos += bit_offset.to_shwi ();
2103     }
2104
2105   if (bitpos % BITS_PER_UNIT)
2106     return false;
2107   if (bitsize % BITS_PER_UNIT)
2108     return false;
2109
2110   if (!init_symbolic_number (n, ref))
2111     return false;
2112   n->base_addr = base_addr;
2113   n->offset = offset;
2114   n->bytepos = bitpos / BITS_PER_UNIT;
2115   n->alias_set = reference_alias_ptr_type (ref);
2116   n->vuse = gimple_vuse (stmt);
2117   return true;
2118 }
2119
2120 /* Compute the symbolic number N representing the result of a bitwise OR on 2
2121    symbolic number N1 and N2 whose source statements are respectively
2122    SOURCE_STMT1 and SOURCE_STMT2.  */
2123
2124 static gimple
2125 perform_symbolic_merge (gimple source_stmt1, struct symbolic_number *n1,
2126                         gimple source_stmt2, struct symbolic_number *n2,
2127                         struct symbolic_number *n)
2128 {
2129   int i, size;
2130   uint64_t mask;
2131   gimple source_stmt;
2132   struct symbolic_number *n_start;
2133
2134   /* Sources are different, cancel bswap if they are not memory location with
2135      the same base (array, structure, ...).  */
2136   if (gimple_assign_rhs1 (source_stmt1) != gimple_assign_rhs1 (source_stmt2))
2137     {
2138       int64_t inc;
2139       HOST_WIDE_INT start_sub, end_sub, end1, end2, end;
2140       struct symbolic_number *toinc_n_ptr, *n_end;
2141
2142       if (!n1->base_addr || !n2->base_addr
2143           || !operand_equal_p (n1->base_addr, n2->base_addr, 0))
2144         return NULL;
2145
2146       if (!n1->offset != !n2->offset
2147           || (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0)))
2148         return NULL;
2149
2150       if (n1->bytepos < n2->bytepos)
2151         {
2152           n_start = n1;
2153           start_sub = n2->bytepos - n1->bytepos;
2154           source_stmt = source_stmt1;
2155         }
2156       else
2157         {
2158           n_start = n2;
2159           start_sub = n1->bytepos - n2->bytepos;
2160           source_stmt = source_stmt2;
2161         }
2162
2163       /* Find the highest address at which a load is performed and
2164          compute related info.  */
2165       end1 = n1->bytepos + (n1->range - 1);
2166       end2 = n2->bytepos + (n2->range - 1);
2167       if (end1 < end2)
2168         {
2169           end = end2;
2170           end_sub = end2 - end1;
2171         }
2172       else
2173         {
2174           end = end1;
2175           end_sub = end1 - end2;
2176         }
2177       n_end = (end2 > end1) ? n2 : n1;
2178
2179       /* Find symbolic number whose lsb is the most significant.  */
2180       if (BYTES_BIG_ENDIAN)
2181         toinc_n_ptr = (n_end == n1) ? n2 : n1;
2182       else
2183         toinc_n_ptr = (n_start == n1) ? n2 : n1;
2184
2185       n->range = end - n_start->bytepos + 1;
2186
2187       /* Check that the range of memory covered can be represented by
2188          a symbolic number.  */
2189       if (n->range > 64 / BITS_PER_MARKER)
2190         return NULL;
2191
2192       /* Reinterpret byte marks in symbolic number holding the value of
2193          bigger weight according to target endianness.  */
2194       inc = BYTES_BIG_ENDIAN ? end_sub : start_sub;
2195       size = TYPE_PRECISION (n1->type) / BITS_PER_UNIT;
2196       for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER)
2197         {
2198           unsigned marker
2199             = (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK;
2200           if (marker && marker != MARKER_BYTE_UNKNOWN)
2201             toinc_n_ptr->n += inc;
2202         }
2203     }
2204   else
2205     {
2206       n->range = n1->range;
2207       n_start = n1;
2208       source_stmt = source_stmt1;
2209     }
2210
2211   if (!n1->alias_set
2212       || alias_ptr_types_compatible_p (n1->alias_set, n2->alias_set))
2213     n->alias_set = n1->alias_set;
2214   else
2215     n->alias_set = ptr_type_node;
2216   n->vuse = n_start->vuse;
2217   n->base_addr = n_start->base_addr;
2218   n->offset = n_start->offset;
2219   n->bytepos = n_start->bytepos;
2220   n->type = n_start->type;
2221   size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
2222
2223   for (i = 0, mask = MARKER_MASK; i < size; i++, mask <<= BITS_PER_MARKER)
2224     {
2225       uint64_t masked1, masked2;
2226
2227       masked1 = n1->n & mask;
2228       masked2 = n2->n & mask;
2229       if (masked1 && masked2 && masked1 != masked2)
2230         return NULL;
2231     }
2232   n->n = n1->n | n2->n;
2233
2234   return source_stmt;
2235 }
2236
2237 /* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform
2238    the operation given by the rhs of STMT on the result.  If the operation
2239    could successfully be executed the function returns a gimple stmt whose
2240    rhs's first tree is the expression of the source operand and NULL
2241    otherwise.  */
2242
2243 static gimple
2244 find_bswap_or_nop_1 (gimple stmt, struct symbolic_number *n, int limit)
2245 {
2246   enum tree_code code;
2247   tree rhs1, rhs2 = NULL;
2248   gimple rhs1_stmt, rhs2_stmt, source_stmt1;
2249   enum gimple_rhs_class rhs_class;
2250
2251   if (!limit || !is_gimple_assign (stmt))
2252     return NULL;
2253
2254   rhs1 = gimple_assign_rhs1 (stmt);
2255
2256   if (find_bswap_or_nop_load (stmt, rhs1, n))
2257     return stmt;
2258
2259   if (TREE_CODE (rhs1) != SSA_NAME)
2260     return NULL;
2261
2262   code = gimple_assign_rhs_code (stmt);
2263   rhs_class = gimple_assign_rhs_class (stmt);
2264   rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2265
2266   if (rhs_class == GIMPLE_BINARY_RHS)
2267     rhs2 = gimple_assign_rhs2 (stmt);
2268
2269   /* Handle unary rhs and binary rhs with integer constants as second
2270      operand.  */
2271
2272   if (rhs_class == GIMPLE_UNARY_RHS
2273       || (rhs_class == GIMPLE_BINARY_RHS
2274           && TREE_CODE (rhs2) == INTEGER_CST))
2275     {
2276       if (code != BIT_AND_EXPR
2277           && code != LSHIFT_EXPR
2278           && code != RSHIFT_EXPR
2279           && code != LROTATE_EXPR
2280           && code != RROTATE_EXPR
2281           && !CONVERT_EXPR_CODE_P (code))
2282         return NULL;
2283
2284       source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1);
2285
2286       /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
2287          we have to initialize the symbolic number.  */
2288       if (!source_stmt1)
2289         {
2290           if (gimple_assign_load_p (stmt)
2291               || !init_symbolic_number (n, rhs1))
2292             return NULL;
2293           source_stmt1 = stmt;
2294         }
2295
2296       switch (code)
2297         {
2298         case BIT_AND_EXPR:
2299           {
2300             int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
2301             uint64_t val = int_cst_value (rhs2), mask = 0;
2302             uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
2303
2304             /* Only constants masking full bytes are allowed.  */
2305             for (i = 0; i < size; i++, tmp <<= BITS_PER_UNIT)
2306               if ((val & tmp) != 0 && (val & tmp) != tmp)
2307                 return NULL;
2308               else if (val & tmp)
2309                 mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
2310
2311             n->n &= mask;
2312           }
2313           break;
2314         case LSHIFT_EXPR:
2315         case RSHIFT_EXPR:
2316         case LROTATE_EXPR:
2317         case RROTATE_EXPR:
2318           if (!do_shift_rotate (code, n, (int) TREE_INT_CST_LOW (rhs2)))
2319             return NULL;
2320           break;
2321         CASE_CONVERT:
2322           {
2323             int i, type_size, old_type_size;
2324             tree type;
2325
2326             type = gimple_expr_type (stmt);
2327             type_size = TYPE_PRECISION (type);
2328             if (type_size % BITS_PER_UNIT != 0)
2329               return NULL;
2330             type_size /= BITS_PER_UNIT;
2331             if (type_size > 64 / BITS_PER_MARKER)
2332               return NULL;
2333
2334             /* Sign extension: result is dependent on the value.  */
2335             old_type_size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
2336             if (!TYPE_UNSIGNED (n->type) && type_size > old_type_size
2337                 && HEAD_MARKER (n->n, old_type_size))
2338               for (i = 0; i < type_size - old_type_size; i++)
2339                 n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
2340                         << ((type_size - 1 - i) * BITS_PER_MARKER);
2341
2342             if (type_size < 64 / BITS_PER_MARKER)
2343               {
2344                 /* If STMT casts to a smaller type mask out the bits not
2345                    belonging to the target type.  */
2346                 n->n &= ((uint64_t) 1 << (type_size * BITS_PER_MARKER)) - 1;
2347               }
2348             n->type = type;
2349             if (!n->base_addr)
2350               n->range = type_size;
2351           }
2352           break;
2353         default:
2354           return NULL;
2355         };
2356       return verify_symbolic_number_p (n, stmt) ? source_stmt1 : NULL;
2357     }
2358
2359   /* Handle binary rhs.  */
2360
2361   if (rhs_class == GIMPLE_BINARY_RHS)
2362     {
2363       struct symbolic_number n1, n2;
2364       gimple source_stmt, source_stmt2;
2365
2366       if (code != BIT_IOR_EXPR)
2367         return NULL;
2368
2369       if (TREE_CODE (rhs2) != SSA_NAME)
2370         return NULL;
2371
2372       rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2373
2374       switch (code)
2375         {
2376         case BIT_IOR_EXPR:
2377           source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1);
2378
2379           if (!source_stmt1)
2380             return NULL;
2381
2382           source_stmt2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1);
2383
2384           if (!source_stmt2)
2385             return NULL;
2386
2387           if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type))
2388             return NULL;
2389
2390           if (!n1.vuse != !n2.vuse
2391               || (n1.vuse && !operand_equal_p (n1.vuse, n2.vuse, 0)))
2392             return NULL;
2393
2394           source_stmt
2395             = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n);
2396
2397           if (!source_stmt)
2398             return NULL;
2399
2400           if (!verify_symbolic_number_p (n, stmt))
2401             return NULL;
2402
2403           break;
2404         default:
2405           return NULL;
2406         }
2407       return source_stmt;
2408     }
2409   return NULL;
2410 }
2411
2412 /* Check if STMT completes a bswap implementation or a read in a given
2413    endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
2414    accordingly.  It also sets N to represent the kind of operations
2415    performed: size of the resulting expression and whether it works on
2416    a memory source, and if so alias-set and vuse.  At last, the
2417    function returns a stmt whose rhs's first tree is the source
2418    expression.  */
2419
2420 static gimple
2421 find_bswap_or_nop (gimple stmt, struct symbolic_number *n, bool *bswap)
2422 {
2423 /* The number which the find_bswap_or_nop_1 result should match in order
2424    to have a full byte swap.  The number is shifted to the right
2425    according to the size of the symbolic number before using it.  */
2426   uint64_t cmpxchg = CMPXCHG;
2427   uint64_t cmpnop = CMPNOP;
2428
2429   gimple source_stmt;
2430   int limit;
2431
2432   /* The last parameter determines the depth search limit.  It usually
2433      correlates directly to the number n of bytes to be touched.  We
2434      increase that number by log2(n) + 1 here in order to also
2435      cover signed -> unsigned conversions of the src operand as can be seen
2436      in libgcc, and for initial shift/and operation of the src operand.  */
2437   limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt)));
2438   limit += 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit);
2439   source_stmt = find_bswap_or_nop_1 (stmt, n, limit);
2440
2441   if (!source_stmt)
2442     return NULL;
2443
2444   /* Find real size of result (highest non-zero byte).  */
2445   if (n->base_addr)
2446     {
2447       int rsize;
2448       uint64_t tmpn;
2449
2450       for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++);
2451       n->range = rsize;
2452     }
2453
2454   /* Zero out the extra bits of N and CMP*.  */
2455   if (n->range < (int) sizeof (int64_t))
2456     {
2457       uint64_t mask;
2458
2459       mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1;
2460       cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER;
2461       cmpnop &= mask;
2462     }
2463
2464   /* A complete byte swap should make the symbolic number to start with
2465      the largest digit in the highest order byte. Unchanged symbolic
2466      number indicates a read with same endianness as target architecture.  */
2467   if (n->n == cmpnop)
2468     *bswap = false;
2469   else if (n->n == cmpxchg)
2470     *bswap = true;
2471   else
2472     return NULL;
2473
2474   /* Useless bit manipulation performed by code.  */
2475   if (!n->base_addr && n->n == cmpnop)
2476     return NULL;
2477
2478   n->range *= BITS_PER_UNIT;
2479   return source_stmt;
2480 }
2481
2482 namespace {
2483
2484 const pass_data pass_data_optimize_bswap =
2485 {
2486   GIMPLE_PASS, /* type */
2487   "bswap", /* name */
2488   OPTGROUP_NONE, /* optinfo_flags */
2489   TV_NONE, /* tv_id */
2490   PROP_ssa, /* properties_required */
2491   0, /* properties_provided */
2492   0, /* properties_destroyed */
2493   0, /* todo_flags_start */
2494   0, /* todo_flags_finish */
2495 };
2496
2497 class pass_optimize_bswap : public gimple_opt_pass
2498 {
2499 public:
2500   pass_optimize_bswap (gcc::context *ctxt)
2501     : gimple_opt_pass (pass_data_optimize_bswap, ctxt)
2502   {}
2503
2504   /* opt_pass methods: */
2505   virtual bool gate (function *)
2506     {
2507       return flag_expensive_optimizations && optimize;
2508     }
2509
2510   virtual unsigned int execute (function *);
2511
2512 }; // class pass_optimize_bswap
2513
2514 /* Perform the bswap optimization: replace the expression computed in the rhs
2515    of CUR_STMT by an equivalent bswap, load or load + bswap expression.
2516    Which of these alternatives replace the rhs is given by N->base_addr (non
2517    null if a load is needed) and BSWAP.  The type, VUSE and set-alias of the
2518    load to perform are also given in N while the builtin bswap invoke is given
2519    in FNDEL.  Finally, if a load is involved, SRC_STMT refers to one of the
2520    load statements involved to construct the rhs in CUR_STMT and N->range gives
2521    the size of the rhs expression for maintaining some statistics.
2522
2523    Note that if the replacement involve a load, CUR_STMT is moved just after
2524    SRC_STMT to do the load with the same VUSE which can lead to CUR_STMT
2525    changing of basic block.  */
2526
2527 static bool
2528 bswap_replace (gimple cur_stmt, gimple src_stmt, tree fndecl, tree bswap_type,
2529                tree load_type, struct symbolic_number *n, bool bswap)
2530 {
2531   gimple_stmt_iterator gsi;
2532   tree src, tmp, tgt;
2533   gimple bswap_stmt;
2534
2535   gsi = gsi_for_stmt (cur_stmt);
2536   src = gimple_assign_rhs1 (src_stmt);
2537   tgt = gimple_assign_lhs (cur_stmt);
2538
2539   /* Need to load the value from memory first.  */
2540   if (n->base_addr)
2541     {
2542       gimple_stmt_iterator gsi_ins = gsi_for_stmt (src_stmt);
2543       tree addr_expr, addr_tmp, val_expr, val_tmp;
2544       tree load_offset_ptr, aligned_load_type;
2545       gimple addr_stmt, load_stmt;
2546       unsigned align;
2547       HOST_WIDE_INT load_offset = 0;
2548
2549       align = get_object_alignment (src);
2550       /* If the new access is smaller than the original one, we need
2551          to perform big endian adjustment.  */
2552       if (BYTES_BIG_ENDIAN)
2553         {
2554           HOST_WIDE_INT bitsize, bitpos;
2555           machine_mode mode;
2556           int unsignedp, volatilep;
2557           tree offset;
2558
2559           get_inner_reference (src, &bitsize, &bitpos, &offset, &mode,
2560                                &unsignedp, &volatilep, false);
2561           if (n->range < (unsigned HOST_WIDE_INT) bitsize)
2562             {
2563               load_offset = (bitsize - n->range) / BITS_PER_UNIT;
2564               unsigned HOST_WIDE_INT l
2565                 = (load_offset * BITS_PER_UNIT) & (align - 1);
2566               if (l)
2567                 align = l & -l;
2568             }
2569         }
2570
2571       if (bswap
2572           && align < GET_MODE_ALIGNMENT (TYPE_MODE (load_type))
2573           && SLOW_UNALIGNED_ACCESS (TYPE_MODE (load_type), align))
2574         return false;
2575
2576       /* Move cur_stmt just before  one of the load of the original
2577          to ensure it has the same VUSE.  See PR61517 for what could
2578          go wrong.  */
2579       gsi_move_before (&gsi, &gsi_ins);
2580       gsi = gsi_for_stmt (cur_stmt);
2581
2582       /* Compute address to load from and cast according to the size
2583          of the load.  */
2584       addr_expr = build_fold_addr_expr (unshare_expr (src));
2585       if (is_gimple_mem_ref_addr (addr_expr))
2586         addr_tmp = addr_expr;
2587       else
2588         {
2589           addr_tmp = make_temp_ssa_name (TREE_TYPE (addr_expr), NULL,
2590                                          "load_src");
2591           addr_stmt = gimple_build_assign (addr_tmp, addr_expr);
2592           gsi_insert_before (&gsi, addr_stmt, GSI_SAME_STMT);
2593         }
2594
2595       /* Perform the load.  */
2596       aligned_load_type = load_type;
2597       if (align < TYPE_ALIGN (load_type))
2598         aligned_load_type = build_aligned_type (load_type, align);
2599       load_offset_ptr = build_int_cst (n->alias_set, load_offset);
2600       val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp,
2601                               load_offset_ptr);
2602
2603       if (!bswap)
2604         {
2605           if (n->range == 16)
2606             nop_stats.found_16bit++;
2607           else if (n->range == 32)
2608             nop_stats.found_32bit++;
2609           else
2610             {
2611               gcc_assert (n->range == 64);
2612               nop_stats.found_64bit++;
2613             }
2614
2615           /* Convert the result of load if necessary.  */
2616           if (!useless_type_conversion_p (TREE_TYPE (tgt), load_type))
2617             {
2618               val_tmp = make_temp_ssa_name (aligned_load_type, NULL,
2619                                             "load_dst");
2620               load_stmt = gimple_build_assign (val_tmp, val_expr);
2621               gimple_set_vuse (load_stmt, n->vuse);
2622               gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
2623               gimple_assign_set_rhs_with_ops (&gsi, NOP_EXPR, val_tmp);
2624             }
2625           else
2626             {
2627               gimple_assign_set_rhs_with_ops (&gsi, MEM_REF, val_expr);
2628               gimple_set_vuse (cur_stmt, n->vuse);
2629             }
2630           update_stmt (cur_stmt);
2631
2632           if (dump_file)
2633             {
2634               fprintf (dump_file,
2635                        "%d bit load in target endianness found at: ",
2636                        (int) n->range);
2637               print_gimple_stmt (dump_file, cur_stmt, 0, 0);
2638             }
2639           return true;
2640         }
2641       else
2642         {
2643           val_tmp = make_temp_ssa_name (aligned_load_type, NULL, "load_dst");
2644           load_stmt = gimple_build_assign (val_tmp, val_expr);
2645           gimple_set_vuse (load_stmt, n->vuse);
2646           gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
2647         }
2648       src = val_tmp;
2649     }
2650
2651   if (n->range == 16)
2652     bswap_stats.found_16bit++;
2653   else if (n->range == 32)
2654     bswap_stats.found_32bit++;
2655   else
2656     {
2657       gcc_assert (n->range == 64);
2658       bswap_stats.found_64bit++;
2659     }
2660
2661   tmp = src;
2662
2663   /* Convert the src expression if necessary.  */
2664   if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type))
2665     {
2666       gimple convert_stmt;
2667
2668       tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc");
2669       convert_stmt = gimple_build_assign (tmp, NOP_EXPR, src);
2670       gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
2671     }
2672
2673   /* Canonical form for 16 bit bswap is a rotate expression.  Only 16bit values
2674      are considered as rotation of 2N bit values by N bits is generally not
2675      equivalent to a bswap.  Consider for instance 0x01020304 r>> 16 which
2676      gives 0x03040102 while a bswap for that value is 0x04030201.  */
2677   if (bswap && n->range == 16)
2678     {
2679       tree count = build_int_cst (NULL, BITS_PER_UNIT);
2680       src = fold_build2 (LROTATE_EXPR, bswap_type, tmp, count);
2681       bswap_stmt = gimple_build_assign (NULL, src);
2682     }
2683   else
2684     bswap_stmt = gimple_build_call (fndecl, 1, tmp);
2685
2686   tmp = tgt;
2687
2688   /* Convert the result if necessary.  */
2689   if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type))
2690     {
2691       gimple convert_stmt;
2692
2693       tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
2694       convert_stmt = gimple_build_assign (tgt, NOP_EXPR, tmp);
2695       gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT);
2696     }
2697
2698   gimple_set_lhs (bswap_stmt, tmp);
2699
2700   if (dump_file)
2701     {
2702       fprintf (dump_file, "%d bit bswap implementation found at: ",
2703                (int) n->range);
2704       print_gimple_stmt (dump_file, cur_stmt, 0, 0);
2705     }
2706
2707   gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT);
2708   gsi_remove (&gsi, true);
2709   return true;
2710 }
2711
2712 /* Find manual byte swap implementations as well as load in a given
2713    endianness. Byte swaps are turned into a bswap builtin invokation
2714    while endian loads are converted to bswap builtin invokation or
2715    simple load according to the target endianness.  */
2716
2717 unsigned int
2718 pass_optimize_bswap::execute (function *fun)
2719 {
2720   basic_block bb;
2721   bool bswap32_p, bswap64_p;
2722   bool changed = false;
2723   tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;
2724
2725   if (BITS_PER_UNIT != 8)
2726     return 0;
2727
2728   bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
2729                && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing);
2730   bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
2731                && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
2732                    || (bswap32_p && word_mode == SImode)));
2733
2734   /* Determine the argument type of the builtins.  The code later on
2735      assumes that the return and argument type are the same.  */
2736   if (bswap32_p)
2737     {
2738       tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
2739       bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
2740     }
2741
2742   if (bswap64_p)
2743     {
2744       tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
2745       bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
2746     }
2747
2748   memset (&nop_stats, 0, sizeof (nop_stats));
2749   memset (&bswap_stats, 0, sizeof (bswap_stats));
2750
2751   FOR_EACH_BB_FN (bb, fun)
2752     {
2753       gimple_stmt_iterator gsi;
2754
2755       /* We do a reverse scan for bswap patterns to make sure we get the
2756          widest match. As bswap pattern matching doesn't handle previously
2757          inserted smaller bswap replacements as sub-patterns, the wider
2758          variant wouldn't be detected.  */
2759       for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
2760         {
2761           gimple src_stmt, cur_stmt = gsi_stmt (gsi);
2762           tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
2763           enum tree_code code;
2764           struct symbolic_number n;
2765           bool bswap;
2766
2767           /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt
2768              might be moved to a different basic block by bswap_replace and gsi
2769              must not points to it if that's the case.  Moving the gsi_prev
2770              there make sure that gsi points to the statement previous to
2771              cur_stmt while still making sure that all statements are
2772              considered in this basic block.  */
2773           gsi_prev (&gsi);
2774
2775           if (!is_gimple_assign (cur_stmt))
2776             continue;
2777
2778           code = gimple_assign_rhs_code (cur_stmt);
2779           switch (code)
2780             {
2781             case LROTATE_EXPR:
2782             case RROTATE_EXPR:
2783               if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt))
2784                   || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt))
2785                      % BITS_PER_UNIT)
2786                 continue;
2787               /* Fall through.  */
2788             case BIT_IOR_EXPR:
2789               break;
2790             default:
2791               continue;
2792             }
2793
2794           src_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap);
2795
2796           if (!src_stmt)
2797             continue;
2798
2799           switch (n.range)
2800             {
2801             case 16:
2802               /* Already in canonical form, nothing to do.  */
2803               if (code == LROTATE_EXPR || code == RROTATE_EXPR)
2804                 continue;
2805               load_type = bswap_type = uint16_type_node;
2806               break;
2807             case 32:
2808               load_type = uint32_type_node;
2809               if (bswap32_p)
2810                 {
2811                   fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
2812                   bswap_type = bswap32_type;
2813                 }
2814               break;
2815             case 64:
2816               load_type = uint64_type_node;
2817               if (bswap64_p)
2818                 {
2819                   fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
2820                   bswap_type = bswap64_type;
2821                 }
2822               break;
2823             default:
2824               continue;
2825             }
2826
2827           if (bswap && !fndecl && n.range != 16)
2828             continue;
2829
2830           if (bswap_replace (cur_stmt, src_stmt, fndecl, bswap_type, load_type,
2831                              &n, bswap))
2832             changed = true;
2833         }
2834     }
2835
2836   statistics_counter_event (fun, "16-bit nop implementations found",
2837                             nop_stats.found_16bit);
2838   statistics_counter_event (fun, "32-bit nop implementations found",
2839                             nop_stats.found_32bit);
2840   statistics_counter_event (fun, "64-bit nop implementations found",
2841                             nop_stats.found_64bit);
2842   statistics_counter_event (fun, "16-bit bswap implementations found",
2843                             bswap_stats.found_16bit);
2844   statistics_counter_event (fun, "32-bit bswap implementations found",
2845                             bswap_stats.found_32bit);
2846   statistics_counter_event (fun, "64-bit bswap implementations found",
2847                             bswap_stats.found_64bit);
2848
2849   return (changed ? TODO_update_ssa : 0);
2850 }
2851
2852 } // anon namespace
2853
2854 gimple_opt_pass *
2855 make_pass_optimize_bswap (gcc::context *ctxt)
2856 {
2857   return new pass_optimize_bswap (ctxt);
2858 }
2859
2860 /* Return true if stmt is a type conversion operation that can be stripped
2861    when used in a widening multiply operation.  */
2862 static bool
2863 widening_mult_conversion_strippable_p (tree result_type, gimple stmt)
2864 {
2865   enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
2866
2867   if (TREE_CODE (result_type) == INTEGER_TYPE)
2868     {
2869       tree op_type;
2870       tree inner_op_type;
2871
2872       if (!CONVERT_EXPR_CODE_P (rhs_code))
2873         return false;
2874
2875       op_type = TREE_TYPE (gimple_assign_lhs (stmt));
2876
2877       /* If the type of OP has the same precision as the result, then
2878          we can strip this conversion.  The multiply operation will be
2879          selected to create the correct extension as a by-product.  */
2880       if (TYPE_PRECISION (result_type) == TYPE_PRECISION (op_type))
2881         return true;
2882
2883       /* We can also strip a conversion if it preserves the signed-ness of
2884          the operation and doesn't narrow the range.  */
2885       inner_op_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
2886
2887       /* If the inner-most type is unsigned, then we can strip any
2888          intermediate widening operation.  If it's signed, then the
2889          intermediate widening operation must also be signed.  */
2890       if ((TYPE_UNSIGNED (inner_op_type)
2891            || TYPE_UNSIGNED (op_type) == TYPE_UNSIGNED (inner_op_type))
2892           && TYPE_PRECISION (op_type) > TYPE_PRECISION (inner_op_type))
2893         return true;
2894
2895       return false;
2896     }
2897
2898   return rhs_code == FIXED_CONVERT_EXPR;
2899 }
2900
2901 /* Return true if RHS is a suitable operand for a widening multiplication,
2902    assuming a target type of TYPE.
2903    There are two cases:
2904
2905      - RHS makes some value at least twice as wide.  Store that value
2906        in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
2907
2908      - RHS is an integer constant.  Store that value in *NEW_RHS_OUT if so,
2909        but leave *TYPE_OUT untouched.  */
2910
2911 static bool
2912 is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out,
2913                         tree *new_rhs_out)
2914 {
2915   gimple stmt;
2916   tree type1, rhs1;
2917
2918   if (TREE_CODE (rhs) == SSA_NAME)
2919     {
2920       stmt = SSA_NAME_DEF_STMT (rhs);
2921       if (is_gimple_assign (stmt))
2922         {
2923           if (! widening_mult_conversion_strippable_p (type, stmt))
2924             rhs1 = rhs;
2925           else
2926             {
2927               rhs1 = gimple_assign_rhs1 (stmt);
2928
2929               if (TREE_CODE (rhs1) == INTEGER_CST)
2930                 {
2931                   *new_rhs_out = rhs1;
2932                   *type_out = NULL;
2933                   return true;
2934                 }
2935             }
2936         }
2937       else
2938         rhs1 = rhs;
2939
2940       type1 = TREE_TYPE (rhs1);
2941
2942       if (TREE_CODE (type1) != TREE_CODE (type)
2943           || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type))
2944         return false;
2945
2946       *new_rhs_out = rhs1;
2947       *type_out = type1;
2948       return true;
2949     }
2950
2951   if (TREE_CODE (rhs) == INTEGER_CST)
2952     {
2953       *new_rhs_out = rhs;
2954       *type_out = NULL;
2955       return true;
2956     }
2957
2958   return false;
2959 }
2960
2961 /* Return true if STMT performs a widening multiplication, assuming the
2962    output type is TYPE.  If so, store the unwidened types of the operands
2963    in *TYPE1_OUT and *TYPE2_OUT respectively.  Also fill *RHS1_OUT and
2964    *RHS2_OUT such that converting those operands to types *TYPE1_OUT
2965    and *TYPE2_OUT would give the operands of the multiplication.  */
2966
2967 static bool
2968 is_widening_mult_p (gimple stmt,
2969                     tree *type1_out, tree *rhs1_out,
2970                     tree *type2_out, tree *rhs2_out)
2971 {
2972   tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2973
2974   if (TREE_CODE (type) != INTEGER_TYPE
2975       && TREE_CODE (type) != FIXED_POINT_TYPE)
2976     return false;
2977
2978   if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out,
2979                                rhs1_out))
2980     return false;
2981
2982   if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out,
2983                                rhs2_out))
2984     return false;
2985
2986   if (*type1_out == NULL)
2987     {
2988       if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out))
2989         return false;
2990       *type1_out = *type2_out;
2991     }
2992
2993   if (*type2_out == NULL)
2994     {
2995       if (!int_fits_type_p (*rhs2_out, *type1_out))
2996         return false;
2997       *type2_out = *type1_out;
2998     }
2999
3000   /* Ensure that the larger of the two operands comes first. */
3001   if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out))
3002     {
3003       std::swap (*type1_out, *type2_out);
3004       std::swap (*rhs1_out, *rhs2_out);
3005     }
3006
3007   return true;
3008 }
3009
3010 /* Process a single gimple statement STMT, which has a MULT_EXPR as
3011    its rhs, and try to convert it into a WIDEN_MULT_EXPR.  The return
3012    value is true iff we converted the statement.  */
3013
3014 static bool
3015 convert_mult_to_widen (gimple stmt, gimple_stmt_iterator *gsi)
3016 {
3017   tree lhs, rhs1, rhs2, type, type1, type2;
3018   enum insn_code handler;
3019   machine_mode to_mode, from_mode, actual_mode;
3020   optab op;
3021   int actual_precision;
3022   location_t loc = gimple_location (stmt);
3023   bool from_unsigned1, from_unsigned2;
3024
3025   lhs = gimple_assign_lhs (stmt);
3026   type = TREE_TYPE (lhs);
3027   if (TREE_CODE (type) != INTEGER_TYPE)
3028     return false;
3029
3030   if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2))
3031     return false;
3032
3033   to_mode = TYPE_MODE (type);
3034   from_mode = TYPE_MODE (type1);
3035   from_unsigned1 = TYPE_UNSIGNED (type1);
3036   from_unsigned2 = TYPE_UNSIGNED (type2);
3037
3038   if (from_unsigned1 && from_unsigned2)
3039     op = umul_widen_optab;
3040   else if (!from_unsigned1 && !from_unsigned2)
3041     op = smul_widen_optab;
3042   else
3043     op = usmul_widen_optab;
3044
3045   handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode,
3046                                                   0, &actual_mode);
3047
3048   if (handler == CODE_FOR_nothing)
3049     {
3050       if (op != smul_widen_optab)
3051         {
3052           /* We can use a signed multiply with unsigned types as long as
3053              there is a wider mode to use, or it is the smaller of the two
3054              types that is unsigned.  Note that type1 >= type2, always.  */
3055           if ((TYPE_UNSIGNED (type1)
3056                && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
3057               || (TYPE_UNSIGNED (type2)
3058                   && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
3059             {
3060               from_mode = GET_MODE_WIDER_MODE (from_mode);
3061               if (GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode))
3062                 return false;
3063             }
3064
3065           op = smul_widen_optab;
3066           handler = find_widening_optab_handler_and_mode (op, to_mode,
3067                                                           from_mode, 0,
3068                                                           &actual_mode);
3069
3070           if (handler == CODE_FOR_nothing)
3071             return false;
3072
3073           from_unsigned1 = from_unsigned2 = false;
3074         }
3075       else
3076         return false;
3077     }
3078
3079   /* Ensure that the inputs to the handler are in the correct precison
3080      for the opcode.  This will be the full mode size.  */
3081   actual_precision = GET_MODE_PRECISION (actual_mode);
3082   if (2 * actual_precision > TYPE_PRECISION (type))
3083     return false;
3084   if (actual_precision != TYPE_PRECISION (type1)
3085       || from_unsigned1 != TYPE_UNSIGNED (type1))
3086     rhs1 = build_and_insert_cast (gsi, loc,
3087                                   build_nonstandard_integer_type
3088                                     (actual_precision, from_unsigned1), rhs1);
3089   if (actual_precision != TYPE_PRECISION (type2)
3090       || from_unsigned2 != TYPE_UNSIGNED (type2))
3091     rhs2 = build_and_insert_cast (gsi, loc,
3092                                   build_nonstandard_integer_type
3093                                     (actual_precision, from_unsigned2), rhs2);
3094
3095   /* Handle constants.  */
3096   if (TREE_CODE (rhs1) == INTEGER_CST)
3097     rhs1 = fold_convert (type1, rhs1);
3098   if (TREE_CODE (rhs2) == INTEGER_CST)
3099     rhs2 = fold_convert (type2, rhs2);
3100
3101   gimple_assign_set_rhs1 (stmt, rhs1);
3102   gimple_assign_set_rhs2 (stmt, rhs2);
3103   gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
3104   update_stmt (stmt);
3105   widen_mul_stats.widen_mults_inserted++;
3106   return true;
3107 }
3108
3109 /* Process a single gimple statement STMT, which is found at the
3110    iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
3111    rhs (given by CODE), and try to convert it into a
3112    WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR.  The return value
3113    is true iff we converted the statement.  */
3114
3115 static bool
3116 convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt,
3117                             enum tree_code code)
3118 {
3119   gimple rhs1_stmt = NULL, rhs2_stmt = NULL;
3120   gimple conv1_stmt = NULL, conv2_stmt = NULL, conv_stmt;
3121   tree type, type1, type2, optype;
3122   tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
3123   enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
3124   optab this_optab;
3125   enum tree_code wmult_code;
3126   enum insn_code handler;
3127   machine_mode to_mode, from_mode, actual_mode;
3128   location_t loc = gimple_location (stmt);
3129   int actual_precision;
3130   bool from_unsigned1, from_unsigned2;
3131
3132   lhs = gimple_assign_lhs (stmt);
3133   type = TREE_TYPE (lhs);
3134   if (TREE_CODE (type) != INTEGER_TYPE
3135       && TREE_CODE (type) != FIXED_POINT_TYPE)
3136     return false;
3137
3138   if (code == MINUS_EXPR)
3139     wmult_code = WIDEN_MULT_MINUS_EXPR;
3140   else
3141     wmult_code = WIDEN_MULT_PLUS_EXPR;
3142
3143   rhs1 = gimple_assign_rhs1 (stmt);
3144   rhs2 = gimple_assign_rhs2 (stmt);
3145
3146   if (TREE_CODE (rhs1) == SSA_NAME)
3147     {
3148       rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
3149       if (is_gimple_assign (rhs1_stmt))
3150         rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
3151     }
3152
3153   if (TREE_CODE (rhs2) == SSA_NAME)
3154     {
3155       rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
3156       if (is_gimple_assign (rhs2_stmt))
3157         rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
3158     }
3159
3160   /* Allow for one conversion statement between the multiply
3161      and addition/subtraction statement.  If there are more than
3162      one conversions then we assume they would invalidate this
3163      transformation.  If that's not the case then they should have
3164      been folded before now.  */
3165   if (CONVERT_EXPR_CODE_P (rhs1_code))
3166     {
3167       conv1_stmt = rhs1_stmt;
3168       rhs1 = gimple_assign_rhs1 (rhs1_stmt);
3169       if (TREE_CODE (rhs1) == SSA_NAME)
3170         {
3171           rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
3172           if (is_gimple_assign (rhs1_stmt))
3173             rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
3174         }
3175       else
3176         return false;
3177     }
3178   if (CONVERT_EXPR_CODE_P (rhs2_code))
3179     {
3180       conv2_stmt = rhs2_stmt;
3181       rhs2 = gimple_assign_rhs1 (rhs2_stmt);
3182       if (TREE_CODE (rhs2) == SSA_NAME)
3183         {
3184           rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
3185           if (is_gimple_assign (rhs2_stmt))
3186             rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
3187         }
3188       else
3189         return false;
3190     }
3191
3192   /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
3193      is_widening_mult_p, but we still need the rhs returns.
3194
3195      It might also appear that it would be sufficient to use the existing
3196      operands of the widening multiply, but that would limit the choice of
3197      multiply-and-accumulate instructions.
3198
3199      If the widened-multiplication result has more than one uses, it is
3200      probably wiser not to do the conversion.  */
3201   if (code == PLUS_EXPR
3202       && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
3203     {
3204       if (!has_single_use (rhs1)
3205           || !is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
3206                                   &type2, &mult_rhs2))
3207         return false;
3208       add_rhs = rhs2;
3209       conv_stmt = conv1_stmt;
3210     }
3211   else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
3212     {
3213       if (!has_single_use (rhs2)
3214           || !is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
3215                                   &type2, &mult_rhs2))
3216         return false;
3217       add_rhs = rhs1;
3218       conv_stmt = conv2_stmt;
3219     }
3220   else
3221     return false;
3222
3223   to_mode = TYPE_MODE (type);
3224   from_mode = TYPE_MODE (type1);
3225   from_unsigned1 = TYPE_UNSIGNED (type1);
3226   from_unsigned2 = TYPE_UNSIGNED (type2);
3227   optype = type1;
3228
3229   /* There's no such thing as a mixed sign madd yet, so use a wider mode.  */
3230   if (from_unsigned1 != from_unsigned2)
3231     {
3232       if (!INTEGRAL_TYPE_P (type))
3233         return false;
3234       /* We can use a signed multiply with unsigned types as long as
3235          there is a wider mode to use, or it is the smaller of the two
3236          types that is unsigned.  Note that type1 >= type2, always.  */
3237       if ((from_unsigned1
3238            && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
3239           || (from_unsigned2
3240               && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
3241         {
3242           from_mode = GET_MODE_WIDER_MODE (from_mode);
3243           if (GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode))
3244             return false;
3245         }
3246
3247       from_unsigned1 = from_unsigned2 = false;
3248       optype = build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode),
3249                                                false);
3250     }
3251
3252   /* If there was a conversion between the multiply and addition
3253      then we need to make sure it fits a multiply-and-accumulate.
3254      The should be a single mode change which does not change the
3255      value.  */
3256   if (conv_stmt)
3257     {
3258       /* We use the original, unmodified data types for this.  */
3259       tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt));
3260       tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt));
3261       int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2);
3262       bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2);
3263
3264       if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type))
3265         {
3266           /* Conversion is a truncate.  */
3267           if (TYPE_PRECISION (to_type) < data_size)
3268             return false;
3269         }
3270       else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type))
3271         {
3272           /* Conversion is an extend.  Check it's the right sort.  */
3273           if (TYPE_UNSIGNED (from_type) != is_unsigned
3274               && !(is_unsigned && TYPE_PRECISION (from_type) > data_size))
3275             return false;
3276         }
3277       /* else convert is a no-op for our purposes.  */
3278     }
3279
3280   /* Verify that the machine can perform a widening multiply
3281      accumulate in this mode/signedness combination, otherwise
3282      this transformation is likely to pessimize code.  */
3283   this_optab = optab_for_tree_code (wmult_code, optype, optab_default);
3284   handler = find_widening_optab_handler_and_mode (this_optab, to_mode,
3285                                                   from_mode, 0, &actual_mode);
3286
3287   if (handler == CODE_FOR_nothing)
3288     return false;
3289
3290   /* Ensure that the inputs to the handler are in the correct precison
3291      for the opcode.  This will be the full mode size.  */
3292   actual_precision = GET_MODE_PRECISION (actual_mode);
3293   if (actual_precision != TYPE_PRECISION (type1)
3294       || from_unsigned1 != TYPE_UNSIGNED (type1))
3295     mult_rhs1 = build_and_insert_cast (gsi, loc,
3296                                        build_nonstandard_integer_type
3297                                          (actual_precision, from_unsigned1),
3298                                        mult_rhs1);
3299   if (actual_precision != TYPE_PRECISION (type2)
3300       || from_unsigned2 != TYPE_UNSIGNED (type2))
3301     mult_rhs2 = build_and_insert_cast (gsi, loc,
3302                                        build_nonstandard_integer_type
3303                                          (actual_precision, from_unsigned2),
3304                                        mult_rhs2);
3305
3306   if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs)))
3307     add_rhs = build_and_insert_cast (gsi, loc, type, add_rhs);
3308
3309   /* Handle constants.  */
3310   if (TREE_CODE (mult_rhs1) == INTEGER_CST)
3311     mult_rhs1 = fold_convert (type1, mult_rhs1);
3312   if (TREE_CODE (mult_rhs2) == INTEGER_CST)
3313     mult_rhs2 = fold_convert (type2, mult_rhs2);
3314
3315   gimple_assign_set_rhs_with_ops (gsi, wmult_code, mult_rhs1, mult_rhs2,
3316                                   add_rhs);
3317   update_stmt (gsi_stmt (*gsi));
3318   widen_mul_stats.maccs_inserted++;
3319   return true;
3320 }
3321
3322 /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
3323    with uses in additions and subtractions to form fused multiply-add
3324    operations.  Returns true if successful and MUL_STMT should be removed.  */
3325
3326 static bool
3327 convert_mult_to_fma (gimple mul_stmt, tree op1, tree op2)
3328 {
3329   tree mul_result = gimple_get_lhs (mul_stmt);
3330   tree type = TREE_TYPE (mul_result);
3331   gimple use_stmt, neguse_stmt;
3332   gassign *fma_stmt;
3333   use_operand_p use_p;
3334   imm_use_iterator imm_iter;
3335
3336   if (FLOAT_TYPE_P (type)
3337       && flag_fp_contract_mode == FP_CONTRACT_OFF)
3338     return false;
3339
3340   /* We don't want to do bitfield reduction ops.  */
3341   if (INTEGRAL_TYPE_P (type)
3342       && (TYPE_PRECISION (type)
3343           != GET_MODE_PRECISION (TYPE_MODE (type))))
3344     return false;
3345
3346   /* If the target doesn't support it, don't generate it.  We assume that
3347      if fma isn't available then fms, fnma or fnms are not either.  */
3348   if (optab_handler (fma_optab, TYPE_MODE (type)) == CODE_FOR_nothing)
3349     return false;
3350
3351   /* If the multiplication has zero uses, it is kept around probably because
3352      of -fnon-call-exceptions.  Don't optimize it away in that case,
3353      it is DCE job.  */
3354   if (has_zero_uses (mul_result))
3355     return false;
3356
3357   /* Make sure that the multiplication statement becomes dead after
3358      the transformation, thus that all uses are transformed to FMAs.
3359      This means we assume that an FMA operation has the same cost
3360      as an addition.  */
3361   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
3362     {
3363       enum tree_code use_code;
3364       tree result = mul_result;
3365       bool negate_p = false;
3366
3367       use_stmt = USE_STMT (use_p);
3368
3369       if (is_gimple_debug (use_stmt))
3370         continue;
3371
3372       /* For now restrict this operations to single basic blocks.  In theory
3373          we would want to support sinking the multiplication in
3374          m = a*b;
3375          if ()
3376            ma = m + c;
3377          else
3378            d = m;
3379          to form a fma in the then block and sink the multiplication to the
3380          else block.  */
3381       if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3382         return false;
3383
3384       if (!is_gimple_assign (use_stmt))
3385         return false;
3386
3387       use_code = gimple_assign_rhs_code (use_stmt);
3388
3389       /* A negate on the multiplication leads to FNMA.  */
3390       if (use_code == NEGATE_EXPR)
3391         {
3392           ssa_op_iter iter;
3393           use_operand_p usep;
3394
3395           result = gimple_assign_lhs (use_stmt);
3396
3397           /* Make sure the negate statement becomes dead with this
3398              single transformation.  */
3399           if (!single_imm_use (gimple_assign_lhs (use_stmt),
3400                                &use_p, &neguse_stmt))
3401             return false;
3402
3403           /* Make sure the multiplication isn't also used on that stmt.  */
3404           FOR_EACH_PHI_OR_STMT_USE (usep, neguse_stmt, iter, SSA_OP_USE)
3405             if (USE_FROM_PTR (usep) == mul_result)
3406               return false;
3407
3408           /* Re-validate.  */
3409           use_stmt = neguse_stmt;
3410           if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3411             return false;
3412           if (!is_gimple_assign (use_stmt))
3413             return false;
3414
3415           use_code = gimple_assign_rhs_code (use_stmt);
3416           negate_p = true;
3417         }
3418
3419       switch (use_code)
3420         {
3421         case MINUS_EXPR:
3422           if (gimple_assign_rhs2 (use_stmt) == result)
3423             negate_p = !negate_p;
3424           break;
3425         case PLUS_EXPR:
3426           break;
3427         default:
3428           /* FMA can only be formed from PLUS and MINUS.  */
3429           return false;
3430         }
3431
3432       /* If the subtrahend (gimple_assign_rhs2 (use_stmt)) is computed
3433          by a MULT_EXPR that we'll visit later, we might be able to
3434          get a more profitable match with fnma.
3435          OTOH, if we don't, a negate / fma pair has likely lower latency
3436          that a mult / subtract pair.  */
3437       if (use_code == MINUS_EXPR && !negate_p
3438           && gimple_assign_rhs1 (use_stmt) == result
3439           && optab_handler (fms_optab, TYPE_MODE (type)) == CODE_FOR_nothing
3440           && optab_handler (fnma_optab, TYPE_MODE (type)) != CODE_FOR_nothing)
3441         {
3442           tree rhs2 = gimple_assign_rhs2 (use_stmt);
3443
3444           if (TREE_CODE (rhs2) == SSA_NAME)
3445             {
3446               gimple stmt2 = SSA_NAME_DEF_STMT (rhs2);
3447               if (has_single_use (rhs2)
3448                   && is_gimple_assign (stmt2)
3449                   && gimple_assign_rhs_code (stmt2) == MULT_EXPR)
3450               return false;
3451             }
3452         }
3453
3454       /* We can't handle a * b + a * b.  */
3455       if (gimple_assign_rhs1 (use_stmt) == gimple_assign_rhs2 (use_stmt))
3456         return false;
3457
3458       /* While it is possible to validate whether or not the exact form
3459          that we've recognized is available in the backend, the assumption
3460          is that the transformation is never a loss.  For instance, suppose
3461          the target only has the plain FMA pattern available.  Consider
3462          a*b-c -> fma(a,b,-c): we've exchanged MUL+SUB for FMA+NEG, which
3463          is still two operations.  Consider -(a*b)-c -> fma(-a,b,-c): we
3464          still have 3 operations, but in the FMA form the two NEGs are
3465          independent and could be run in parallel.  */
3466     }
3467
3468   FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
3469     {
3470       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
3471       enum tree_code use_code;
3472       tree addop, mulop1 = op1, result = mul_result;
3473       bool negate_p = false;
3474
3475       if (is_gimple_debug (use_stmt))
3476         continue;
3477
3478       use_code = gimple_assign_rhs_code (use_stmt);
3479       if (use_code == NEGATE_EXPR)
3480         {
3481           result = gimple_assign_lhs (use_stmt);
3482           single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt);
3483           gsi_remove (&gsi, true);
3484           release_defs (use_stmt);
3485
3486           use_stmt = neguse_stmt;
3487           gsi = gsi_for_stmt (use_stmt);
3488           use_code = gimple_assign_rhs_code (use_stmt);
3489           negate_p = true;
3490         }
3491
3492       if (gimple_assign_rhs1 (use_stmt) == result)
3493         {
3494           addop = gimple_assign_rhs2 (use_stmt);
3495           /* a * b - c -> a * b + (-c)  */
3496           if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
3497             addop = force_gimple_operand_gsi (&gsi,
3498                                               build1 (NEGATE_EXPR,
3499                                                       type, addop),
3500                                               true, NULL_TREE, true,
3501                                               GSI_SAME_STMT);
3502         }
3503       else
3504         {
3505           addop = gimple_assign_rhs1 (use_stmt);
3506           /* a - b * c -> (-b) * c + a */
3507           if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
3508             negate_p = !negate_p;
3509         }
3510
3511       if (negate_p)
3512         mulop1 = force_gimple_operand_gsi (&gsi,
3513                                            build1 (NEGATE_EXPR,
3514                                                    type, mulop1),
3515                                            true, NULL_TREE, true,
3516                                            GSI_SAME_STMT);
3517
3518       fma_stmt = gimple_build_assign (gimple_assign_lhs (use_stmt),
3519                                       FMA_EXPR, mulop1, op2, addop);
3520       gsi_replace (&gsi, fma_stmt, true);
3521       widen_mul_stats.fmas_inserted++;
3522     }
3523
3524   return true;
3525 }
3526
3527 /* Find integer multiplications where the operands are extended from
3528    smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
3529    where appropriate.  */
3530
3531 namespace {
3532
3533 const pass_data pass_data_optimize_widening_mul =
3534 {
3535   GIMPLE_PASS, /* type */
3536   "widening_mul", /* name */
3537   OPTGROUP_NONE, /* optinfo_flags */
3538   TV_NONE, /* tv_id */
3539   PROP_ssa, /* properties_required */
3540   0, /* properties_provided */
3541   0, /* properties_destroyed */
3542   0, /* todo_flags_start */
3543   TODO_update_ssa, /* todo_flags_finish */
3544 };
3545
3546 class pass_optimize_widening_mul : public gimple_opt_pass
3547 {
3548 public:
3549   pass_optimize_widening_mul (gcc::context *ctxt)
3550     : gimple_opt_pass (pass_data_optimize_widening_mul, ctxt)
3551   {}
3552
3553   /* opt_pass methods: */
3554   virtual bool gate (function *)
3555     {
3556       return flag_expensive_optimizations && optimize;
3557     }
3558
3559   virtual unsigned int execute (function *);
3560
3561 }; // class pass_optimize_widening_mul
3562
3563 unsigned int
3564 pass_optimize_widening_mul::execute (function *fun)
3565 {
3566   basic_block bb;
3567   bool cfg_changed = false;
3568
3569   memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
3570
3571   FOR_EACH_BB_FN (bb, fun)
3572     {
3573       gimple_stmt_iterator gsi;
3574
3575       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
3576         {
3577           gimple stmt = gsi_stmt (gsi);
3578           enum tree_code code;
3579
3580           if (is_gimple_assign (stmt))
3581             {
3582               code = gimple_assign_rhs_code (stmt);
3583               switch (code)
3584                 {
3585                 case MULT_EXPR:
3586                   if (!convert_mult_to_widen (stmt, &gsi)
3587                       && convert_mult_to_fma (stmt,
3588                                               gimple_assign_rhs1 (stmt),
3589                                               gimple_assign_rhs2 (stmt)))
3590                     {
3591                       gsi_remove (&gsi, true);
3592                       release_defs (stmt);
3593                       continue;
3594                     }
3595                   break;
3596
3597                 case PLUS_EXPR:
3598                 case MINUS_EXPR:
3599                   convert_plusminus_to_widen (&gsi, stmt, code);
3600                   break;
3601
3602                 default:;
3603                 }
3604             }
3605           else if (is_gimple_call (stmt)
3606                    && gimple_call_lhs (stmt))
3607             {
3608               tree fndecl = gimple_call_fndecl (stmt);
3609               if (fndecl
3610                   && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
3611                 {
3612                   switch (DECL_FUNCTION_CODE (fndecl))
3613                     {
3614                       case BUILT_IN_POWF:
3615                       case BUILT_IN_POW:
3616                       case BUILT_IN_POWL:
3617                         if (TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST
3618                             && REAL_VALUES_EQUAL
3619                                  (TREE_REAL_CST (gimple_call_arg (stmt, 1)),
3620                                   dconst2)
3621                             && convert_mult_to_fma (stmt,
3622                                                     gimple_call_arg (stmt, 0),
3623                                                     gimple_call_arg (stmt, 0)))
3624                           {
3625                             unlink_stmt_vdef (stmt);
3626                             if (gsi_remove (&gsi, true)
3627                                 && gimple_purge_dead_eh_edges (bb))
3628                               cfg_changed = true;
3629                             release_defs (stmt);
3630                             continue;
3631                           }
3632                           break;
3633
3634                       default:;
3635                     }
3636                 }
3637             }
3638           gsi_next (&gsi);
3639         }
3640     }
3641
3642   statistics_counter_event (fun, "widening multiplications inserted",
3643                             widen_mul_stats.widen_mults_inserted);
3644   statistics_counter_event (fun, "widening maccs inserted",
3645                             widen_mul_stats.maccs_inserted);
3646   statistics_counter_event (fun, "fused multiply-adds inserted",
3647                             widen_mul_stats.fmas_inserted);
3648
3649   return cfg_changed ? TODO_cleanup_cfg : 0;
3650 }
3651
3652 } // anon namespace
3653
3654 gimple_opt_pass *
3655 make_pass_optimize_widening_mul (gcc::context *ctxt)
3656 {
3657   return new pass_optimize_widening_mul (ctxt);
3658 }