gcc/tree-ssa-math-opts.c

   1 /* Global, SSA-based optimizations using mathematical identities.
   2    Copyright (C) 2005-2017 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* Currently, the only mini-pass in this file tries to CSE reciprocal
  21    operations.  These are common in sequences such as this one:
  22
  23         modulus = sqrt(x*x + y*y + z*z);
  24         x = x / modulus;
  25         y = y / modulus;
  26         z = z / modulus;
  27
  28    that can be optimized to
  29
  30         modulus = sqrt(x*x + y*y + z*z);
  31         rmodulus = 1.0 / modulus;
  32         x = x * rmodulus;
  33         y = y * rmodulus;
  34         z = z * rmodulus;
  35
  36    We do this for loop invariant divisors, and with this pass whenever
  37    we notice that a division has the same divisor multiple times.
  38
  39    Of course, like in PRE, we don't insert a division if a dominator
  40    already has one.  However, this cannot be done as an extension of
  41    PRE for several reasons.
  42
  43    First of all, with some experiments it was found out that the
  44    transformation is not always useful if there are only two divisions
  45    by the same divisor.  This is probably because modern processors
  46    can pipeline the divisions; on older, in-order processors it should
  47    still be effective to optimize two divisions by the same number.
  48    We make this a param, and it shall be called N in the remainder of
  49    this comment.
  50
  51    Second, if trapping math is active, we have less freedom on where
  52    to insert divisions: we can only do so in basic blocks that already
  53    contain one.  (If divisions don't trap, instead, we can insert
  54    divisions elsewhere, which will be in blocks that are common dominators
  55    of those that have the division).
  56
  57    We really don't want to compute the reciprocal unless a division will
  58    be found.  To do this, we won't insert the division in a basic block
  59    that has less than N divisions *post-dominating* it.
  60
  61    The algorithm constructs a subset of the dominator tree, holding the
  62    blocks containing the divisions and the common dominators to them,
  63    and walk it twice.  The first walk is in post-order, and it annotates
  64    each block with the number of divisions that post-dominate it: this
  65    gives information on where divisions can be inserted profitably.
  66    The second walk is in pre-order, and it inserts divisions as explained
  67    above, and replaces divisions by multiplications.
  68
  69    In the best case, the cost of the pass is O(n_statements).  In the
  70    worst-case, the cost is due to creating the dominator tree subset,
  71    with a cost of O(n_basic_blocks ^ 2); however this can only happen
  72    for n_statements / n_basic_blocks statements.  So, the amortized cost
  73    of creating the dominator tree subset is O(n_basic_blocks) and the
  74    worst-case cost of the pass is O(n_statements * n_basic_blocks).
  75
  76    More practically, the cost will be small because there are few
  77    divisions, and they tend to be in the same basic block, so insert_bb
  78    is called very few times.
  79
  80    If we did this using domwalk.c, an efficient implementation would have
  81    to work on all the variables in a single pass, because we could not
  82    work on just a subset of the dominator tree, as we do now, and the
  83    cost would also be something like O(n_statements * n_basic_blocks).
  84    The data structures would be more complex in order to work on all the
  85    variables in a single pass.  */
  86
  87 #include "config.h"
  88 #include "system.h"
  89 #include "coretypes.h"
  90 #include "backend.h"
  91 #include "target.h"
  92 #include "rtl.h"
  93 #include "tree.h"
  94 #include "gimple.h"
  95 #include "predict.h"
  96 #include "alloc-pool.h"
  97 #include "tree-pass.h"
  98 #include "ssa.h"
  99 #include "optabs-tree.h"
 100 #include "gimple-pretty-print.h"
 101 #include "alias.h"
 102 #include "fold-const.h"
 103 #include "gimple-fold.h"
 104 #include "gimple-iterator.h"
 105 #include "gimplify.h"
 106 #include "gimplify-me.h"
 107 #include "stor-layout.h"
 108 #include "tree-cfg.h"
 109 #include "tree-dfa.h"
 110 #include "tree-ssa.h"
 111 #include "builtins.h"
 112 #include "params.h"
 113 #include "internal-fn.h"
 114 #include "case-cfn-macros.h"
 115 #include "optabs-libfuncs.h"
 116 #include "tree-eh.h"
 117 #include "targhooks.h"
 118
 119 /* This structure represents one basic block that either computes a
 120    division, or is a common dominator for basic block that compute a
 121    division.  */
 122 struct occurrence {
 123   /* The basic block represented by this structure.  */
 124   basic_block bb;
 125
 126   /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
 127      inserted in BB.  */
 128   tree recip_def;
 129
 130   /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
 131      was inserted in BB.  */
 132   gimple *recip_def_stmt;
 133
 134   /* Pointer to a list of "struct occurrence"s for blocks dominated
 135      by BB.  */
 136   struct occurrence *children;
 137
 138   /* Pointer to the next "struct occurrence"s in the list of blocks
 139      sharing a common dominator.  */
 140   struct occurrence *next;
 141
 142   /* The number of divisions that are in BB before compute_merit.  The
 143      number of divisions that are in BB or post-dominate it after
 144      compute_merit.  */
 145   int num_divisions;
 146
 147   /* True if the basic block has a division, false if it is a common
 148      dominator for basic blocks that do.  If it is false and trapping
 149      math is active, BB is not a candidate for inserting a reciprocal.  */
 150   bool bb_has_division;
 151 };
 152
 153 static struct
 154 {
 155   /* Number of 1.0/X ops inserted.  */
 156   int rdivs_inserted;
 157
 158   /* Number of 1.0/FUNC ops inserted.  */
 159   int rfuncs_inserted;
 160 } reciprocal_stats;
 161
 162 static struct
 163 {
 164   /* Number of cexpi calls inserted.  */
 165   int inserted;
 166 } sincos_stats;
 167
 168 static struct
 169 {
 170   /* Number of hand-written 16-bit nop / bswaps found.  */
 171   int found_16bit;
 172
 173   /* Number of hand-written 32-bit nop / bswaps found.  */
 174   int found_32bit;
 175
 176   /* Number of hand-written 64-bit nop / bswaps found.  */
 177   int found_64bit;
 178 } nop_stats, bswap_stats;
 179
 180 static struct
 181 {
 182   /* Number of widening multiplication ops inserted.  */
 183   int widen_mults_inserted;
 184
 185   /* Number of integer multiply-and-accumulate ops inserted.  */
 186   int maccs_inserted;
 187
 188   /* Number of fp fused multiply-add ops inserted.  */
 189   int fmas_inserted;
 190
 191   /* Number of divmod calls inserted.  */
 192   int divmod_calls_inserted;
 193 } widen_mul_stats;
 194
 195 /* The instance of "struct occurrence" representing the highest
 196    interesting block in the dominator tree.  */
 197 static struct occurrence *occ_head;
 198
 199 /* Allocation pool for getting instances of "struct occurrence".  */
 200 static object_allocator<occurrence> *occ_pool;
 201
 202
 203
 204 /* Allocate and return a new struct occurrence for basic block BB, and
 205    whose children list is headed by CHILDREN.  */
 206 static struct occurrence *
 207 occ_new (basic_block bb, struct occurrence *children)
 208 {
 209   struct occurrence *occ;
 210
 211   bb->aux = occ = occ_pool->allocate ();
 212   memset (occ, 0, sizeof (struct occurrence));
 213
 214   occ->bb = bb;
 215   occ->children = children;
 216   return occ;
 217 }
 218
 219
 220 /* Insert NEW_OCC into our subset of the dominator tree.  P_HEAD points to a
 221    list of "struct occurrence"s, one per basic block, having IDOM as
 222    their common dominator.
 223
 224    We try to insert NEW_OCC as deep as possible in the tree, and we also
 225    insert any other block that is a common dominator for BB and one
 226    block already in the tree.  */
 227
 228 static void
 229 insert_bb (struct occurrence *new_occ, basic_block idom,
 230            struct occurrence **p_head)
 231 {
 232   struct occurrence *occ, **p_occ;
 233
 234   for (p_occ = p_head; (occ = *p_occ) != NULL; )
 235     {
 236       basic_block bb = new_occ->bb, occ_bb = occ->bb;
 237       basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb);
 238       if (dom == bb)
 239         {
 240           /* BB dominates OCC_BB.  OCC becomes NEW_OCC's child: remove OCC
 241              from its list.  */
 242           *p_occ = occ->next;
 243           occ->next = new_occ->children;
 244           new_occ->children = occ;
 245
 246           /* Try the next block (it may as well be dominated by BB).  */
 247         }
 248
 249       else if (dom == occ_bb)
 250         {
 251           /* OCC_BB dominates BB.  Tail recurse to look deeper.  */
 252           insert_bb (new_occ, dom, &occ->children);
 253           return;
 254         }
 255
 256       else if (dom != idom)
 257         {
 258           gcc_assert (!dom->aux);
 259
 260           /* There is a dominator between IDOM and BB, add it and make
 261              two children out of NEW_OCC and OCC.  First, remove OCC from
 262              its list.  */
 263           *p_occ = occ->next;
 264           new_occ->next = occ;
 265           occ->next = NULL;
 266
 267           /* None of the previous blocks has DOM as a dominator: if we tail
 268              recursed, we would reexamine them uselessly. Just switch BB with
 269              DOM, and go on looking for blocks dominated by DOM.  */
 270           new_occ = occ_new (dom, new_occ);
 271         }
 272
 273       else
 274         {
 275           /* Nothing special, go on with the next element.  */
 276           p_occ = &occ->next;
 277         }
 278     }
 279
 280   /* No place was found as a child of IDOM.  Make BB a sibling of IDOM.  */
 281   new_occ->next = *p_head;
 282   *p_head = new_occ;
 283 }
 284
 285 /* Register that we found a division in BB.  */
 286
 287 static inline void
 288 register_division_in (basic_block bb)
 289 {
 290   struct occurrence *occ;
 291
 292   occ = (struct occurrence *) bb->aux;
 293   if (!occ)
 294     {
 295       occ = occ_new (bb, NULL);
 296       insert_bb (occ, ENTRY_BLOCK_PTR_FOR_FN (cfun), &occ_head);
 297     }
 298
 299   occ->bb_has_division = true;
 300   occ->num_divisions++;
 301 }
 302
 303
 304 /* Compute the number of divisions that postdominate each block in OCC and
 305    its children.  */
 306
 307 static void
 308 compute_merit (struct occurrence *occ)
 309 {
 310   struct occurrence *occ_child;
 311   basic_block dom = occ->bb;
 312
 313   for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
 314     {
 315       basic_block bb;
 316       if (occ_child->children)
 317         compute_merit (occ_child);
 318
 319       if (flag_exceptions)
 320         bb = single_noncomplex_succ (dom);
 321       else
 322         bb = dom;
 323
 324       if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
 325         occ->num_divisions += occ_child->num_divisions;
 326     }
 327 }
 328
 329
 330 /* Return whether USE_STMT is a floating-point division by DEF.  */
 331 static inline bool
 332 is_division_by (gimple *use_stmt, tree def)
 333 {
 334   return is_gimple_assign (use_stmt)
 335          && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
 336          && gimple_assign_rhs2 (use_stmt) == def
 337          /* Do not recognize x / x as valid division, as we are getting
 338             confused later by replacing all immediate uses x in such
 339             a stmt.  */
 340          && gimple_assign_rhs1 (use_stmt) != def;
 341 }
 342
 343 /* Walk the subset of the dominator tree rooted at OCC, setting the
 344    RECIP_DEF field to a definition of 1.0 / DEF that can be used in
 345    the given basic block.  The field may be left NULL, of course,
 346    if it is not possible or profitable to do the optimization.
 347
 348    DEF_BSI is an iterator pointing at the statement defining DEF.
 349    If RECIP_DEF is set, a dominator already has a computation that can
 350    be used.  */
 351
 352 static void
 353 insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
 354                     tree def, tree recip_def, int threshold)
 355 {
 356   tree type;
 357   gassign *new_stmt;
 358   gimple_stmt_iterator gsi;
 359   struct occurrence *occ_child;
 360
 361   if (!recip_def
 362       && (occ->bb_has_division || !flag_trapping_math)
 363       && occ->num_divisions >= threshold)
 364     {
 365       /* Make a variable with the replacement and substitute it.  */
 366       type = TREE_TYPE (def);
 367       recip_def = create_tmp_reg (type, "reciptmp");
 368       new_stmt = gimple_build_assign (recip_def, RDIV_EXPR,
 369                                       build_one_cst (type), def);
 370
 371       if (occ->bb_has_division)
 372         {
 373           /* Case 1: insert before an existing division.  */
 374           gsi = gsi_after_labels (occ->bb);
 375           while (!gsi_end_p (gsi) && !is_division_by (gsi_stmt (gsi), def))
 376             gsi_next (&gsi);
 377
 378           gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
 379         }
 380       else if (def_gsi && occ->bb == def_gsi->bb)
 381         {
 382           /* Case 2: insert right after the definition.  Note that this will
 383              never happen if the definition statement can throw, because in
 384              that case the sole successor of the statement's basic block will
 385              dominate all the uses as well.  */
 386           gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
 387         }
 388       else
 389         {
 390           /* Case 3: insert in a basic block not containing defs/uses.  */
 391           gsi = gsi_after_labels (occ->bb);
 392           gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
 393         }
 394
 395       reciprocal_stats.rdivs_inserted++;
 396
 397       occ->recip_def_stmt = new_stmt;
 398     }
 399
 400   occ->recip_def = recip_def;
 401   for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
 402     insert_reciprocals (def_gsi, occ_child, def, recip_def, threshold);
 403 }
 404
 405
 406 /* Replace the division at USE_P with a multiplication by the reciprocal, if
 407    possible.  */
 408
 409 static inline void
 410 replace_reciprocal (use_operand_p use_p)
 411 {
 412   gimple *use_stmt = USE_STMT (use_p);
 413   basic_block bb = gimple_bb (use_stmt);
 414   struct occurrence *occ = (struct occurrence *) bb->aux;
 415
 416   if (optimize_bb_for_speed_p (bb)
 417       && occ->recip_def && use_stmt != occ->recip_def_stmt)
 418     {
 419       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
 420       gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
 421       SET_USE (use_p, occ->recip_def);
 422       fold_stmt_inplace (&gsi);
 423       update_stmt (use_stmt);
 424     }
 425 }
 426
 427
 428 /* Free OCC and return one more "struct occurrence" to be freed.  */
 429
 430 static struct occurrence *
 431 free_bb (struct occurrence *occ)
 432 {
 433   struct occurrence *child, *next;
 434
 435   /* First get the two pointers hanging off OCC.  */
 436   next = occ->next;
 437   child = occ->children;
 438   occ->bb->aux = NULL;
 439   occ_pool->remove (occ);
 440
 441   /* Now ensure that we don't recurse unless it is necessary.  */
 442   if (!child)
 443     return next;
 444   else
 445     {
 446       while (next)
 447         next = free_bb (next);
 448
 449       return child;
 450     }
 451 }
 452
 453
 454 /* Look for floating-point divisions among DEF's uses, and try to
 455    replace them by multiplications with the reciprocal.  Add
 456    as many statements computing the reciprocal as needed.
 457
 458    DEF must be a GIMPLE register of a floating-point type.  */
 459
 460 static void
 461 execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
 462 {
 463   use_operand_p use_p;
 464   imm_use_iterator use_iter;
 465   struct occurrence *occ;
 466   int count = 0, threshold;
 467
 468   gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && is_gimple_reg (def));
 469
 470   FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
 471     {
 472       gimple *use_stmt = USE_STMT (use_p);
 473       if (is_division_by (use_stmt, def))
 474         {
 475           register_division_in (gimple_bb (use_stmt));
 476           count++;
 477         }
 478     }
 479
 480   /* Do the expensive part only if we can hope to optimize something.  */
 481   threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
 482   if (count >= threshold)
 483     {
 484       gimple *use_stmt;
 485       for (occ = occ_head; occ; occ = occ->next)
 486         {
 487           compute_merit (occ);
 488           insert_reciprocals (def_gsi, occ, def, NULL, threshold);
 489         }
 490
 491       FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
 492         {
 493           if (is_division_by (use_stmt, def))
 494             {
 495               FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
 496                 replace_reciprocal (use_p);
 497             }
 498         }
 499     }
 500
 501   for (occ = occ_head; occ; )
 502     occ = free_bb (occ);
 503
 504   occ_head = NULL;
 505 }
 506
 507 /* Return an internal function that implements the reciprocal of CALL,
 508    or IFN_LAST if there is no such function that the target supports.  */
 509
 510 internal_fn
 511 internal_fn_reciprocal (gcall *call)
 512 {
 513   internal_fn ifn;
 514
 515   switch (gimple_call_combined_fn (call))
 516     {
 517     CASE_CFN_SQRT:
 518     CASE_CFN_SQRT_FN:
 519       ifn = IFN_RSQRT;
 520       break;
 521
 522     default:
 523       return IFN_LAST;
 524     }
 525
 526   tree_pair types = direct_internal_fn_types (ifn, call);
 527   if (!direct_internal_fn_supported_p (ifn, types, OPTIMIZE_FOR_SPEED))
 528     return IFN_LAST;
 529
 530   return ifn;
 531 }
 532
 533 /* Go through all the floating-point SSA_NAMEs, and call
 534    execute_cse_reciprocals_1 on each of them.  */
 535 namespace {
 536
 537 const pass_data pass_data_cse_reciprocals =
 538 {
 539   GIMPLE_PASS, /* type */
 540   "recip", /* name */
 541   OPTGROUP_NONE, /* optinfo_flags */
 542   TV_NONE, /* tv_id */
 543   PROP_ssa, /* properties_required */
 544   0, /* properties_provided */
 545   0, /* properties_destroyed */
 546   0, /* todo_flags_start */
 547   TODO_update_ssa, /* todo_flags_finish */
 548 };
 549
 550 class pass_cse_reciprocals : public gimple_opt_pass
 551 {
 552 public:
 553   pass_cse_reciprocals (gcc::context *ctxt)
 554     : gimple_opt_pass (pass_data_cse_reciprocals, ctxt)
 555   {}
 556
 557   /* opt_pass methods: */
 558   virtual bool gate (function *) { return optimize && flag_reciprocal_math; }
 559   virtual unsigned int execute (function *);
 560
 561 }; // class pass_cse_reciprocals
 562
 563 unsigned int
 564 pass_cse_reciprocals::execute (function *fun)
 565 {
 566   basic_block bb;
 567   tree arg;
 568
 569   occ_pool = new object_allocator<occurrence> ("dominators for recip");
 570
 571   memset (&reciprocal_stats, 0, sizeof (reciprocal_stats));
 572   calculate_dominance_info (CDI_DOMINATORS);
 573   calculate_dominance_info (CDI_POST_DOMINATORS);
 574
 575   if (flag_checking)
 576     FOR_EACH_BB_FN (bb, fun)
 577       gcc_assert (!bb->aux);
 578
 579   for (arg = DECL_ARGUMENTS (fun->decl); arg; arg = DECL_CHAIN (arg))
 580     if (FLOAT_TYPE_P (TREE_TYPE (arg))
 581         && is_gimple_reg (arg))
 582       {
 583         tree name = ssa_default_def (fun, arg);
 584         if (name)
 585           execute_cse_reciprocals_1 (NULL, name);
 586       }
 587
 588   FOR_EACH_BB_FN (bb, fun)
 589     {
 590       tree def;
 591
 592       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
 593            gsi_next (&gsi))
 594         {
 595           gphi *phi = gsi.phi ();
 596           def = PHI_RESULT (phi);
 597           if (! virtual_operand_p (def)
 598               && FLOAT_TYPE_P (TREE_TYPE (def)))
 599             execute_cse_reciprocals_1 (NULL, def);
 600         }
 601
 602       for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
 603            gsi_next (&gsi))
 604         {
 605           gimple *stmt = gsi_stmt (gsi);
 606
 607           if (gimple_has_lhs (stmt)
 608               && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
 609               && FLOAT_TYPE_P (TREE_TYPE (def))
 610               && TREE_CODE (def) == SSA_NAME)
 611             execute_cse_reciprocals_1 (&gsi, def);
 612         }
 613
 614       if (optimize_bb_for_size_p (bb))
 615         continue;
 616
 617       /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b).  */
 618       for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
 619            gsi_next (&gsi))
 620         {
 621           gimple *stmt = gsi_stmt (gsi);
 622
 623           if (is_gimple_assign (stmt)
 624               && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
 625             {
 626               tree arg1 = gimple_assign_rhs2 (stmt);
 627               gimple *stmt1;
 628
 629               if (TREE_CODE (arg1) != SSA_NAME)
 630                 continue;
 631
 632               stmt1 = SSA_NAME_DEF_STMT (arg1);
 633
 634               if (is_gimple_call (stmt1)
 635                   && gimple_call_lhs (stmt1))
 636                 {
 637                   bool fail;
 638                   imm_use_iterator ui;
 639                   use_operand_p use_p;
 640                   tree fndecl = NULL_TREE;
 641
 642                   gcall *call = as_a <gcall *> (stmt1);
 643                   internal_fn ifn = internal_fn_reciprocal (call);
 644                   if (ifn == IFN_LAST)
 645                     {
 646                       fndecl = gimple_call_fndecl (call);
 647                       if (!fndecl
 648                           || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_MD)
 649                         continue;
 650                       fndecl = targetm.builtin_reciprocal (fndecl);
 651                       if (!fndecl)
 652                         continue;
 653                     }
 654
 655                   /* Check that all uses of the SSA name are divisions,
 656                      otherwise replacing the defining statement will do
 657                      the wrong thing.  */
 658                   fail = false;
 659                   FOR_EACH_IMM_USE_FAST (use_p, ui, arg1)
 660                     {
 661                       gimple *stmt2 = USE_STMT (use_p);
 662                       if (is_gimple_debug (stmt2))
 663                         continue;
 664                       if (!is_gimple_assign (stmt2)
 665                           || gimple_assign_rhs_code (stmt2) != RDIV_EXPR
 666                           || gimple_assign_rhs1 (stmt2) == arg1
 667                           || gimple_assign_rhs2 (stmt2) != arg1)
 668                         {
 669                           fail = true;
 670                           break;
 671                         }
 672                     }
 673                   if (fail)
 674                     continue;
 675
 676                   gimple_replace_ssa_lhs (call, arg1);
 677                   if (gimple_call_internal_p (call) != (ifn != IFN_LAST))
 678                     {
 679                       auto_vec<tree, 4> args;
 680                       for (unsigned int i = 0;
 681                            i < gimple_call_num_args (call); i++)
 682                         args.safe_push (gimple_call_arg (call, i));
 683                       gcall *stmt2;
 684                       if (ifn == IFN_LAST)
 685                         stmt2 = gimple_build_call_vec (fndecl, args);
 686                       else
 687                         stmt2 = gimple_build_call_internal_vec (ifn, args);
 688                       gimple_call_set_lhs (stmt2, arg1);
 689                       if (gimple_vdef (call))
 690                         {
 691                           gimple_set_vdef (stmt2, gimple_vdef (call));
 692                           SSA_NAME_DEF_STMT (gimple_vdef (stmt2)) = stmt2;
 693                         }
 694                       gimple_call_set_nothrow (stmt2,
 695                                                gimple_call_nothrow_p (call));
 696                       gimple_set_vuse (stmt2, gimple_vuse (call));
 697                       gimple_stmt_iterator gsi2 = gsi_for_stmt (call);
 698                       gsi_replace (&gsi2, stmt2, true);
 699                     }
 700                   else
 701                     {
 702                       if (ifn == IFN_LAST)
 703                         gimple_call_set_fndecl (call, fndecl);
 704                       else
 705                         gimple_call_set_internal_fn (call, ifn);
 706                       update_stmt (call);
 707                     }
 708                   reciprocal_stats.rfuncs_inserted++;
 709
 710                   FOR_EACH_IMM_USE_STMT (stmt, ui, arg1)
 711                     {
 712                       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
 713                       gimple_assign_set_rhs_code (stmt, MULT_EXPR);
 714                       fold_stmt_inplace (&gsi);
 715                       update_stmt (stmt);
 716                     }
 717                 }
 718             }
 719         }
 720     }
 721
 722   statistics_counter_event (fun, "reciprocal divs inserted",
 723                             reciprocal_stats.rdivs_inserted);
 724   statistics_counter_event (fun, "reciprocal functions inserted",
 725                             reciprocal_stats.rfuncs_inserted);
 726
 727   free_dominance_info (CDI_DOMINATORS);
 728   free_dominance_info (CDI_POST_DOMINATORS);
 729   delete occ_pool;
 730   return 0;
 731 }
 732
 733 } // anon namespace
 734
 735 gimple_opt_pass *
 736 make_pass_cse_reciprocals (gcc::context *ctxt)
 737 {
 738   return new pass_cse_reciprocals (ctxt);
 739 }
 740
 741 /* Records an occurrence at statement USE_STMT in the vector of trees
 742    STMTS if it is dominated by *TOP_BB or dominates it or this basic block
 743    is not yet initialized.  Returns true if the occurrence was pushed on
 744    the vector.  Adjusts *TOP_BB to be the basic block dominating all
 745    statements in the vector.  */
 746
 747 static bool
 748 maybe_record_sincos (vec<gimple *> *stmts,
 749                      basic_block *top_bb, gimple *use_stmt)
 750 {
 751   basic_block use_bb = gimple_bb (use_stmt);
 752   if (*top_bb
 753       && (*top_bb == use_bb
 754           || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb)))
 755     stmts->safe_push (use_stmt);
 756   else if (!*top_bb
 757            || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb))
 758     {
 759       stmts->safe_push (use_stmt);
 760       *top_bb = use_bb;
 761     }
 762   else
 763     return false;
 764
 765   return true;
 766 }
 767
 768 /* Look for sin, cos and cexpi calls with the same argument NAME and
 769    create a single call to cexpi CSEing the result in this case.
 770    We first walk over all immediate uses of the argument collecting
 771    statements that we can CSE in a vector and in a second pass replace
 772    the statement rhs with a REALPART or IMAGPART expression on the
 773    result of the cexpi call we insert before the use statement that
 774    dominates all other candidates.  */
 775
 776 static bool
 777 execute_cse_sincos_1 (tree name)
 778 {
 779   gimple_stmt_iterator gsi;
 780   imm_use_iterator use_iter;
 781   tree fndecl, res, type;
 782   gimple *def_stmt, *use_stmt, *stmt;
 783   int seen_cos = 0, seen_sin = 0, seen_cexpi = 0;
 784   auto_vec<gimple *> stmts;
 785   basic_block top_bb = NULL;
 786   int i;
 787   bool cfg_changed = false;
 788
 789   type = TREE_TYPE (name);
 790   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
 791     {
 792       if (gimple_code (use_stmt) != GIMPLE_CALL
 793           || !gimple_call_lhs (use_stmt))
 794         continue;
 795
 796       switch (gimple_call_combined_fn (use_stmt))
 797         {
 798         CASE_CFN_COS:
 799           seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
 800           break;
 801
 802         CASE_CFN_SIN:
 803           seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
 804           break;
 805
 806         CASE_CFN_CEXPI:
 807           seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
 808           break;
 809
 810         default:;
 811         }
 812     }
 813
 814   if (seen_cos + seen_sin + seen_cexpi <= 1)
 815     return false;
 816
 817   /* Simply insert cexpi at the beginning of top_bb but not earlier than
 818      the name def statement.  */
 819   fndecl = mathfn_built_in (type, BUILT_IN_CEXPI);
 820   if (!fndecl)
 821     return false;
 822   stmt = gimple_build_call (fndecl, 1, name);
 823   res = make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl)), stmt, "sincostmp");
 824   gimple_call_set_lhs (stmt, res);
 825
 826   def_stmt = SSA_NAME_DEF_STMT (name);
 827   if (!SSA_NAME_IS_DEFAULT_DEF (name)
 828       && gimple_code (def_stmt) != GIMPLE_PHI
 829       && gimple_bb (def_stmt) == top_bb)
 830     {
 831       gsi = gsi_for_stmt (def_stmt);
 832       gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
 833     }
 834   else
 835     {
 836       gsi = gsi_after_labels (top_bb);
 837       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
 838     }
 839   sincos_stats.inserted++;
 840
 841   /* And adjust the recorded old call sites.  */
 842   for (i = 0; stmts.iterate (i, &use_stmt); ++i)
 843     {
 844       tree rhs = NULL;
 845
 846       switch (gimple_call_combined_fn (use_stmt))
 847         {
 848         CASE_CFN_COS:
 849           rhs = fold_build1 (REALPART_EXPR, type, res);
 850           break;
 851
 852         CASE_CFN_SIN:
 853           rhs = fold_build1 (IMAGPART_EXPR, type, res);
 854           break;
 855
 856         CASE_CFN_CEXPI:
 857           rhs = res;
 858           break;
 859
 860         default:;
 861           gcc_unreachable ();
 862         }
 863
 864         /* Replace call with a copy.  */
 865         stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs);
 866
 867         gsi = gsi_for_stmt (use_stmt);
 868         gsi_replace (&gsi, stmt, true);
 869         if (gimple_purge_dead_eh_edges (gimple_bb (stmt)))
 870           cfg_changed = true;
 871     }
 872
 873   return cfg_changed;
 874 }
 875
 876 /* To evaluate powi(x,n), the floating point value x raised to the
 877    constant integer exponent n, we use a hybrid algorithm that
 878    combines the "window method" with look-up tables.  For an
 879    introduction to exponentiation algorithms and "addition chains",
 880    see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
 881    "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
 882    3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
 883    Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998.  */
 884
 885 /* Provide a default value for POWI_MAX_MULTS, the maximum number of
 886    multiplications to inline before calling the system library's pow
 887    function.  powi(x,n) requires at worst 2*bits(n)-2 multiplications,
 888    so this default never requires calling pow, powf or powl.  */
 889
 890 #ifndef POWI_MAX_MULTS
 891 #define POWI_MAX_MULTS  (2*HOST_BITS_PER_WIDE_INT-2)
 892 #endif
 893
 894 /* The size of the "optimal power tree" lookup table.  All
 895    exponents less than this value are simply looked up in the
 896    powi_table below.  This threshold is also used to size the
 897    cache of pseudo registers that hold intermediate results.  */
 898 #define POWI_TABLE_SIZE 256
 899
 900 /* The size, in bits of the window, used in the "window method"
 901    exponentiation algorithm.  This is equivalent to a radix of
 902    (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method".  */
 903 #define POWI_WINDOW_SIZE 3
 904
 905 /* The following table is an efficient representation of an
 906    "optimal power tree".  For each value, i, the corresponding
 907    value, j, in the table states than an optimal evaluation
 908    sequence for calculating pow(x,i) can be found by evaluating
 909    pow(x,j)*pow(x,i-j).  An optimal power tree for the first
 910    100 integers is given in Knuth's "Seminumerical algorithms".  */
 911
 912 static const unsigned char powi_table[POWI_TABLE_SIZE] =
 913   {
 914       0,   1,   1,   2,   2,   3,   3,   4,  /*   0 -   7 */
 915       4,   6,   5,   6,   6,  10,   7,   9,  /*   8 -  15 */
 916       8,  16,   9,  16,  10,  12,  11,  13,  /*  16 -  23 */
 917      12,  17,  13,  18,  14,  24,  15,  26,  /*  24 -  31 */
 918      16,  17,  17,  19,  18,  33,  19,  26,  /*  32 -  39 */
 919      20,  25,  21,  40,  22,  27,  23,  44,  /*  40 -  47 */
 920      24,  32,  25,  34,  26,  29,  27,  44,  /*  48 -  55 */
 921      28,  31,  29,  34,  30,  60,  31,  36,  /*  56 -  63 */
 922      32,  64,  33,  34,  34,  46,  35,  37,  /*  64 -  71 */
 923      36,  65,  37,  50,  38,  48,  39,  69,  /*  72 -  79 */
 924      40,  49,  41,  43,  42,  51,  43,  58,  /*  80 -  87 */
 925      44,  64,  45,  47,  46,  59,  47,  76,  /*  88 -  95 */
 926      48,  65,  49,  66,  50,  67,  51,  66,  /*  96 - 103 */
 927      52,  70,  53,  74,  54, 104,  55,  74,  /* 104 - 111 */
 928      56,  64,  57,  69,  58,  78,  59,  68,  /* 112 - 119 */
 929      60,  61,  61,  80,  62,  75,  63,  68,  /* 120 - 127 */
 930      64,  65,  65, 128,  66, 129,  67,  90,  /* 128 - 135 */
 931      68,  73,  69, 131,  70,  94,  71,  88,  /* 136 - 143 */
 932      72, 128,  73,  98,  74, 132,  75, 121,  /* 144 - 151 */
 933      76, 102,  77, 124,  78, 132,  79, 106,  /* 152 - 159 */
 934      80,  97,  81, 160,  82,  99,  83, 134,  /* 160 - 167 */
 935      84,  86,  85,  95,  86, 160,  87, 100,  /* 168 - 175 */
 936      88, 113,  89,  98,  90, 107,  91, 122,  /* 176 - 183 */
 937      92, 111,  93, 102,  94, 126,  95, 150,  /* 184 - 191 */
 938      96, 128,  97, 130,  98, 133,  99, 195,  /* 192 - 199 */
 939     100, 128, 101, 123, 102, 164, 103, 138,  /* 200 - 207 */
 940     104, 145, 105, 146, 106, 109, 107, 149,  /* 208 - 215 */
 941     108, 200, 109, 146, 110, 170, 111, 157,  /* 216 - 223 */
 942     112, 128, 113, 130, 114, 182, 115, 132,  /* 224 - 231 */
 943     116, 200, 117, 132, 118, 158, 119, 206,  /* 232 - 239 */
 944     120, 240, 121, 162, 122, 147, 123, 152,  /* 240 - 247 */
 945     124, 166, 125, 214, 126, 138, 127, 153,  /* 248 - 255 */
 946   };
 947
 948
 949 /* Return the number of multiplications required to calculate
 950    powi(x,n) where n is less than POWI_TABLE_SIZE.  This is a
 951    subroutine of powi_cost.  CACHE is an array indicating
 952    which exponents have already been calculated.  */
 953
 954 static int
 955 powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache)
 956 {
 957   /* If we've already calculated this exponent, then this evaluation
 958      doesn't require any additional multiplications.  */
 959   if (cache[n])
 960     return 0;
 961
 962   cache[n] = true;
 963   return powi_lookup_cost (n - powi_table[n], cache)
 964          + powi_lookup_cost (powi_table[n], cache) + 1;
 965 }
 966
 967 /* Return the number of multiplications required to calculate
 968    powi(x,n) for an arbitrary x, given the exponent N.  This
 969    function needs to be kept in sync with powi_as_mults below.  */
 970
 971 static int
 972 powi_cost (HOST_WIDE_INT n)
 973 {
 974   bool cache[POWI_TABLE_SIZE];
 975   unsigned HOST_WIDE_INT digit;
 976   unsigned HOST_WIDE_INT val;
 977   int result;
 978
 979   if (n == 0)
 980     return 0;
 981
 982   /* Ignore the reciprocal when calculating the cost.  */
 983   val = (n < 0) ? -n : n;
 984
 985   /* Initialize the exponent cache.  */
 986   memset (cache, 0, POWI_TABLE_SIZE * sizeof (bool));
 987   cache[1] = true;
 988
 989   result = 0;
 990
 991   while (val >= POWI_TABLE_SIZE)
 992     {
 993       if (val & 1)
 994         {
 995           digit = val & ((1 << POWI_WINDOW_SIZE) - 1);
 996           result += powi_lookup_cost (digit, cache)
 997                     + POWI_WINDOW_SIZE + 1;
 998           val >>= POWI_WINDOW_SIZE;
 999         }
1000       else
1001         {
1002           val >>= 1;
1003           result++;
1004         }
1005     }
1006
1007   return result + powi_lookup_cost (val, cache);
1008 }
1009
1010 /* Recursive subroutine of powi_as_mults.  This function takes the
1011    array, CACHE, of already calculated exponents and an exponent N and
1012    returns a tree that corresponds to CACHE[1]**N, with type TYPE.  */
1013
1014 static tree
1015 powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type,
1016                  HOST_WIDE_INT n, tree *cache)
1017 {
1018   tree op0, op1, ssa_target;
1019   unsigned HOST_WIDE_INT digit;
1020   gassign *mult_stmt;
1021
1022   if (n < POWI_TABLE_SIZE && cache[n])
1023     return cache[n];
1024
1025   ssa_target = make_temp_ssa_name (type, NULL, "powmult");
1026
1027   if (n < POWI_TABLE_SIZE)
1028     {
1029       cache[n] = ssa_target;
1030       op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache);
1031       op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache);
1032     }
1033   else if (n & 1)
1034     {
1035       digit = n & ((1 << POWI_WINDOW_SIZE) - 1);
1036       op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache);
1037       op1 = powi_as_mults_1 (gsi, loc, type, digit, cache);
1038     }
1039   else
1040     {
1041       op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache);
1042       op1 = op0;
1043     }
1044
1045   mult_stmt = gimple_build_assign (ssa_target, MULT_EXPR, op0, op1);
1046   gimple_set_location (mult_stmt, loc);
1047   gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
1048
1049   return ssa_target;
1050 }
1051
1052 /* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
1053    This function needs to be kept in sync with powi_cost above.  */
1054
1055 static tree
1056 powi_as_mults (gimple_stmt_iterator *gsi, location_t loc,
1057                tree arg0, HOST_WIDE_INT n)
1058 {
1059   tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0);
1060   gassign *div_stmt;
1061   tree target;
1062
1063   if (n == 0)
1064     return build_real (type, dconst1);
1065
1066   memset (cache, 0,  sizeof (cache));
1067   cache[1] = arg0;
1068
1069   result = powi_as_mults_1 (gsi, loc, type, (n < 0) ? -n : n, cache);
1070   if (n >= 0)
1071     return result;
1072
1073   /* If the original exponent was negative, reciprocate the result.  */
1074   target = make_temp_ssa_name (type, NULL, "powmult");
1075   div_stmt = gimple_build_assign (target, RDIV_EXPR,
1076                                   build_real (type, dconst1), result);
1077   gimple_set_location (div_stmt, loc);
1078   gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
1079
1080   return target;
1081 }
1082
1083 /* ARG0 and N are the two arguments to a powi builtin in GSI with
1084    location info LOC.  If the arguments are appropriate, create an
1085    equivalent sequence of statements prior to GSI using an optimal
1086    number of multiplications, and return an expession holding the
1087    result.  */
1088
1089 static tree
1090 gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc,
1091                             tree arg0, HOST_WIDE_INT n)
1092 {
1093   /* Avoid largest negative number.  */
1094   if (n != -n
1095       && ((n >= -1 && n <= 2)
1096           || (optimize_function_for_speed_p (cfun)
1097               && powi_cost (n) <= POWI_MAX_MULTS)))
1098     return powi_as_mults (gsi, loc, arg0, n);
1099
1100   return NULL_TREE;
1101 }
1102
1103 /* Build a gimple call statement that calls FN with argument ARG.
1104    Set the lhs of the call statement to a fresh SSA name.  Insert the
1105    statement prior to GSI's current position, and return the fresh
1106    SSA name.  */
1107
1108 static tree
1109 build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc,
1110                        tree fn, tree arg)
1111 {
1112   gcall *call_stmt;
1113   tree ssa_target;
1114
1115   call_stmt = gimple_build_call (fn, 1, arg);
1116   ssa_target = make_temp_ssa_name (TREE_TYPE (arg), NULL, "powroot");
1117   gimple_set_lhs (call_stmt, ssa_target);
1118   gimple_set_location (call_stmt, loc);
1119   gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
1120
1121   return ssa_target;
1122 }
1123
1124 /* Build a gimple binary operation with the given CODE and arguments
1125    ARG0, ARG1, assigning the result to a new SSA name for variable
1126    TARGET.  Insert the statement prior to GSI's current position, and
1127    return the fresh SSA name.*/
1128
1129 static tree
1130 build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc,
1131                         const char *name, enum tree_code code,
1132                         tree arg0, tree arg1)
1133 {
1134   tree result = make_temp_ssa_name (TREE_TYPE (arg0), NULL, name);
1135   gassign *stmt = gimple_build_assign (result, code, arg0, arg1);
1136   gimple_set_location (stmt, loc);
1137   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1138   return result;
1139 }
1140
1141 /* Build a gimple reference operation with the given CODE and argument
1142    ARG, assigning the result to a new SSA name of TYPE with NAME.
1143    Insert the statement prior to GSI's current position, and return
1144    the fresh SSA name.  */
1145
1146 static inline tree
1147 build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type,
1148                       const char *name, enum tree_code code, tree arg0)
1149 {
1150   tree result = make_temp_ssa_name (type, NULL, name);
1151   gimple *stmt = gimple_build_assign (result, build1 (code, type, arg0));
1152   gimple_set_location (stmt, loc);
1153   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1154   return result;
1155 }
1156
1157 /* Build a gimple assignment to cast VAL to TYPE.  Insert the statement
1158    prior to GSI's current position, and return the fresh SSA name.  */
1159
1160 static tree
1161 build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,
1162                        tree type, tree val)
1163 {
1164   tree result = make_ssa_name (type);
1165   gassign *stmt = gimple_build_assign (result, NOP_EXPR, val);
1166   gimple_set_location (stmt, loc);
1167   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1168   return result;
1169 }
1170
1171 struct pow_synth_sqrt_info
1172 {
1173   bool *factors;
1174   unsigned int deepest;
1175   unsigned int num_mults;
1176 };
1177
1178 /* Return true iff the real value C can be represented as a
1179    sum of powers of 0.5 up to N.  That is:
1180    C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1.
1181    Record in INFO the various parameters of the synthesis algorithm such
1182    as the factors a[i], the maximum 0.5 power and the number of
1183    multiplications that will be required.  */
1184
1185 bool
1186 representable_as_half_series_p (REAL_VALUE_TYPE c, unsigned n,
1187                                  struct pow_synth_sqrt_info *info)
1188 {
1189   REAL_VALUE_TYPE factor = dconsthalf;
1190   REAL_VALUE_TYPE remainder = c;
1191
1192   info->deepest = 0;
1193   info->num_mults = 0;
1194   memset (info->factors, 0, n * sizeof (bool));
1195
1196   for (unsigned i = 0; i < n; i++)
1197     {
1198       REAL_VALUE_TYPE res;
1199
1200       /* If something inexact happened bail out now.  */
1201       if (real_arithmetic (&res, MINUS_EXPR, &remainder, &factor))
1202         return false;
1203
1204       /* We have hit zero.  The number is representable as a sum
1205          of powers of 0.5.  */
1206       if (real_equal (&res, &dconst0))
1207         {
1208           info->factors[i] = true;
1209           info->deepest = i + 1;
1210           return true;
1211         }
1212       else if (!REAL_VALUE_NEGATIVE (res))
1213         {
1214           remainder = res;
1215           info->factors[i] = true;
1216           info->num_mults++;
1217         }
1218       else
1219         info->factors[i] = false;
1220
1221       real_arithmetic (&factor, MULT_EXPR, &factor, &dconsthalf);
1222     }
1223   return false;
1224 }
1225
1226 /* Return the tree corresponding to FN being applied
1227    to ARG N times at GSI and LOC.
1228    Look up previous results from CACHE if need be.
1229    cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times.  */
1230
1231 static tree
1232 get_fn_chain (tree arg, unsigned int n, gimple_stmt_iterator *gsi,
1233               tree fn, location_t loc, tree *cache)
1234 {
1235   tree res = cache[n];
1236   if (!res)
1237     {
1238       tree prev = get_fn_chain (arg, n - 1, gsi, fn, loc, cache);
1239       res = build_and_insert_call (gsi, loc, fn, prev);
1240       cache[n] = res;
1241     }
1242
1243   return res;
1244 }
1245
1246 /* Print to STREAM the repeated application of function FNAME to ARG
1247    N times.  So, for FNAME = "foo", ARG = "x", N = 2 it would print:
1248    "foo (foo (x))".  */
1249
1250 static void
1251 print_nested_fn (FILE* stream, const char *fname, const char* arg,
1252                  unsigned int n)
1253 {
1254   if (n == 0)
1255     fprintf (stream, "%s", arg);
1256   else
1257     {
1258       fprintf (stream, "%s (", fname);
1259       print_nested_fn (stream, fname, arg, n - 1);
1260       fprintf (stream, ")");
1261     }
1262 }
1263
1264 /* Print to STREAM the fractional sequence of sqrt chains
1265    applied to ARG, described by INFO.  Used for the dump file.  */
1266
1267 static void
1268 dump_fractional_sqrt_sequence (FILE *stream, const char *arg,
1269                                 struct pow_synth_sqrt_info *info)
1270 {
1271   for (unsigned int i = 0; i < info->deepest; i++)
1272     {
1273       bool is_set = info->factors[i];
1274       if (is_set)
1275         {
1276           print_nested_fn (stream, "sqrt", arg, i + 1);
1277           if (i != info->deepest - 1)
1278             fprintf (stream, " * ");
1279         }
1280     }
1281 }
1282
1283 /* Print to STREAM a representation of raising ARG to an integer
1284    power N.  Used for the dump file.  */
1285
1286 static void
1287 dump_integer_part (FILE *stream, const char* arg, HOST_WIDE_INT n)
1288 {
1289   if (n > 1)
1290     fprintf (stream, "powi (%s, " HOST_WIDE_INT_PRINT_DEC ")", arg, n);
1291   else if (n == 1)
1292     fprintf (stream, "%s", arg);
1293 }
1294
1295 /* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of
1296    square roots.  Place at GSI and LOC.  Limit the maximum depth
1297    of the sqrt chains to MAX_DEPTH.  Return the tree holding the
1298    result of the expanded sequence or NULL_TREE if the expansion failed.
1299
1300    This routine assumes that ARG1 is a real number with a fractional part
1301    (the integer exponent case will have been handled earlier in
1302    gimple_expand_builtin_pow).
1303
1304    For ARG1 > 0.0:
1305    * For ARG1 composed of a whole part WHOLE_PART and a fractional part
1306      FRAC_PART i.e. WHOLE_PART == floor (ARG1) and
1307                     FRAC_PART == ARG1 - WHOLE_PART:
1308      Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where
1309      POW (ARG0, FRAC_PART) is expanded as a product of square root chains
1310      if it can be expressed as such, that is if FRAC_PART satisfies:
1311      FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i))
1312      where integer a[i] is either 0 or 1.
1313
1314      Example:
1315      POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625)
1316        --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x)))
1317
1318    For ARG1 < 0.0 there are two approaches:
1319    * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1)
1320          is calculated as above.
1321
1322      Example:
1323      POW (x, -5.625) == 1.0 / POW (x, 5.625)
1324        -->  1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x))))
1325
1326    * (B) : WHOLE_PART := - ceil (abs (ARG1))
1327            FRAC_PART  := ARG1 - WHOLE_PART
1328      and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART).
1329      Example:
1330      POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6)
1331        --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6))
1332
1333    For ARG1 < 0.0 we choose between (A) and (B) depending on
1334    how many multiplications we'd have to do.
1335    So, for the example in (B): POW (x, -5.875), if we were to
1336    follow algorithm (A) we would produce:
1337    1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X)))
1338    which contains more multiplications than approach (B).
1339
1340    Hopefully, this approach will eliminate potentially expensive POW library
1341    calls when unsafe floating point math is enabled and allow the compiler to
1342    further optimise the multiplies, square roots and divides produced by this
1343    function.  */
1344
1345 static tree
1346 expand_pow_as_sqrts (gimple_stmt_iterator *gsi, location_t loc,
1347                      tree arg0, tree arg1, HOST_WIDE_INT max_depth)
1348 {
1349   tree type = TREE_TYPE (arg0);
1350   machine_mode mode = TYPE_MODE (type);
1351   tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1352   bool one_over = true;
1353
1354   if (!sqrtfn)
1355     return NULL_TREE;
1356
1357   if (TREE_CODE (arg1) != REAL_CST)
1358     return NULL_TREE;
1359
1360   REAL_VALUE_TYPE exp_init = TREE_REAL_CST (arg1);
1361
1362   gcc_assert (max_depth > 0);
1363   tree *cache = XALLOCAVEC (tree, max_depth + 1);
1364
1365   struct pow_synth_sqrt_info synth_info;
1366   synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1367   synth_info.deepest = 0;
1368   synth_info.num_mults = 0;
1369
1370   bool neg_exp = REAL_VALUE_NEGATIVE (exp_init);
1371   REAL_VALUE_TYPE exp = real_value_abs (&exp_init);
1372
1373   /* The whole and fractional parts of exp.  */
1374   REAL_VALUE_TYPE whole_part;
1375   REAL_VALUE_TYPE frac_part;
1376
1377   real_floor (&whole_part, mode, &exp);
1378   real_arithmetic (&frac_part, MINUS_EXPR, &exp, &whole_part);
1379
1380
1381   REAL_VALUE_TYPE ceil_whole = dconst0;
1382   REAL_VALUE_TYPE ceil_fract = dconst0;
1383
1384   if (neg_exp)
1385     {
1386       real_ceil (&ceil_whole, mode, &exp);
1387       real_arithmetic (&ceil_fract, MINUS_EXPR, &ceil_whole, &exp);
1388     }
1389
1390   if (!representable_as_half_series_p (frac_part, max_depth, &synth_info))
1391     return NULL_TREE;
1392
1393   /* Check whether it's more profitable to not use 1.0 / ...  */
1394   if (neg_exp)
1395     {
1396       struct pow_synth_sqrt_info alt_synth_info;
1397       alt_synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1398       alt_synth_info.deepest = 0;
1399       alt_synth_info.num_mults = 0;
1400
1401       if (representable_as_half_series_p (ceil_fract, max_depth,
1402                                            &alt_synth_info)
1403           && alt_synth_info.deepest <= synth_info.deepest
1404           && alt_synth_info.num_mults < synth_info.num_mults)
1405         {
1406           whole_part = ceil_whole;
1407           frac_part = ceil_fract;
1408           synth_info.deepest = alt_synth_info.deepest;
1409           synth_info.num_mults = alt_synth_info.num_mults;
1410           memcpy (synth_info.factors, alt_synth_info.factors,
1411                   (max_depth + 1) * sizeof (bool));
1412           one_over = false;
1413         }
1414     }
1415
1416   HOST_WIDE_INT n = real_to_integer (&whole_part);
1417   REAL_VALUE_TYPE cint;
1418   real_from_integer (&cint, VOIDmode, n, SIGNED);
1419
1420   if (!real_identical (&whole_part, &cint))
1421     return NULL_TREE;
1422
1423   if (powi_cost (n) + synth_info.num_mults > POWI_MAX_MULTS)
1424     return NULL_TREE;
1425
1426   memset (cache, 0, (max_depth + 1) * sizeof (tree));
1427
1428   tree integer_res = n == 0 ? build_real (type, dconst1) : arg0;
1429
1430   /* Calculate the integer part of the exponent.  */
1431   if (n > 1)
1432     {
1433       integer_res = gimple_expand_builtin_powi (gsi, loc, arg0, n);
1434       if (!integer_res)
1435         return NULL_TREE;
1436     }
1437
1438   if (dump_file)
1439     {
1440       char string[64];
1441
1442       real_to_decimal (string, &exp_init, sizeof (string), 0, 1);
1443       fprintf (dump_file, "synthesizing pow (x, %s) as:\n", string);
1444
1445       if (neg_exp)
1446         {
1447           if (one_over)
1448             {
1449               fprintf (dump_file, "1.0 / (");
1450               dump_integer_part (dump_file, "x", n);
1451               if (n > 0)
1452                 fprintf (dump_file, " * ");
1453               dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1454               fprintf (dump_file, ")");
1455             }
1456           else
1457             {
1458               dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1459               fprintf (dump_file, " / (");
1460               dump_integer_part (dump_file, "x", n);
1461               fprintf (dump_file, ")");
1462             }
1463         }
1464       else
1465         {
1466           dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1467           if (n > 0)
1468             fprintf (dump_file, " * ");
1469           dump_integer_part (dump_file, "x", n);
1470         }
1471
1472       fprintf (dump_file, "\ndeepest sqrt chain: %d\n", synth_info.deepest);
1473     }
1474
1475
1476   tree fract_res = NULL_TREE;
1477   cache[0] = arg0;
1478
1479   /* Calculate the fractional part of the exponent.  */
1480   for (unsigned i = 0; i < synth_info.deepest; i++)
1481     {
1482       if (synth_info.factors[i])
1483         {
1484           tree sqrt_chain = get_fn_chain (arg0, i + 1, gsi, sqrtfn, loc, cache);
1485
1486           if (!fract_res)
1487               fract_res = sqrt_chain;
1488
1489           else
1490             fract_res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1491                                            fract_res, sqrt_chain);
1492         }
1493     }
1494
1495   tree res = NULL_TREE;
1496
1497   if (neg_exp)
1498     {
1499       if (one_over)
1500         {
1501           if (n > 0)
1502             res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1503                                            fract_res, integer_res);
1504           else
1505             res = fract_res;
1506
1507           res = build_and_insert_binop (gsi, loc, "powrootrecip", RDIV_EXPR,
1508                                           build_real (type, dconst1), res);
1509         }
1510       else
1511         {
1512           res = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1513                                          fract_res, integer_res);
1514         }
1515     }
1516   else
1517     res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1518                                    fract_res, integer_res);
1519   return res;
1520 }
1521
1522 /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
1523    with location info LOC.  If possible, create an equivalent and
1524    less expensive sequence of statements prior to GSI, and return an
1525    expession holding the result.  */
1526
1527 static tree
1528 gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,
1529                            tree arg0, tree arg1)
1530 {
1531   REAL_VALUE_TYPE c, cint, dconst1_3, dconst1_4, dconst1_6;
1532   REAL_VALUE_TYPE c2, dconst3;
1533   HOST_WIDE_INT n;
1534   tree type, sqrtfn, cbrtfn, sqrt_arg0, result, cbrt_x, powi_cbrt_x;
1535   machine_mode mode;
1536   bool speed_p = optimize_bb_for_speed_p (gsi_bb (*gsi));
1537   bool hw_sqrt_exists, c_is_int, c2_is_int;
1538
1539   dconst1_4 = dconst1;
1540   SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
1541
1542   /* If the exponent isn't a constant, there's nothing of interest
1543      to be done.  */
1544   if (TREE_CODE (arg1) != REAL_CST)
1545     return NULL_TREE;
1546
1547   /* Don't perform the operation if flag_signaling_nans is on
1548      and the operand is a signaling NaN.  */
1549   if (HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg1)))
1550       && ((TREE_CODE (arg0) == REAL_CST
1551            && REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg0)))
1552           || REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg1))))
1553     return NULL_TREE;
1554
1555   /* If the exponent is equivalent to an integer, expand to an optimal
1556      multiplication sequence when profitable.  */
1557   c = TREE_REAL_CST (arg1);
1558   n = real_to_integer (&c);
1559   real_from_integer (&cint, VOIDmode, n, SIGNED);
1560   c_is_int = real_identical (&c, &cint);
1561
1562   if (c_is_int
1563       && ((n >= -1 && n <= 2)
1564           || (flag_unsafe_math_optimizations
1565               && speed_p
1566               && powi_cost (n) <= POWI_MAX_MULTS)))
1567     return gimple_expand_builtin_powi (gsi, loc, arg0, n);
1568
1569   /* Attempt various optimizations using sqrt and cbrt.  */
1570   type = TREE_TYPE (arg0);
1571   mode = TYPE_MODE (type);
1572   sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1573
1574   /* Optimize pow(x,0.5) = sqrt(x).  This replacement is always safe
1575      unless signed zeros must be maintained.  pow(-0,0.5) = +0, while
1576      sqrt(-0) = -0.  */
1577   if (sqrtfn
1578       && real_equal (&c, &dconsthalf)
1579       && !HONOR_SIGNED_ZEROS (mode))
1580     return build_and_insert_call (gsi, loc, sqrtfn, arg0);
1581
1582   hw_sqrt_exists = optab_handler (sqrt_optab, mode) != CODE_FOR_nothing;
1583
1584   /* Optimize pow(x,1./3.) = cbrt(x).  This requires unsafe math
1585      optimizations since 1./3. is not exactly representable.  If x
1586      is negative and finite, the correct value of pow(x,1./3.) is
1587      a NaN with the "invalid" exception raised, because the value
1588      of 1./3. actually has an even denominator.  The correct value
1589      of cbrt(x) is a negative real value.  */
1590   cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT);
1591   dconst1_3 = real_value_truncate (mode, dconst_third ());
1592
1593   if (flag_unsafe_math_optimizations
1594       && cbrtfn
1595       && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
1596       && real_equal (&c, &dconst1_3))
1597     return build_and_insert_call (gsi, loc, cbrtfn, arg0);
1598
1599   /* Optimize pow(x,1./6.) = cbrt(sqrt(x)).  Don't do this optimization
1600      if we don't have a hardware sqrt insn.  */
1601   dconst1_6 = dconst1_3;
1602   SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
1603
1604   if (flag_unsafe_math_optimizations
1605       && sqrtfn
1606       && cbrtfn
1607       && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
1608       && speed_p
1609       && hw_sqrt_exists
1610       && real_equal (&c, &dconst1_6))
1611     {
1612       /* sqrt(x)  */
1613       sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
1614
1615       /* cbrt(sqrt(x))  */
1616       return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0);
1617     }
1618
1619
1620   /* Attempt to expand the POW as a product of square root chains.
1621      Expand the 0.25 case even when otpimising for size.  */
1622   if (flag_unsafe_math_optimizations
1623       && sqrtfn
1624       && hw_sqrt_exists
1625       && (speed_p || real_equal (&c, &dconst1_4))
1626       && !HONOR_SIGNED_ZEROS (mode))
1627     {
1628       unsigned int max_depth = speed_p
1629                                 ? PARAM_VALUE (PARAM_MAX_POW_SQRT_DEPTH)
1630                                 : 2;
1631
1632       tree expand_with_sqrts
1633         = expand_pow_as_sqrts (gsi, loc, arg0, arg1, max_depth);
1634
1635       if (expand_with_sqrts)
1636         return expand_with_sqrts;
1637     }
1638
1639   real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
1640   n = real_to_integer (&c2);
1641   real_from_integer (&cint, VOIDmode, n, SIGNED);
1642   c2_is_int = real_identical (&c2, &cint);
1643
1644   /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
1645
1646      powi(x, n/3) * powi(cbrt(x), n%3),                    n > 0;
1647      1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)),  n < 0.
1648
1649      Do not calculate the first factor when n/3 = 0.  As cbrt(x) is
1650      different from pow(x, 1./3.) due to rounding and behavior with
1651      negative x, we need to constrain this transformation to unsafe
1652      math and positive x or finite math.  */
1653   real_from_integer (&dconst3, VOIDmode, 3, SIGNED);
1654   real_arithmetic (&c2, MULT_EXPR, &c, &dconst3);
1655   real_round (&c2, mode, &c2);
1656   n = real_to_integer (&c2);
1657   real_from_integer (&cint, VOIDmode, n, SIGNED);
1658   real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3);
1659   real_convert (&c2, mode, &c2);
1660
1661   if (flag_unsafe_math_optimizations
1662       && cbrtfn
1663       && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
1664       && real_identical (&c2, &c)
1665       && !c2_is_int
1666       && optimize_function_for_speed_p (cfun)
1667       && powi_cost (n / 3) <= POWI_MAX_MULTS)
1668     {
1669       tree powi_x_ndiv3 = NULL_TREE;
1670
1671       /* Attempt to fold powi(arg0, abs(n/3)) into multiplies.  If not
1672          possible or profitable, give up.  Skip the degenerate case when
1673          abs(n) < 3, where the result is always 1.  */
1674       if (absu_hwi (n) >= 3)
1675         {
1676           powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0,
1677                                                      abs_hwi (n / 3));
1678           if (!powi_x_ndiv3)
1679             return NULL_TREE;
1680         }
1681
1682       /* Calculate powi(cbrt(x), n%3).  Don't use gimple_expand_builtin_powi
1683          as that creates an unnecessary variable.  Instead, just produce
1684          either cbrt(x) or cbrt(x) * cbrt(x).  */
1685       cbrt_x = build_and_insert_call (gsi, loc, cbrtfn, arg0);
1686
1687       if (absu_hwi (n) % 3 == 1)
1688         powi_cbrt_x = cbrt_x;
1689       else
1690         powi_cbrt_x = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1691                                               cbrt_x, cbrt_x);
1692
1693       /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1.  */
1694       if (absu_hwi (n) < 3)
1695         result = powi_cbrt_x;
1696       else
1697         result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1698                                          powi_x_ndiv3, powi_cbrt_x);
1699
1700       /* If n is negative, reciprocate the result.  */
1701       if (n < 0)
1702         result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1703                                          build_real (type, dconst1), result);
1704
1705       return result;
1706     }
1707
1708   /* No optimizations succeeded.  */
1709   return NULL_TREE;
1710 }
1711
1712 /* ARG is the argument to a cabs builtin call in GSI with location info
1713    LOC.  Create a sequence of statements prior to GSI that calculates
1714    sqrt(R*R + I*I), where R and I are the real and imaginary components
1715    of ARG, respectively.  Return an expression holding the result.  */
1716
1717 static tree
1718 gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg)
1719 {
1720   tree real_part, imag_part, addend1, addend2, sum, result;
1721   tree type = TREE_TYPE (TREE_TYPE (arg));
1722   tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1723   machine_mode mode = TYPE_MODE (type);
1724
1725   if (!flag_unsafe_math_optimizations
1726       || !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi)))
1727       || !sqrtfn
1728       || optab_handler (sqrt_optab, mode) == CODE_FOR_nothing)
1729     return NULL_TREE;
1730
1731   real_part = build_and_insert_ref (gsi, loc, type, "cabs",
1732                                     REALPART_EXPR, arg);
1733   addend1 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
1734                                     real_part, real_part);
1735   imag_part = build_and_insert_ref (gsi, loc, type, "cabs",
1736                                     IMAGPART_EXPR, arg);
1737   addend2 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
1738                                     imag_part, imag_part);
1739   sum = build_and_insert_binop (gsi, loc, "cabs", PLUS_EXPR, addend1, addend2);
1740   result = build_and_insert_call (gsi, loc, sqrtfn, sum);
1741
1742   return result;
1743 }
1744
1745 /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
1746    on the SSA_NAME argument of each of them.  Also expand powi(x,n) into
1747    an optimal number of multiplies, when n is a constant.  */
1748
1749 namespace {
1750
1751 const pass_data pass_data_cse_sincos =
1752 {
1753   GIMPLE_PASS, /* type */
1754   "sincos", /* name */
1755   OPTGROUP_NONE, /* optinfo_flags */
1756   TV_NONE, /* tv_id */
1757   PROP_ssa, /* properties_required */
1758   PROP_gimple_opt_math, /* properties_provided */
1759   0, /* properties_destroyed */
1760   0, /* todo_flags_start */
1761   TODO_update_ssa, /* todo_flags_finish */
1762 };
1763
1764 class pass_cse_sincos : public gimple_opt_pass
1765 {
1766 public:
1767   pass_cse_sincos (gcc::context *ctxt)
1768     : gimple_opt_pass (pass_data_cse_sincos, ctxt)
1769   {}
1770
1771   /* opt_pass methods: */
1772   virtual bool gate (function *)
1773     {
1774       /* We no longer require either sincos or cexp, since powi expansion
1775          piggybacks on this pass.  */
1776       return optimize;
1777     }
1778
1779   virtual unsigned int execute (function *);
1780
1781 }; // class pass_cse_sincos
1782
1783 unsigned int
1784 pass_cse_sincos::execute (function *fun)
1785 {
1786   basic_block bb;
1787   bool cfg_changed = false;
1788
1789   calculate_dominance_info (CDI_DOMINATORS);
1790   memset (&sincos_stats, 0, sizeof (sincos_stats));
1791
1792   FOR_EACH_BB_FN (bb, fun)
1793     {
1794       gimple_stmt_iterator gsi;
1795       bool cleanup_eh = false;
1796
1797       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1798         {
1799           gimple *stmt = gsi_stmt (gsi);
1800
1801           /* Only the last stmt in a bb could throw, no need to call
1802              gimple_purge_dead_eh_edges if we change something in the middle
1803              of a basic block.  */
1804           cleanup_eh = false;
1805
1806           if (is_gimple_call (stmt)
1807               && gimple_call_lhs (stmt))
1808             {
1809               tree arg, arg0, arg1, result;
1810               HOST_WIDE_INT n;
1811               location_t loc;
1812
1813               switch (gimple_call_combined_fn (stmt))
1814                 {
1815                 CASE_CFN_COS:
1816                 CASE_CFN_SIN:
1817                 CASE_CFN_CEXPI:
1818                   /* Make sure we have either sincos or cexp.  */
1819                   if (!targetm.libc_has_function (function_c99_math_complex)
1820                       && !targetm.libc_has_function (function_sincos))
1821                     break;
1822
1823                   arg = gimple_call_arg (stmt, 0);
1824                   if (TREE_CODE (arg) == SSA_NAME)
1825                     cfg_changed |= execute_cse_sincos_1 (arg);
1826                   break;
1827
1828                 CASE_CFN_POW:
1829                   arg0 = gimple_call_arg (stmt, 0);
1830                   arg1 = gimple_call_arg (stmt, 1);
1831
1832                   loc = gimple_location (stmt);
1833                   result = gimple_expand_builtin_pow (&gsi, loc, arg0, arg1);
1834
1835                   if (result)
1836                     {
1837                       tree lhs = gimple_get_lhs (stmt);
1838                       gassign *new_stmt = gimple_build_assign (lhs, result);
1839                       gimple_set_location (new_stmt, loc);
1840                       unlink_stmt_vdef (stmt);
1841                       gsi_replace (&gsi, new_stmt, true);
1842                       cleanup_eh = true;
1843                       if (gimple_vdef (stmt))
1844                         release_ssa_name (gimple_vdef (stmt));
1845                     }
1846                   break;
1847
1848                 CASE_CFN_POWI:
1849                   arg0 = gimple_call_arg (stmt, 0);
1850                   arg1 = gimple_call_arg (stmt, 1);
1851                   loc = gimple_location (stmt);
1852
1853                   if (real_minus_onep (arg0))
1854                     {
1855                       tree t0, t1, cond, one, minus_one;
1856                       gassign *stmt;
1857
1858                       t0 = TREE_TYPE (arg0);
1859                       t1 = TREE_TYPE (arg1);
1860                       one = build_real (t0, dconst1);
1861                       minus_one = build_real (t0, dconstm1);
1862
1863                       cond = make_temp_ssa_name (t1, NULL, "powi_cond");
1864                       stmt = gimple_build_assign (cond, BIT_AND_EXPR,
1865                                                   arg1, build_int_cst (t1, 1));
1866                       gimple_set_location (stmt, loc);
1867                       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1868
1869                       result = make_temp_ssa_name (t0, NULL, "powi");
1870                       stmt = gimple_build_assign (result, COND_EXPR, cond,
1871                                                   minus_one, one);
1872                       gimple_set_location (stmt, loc);
1873                       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1874                     }
1875                   else
1876                     {
1877                       if (!tree_fits_shwi_p (arg1))
1878                         break;
1879
1880                       n = tree_to_shwi (arg1);
1881                       result = gimple_expand_builtin_powi (&gsi, loc, arg0, n);
1882                     }
1883
1884                   if (result)
1885                     {
1886                       tree lhs = gimple_get_lhs (stmt);
1887                       gassign *new_stmt = gimple_build_assign (lhs, result);
1888                       gimple_set_location (new_stmt, loc);
1889                       unlink_stmt_vdef (stmt);
1890                       gsi_replace (&gsi, new_stmt, true);
1891                       cleanup_eh = true;
1892                       if (gimple_vdef (stmt))
1893                         release_ssa_name (gimple_vdef (stmt));
1894                     }
1895                   break;
1896
1897                 CASE_CFN_CABS:
1898                   arg0 = gimple_call_arg (stmt, 0);
1899                   loc = gimple_location (stmt);
1900                   result = gimple_expand_builtin_cabs (&gsi, loc, arg0);
1901
1902                   if (result)
1903                     {
1904                       tree lhs = gimple_get_lhs (stmt);
1905                       gassign *new_stmt = gimple_build_assign (lhs, result);
1906                       gimple_set_location (new_stmt, loc);
1907                       unlink_stmt_vdef (stmt);
1908                       gsi_replace (&gsi, new_stmt, true);
1909                       cleanup_eh = true;
1910                       if (gimple_vdef (stmt))
1911                         release_ssa_name (gimple_vdef (stmt));
1912                     }
1913                   break;
1914
1915                 default:;
1916                 }
1917             }
1918         }
1919       if (cleanup_eh)
1920         cfg_changed |= gimple_purge_dead_eh_edges (bb);
1921     }
1922
1923   statistics_counter_event (fun, "sincos statements inserted",
1924                             sincos_stats.inserted);
1925
1926   return cfg_changed ? TODO_cleanup_cfg : 0;
1927 }
1928
1929 } // anon namespace
1930
1931 gimple_opt_pass *
1932 make_pass_cse_sincos (gcc::context *ctxt)
1933 {
1934   return new pass_cse_sincos (ctxt);
1935 }
1936
1937 /* A symbolic number structure is used to detect byte permutation and selection
1938    patterns of a source.  To achieve that, its field N contains an artificial
1939    number consisting of BITS_PER_MARKER sized markers tracking where does each
1940    byte come from in the source:
1941
1942    0       - target byte has the value 0
1943    FF      - target byte has an unknown value (eg. due to sign extension)
1944    1..size - marker value is the byte index in the source (0 for lsb).
1945
1946    To detect permutations on memory sources (arrays and structures), a symbolic
1947    number is also associated:
1948    - a base address BASE_ADDR and an OFFSET giving the address of the source;
1949    - a range which gives the difference between the highest and lowest accessed
1950      memory location to make such a symbolic number;
1951    - the address SRC of the source element of lowest address as a convenience
1952      to easily get BASE_ADDR + offset + lowest bytepos;
1953    - number of expressions N_OPS bitwise ored together to represent
1954      approximate cost of the computation.
1955
1956    Note 1: the range is different from size as size reflects the size of the
1957    type of the current expression.  For instance, for an array char a[],
1958    (short) a[0] | (short) a[3] would have a size of 2 but a range of 4 while
1959    (short) a[0] | ((short) a[0] << 1) would still have a size of 2 but this
1960    time a range of 1.
1961
1962    Note 2: for non-memory sources, range holds the same value as size.
1963
1964    Note 3: SRC points to the SSA_NAME in case of non-memory source.  */
1965
1966 struct symbolic_number {
1967   uint64_t n;
1968   tree type;
1969   tree base_addr;
1970   tree offset;
1971   HOST_WIDE_INT bytepos;
1972   tree src;
1973   tree alias_set;
1974   tree vuse;
1975   unsigned HOST_WIDE_INT range;
1976   int n_ops;
1977 };
1978
1979 #define BITS_PER_MARKER 8
1980 #define MARKER_MASK ((1 << BITS_PER_MARKER) - 1)
1981 #define MARKER_BYTE_UNKNOWN MARKER_MASK
1982 #define HEAD_MARKER(n, size) \
1983   ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER)))
1984
1985 /* The number which the find_bswap_or_nop_1 result should match in
1986    order to have a nop.  The number is masked according to the size of
1987    the symbolic number before using it.  */
1988 #define CMPNOP (sizeof (int64_t) < 8 ? 0 : \
1989   (uint64_t)0x08070605 << 32 | 0x04030201)
1990
1991 /* The number which the find_bswap_or_nop_1 result should match in
1992    order to have a byte swap.  The number is masked according to the
1993    size of the symbolic number before using it.  */
1994 #define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \
1995   (uint64_t)0x01020304 << 32 | 0x05060708)
1996
1997 /* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
1998    number N.  Return false if the requested operation is not permitted
1999    on a symbolic number.  */
2000
2001 static inline bool
2002 do_shift_rotate (enum tree_code code,
2003                  struct symbolic_number *n,
2004                  int count)
2005 {
2006   int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
2007   unsigned head_marker;
2008
2009   if (count % BITS_PER_UNIT != 0)
2010     return false;
2011   count = (count / BITS_PER_UNIT) * BITS_PER_MARKER;
2012
2013   /* Zero out the extra bits of N in order to avoid them being shifted
2014      into the significant bits.  */
2015   if (size < 64 / BITS_PER_MARKER)
2016     n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
2017
2018   switch (code)
2019     {
2020     case LSHIFT_EXPR:
2021       n->n <<= count;
2022       break;
2023     case RSHIFT_EXPR:
2024       head_marker = HEAD_MARKER (n->n, size);
2025       n->n >>= count;
2026       /* Arithmetic shift of signed type: result is dependent on the value.  */
2027       if (!TYPE_UNSIGNED (n->type) && head_marker)
2028         for (i = 0; i < count / BITS_PER_MARKER; i++)
2029           n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
2030                   << ((size - 1 - i) * BITS_PER_MARKER);
2031       break;
2032     case LROTATE_EXPR:
2033       n->n = (n->n << count) | (n->n >> ((size * BITS_PER_MARKER) - count));
2034       break;
2035     case RROTATE_EXPR:
2036       n->n = (n->n >> count) | (n->n << ((size * BITS_PER_MARKER) - count));
2037       break;
2038     default:
2039       return false;
2040     }
2041   /* Zero unused bits for size.  */
2042   if (size < 64 / BITS_PER_MARKER)
2043     n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
2044   return true;
2045 }
2046
2047 /* Perform sanity checking for the symbolic number N and the gimple
2048    statement STMT.  */
2049
2050 static inline bool
2051 verify_symbolic_number_p (struct symbolic_number *n, gimple *stmt)
2052 {
2053   tree lhs_type;
2054
2055   lhs_type = gimple_expr_type (stmt);
2056
2057   if (TREE_CODE (lhs_type) != INTEGER_TYPE)
2058     return false;
2059
2060   if (TYPE_PRECISION (lhs_type) != TYPE_PRECISION (n->type))
2061     return false;
2062
2063   return true;
2064 }
2065
2066 /* Initialize the symbolic number N for the bswap pass from the base element
2067    SRC manipulated by the bitwise OR expression.  */
2068
2069 static bool
2070 init_symbolic_number (struct symbolic_number *n, tree src)
2071 {
2072   int size;
2073
2074   if (! INTEGRAL_TYPE_P (TREE_TYPE (src)))
2075     return false;
2076
2077   n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE;
2078   n->src = src;
2079
2080   /* Set up the symbolic number N by setting each byte to a value between 1 and
2081      the byte size of rhs1.  The highest order byte is set to n->size and the
2082      lowest order byte to 1.  */
2083   n->type = TREE_TYPE (src);
2084   size = TYPE_PRECISION (n->type);
2085   if (size % BITS_PER_UNIT != 0)
2086     return false;
2087   size /= BITS_PER_UNIT;
2088   if (size > 64 / BITS_PER_MARKER)
2089     return false;
2090   n->range = size;
2091   n->n = CMPNOP;
2092   n->n_ops = 1;
2093
2094   if (size < 64 / BITS_PER_MARKER)
2095     n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
2096
2097   return true;
2098 }
2099
2100 /* Check if STMT might be a byte swap or a nop from a memory source and returns
2101    the answer. If so, REF is that memory source and the base of the memory area
2102    accessed and the offset of the access from that base are recorded in N.  */
2103
2104 bool
2105 find_bswap_or_nop_load (gimple *stmt, tree ref, struct symbolic_number *n)
2106 {
2107   /* Leaf node is an array or component ref. Memorize its base and
2108      offset from base to compare to other such leaf node.  */
2109   HOST_WIDE_INT bitsize, bitpos;
2110   machine_mode mode;
2111   int unsignedp, reversep, volatilep;
2112   tree offset, base_addr;
2113
2114   /* Not prepared to handle PDP endian.  */
2115   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
2116     return false;
2117
2118   if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt))
2119     return false;
2120
2121   base_addr = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
2122                                    &unsignedp, &reversep, &volatilep);
2123
2124   if (TREE_CODE (base_addr) == MEM_REF)
2125     {
2126       offset_int bit_offset = 0;
2127       tree off = TREE_OPERAND (base_addr, 1);
2128
2129       if (!integer_zerop (off))
2130         {
2131           offset_int boff, coff = mem_ref_offset (base_addr);
2132           boff = coff << LOG2_BITS_PER_UNIT;
2133           bit_offset += boff;
2134         }
2135
2136       base_addr = TREE_OPERAND (base_addr, 0);
2137
2138       /* Avoid returning a negative bitpos as this may wreak havoc later.  */
2139       if (wi::neg_p (bit_offset))
2140         {
2141           offset_int mask = wi::mask <offset_int> (LOG2_BITS_PER_UNIT, false);
2142           offset_int tem = wi::bit_and_not (bit_offset, mask);
2143           /* TEM is the bitpos rounded to BITS_PER_UNIT towards -Inf.
2144              Subtract it to BIT_OFFSET and add it (scaled) to OFFSET.  */
2145           bit_offset -= tem;
2146           tem >>= LOG2_BITS_PER_UNIT;
2147           if (offset)
2148             offset = size_binop (PLUS_EXPR, offset,
2149                                     wide_int_to_tree (sizetype, tem));
2150           else
2151             offset = wide_int_to_tree (sizetype, tem);
2152         }
2153
2154       bitpos += bit_offset.to_shwi ();
2155     }
2156
2157   if (bitpos % BITS_PER_UNIT)
2158     return false;
2159   if (bitsize % BITS_PER_UNIT)
2160     return false;
2161   if (reversep)
2162     return false;
2163
2164   if (!init_symbolic_number (n, ref))
2165     return false;
2166   n->base_addr = base_addr;
2167   n->offset = offset;
2168   n->bytepos = bitpos / BITS_PER_UNIT;
2169   n->alias_set = reference_alias_ptr_type (ref);
2170   n->vuse = gimple_vuse (stmt);
2171   return true;
2172 }
2173
2174 /* Compute the symbolic number N representing the result of a bitwise OR on 2
2175    symbolic number N1 and N2 whose source statements are respectively
2176    SOURCE_STMT1 and SOURCE_STMT2.  */
2177
2178 static gimple *
2179 perform_symbolic_merge (gimple *source_stmt1, struct symbolic_number *n1,
2180                         gimple *source_stmt2, struct symbolic_number *n2,
2181                         struct symbolic_number *n)
2182 {
2183   int i, size;
2184   uint64_t mask;
2185   gimple *source_stmt;
2186   struct symbolic_number *n_start;
2187
2188   tree rhs1 = gimple_assign_rhs1 (source_stmt1);
2189   if (TREE_CODE (rhs1) == BIT_FIELD_REF
2190       && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
2191     rhs1 = TREE_OPERAND (rhs1, 0);
2192   tree rhs2 = gimple_assign_rhs1 (source_stmt2);
2193   if (TREE_CODE (rhs2) == BIT_FIELD_REF
2194       && TREE_CODE (TREE_OPERAND (rhs2, 0)) == SSA_NAME)
2195     rhs2 = TREE_OPERAND (rhs2, 0);
2196
2197   /* Sources are different, cancel bswap if they are not memory location with
2198      the same base (array, structure, ...).  */
2199   if (rhs1 != rhs2)
2200     {
2201       uint64_t inc;
2202       HOST_WIDE_INT start_sub, end_sub, end1, end2, end;
2203       struct symbolic_number *toinc_n_ptr, *n_end;
2204       basic_block bb1, bb2;
2205
2206       if (!n1->base_addr || !n2->base_addr
2207           || !operand_equal_p (n1->base_addr, n2->base_addr, 0))
2208         return NULL;
2209
2210       if (!n1->offset != !n2->offset
2211           || (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0)))
2212         return NULL;
2213
2214       if (n1->bytepos < n2->bytepos)
2215         {
2216           n_start = n1;
2217           start_sub = n2->bytepos - n1->bytepos;
2218         }
2219       else
2220         {
2221           n_start = n2;
2222           start_sub = n1->bytepos - n2->bytepos;
2223         }
2224
2225       bb1 = gimple_bb (source_stmt1);
2226       bb2 = gimple_bb (source_stmt2);
2227       if (dominated_by_p (CDI_DOMINATORS, bb1, bb2))
2228         source_stmt = source_stmt1;
2229       else
2230         source_stmt = source_stmt2;
2231
2232       /* Find the highest address at which a load is performed and
2233          compute related info.  */
2234       end1 = n1->bytepos + (n1->range - 1);
2235       end2 = n2->bytepos + (n2->range - 1);
2236       if (end1 < end2)
2237         {
2238           end = end2;
2239           end_sub = end2 - end1;
2240         }
2241       else
2242         {
2243           end = end1;
2244           end_sub = end1 - end2;
2245         }
2246       n_end = (end2 > end1) ? n2 : n1;
2247
2248       /* Find symbolic number whose lsb is the most significant.  */
2249       if (BYTES_BIG_ENDIAN)
2250         toinc_n_ptr = (n_end == n1) ? n2 : n1;
2251       else
2252         toinc_n_ptr = (n_start == n1) ? n2 : n1;
2253
2254       n->range = end - n_start->bytepos + 1;
2255
2256       /* Check that the range of memory covered can be represented by
2257          a symbolic number.  */
2258       if (n->range > 64 / BITS_PER_MARKER)
2259         return NULL;
2260
2261       /* Reinterpret byte marks in symbolic number holding the value of
2262          bigger weight according to target endianness.  */
2263       inc = BYTES_BIG_ENDIAN ? end_sub : start_sub;
2264       size = TYPE_PRECISION (n1->type) / BITS_PER_UNIT;
2265       for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER)
2266         {
2267           unsigned marker
2268             = (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK;
2269           if (marker && marker != MARKER_BYTE_UNKNOWN)
2270             toinc_n_ptr->n += inc;
2271         }
2272     }
2273   else
2274     {
2275       n->range = n1->range;
2276       n_start = n1;
2277       source_stmt = source_stmt1;
2278     }
2279
2280   if (!n1->alias_set
2281       || alias_ptr_types_compatible_p (n1->alias_set, n2->alias_set))
2282     n->alias_set = n1->alias_set;
2283   else
2284     n->alias_set = ptr_type_node;
2285   n->vuse = n_start->vuse;
2286   n->base_addr = n_start->base_addr;
2287   n->offset = n_start->offset;
2288   n->src = n_start->src;
2289   n->bytepos = n_start->bytepos;
2290   n->type = n_start->type;
2291   size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
2292
2293   for (i = 0, mask = MARKER_MASK; i < size; i++, mask <<= BITS_PER_MARKER)
2294     {
2295       uint64_t masked1, masked2;
2296
2297       masked1 = n1->n & mask;
2298       masked2 = n2->n & mask;
2299       if (masked1 && masked2 && masked1 != masked2)
2300         return NULL;
2301     }
2302   n->n = n1->n | n2->n;
2303   n->n_ops = n1->n_ops + n2->n_ops;
2304
2305   return source_stmt;
2306 }
2307
2308 /* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform
2309    the operation given by the rhs of STMT on the result.  If the operation
2310    could successfully be executed the function returns a gimple stmt whose
2311    rhs's first tree is the expression of the source operand and NULL
2312    otherwise.  */
2313
2314 static gimple *
2315 find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int limit)
2316 {
2317   enum tree_code code;
2318   tree rhs1, rhs2 = NULL;
2319   gimple *rhs1_stmt, *rhs2_stmt, *source_stmt1;
2320   enum gimple_rhs_class rhs_class;
2321
2322   if (!limit || !is_gimple_assign (stmt))
2323     return NULL;
2324
2325   rhs1 = gimple_assign_rhs1 (stmt);
2326
2327   if (find_bswap_or_nop_load (stmt, rhs1, n))
2328     return stmt;
2329
2330   /* Handle BIT_FIELD_REF.  */
2331   if (TREE_CODE (rhs1) == BIT_FIELD_REF
2332       && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
2333     {
2334       unsigned HOST_WIDE_INT bitsize = tree_to_uhwi (TREE_OPERAND (rhs1, 1));
2335       unsigned HOST_WIDE_INT bitpos = tree_to_uhwi (TREE_OPERAND (rhs1, 2));
2336       if (bitpos % BITS_PER_UNIT == 0
2337           && bitsize % BITS_PER_UNIT == 0
2338           && init_symbolic_number (n, TREE_OPERAND (rhs1, 0)))
2339         {
2340           /* Handle big-endian bit numbering in BIT_FIELD_REF.  */
2341           if (BYTES_BIG_ENDIAN)
2342             bitpos = TYPE_PRECISION (n->type) - bitpos - bitsize;
2343
2344           /* Shift.  */
2345           if (!do_shift_rotate (RSHIFT_EXPR, n, bitpos))
2346             return NULL;
2347
2348           /* Mask.  */
2349           uint64_t mask = 0;
2350           uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
2351           for (unsigned i = 0; i < bitsize / BITS_PER_UNIT;
2352                i++, tmp <<= BITS_PER_UNIT)
2353             mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
2354           n->n &= mask;
2355
2356           /* Convert.  */
2357           n->type = TREE_TYPE (rhs1);
2358           if (!n->base_addr)
2359             n->range = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
2360
2361           return verify_symbolic_number_p (n, stmt) ? stmt : NULL;
2362         }
2363
2364       return NULL;
2365     }
2366
2367   if (TREE_CODE (rhs1) != SSA_NAME)
2368     return NULL;
2369
2370   code = gimple_assign_rhs_code (stmt);
2371   rhs_class = gimple_assign_rhs_class (stmt);
2372   rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2373
2374   if (rhs_class == GIMPLE_BINARY_RHS)
2375     rhs2 = gimple_assign_rhs2 (stmt);
2376
2377   /* Handle unary rhs and binary rhs with integer constants as second
2378      operand.  */
2379
2380   if (rhs_class == GIMPLE_UNARY_RHS
2381       || (rhs_class == GIMPLE_BINARY_RHS
2382           && TREE_CODE (rhs2) == INTEGER_CST))
2383     {
2384       if (code != BIT_AND_EXPR
2385           && code != LSHIFT_EXPR
2386           && code != RSHIFT_EXPR
2387           && code != LROTATE_EXPR
2388           && code != RROTATE_EXPR
2389           && !CONVERT_EXPR_CODE_P (code))
2390         return NULL;
2391
2392       source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1);
2393
2394       /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
2395          we have to initialize the symbolic number.  */
2396       if (!source_stmt1)
2397         {
2398           if (gimple_assign_load_p (stmt)
2399               || !init_symbolic_number (n, rhs1))
2400             return NULL;
2401           source_stmt1 = stmt;
2402         }
2403
2404       switch (code)
2405         {
2406         case BIT_AND_EXPR:
2407           {
2408             int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
2409             uint64_t val = int_cst_value (rhs2), mask = 0;
2410             uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
2411
2412             /* Only constants masking full bytes are allowed.  */
2413             for (i = 0; i < size; i++, tmp <<= BITS_PER_UNIT)
2414               if ((val & tmp) != 0 && (val & tmp) != tmp)
2415                 return NULL;
2416               else if (val & tmp)
2417                 mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
2418
2419             n->n &= mask;
2420           }
2421           break;
2422         case LSHIFT_EXPR:
2423         case RSHIFT_EXPR:
2424         case LROTATE_EXPR:
2425         case RROTATE_EXPR:
2426           if (!do_shift_rotate (code, n, (int) TREE_INT_CST_LOW (rhs2)))
2427             return NULL;
2428           break;
2429         CASE_CONVERT:
2430           {
2431             int i, type_size, old_type_size;
2432             tree type;
2433
2434             type = gimple_expr_type (stmt);
2435             type_size = TYPE_PRECISION (type);
2436             if (type_size % BITS_PER_UNIT != 0)
2437               return NULL;
2438             type_size /= BITS_PER_UNIT;
2439             if (type_size > 64 / BITS_PER_MARKER)
2440               return NULL;
2441
2442             /* Sign extension: result is dependent on the value.  */
2443             old_type_size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
2444             if (!TYPE_UNSIGNED (n->type) && type_size > old_type_size
2445                 && HEAD_MARKER (n->n, old_type_size))
2446               for (i = 0; i < type_size - old_type_size; i++)
2447                 n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
2448                         << ((type_size - 1 - i) * BITS_PER_MARKER);
2449
2450             if (type_size < 64 / BITS_PER_MARKER)
2451               {
2452                 /* If STMT casts to a smaller type mask out the bits not
2453                    belonging to the target type.  */
2454                 n->n &= ((uint64_t) 1 << (type_size * BITS_PER_MARKER)) - 1;
2455               }
2456             n->type = type;
2457             if (!n->base_addr)
2458               n->range = type_size;
2459           }
2460           break;
2461         default:
2462           return NULL;
2463         };
2464       return verify_symbolic_number_p (n, stmt) ? source_stmt1 : NULL;
2465     }
2466
2467   /* Handle binary rhs.  */
2468
2469   if (rhs_class == GIMPLE_BINARY_RHS)
2470     {
2471       struct symbolic_number n1, n2;
2472       gimple *source_stmt, *source_stmt2;
2473
2474       if (code != BIT_IOR_EXPR)
2475         return NULL;
2476
2477       if (TREE_CODE (rhs2) != SSA_NAME)
2478         return NULL;
2479
2480       rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2481
2482       switch (code)
2483         {
2484         case BIT_IOR_EXPR:
2485           source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1);
2486
2487           if (!source_stmt1)
2488             return NULL;
2489
2490           source_stmt2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1);
2491
2492           if (!source_stmt2)
2493             return NULL;
2494
2495           if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type))
2496             return NULL;
2497
2498           if (!n1.vuse != !n2.vuse
2499               || (n1.vuse && !operand_equal_p (n1.vuse, n2.vuse, 0)))
2500             return NULL;
2501
2502           source_stmt
2503             = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n);
2504
2505           if (!source_stmt)
2506             return NULL;
2507
2508           if (!verify_symbolic_number_p (n, stmt))
2509             return NULL;
2510
2511           break;
2512         default:
2513           return NULL;
2514         }
2515       return source_stmt;
2516     }
2517   return NULL;
2518 }
2519
2520 /* Check if STMT completes a bswap implementation or a read in a given
2521    endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
2522    accordingly.  It also sets N to represent the kind of operations
2523    performed: size of the resulting expression and whether it works on
2524    a memory source, and if so alias-set and vuse.  At last, the
2525    function returns a stmt whose rhs's first tree is the source
2526    expression.  */
2527
2528 static gimple *
2529 find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap)
2530 {
2531   unsigned rsize;
2532   uint64_t tmpn, mask;
2533 /* The number which the find_bswap_or_nop_1 result should match in order
2534    to have a full byte swap.  The number is shifted to the right
2535    according to the size of the symbolic number before using it.  */
2536   uint64_t cmpxchg = CMPXCHG;
2537   uint64_t cmpnop = CMPNOP;
2538
2539   gimple *ins_stmt;
2540   int limit;
2541
2542   /* The last parameter determines the depth search limit.  It usually
2543      correlates directly to the number n of bytes to be touched.  We
2544      increase that number by log2(n) + 1 here in order to also
2545      cover signed -> unsigned conversions of the src operand as can be seen
2546      in libgcc, and for initial shift/and operation of the src operand.  */
2547   limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt)));
2548   limit += 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit);
2549   ins_stmt = find_bswap_or_nop_1 (stmt, n, limit);
2550
2551   if (!ins_stmt)
2552     return NULL;
2553
2554   /* Find real size of result (highest non-zero byte).  */
2555   if (n->base_addr)
2556     for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++);
2557   else
2558     rsize = n->range;
2559
2560   /* Zero out the bits corresponding to untouched bytes in original gimple
2561      expression.  */
2562   if (n->range < (int) sizeof (int64_t))
2563     {
2564       mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1;
2565       cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER;
2566       cmpnop &= mask;
2567     }
2568
2569   /* Zero out the bits corresponding to unused bytes in the result of the
2570      gimple expression.  */
2571   if (rsize < n->range)
2572     {
2573       if (BYTES_BIG_ENDIAN)
2574         {
2575           mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
2576           cmpxchg &= mask;
2577           cmpnop >>= (n->range - rsize) * BITS_PER_MARKER;
2578         }
2579       else
2580         {
2581           mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
2582           cmpxchg >>= (n->range - rsize) * BITS_PER_MARKER;
2583           cmpnop &= mask;
2584         }
2585       n->range = rsize;
2586     }
2587
2588   /* A complete byte swap should make the symbolic number to start with
2589      the largest digit in the highest order byte. Unchanged symbolic
2590      number indicates a read with same endianness as target architecture.  */
2591   if (n->n == cmpnop)
2592     *bswap = false;
2593   else if (n->n == cmpxchg)
2594     *bswap = true;
2595   else
2596     return NULL;
2597
2598   /* Useless bit manipulation performed by code.  */
2599   if (!n->base_addr && n->n == cmpnop && n->n_ops == 1)
2600     return NULL;
2601
2602   n->range *= BITS_PER_UNIT;
2603   return ins_stmt;
2604 }
2605
2606 namespace {
2607
2608 const pass_data pass_data_optimize_bswap =
2609 {
2610   GIMPLE_PASS, /* type */
2611   "bswap", /* name */
2612   OPTGROUP_NONE, /* optinfo_flags */
2613   TV_NONE, /* tv_id */
2614   PROP_ssa, /* properties_required */
2615   0, /* properties_provided */
2616   0, /* properties_destroyed */
2617   0, /* todo_flags_start */
2618   0, /* todo_flags_finish */
2619 };
2620
2621 class pass_optimize_bswap : public gimple_opt_pass
2622 {
2623 public:
2624   pass_optimize_bswap (gcc::context *ctxt)
2625     : gimple_opt_pass (pass_data_optimize_bswap, ctxt)
2626   {}
2627
2628   /* opt_pass methods: */
2629   virtual bool gate (function *)
2630     {
2631       return flag_expensive_optimizations && optimize;
2632     }
2633
2634   virtual unsigned int execute (function *);
2635
2636 }; // class pass_optimize_bswap
2637
2638 /* Perform the bswap optimization: replace the expression computed in the rhs
2639    of CUR_STMT by an equivalent bswap, load or load + bswap expression.
2640    Which of these alternatives replace the rhs is given by N->base_addr (non
2641    null if a load is needed) and BSWAP.  The type, VUSE and set-alias of the
2642    load to perform are also given in N while the builtin bswap invoke is given
2643    in FNDEL.  Finally, if a load is involved, SRC_STMT refers to one of the
2644    load statements involved to construct the rhs in CUR_STMT and N->range gives
2645    the size of the rhs expression for maintaining some statistics.
2646
2647    Note that if the replacement involve a load, CUR_STMT is moved just after
2648    SRC_STMT to do the load with the same VUSE which can lead to CUR_STMT
2649    changing of basic block.  */
2650
2651 static bool
2652 bswap_replace (gimple *cur_stmt, gimple *ins_stmt, tree fndecl,
2653                tree bswap_type, tree load_type, struct symbolic_number *n,
2654                bool bswap)
2655 {
2656   gimple_stmt_iterator gsi;
2657   tree src, tmp, tgt;
2658   gimple *bswap_stmt;
2659
2660   gsi = gsi_for_stmt (cur_stmt);
2661   src = n->src;
2662   tgt = gimple_assign_lhs (cur_stmt);
2663
2664   /* Need to load the value from memory first.  */
2665   if (n->base_addr)
2666     {
2667       gimple_stmt_iterator gsi_ins = gsi_for_stmt (ins_stmt);
2668       tree addr_expr, addr_tmp, val_expr, val_tmp;
2669       tree load_offset_ptr, aligned_load_type;
2670       gimple *addr_stmt, *load_stmt;
2671       unsigned align;
2672       HOST_WIDE_INT load_offset = 0;
2673       basic_block ins_bb, cur_bb;
2674
2675       ins_bb = gimple_bb (ins_stmt);
2676       cur_bb = gimple_bb (cur_stmt);
2677       if (!dominated_by_p (CDI_DOMINATORS, cur_bb, ins_bb))
2678         return false;
2679
2680       align = get_object_alignment (src);
2681
2682       /* Move cur_stmt just before  one of the load of the original
2683          to ensure it has the same VUSE.  See PR61517 for what could
2684          go wrong.  */
2685       if (gimple_bb (cur_stmt) != gimple_bb (ins_stmt))
2686         reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt));
2687       gsi_move_before (&gsi, &gsi_ins);
2688       gsi = gsi_for_stmt (cur_stmt);
2689
2690       /* Compute address to load from and cast according to the size
2691          of the load.  */
2692       addr_expr = build_fold_addr_expr (unshare_expr (src));
2693       if (is_gimple_mem_ref_addr (addr_expr))
2694         addr_tmp = addr_expr;
2695       else
2696         {
2697           addr_tmp = make_temp_ssa_name (TREE_TYPE (addr_expr), NULL,
2698                                          "load_src");
2699           addr_stmt = gimple_build_assign (addr_tmp, addr_expr);
2700           gsi_insert_before (&gsi, addr_stmt, GSI_SAME_STMT);
2701         }
2702
2703       /* Perform the load.  */
2704       aligned_load_type = load_type;
2705       if (align < TYPE_ALIGN (load_type))
2706         aligned_load_type = build_aligned_type (load_type, align);
2707       load_offset_ptr = build_int_cst (n->alias_set, load_offset);
2708       val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp,
2709                               load_offset_ptr);
2710
2711       if (!bswap)
2712         {
2713           if (n->range == 16)
2714             nop_stats.found_16bit++;
2715           else if (n->range == 32)
2716             nop_stats.found_32bit++;
2717           else
2718             {
2719               gcc_assert (n->range == 64);
2720               nop_stats.found_64bit++;
2721             }
2722
2723           /* Convert the result of load if necessary.  */
2724           if (!useless_type_conversion_p (TREE_TYPE (tgt), load_type))
2725             {
2726               val_tmp = make_temp_ssa_name (aligned_load_type, NULL,
2727                                             "load_dst");
2728               load_stmt = gimple_build_assign (val_tmp, val_expr);
2729               gimple_set_vuse (load_stmt, n->vuse);
2730               gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
2731               gimple_assign_set_rhs_with_ops (&gsi, NOP_EXPR, val_tmp);
2732             }
2733           else
2734             {
2735               gimple_assign_set_rhs_with_ops (&gsi, MEM_REF, val_expr);
2736               gimple_set_vuse (cur_stmt, n->vuse);
2737             }
2738           update_stmt (cur_stmt);
2739
2740           if (dump_file)
2741             {
2742               fprintf (dump_file,
2743                        "%d bit load in target endianness found at: ",
2744                        (int) n->range);
2745               print_gimple_stmt (dump_file, cur_stmt, 0);
2746             }
2747           return true;
2748         }
2749       else
2750         {
2751           val_tmp = make_temp_ssa_name (aligned_load_type, NULL, "load_dst");
2752           load_stmt = gimple_build_assign (val_tmp, val_expr);
2753           gimple_set_vuse (load_stmt, n->vuse);
2754           gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
2755         }
2756       src = val_tmp;
2757     }
2758   else if (!bswap)
2759     {
2760       gimple *g;
2761       if (!useless_type_conversion_p (TREE_TYPE (tgt), TREE_TYPE (src)))
2762         {
2763           if (!is_gimple_val (src))
2764             return false;
2765           g = gimple_build_assign (tgt, NOP_EXPR, src);
2766         }
2767       else
2768         g = gimple_build_assign (tgt, src);
2769       if (n->range == 16)
2770         nop_stats.found_16bit++;
2771       else if (n->range == 32)
2772         nop_stats.found_32bit++;
2773       else
2774         {
2775           gcc_assert (n->range == 64);
2776           nop_stats.found_64bit++;
2777         }
2778       if (dump_file)
2779         {
2780           fprintf (dump_file,
2781                    "%d bit reshuffle in target endianness found at: ",
2782                    (int) n->range);
2783           print_gimple_stmt (dump_file, cur_stmt, 0);
2784         }
2785       gsi_replace (&gsi, g, true);
2786       return true;
2787     }
2788   else if (TREE_CODE (src) == BIT_FIELD_REF)
2789     src = TREE_OPERAND (src, 0);
2790
2791   if (n->range == 16)
2792     bswap_stats.found_16bit++;
2793   else if (n->range == 32)
2794     bswap_stats.found_32bit++;
2795   else
2796     {
2797       gcc_assert (n->range == 64);
2798       bswap_stats.found_64bit++;
2799     }
2800
2801   tmp = src;
2802
2803   /* Convert the src expression if necessary.  */
2804   if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type))
2805     {
2806       gimple *convert_stmt;
2807
2808       tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc");
2809       convert_stmt = gimple_build_assign (tmp, NOP_EXPR, src);
2810       gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
2811     }
2812
2813   /* Canonical form for 16 bit bswap is a rotate expression.  Only 16bit values
2814      are considered as rotation of 2N bit values by N bits is generally not
2815      equivalent to a bswap.  Consider for instance 0x01020304 r>> 16 which
2816      gives 0x03040102 while a bswap for that value is 0x04030201.  */
2817   if (bswap && n->range == 16)
2818     {
2819       tree count = build_int_cst (NULL, BITS_PER_UNIT);
2820       src = fold_build2 (LROTATE_EXPR, bswap_type, tmp, count);
2821       bswap_stmt = gimple_build_assign (NULL, src);
2822     }
2823   else
2824     bswap_stmt = gimple_build_call (fndecl, 1, tmp);
2825
2826   tmp = tgt;
2827
2828   /* Convert the result if necessary.  */
2829   if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type))
2830     {
2831       gimple *convert_stmt;
2832
2833       tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
2834       convert_stmt = gimple_build_assign (tgt, NOP_EXPR, tmp);
2835       gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT);
2836     }
2837
2838   gimple_set_lhs (bswap_stmt, tmp);
2839
2840   if (dump_file)
2841     {
2842       fprintf (dump_file, "%d bit bswap implementation found at: ",
2843                (int) n->range);
2844       print_gimple_stmt (dump_file, cur_stmt, 0);
2845     }
2846
2847   gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT);
2848   gsi_remove (&gsi, true);
2849   return true;
2850 }
2851
2852 /* Find manual byte swap implementations as well as load in a given
2853    endianness. Byte swaps are turned into a bswap builtin invokation
2854    while endian loads are converted to bswap builtin invokation or
2855    simple load according to the target endianness.  */
2856
2857 unsigned int
2858 pass_optimize_bswap::execute (function *fun)
2859 {
2860   basic_block bb;
2861   bool bswap32_p, bswap64_p;
2862   bool changed = false;
2863   tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;
2864
2865   if (BITS_PER_UNIT != 8)
2866     return 0;
2867
2868   bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
2869                && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing);
2870   bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
2871                && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
2872                    || (bswap32_p && word_mode == SImode)));
2873
2874   /* Determine the argument type of the builtins.  The code later on
2875      assumes that the return and argument type are the same.  */
2876   if (bswap32_p)
2877     {
2878       tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
2879       bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
2880     }
2881
2882   if (bswap64_p)
2883     {
2884       tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
2885       bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
2886     }
2887
2888   memset (&nop_stats, 0, sizeof (nop_stats));
2889   memset (&bswap_stats, 0, sizeof (bswap_stats));
2890   calculate_dominance_info (CDI_DOMINATORS);
2891
2892   FOR_EACH_BB_FN (bb, fun)
2893     {
2894       gimple_stmt_iterator gsi;
2895
2896       /* We do a reverse scan for bswap patterns to make sure we get the
2897          widest match. As bswap pattern matching doesn't handle previously
2898          inserted smaller bswap replacements as sub-patterns, the wider
2899          variant wouldn't be detected.  */
2900       for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
2901         {
2902           gimple *ins_stmt, *cur_stmt = gsi_stmt (gsi);
2903           tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
2904           enum tree_code code;
2905           struct symbolic_number n;
2906           bool bswap;
2907
2908           /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt
2909              might be moved to a different basic block by bswap_replace and gsi
2910              must not points to it if that's the case.  Moving the gsi_prev
2911              there make sure that gsi points to the statement previous to
2912              cur_stmt while still making sure that all statements are
2913              considered in this basic block.  */
2914           gsi_prev (&gsi);
2915
2916           if (!is_gimple_assign (cur_stmt))
2917             continue;
2918
2919           code = gimple_assign_rhs_code (cur_stmt);
2920           switch (code)
2921             {
2922             case LROTATE_EXPR:
2923             case RROTATE_EXPR:
2924               if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt))
2925                   || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt))
2926                      % BITS_PER_UNIT)
2927                 continue;
2928               /* Fall through.  */
2929             case BIT_IOR_EXPR:
2930               break;
2931             default:
2932               continue;
2933             }
2934
2935           ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap);
2936
2937           if (!ins_stmt)
2938             continue;
2939
2940           switch (n.range)
2941             {
2942             case 16:
2943               /* Already in canonical form, nothing to do.  */
2944               if (code == LROTATE_EXPR || code == RROTATE_EXPR)
2945                 continue;
2946               load_type = bswap_type = uint16_type_node;
2947               break;
2948             case 32:
2949               load_type = uint32_type_node;
2950               if (bswap32_p)
2951                 {
2952                   fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
2953                   bswap_type = bswap32_type;
2954                 }
2955               break;
2956             case 64:
2957               load_type = uint64_type_node;
2958               if (bswap64_p)
2959                 {
2960                   fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
2961                   bswap_type = bswap64_type;
2962                 }
2963               break;
2964             default:
2965               continue;
2966             }
2967
2968           if (bswap && !fndecl && n.range != 16)
2969             continue;
2970
2971           if (bswap_replace (cur_stmt, ins_stmt, fndecl, bswap_type, load_type,
2972                              &n, bswap))
2973             changed = true;
2974         }
2975     }
2976
2977   statistics_counter_event (fun, "16-bit nop implementations found",
2978                             nop_stats.found_16bit);
2979   statistics_counter_event (fun, "32-bit nop implementations found",
2980                             nop_stats.found_32bit);
2981   statistics_counter_event (fun, "64-bit nop implementations found",
2982                             nop_stats.found_64bit);
2983   statistics_counter_event (fun, "16-bit bswap implementations found",
2984                             bswap_stats.found_16bit);
2985   statistics_counter_event (fun, "32-bit bswap implementations found",
2986                             bswap_stats.found_32bit);
2987   statistics_counter_event (fun, "64-bit bswap implementations found",
2988                             bswap_stats.found_64bit);
2989
2990   return (changed ? TODO_update_ssa : 0);
2991 }
2992
2993 } // anon namespace
2994
2995 gimple_opt_pass *
2996 make_pass_optimize_bswap (gcc::context *ctxt)
2997 {
2998   return new pass_optimize_bswap (ctxt);
2999 }
3000
3001 /* Return true if stmt is a type conversion operation that can be stripped
3002    when used in a widening multiply operation.  */
3003 static bool
3004 widening_mult_conversion_strippable_p (tree result_type, gimple *stmt)
3005 {
3006   enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
3007
3008   if (TREE_CODE (result_type) == INTEGER_TYPE)
3009     {
3010       tree op_type;
3011       tree inner_op_type;
3012
3013       if (!CONVERT_EXPR_CODE_P (rhs_code))
3014         return false;
3015
3016       op_type = TREE_TYPE (gimple_assign_lhs (stmt));
3017
3018       /* If the type of OP has the same precision as the result, then
3019          we can strip this conversion.  The multiply operation will be
3020          selected to create the correct extension as a by-product.  */
3021       if (TYPE_PRECISION (result_type) == TYPE_PRECISION (op_type))
3022         return true;
3023
3024       /* We can also strip a conversion if it preserves the signed-ness of
3025          the operation and doesn't narrow the range.  */
3026       inner_op_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
3027
3028       /* If the inner-most type is unsigned, then we can strip any
3029          intermediate widening operation.  If it's signed, then the
3030          intermediate widening operation must also be signed.  */
3031       if ((TYPE_UNSIGNED (inner_op_type)
3032            || TYPE_UNSIGNED (op_type) == TYPE_UNSIGNED (inner_op_type))
3033           && TYPE_PRECISION (op_type) > TYPE_PRECISION (inner_op_type))
3034         return true;
3035
3036       return false;
3037     }
3038
3039   return rhs_code == FIXED_CONVERT_EXPR;
3040 }
3041
3042 /* Return true if RHS is a suitable operand for a widening multiplication,
3043    assuming a target type of TYPE.
3044    There are two cases:
3045
3046      - RHS makes some value at least twice as wide.  Store that value
3047        in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
3048
3049      - RHS is an integer constant.  Store that value in *NEW_RHS_OUT if so,
3050        but leave *TYPE_OUT untouched.  */
3051
3052 static bool
3053 is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out,
3054                         tree *new_rhs_out)
3055 {
3056   gimple *stmt;
3057   tree type1, rhs1;
3058
3059   if (TREE_CODE (rhs) == SSA_NAME)
3060     {
3061       stmt = SSA_NAME_DEF_STMT (rhs);
3062       if (is_gimple_assign (stmt))
3063         {
3064           if (! widening_mult_conversion_strippable_p (type, stmt))
3065             rhs1 = rhs;
3066           else
3067             {
3068               rhs1 = gimple_assign_rhs1 (stmt);
3069
3070               if (TREE_CODE (rhs1) == INTEGER_CST)
3071                 {
3072                   *new_rhs_out = rhs1;
3073                   *type_out = NULL;
3074                   return true;
3075                 }
3076             }
3077         }
3078       else
3079         rhs1 = rhs;
3080
3081       type1 = TREE_TYPE (rhs1);
3082
3083       if (TREE_CODE (type1) != TREE_CODE (type)
3084           || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type))
3085         return false;
3086
3087       *new_rhs_out = rhs1;
3088       *type_out = type1;
3089       return true;
3090     }
3091
3092   if (TREE_CODE (rhs) == INTEGER_CST)
3093     {
3094       *new_rhs_out = rhs;
3095       *type_out = NULL;
3096       return true;
3097     }
3098
3099   return false;
3100 }
3101
3102 /* Return true if STMT performs a widening multiplication, assuming the
3103    output type is TYPE.  If so, store the unwidened types of the operands
3104    in *TYPE1_OUT and *TYPE2_OUT respectively.  Also fill *RHS1_OUT and
3105    *RHS2_OUT such that converting those operands to types *TYPE1_OUT
3106    and *TYPE2_OUT would give the operands of the multiplication.  */
3107
3108 static bool
3109 is_widening_mult_p (gimple *stmt,
3110                     tree *type1_out, tree *rhs1_out,
3111                     tree *type2_out, tree *rhs2_out)
3112 {
3113   tree type = TREE_TYPE (gimple_assign_lhs (stmt));
3114
3115   if (TREE_CODE (type) != INTEGER_TYPE
3116       && TREE_CODE (type) != FIXED_POINT_TYPE)
3117     return false;
3118
3119   if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out,
3120                                rhs1_out))
3121     return false;
3122
3123   if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out,
3124                                rhs2_out))
3125     return false;
3126
3127   if (*type1_out == NULL)
3128     {
3129       if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out))
3130         return false;
3131       *type1_out = *type2_out;
3132     }
3133
3134   if (*type2_out == NULL)
3135     {
3136       if (!int_fits_type_p (*rhs2_out, *type1_out))
3137         return false;
3138       *type2_out = *type1_out;
3139     }
3140
3141   /* Ensure that the larger of the two operands comes first. */
3142   if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out))
3143     {
3144       std::swap (*type1_out, *type2_out);
3145       std::swap (*rhs1_out, *rhs2_out);
3146     }
3147
3148   return true;
3149 }
3150
3151 /* Check to see if the CALL statement is an invocation of copysign
3152    with 1. being the first argument.  */
3153 static bool
3154 is_copysign_call_with_1 (gimple *call)
3155 {
3156   gcall *c = dyn_cast <gcall *> (call);
3157   if (! c)
3158     return false;
3159
3160   enum combined_fn code = gimple_call_combined_fn (c);
3161
3162   if (code == CFN_LAST)
3163     return false;
3164
3165   if (builtin_fn_p (code))
3166     {
3167       switch (as_builtin_fn (code))
3168         {
3169         CASE_FLT_FN (BUILT_IN_COPYSIGN):
3170         CASE_FLT_FN_FLOATN_NX (BUILT_IN_COPYSIGN):
3171           return real_onep (gimple_call_arg (c, 0));
3172         default:
3173           return false;
3174         }
3175     }
3176
3177   if (internal_fn_p (code))
3178     {
3179       switch (as_internal_fn (code))
3180         {
3181         case IFN_COPYSIGN:
3182           return real_onep (gimple_call_arg (c, 0));
3183         default:
3184           return false;
3185         }
3186     }
3187
3188    return false;
3189 }
3190
3191 /* Try to expand the pattern x * copysign (1, y) into xorsign (x, y).
3192    This only happens when the the xorsign optab is defined, if the
3193    pattern is not a xorsign pattern or if expansion fails FALSE is
3194    returned, otherwise TRUE is returned.  */
3195 static bool
3196 convert_expand_mult_copysign (gimple *stmt, gimple_stmt_iterator *gsi)
3197 {
3198   tree treeop0, treeop1, lhs, type;
3199   location_t loc = gimple_location (stmt);
3200   lhs = gimple_assign_lhs (stmt);
3201   treeop0 = gimple_assign_rhs1 (stmt);
3202   treeop1 = gimple_assign_rhs2 (stmt);
3203   type = TREE_TYPE (lhs);
3204   machine_mode mode = TYPE_MODE (type);
3205
3206   if (HONOR_SNANS (type))
3207     return false;
3208
3209   if (TREE_CODE (treeop0) == SSA_NAME && TREE_CODE (treeop1) == SSA_NAME)
3210     {
3211       gimple *call0 = SSA_NAME_DEF_STMT (treeop0);
3212       if (!has_single_use (treeop0) || !is_copysign_call_with_1 (call0))
3213         {
3214           call0 = SSA_NAME_DEF_STMT (treeop1);
3215           if (!has_single_use (treeop1) || !is_copysign_call_with_1 (call0))
3216             return false;
3217
3218           treeop1 = treeop0;
3219         }
3220         if (optab_handler (xorsign_optab, mode) == CODE_FOR_nothing)
3221           return false;
3222
3223         gcall *c = as_a<gcall*> (call0);
3224         treeop0 = gimple_call_arg (c, 1);
3225
3226         gcall *call_stmt
3227           = gimple_build_call_internal (IFN_XORSIGN, 2, treeop1, treeop0);
3228         gimple_set_lhs (call_stmt, lhs);
3229         gimple_set_location (call_stmt, loc);
3230         gsi_replace (gsi, call_stmt, true);
3231         return true;
3232     }
3233
3234   return false;
3235 }
3236
3237 /* Process a single gimple statement STMT, which has a MULT_EXPR as
3238    its rhs, and try to convert it into a WIDEN_MULT_EXPR.  The return
3239    value is true iff we converted the statement.  */
3240
3241 static bool
3242 convert_mult_to_widen (gimple *stmt, gimple_stmt_iterator *gsi)
3243 {
3244   tree lhs, rhs1, rhs2, type, type1, type2;
3245   enum insn_code handler;
3246   machine_mode to_mode, from_mode, actual_mode;
3247   optab op;
3248   int actual_precision;
3249   location_t loc = gimple_location (stmt);
3250   bool from_unsigned1, from_unsigned2;
3251
3252   lhs = gimple_assign_lhs (stmt);
3253   type = TREE_TYPE (lhs);
3254   if (TREE_CODE (type) != INTEGER_TYPE)
3255     return false;
3256
3257   if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2))
3258     return false;
3259
3260   to_mode = SCALAR_INT_TYPE_MODE (type);
3261   from_mode = SCALAR_INT_TYPE_MODE (type1);
3262   from_unsigned1 = TYPE_UNSIGNED (type1);
3263   from_unsigned2 = TYPE_UNSIGNED (type2);
3264
3265   if (from_unsigned1 && from_unsigned2)
3266     op = umul_widen_optab;
3267   else if (!from_unsigned1 && !from_unsigned2)
3268     op = smul_widen_optab;
3269   else
3270     op = usmul_widen_optab;
3271
3272   handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode,
3273                                                   0, &actual_mode);
3274
3275   if (handler == CODE_FOR_nothing)
3276     {
3277       if (op != smul_widen_optab)
3278         {
3279           /* We can use a signed multiply with unsigned types as long as
3280              there is a wider mode to use, or it is the smaller of the two
3281              types that is unsigned.  Note that type1 >= type2, always.  */
3282           if ((TYPE_UNSIGNED (type1)
3283                && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
3284               || (TYPE_UNSIGNED (type2)
3285                   && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
3286             {
3287               if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
3288                   || GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode))
3289                 return false;
3290             }
3291
3292           op = smul_widen_optab;
3293           handler = find_widening_optab_handler_and_mode (op, to_mode,
3294                                                           from_mode, 0,
3295                                                           &actual_mode);
3296
3297           if (handler == CODE_FOR_nothing)
3298             return false;
3299
3300           from_unsigned1 = from_unsigned2 = false;
3301         }
3302       else
3303         return false;
3304     }
3305
3306   /* Ensure that the inputs to the handler are in the correct precison
3307      for the opcode.  This will be the full mode size.  */
3308   actual_precision = GET_MODE_PRECISION (actual_mode);
3309   if (2 * actual_precision > TYPE_PRECISION (type))
3310     return false;
3311   if (actual_precision != TYPE_PRECISION (type1)
3312       || from_unsigned1 != TYPE_UNSIGNED (type1))
3313     rhs1 = build_and_insert_cast (gsi, loc,
3314                                   build_nonstandard_integer_type
3315                                     (actual_precision, from_unsigned1), rhs1);
3316   if (actual_precision != TYPE_PRECISION (type2)
3317       || from_unsigned2 != TYPE_UNSIGNED (type2))
3318     rhs2 = build_and_insert_cast (gsi, loc,
3319                                   build_nonstandard_integer_type
3320                                     (actual_precision, from_unsigned2), rhs2);
3321
3322   /* Handle constants.  */
3323   if (TREE_CODE (rhs1) == INTEGER_CST)
3324     rhs1 = fold_convert (type1, rhs1);
3325   if (TREE_CODE (rhs2) == INTEGER_CST)
3326     rhs2 = fold_convert (type2, rhs2);
3327
3328   gimple_assign_set_rhs1 (stmt, rhs1);
3329   gimple_assign_set_rhs2 (stmt, rhs2);
3330   gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
3331   update_stmt (stmt);
3332   widen_mul_stats.widen_mults_inserted++;
3333   return true;
3334 }
3335
3336 /* Process a single gimple statement STMT, which is found at the
3337    iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
3338    rhs (given by CODE), and try to convert it into a
3339    WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR.  The return value
3340    is true iff we converted the statement.  */
3341
3342 static bool
3343 convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple *stmt,
3344                             enum tree_code code)
3345 {
3346   gimple *rhs1_stmt = NULL, *rhs2_stmt = NULL;
3347   gimple *conv1_stmt = NULL, *conv2_stmt = NULL, *conv_stmt;
3348   tree type, type1, type2, optype;
3349   tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
3350   enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
3351   optab this_optab;
3352   enum tree_code wmult_code;
3353   enum insn_code handler;
3354   scalar_mode to_mode, from_mode;
3355   machine_mode actual_mode;
3356   location_t loc = gimple_location (stmt);
3357   int actual_precision;
3358   bool from_unsigned1, from_unsigned2;
3359
3360   lhs = gimple_assign_lhs (stmt);
3361   type = TREE_TYPE (lhs);
3362   if (TREE_CODE (type) != INTEGER_TYPE
3363       && TREE_CODE (type) != FIXED_POINT_TYPE)
3364     return false;
3365
3366   if (code == MINUS_EXPR)
3367     wmult_code = WIDEN_MULT_MINUS_EXPR;
3368   else
3369     wmult_code = WIDEN_MULT_PLUS_EXPR;
3370
3371   rhs1 = gimple_assign_rhs1 (stmt);
3372   rhs2 = gimple_assign_rhs2 (stmt);
3373
3374   if (TREE_CODE (rhs1) == SSA_NAME)
3375     {
3376       rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
3377       if (is_gimple_assign (rhs1_stmt))
3378         rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
3379     }
3380
3381   if (TREE_CODE (rhs2) == SSA_NAME)
3382     {
3383       rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
3384       if (is_gimple_assign (rhs2_stmt))
3385         rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
3386     }
3387
3388   /* Allow for one conversion statement between the multiply
3389      and addition/subtraction statement.  If there are more than
3390      one conversions then we assume they would invalidate this
3391      transformation.  If that's not the case then they should have
3392      been folded before now.  */
3393   if (CONVERT_EXPR_CODE_P (rhs1_code))
3394     {
3395       conv1_stmt = rhs1_stmt;
3396       rhs1 = gimple_assign_rhs1 (rhs1_stmt);
3397       if (TREE_CODE (rhs1) == SSA_NAME)
3398         {
3399           rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
3400           if (is_gimple_assign (rhs1_stmt))
3401             rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
3402         }
3403       else
3404         return false;
3405     }
3406   if (CONVERT_EXPR_CODE_P (rhs2_code))
3407     {
3408       conv2_stmt = rhs2_stmt;
3409       rhs2 = gimple_assign_rhs1 (rhs2_stmt);
3410       if (TREE_CODE (rhs2) == SSA_NAME)
3411         {
3412           rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
3413           if (is_gimple_assign (rhs2_stmt))
3414             rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
3415         }
3416       else
3417         return false;
3418     }
3419
3420   /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
3421      is_widening_mult_p, but we still need the rhs returns.
3422
3423      It might also appear that it would be sufficient to use the existing
3424      operands of the widening multiply, but that would limit the choice of
3425      multiply-and-accumulate instructions.
3426
3427      If the widened-multiplication result has more than one uses, it is
3428      probably wiser not to do the conversion.  */
3429   if (code == PLUS_EXPR
3430       && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
3431     {
3432       if (!has_single_use (rhs1)
3433           || !is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
3434                                   &type2, &mult_rhs2))
3435         return false;
3436       add_rhs = rhs2;
3437       conv_stmt = conv1_stmt;
3438     }
3439   else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
3440     {
3441       if (!has_single_use (rhs2)
3442           || !is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
3443                                   &type2, &mult_rhs2))
3444         return false;
3445       add_rhs = rhs1;
3446       conv_stmt = conv2_stmt;
3447     }
3448   else
3449     return false;
3450
3451   to_mode = SCALAR_TYPE_MODE (type);
3452   from_mode = SCALAR_TYPE_MODE (type1);
3453   from_unsigned1 = TYPE_UNSIGNED (type1);
3454   from_unsigned2 = TYPE_UNSIGNED (type2);
3455   optype = type1;
3456
3457   /* There's no such thing as a mixed sign madd yet, so use a wider mode.  */
3458   if (from_unsigned1 != from_unsigned2)
3459     {
3460       if (!INTEGRAL_TYPE_P (type))
3461         return false;
3462       /* We can use a signed multiply with unsigned types as long as
3463          there is a wider mode to use, or it is the smaller of the two
3464          types that is unsigned.  Note that type1 >= type2, always.  */
3465       if ((from_unsigned1
3466            && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
3467           || (from_unsigned2
3468               && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
3469         {
3470           if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
3471               || GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode))
3472             return false;
3473         }
3474
3475       from_unsigned1 = from_unsigned2 = false;
3476       optype = build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode),
3477                                                false);
3478     }
3479
3480   /* If there was a conversion between the multiply and addition
3481      then we need to make sure it fits a multiply-and-accumulate.
3482      The should be a single mode change which does not change the
3483      value.  */
3484   if (conv_stmt)
3485     {
3486       /* We use the original, unmodified data types for this.  */
3487       tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt));
3488       tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt));
3489       int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2);
3490       bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2);
3491
3492       if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type))
3493         {
3494           /* Conversion is a truncate.  */
3495           if (TYPE_PRECISION (to_type) < data_size)
3496             return false;
3497         }
3498       else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type))
3499         {
3500           /* Conversion is an extend.  Check it's the right sort.  */
3501           if (TYPE_UNSIGNED (from_type) != is_unsigned
3502               && !(is_unsigned && TYPE_PRECISION (from_type) > data_size))
3503             return false;
3504         }
3505       /* else convert is a no-op for our purposes.  */
3506     }
3507
3508   /* Verify that the machine can perform a widening multiply
3509      accumulate in this mode/signedness combination, otherwise
3510      this transformation is likely to pessimize code.  */
3511   this_optab = optab_for_tree_code (wmult_code, optype, optab_default);
3512   handler = find_widening_optab_handler_and_mode (this_optab, to_mode,
3513                                                   from_mode, 0, &actual_mode);
3514
3515   if (handler == CODE_FOR_nothing)
3516     return false;
3517
3518   /* Ensure that the inputs to the handler are in the correct precison
3519      for the opcode.  This will be the full mode size.  */
3520   actual_precision = GET_MODE_PRECISION (actual_mode);
3521   if (actual_precision != TYPE_PRECISION (type1)
3522       || from_unsigned1 != TYPE_UNSIGNED (type1))
3523     mult_rhs1 = build_and_insert_cast (gsi, loc,
3524                                        build_nonstandard_integer_type
3525                                          (actual_precision, from_unsigned1),
3526                                        mult_rhs1);
3527   if (actual_precision != TYPE_PRECISION (type2)
3528       || from_unsigned2 != TYPE_UNSIGNED (type2))
3529     mult_rhs2 = build_and_insert_cast (gsi, loc,
3530                                        build_nonstandard_integer_type
3531                                          (actual_precision, from_unsigned2),
3532                                        mult_rhs2);
3533
3534   if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs)))
3535     add_rhs = build_and_insert_cast (gsi, loc, type, add_rhs);
3536
3537   /* Handle constants.  */
3538   if (TREE_CODE (mult_rhs1) == INTEGER_CST)
3539     mult_rhs1 = fold_convert (type1, mult_rhs1);
3540   if (TREE_CODE (mult_rhs2) == INTEGER_CST)
3541     mult_rhs2 = fold_convert (type2, mult_rhs2);
3542
3543   gimple_assign_set_rhs_with_ops (gsi, wmult_code, mult_rhs1, mult_rhs2,
3544                                   add_rhs);
3545   update_stmt (gsi_stmt (*gsi));
3546   widen_mul_stats.maccs_inserted++;
3547   return true;
3548 }
3549
3550 /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
3551    with uses in additions and subtractions to form fused multiply-add
3552    operations.  Returns true if successful and MUL_STMT should be removed.  */
3553
3554 static bool
3555 convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2)
3556 {
3557   tree mul_result = gimple_get_lhs (mul_stmt);
3558   tree type = TREE_TYPE (mul_result);
3559   gimple *use_stmt, *neguse_stmt;
3560   gassign *fma_stmt;
3561   use_operand_p use_p;
3562   imm_use_iterator imm_iter;
3563
3564   if (FLOAT_TYPE_P (type)
3565       && flag_fp_contract_mode == FP_CONTRACT_OFF)
3566     return false;
3567
3568   /* We don't want to do bitfield reduction ops.  */
3569   if (INTEGRAL_TYPE_P (type)
3570       && !type_has_mode_precision_p (type))
3571     return false;
3572
3573   /* If the target doesn't support it, don't generate it.  We assume that
3574      if fma isn't available then fms, fnma or fnms are not either.  */
3575   if (optab_handler (fma_optab, TYPE_MODE (type)) == CODE_FOR_nothing)
3576     return false;
3577
3578   /* If the multiplication has zero uses, it is kept around probably because
3579      of -fnon-call-exceptions.  Don't optimize it away in that case,
3580      it is DCE job.  */
3581   if (has_zero_uses (mul_result))
3582     return false;
3583
3584   /* Make sure that the multiplication statement becomes dead after
3585      the transformation, thus that all uses are transformed to FMAs.
3586      This means we assume that an FMA operation has the same cost
3587      as an addition.  */
3588   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
3589     {
3590       enum tree_code use_code;
3591       tree result = mul_result;
3592       bool negate_p = false;
3593
3594       use_stmt = USE_STMT (use_p);
3595
3596       if (is_gimple_debug (use_stmt))
3597         continue;
3598
3599       /* For now restrict this operations to single basic blocks.  In theory
3600          we would want to support sinking the multiplication in
3601          m = a*b;
3602          if ()
3603            ma = m + c;
3604          else
3605            d = m;
3606          to form a fma in the then block and sink the multiplication to the
3607          else block.  */
3608       if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3609         return false;
3610
3611       if (!is_gimple_assign (use_stmt))
3612         return false;
3613
3614       use_code = gimple_assign_rhs_code (use_stmt);
3615
3616       /* A negate on the multiplication leads to FNMA.  */
3617       if (use_code == NEGATE_EXPR)
3618         {
3619           ssa_op_iter iter;
3620           use_operand_p usep;
3621
3622           result = gimple_assign_lhs (use_stmt);
3623
3624           /* Make sure the negate statement becomes dead with this
3625              single transformation.  */
3626           if (!single_imm_use (gimple_assign_lhs (use_stmt),
3627                                &use_p, &neguse_stmt))
3628             return false;
3629
3630           /* Make sure the multiplication isn't also used on that stmt.  */
3631           FOR_EACH_PHI_OR_STMT_USE (usep, neguse_stmt, iter, SSA_OP_USE)
3632             if (USE_FROM_PTR (usep) == mul_result)
3633               return false;
3634
3635           /* Re-validate.  */
3636           use_stmt = neguse_stmt;
3637           if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3638             return false;
3639           if (!is_gimple_assign (use_stmt))
3640             return false;
3641
3642           use_code = gimple_assign_rhs_code (use_stmt);
3643           negate_p = true;
3644         }
3645
3646       switch (use_code)
3647         {
3648         case MINUS_EXPR:
3649           if (gimple_assign_rhs2 (use_stmt) == result)
3650             negate_p = !negate_p;
3651           break;
3652         case PLUS_EXPR:
3653           break;
3654         default:
3655           /* FMA can only be formed from PLUS and MINUS.  */
3656           return false;
3657         }
3658
3659       /* If the subtrahend (gimple_assign_rhs2 (use_stmt)) is computed
3660          by a MULT_EXPR that we'll visit later, we might be able to
3661          get a more profitable match with fnma.
3662          OTOH, if we don't, a negate / fma pair has likely lower latency
3663          that a mult / subtract pair.  */
3664       if (use_code == MINUS_EXPR && !negate_p
3665           && gimple_assign_rhs1 (use_stmt) == result
3666           && optab_handler (fms_optab, TYPE_MODE (type)) == CODE_FOR_nothing
3667           && optab_handler (fnma_optab, TYPE_MODE (type)) != CODE_FOR_nothing)
3668         {
3669           tree rhs2 = gimple_assign_rhs2 (use_stmt);
3670
3671           if (TREE_CODE (rhs2) == SSA_NAME)
3672             {
3673               gimple *stmt2 = SSA_NAME_DEF_STMT (rhs2);
3674               if (has_single_use (rhs2)
3675                   && is_gimple_assign (stmt2)
3676                   && gimple_assign_rhs_code (stmt2) == MULT_EXPR)
3677               return false;
3678             }
3679         }
3680
3681       /* We can't handle a * b + a * b.  */
3682       if (gimple_assign_rhs1 (use_stmt) == gimple_assign_rhs2 (use_stmt))
3683         return false;
3684
3685       /* While it is possible to validate whether or not the exact form
3686          that we've recognized is available in the backend, the assumption
3687          is that the transformation is never a loss.  For instance, suppose
3688          the target only has the plain FMA pattern available.  Consider
3689          a*b-c -> fma(a,b,-c): we've exchanged MUL+SUB for FMA+NEG, which
3690          is still two operations.  Consider -(a*b)-c -> fma(-a,b,-c): we
3691          still have 3 operations, but in the FMA form the two NEGs are
3692          independent and could be run in parallel.  */
3693     }
3694
3695   FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
3696     {
3697       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
3698       enum tree_code use_code;
3699       tree addop, mulop1 = op1, result = mul_result;
3700       bool negate_p = false;
3701
3702       if (is_gimple_debug (use_stmt))
3703         continue;
3704
3705       use_code = gimple_assign_rhs_code (use_stmt);
3706       if (use_code == NEGATE_EXPR)
3707         {
3708           result = gimple_assign_lhs (use_stmt);
3709           single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt);
3710           gsi_remove (&gsi, true);
3711           release_defs (use_stmt);
3712
3713           use_stmt = neguse_stmt;
3714           gsi = gsi_for_stmt (use_stmt);
3715           use_code = gimple_assign_rhs_code (use_stmt);
3716           negate_p = true;
3717         }
3718
3719       if (gimple_assign_rhs1 (use_stmt) == result)
3720         {
3721           addop = gimple_assign_rhs2 (use_stmt);
3722           /* a * b - c -> a * b + (-c)  */
3723           if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
3724             addop = force_gimple_operand_gsi (&gsi,
3725                                               build1 (NEGATE_EXPR,
3726                                                       type, addop),
3727                                               true, NULL_TREE, true,
3728                                               GSI_SAME_STMT);
3729         }
3730       else
3731         {
3732           addop = gimple_assign_rhs1 (use_stmt);
3733           /* a - b * c -> (-b) * c + a */
3734           if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
3735             negate_p = !negate_p;
3736         }
3737
3738       if (negate_p)
3739         mulop1 = force_gimple_operand_gsi (&gsi,
3740                                            build1 (NEGATE_EXPR,
3741                                                    type, mulop1),
3742                                            true, NULL_TREE, true,
3743                                            GSI_SAME_STMT);
3744
3745       fma_stmt = gimple_build_assign (gimple_assign_lhs (use_stmt),
3746                                       FMA_EXPR, mulop1, op2, addop);
3747       gsi_replace (&gsi, fma_stmt, true);
3748       widen_mul_stats.fmas_inserted++;
3749     }
3750
3751   return true;
3752 }
3753
3754
3755 /* Helper function of match_uaddsub_overflow.  Return 1
3756    if USE_STMT is unsigned overflow check ovf != 0 for
3757    STMT, -1 if USE_STMT is unsigned overflow check ovf == 0
3758    and 0 otherwise.  */
3759
3760 static int
3761 uaddsub_overflow_check_p (gimple *stmt, gimple *use_stmt)
3762 {
3763   enum tree_code ccode = ERROR_MARK;
3764   tree crhs1 = NULL_TREE, crhs2 = NULL_TREE;
3765   if (gimple_code (use_stmt) == GIMPLE_COND)
3766     {
3767       ccode = gimple_cond_code (use_stmt);
3768       crhs1 = gimple_cond_lhs (use_stmt);
3769       crhs2 = gimple_cond_rhs (use_stmt);
3770     }
3771   else if (is_gimple_assign (use_stmt))
3772     {
3773       if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
3774         {
3775           ccode = gimple_assign_rhs_code (use_stmt);
3776           crhs1 = gimple_assign_rhs1 (use_stmt);
3777           crhs2 = gimple_assign_rhs2 (use_stmt);
3778         }
3779       else if (gimple_assign_rhs_code (use_stmt) == COND_EXPR)
3780         {
3781           tree cond = gimple_assign_rhs1 (use_stmt);
3782           if (COMPARISON_CLASS_P (cond))
3783             {
3784               ccode = TREE_CODE (cond);
3785               crhs1 = TREE_OPERAND (cond, 0);
3786               crhs2 = TREE_OPERAND (cond, 1);
3787             }
3788           else
3789             return 0;
3790         }
3791       else
3792         return 0;
3793     }
3794   else
3795     return 0;
3796
3797   if (TREE_CODE_CLASS (ccode) != tcc_comparison)
3798     return 0;
3799
3800   enum tree_code code = gimple_assign_rhs_code (stmt);
3801   tree lhs = gimple_assign_lhs (stmt);
3802   tree rhs1 = gimple_assign_rhs1 (stmt);
3803   tree rhs2 = gimple_assign_rhs2 (stmt);
3804
3805   switch (ccode)
3806     {
3807     case GT_EXPR:
3808     case LE_EXPR:
3809       /* r = a - b; r > a or r <= a
3810          r = a + b; a > r or a <= r or b > r or b <= r.  */
3811       if ((code == MINUS_EXPR && crhs1 == lhs && crhs2 == rhs1)
3812           || (code == PLUS_EXPR && (crhs1 == rhs1 || crhs1 == rhs2)
3813               && crhs2 == lhs))
3814         return ccode == GT_EXPR ? 1 : -1;
3815       break;
3816     case LT_EXPR:
3817     case GE_EXPR:
3818       /* r = a - b; a < r or a >= r
3819          r = a + b; r < a or r >= a or r < b or r >= b.  */
3820       if ((code == MINUS_EXPR && crhs1 == rhs1 && crhs2 == lhs)
3821           || (code == PLUS_EXPR && crhs1 == lhs
3822               && (crhs2 == rhs1 || crhs2 == rhs2)))
3823         return ccode == LT_EXPR ? 1 : -1;
3824       break;
3825     default:
3826       break;
3827     }
3828   return 0;
3829 }
3830
3831 /* Recognize for unsigned x
3832    x = y - z;
3833    if (x > y)
3834    where there are other uses of x and replace it with
3835    _7 = SUB_OVERFLOW (y, z);
3836    x = REALPART_EXPR <_7>;
3837    _8 = IMAGPART_EXPR <_7>;
3838    if (_8)
3839    and similarly for addition.  */
3840
3841 static bool
3842 match_uaddsub_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
3843                         enum tree_code code)
3844 {
3845   tree lhs = gimple_assign_lhs (stmt);
3846   tree type = TREE_TYPE (lhs);
3847   use_operand_p use_p;
3848   imm_use_iterator iter;
3849   bool use_seen = false;
3850   bool ovf_use_seen = false;
3851   gimple *use_stmt;
3852
3853   gcc_checking_assert (code == PLUS_EXPR || code == MINUS_EXPR);
3854   if (!INTEGRAL_TYPE_P (type)
3855       || !TYPE_UNSIGNED (type)
3856       || has_zero_uses (lhs)
3857       || has_single_use (lhs)
3858       || optab_handler (code == PLUS_EXPR ? uaddv4_optab : usubv4_optab,
3859                         TYPE_MODE (type)) == CODE_FOR_nothing)
3860     return false;
3861
3862   FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
3863     {
3864       use_stmt = USE_STMT (use_p);
3865       if (is_gimple_debug (use_stmt))
3866         continue;
3867
3868       if (uaddsub_overflow_check_p (stmt, use_stmt))
3869         ovf_use_seen = true;
3870       else
3871         use_seen = true;
3872       if (ovf_use_seen && use_seen)
3873         break;
3874     }
3875
3876   if (!ovf_use_seen || !use_seen)
3877     return false;
3878
3879   tree ctype = build_complex_type (type);
3880   tree rhs1 = gimple_assign_rhs1 (stmt);
3881   tree rhs2 = gimple_assign_rhs2 (stmt);
3882   gcall *g = gimple_build_call_internal (code == PLUS_EXPR
3883                                          ? IFN_ADD_OVERFLOW : IFN_SUB_OVERFLOW,
3884                                          2, rhs1, rhs2);
3885   tree ctmp = make_ssa_name (ctype);
3886   gimple_call_set_lhs (g, ctmp);
3887   gsi_insert_before (gsi, g, GSI_SAME_STMT);
3888   gassign *g2 = gimple_build_assign (lhs, REALPART_EXPR,
3889                                      build1 (REALPART_EXPR, type, ctmp));
3890   gsi_replace (gsi, g2, true);
3891   tree ovf = make_ssa_name (type);
3892   g2 = gimple_build_assign (ovf, IMAGPART_EXPR,
3893                             build1 (IMAGPART_EXPR, type, ctmp));
3894   gsi_insert_after (gsi, g2, GSI_NEW_STMT);
3895
3896   FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
3897     {
3898       if (is_gimple_debug (use_stmt))
3899         continue;
3900
3901       int ovf_use = uaddsub_overflow_check_p (stmt, use_stmt);
3902       if (ovf_use == 0)
3903         continue;
3904       if (gimple_code (use_stmt) == GIMPLE_COND)
3905         {
3906           gcond *cond_stmt = as_a <gcond *> (use_stmt);
3907           gimple_cond_set_lhs (cond_stmt, ovf);
3908           gimple_cond_set_rhs (cond_stmt, build_int_cst (type, 0));
3909           gimple_cond_set_code (cond_stmt, ovf_use == 1 ? NE_EXPR : EQ_EXPR);
3910         }
3911       else
3912         {
3913           gcc_checking_assert (is_gimple_assign (use_stmt));
3914           if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
3915             {
3916               gimple_assign_set_rhs1 (use_stmt, ovf);
3917               gimple_assign_set_rhs2 (use_stmt, build_int_cst (type, 0));
3918               gimple_assign_set_rhs_code (use_stmt,
3919                                           ovf_use == 1 ? NE_EXPR : EQ_EXPR);
3920             }
3921           else
3922             {
3923               gcc_checking_assert (gimple_assign_rhs_code (use_stmt)
3924                                    == COND_EXPR);
3925               tree cond = build2 (ovf_use == 1 ? NE_EXPR : EQ_EXPR,
3926                                   boolean_type_node, ovf,
3927                                   build_int_cst (type, 0));
3928               gimple_assign_set_rhs1 (use_stmt, cond);
3929             }
3930         }
3931       update_stmt (use_stmt);
3932     }
3933   return true;
3934 }
3935
3936 /* Return true if target has support for divmod.  */
3937
3938 static bool
3939 target_supports_divmod_p (optab divmod_optab, optab div_optab, machine_mode mode)
3940 {
3941   /* If target supports hardware divmod insn, use it for divmod.  */
3942   if (optab_handler (divmod_optab, mode) != CODE_FOR_nothing)
3943     return true;
3944
3945   /* Check if libfunc for divmod is available.  */
3946   rtx libfunc = optab_libfunc (divmod_optab, mode);
3947   if (libfunc != NULL_RTX)
3948     {
3949       /* If optab_handler exists for div_optab, perhaps in a wider mode,
3950          we don't want to use the libfunc even if it exists for given mode.  */
3951       machine_mode div_mode;
3952       FOR_EACH_MODE_FROM (div_mode, mode)
3953         if (optab_handler (div_optab, div_mode) != CODE_FOR_nothing)
3954           return false;
3955
3956       return targetm.expand_divmod_libfunc != NULL;
3957     }
3958
3959   return false;
3960 }
3961
3962 /* Check if stmt is candidate for divmod transform.  */
3963
3964 static bool
3965 divmod_candidate_p (gassign *stmt)
3966 {
3967   tree type = TREE_TYPE (gimple_assign_lhs (stmt));
3968   machine_mode mode = TYPE_MODE (type);
3969   optab divmod_optab, div_optab;
3970
3971   if (TYPE_UNSIGNED (type))
3972     {
3973       divmod_optab = udivmod_optab;
3974       div_optab = udiv_optab;
3975     }
3976   else
3977     {
3978       divmod_optab = sdivmod_optab;
3979       div_optab = sdiv_optab;
3980     }
3981
3982   tree op1 = gimple_assign_rhs1 (stmt);
3983   tree op2 = gimple_assign_rhs2 (stmt);
3984
3985   /* Disable the transform if either is a constant, since division-by-constant
3986      may have specialized expansion.  */
3987   if (CONSTANT_CLASS_P (op1) || CONSTANT_CLASS_P (op2))
3988     return false;
3989
3990   /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should
3991      expand using the [su]divv optabs.  */
3992   if (TYPE_OVERFLOW_TRAPS (type))
3993     return false;
3994
3995   if (!target_supports_divmod_p (divmod_optab, div_optab, mode))
3996     return false;
3997
3998   return true;
3999 }
4000
4001 /* This function looks for:
4002    t1 = a TRUNC_DIV_EXPR b;
4003    t2 = a TRUNC_MOD_EXPR b;
4004    and transforms it to the following sequence:
4005    complex_tmp = DIVMOD (a, b);
4006    t1 = REALPART_EXPR(a);
4007    t2 = IMAGPART_EXPR(b);
4008    For conditions enabling the transform see divmod_candidate_p().
4009
4010    The pass has three parts:
4011    1) Find top_stmt which is trunc_div or trunc_mod stmt and dominates all
4012       other trunc_div_expr and trunc_mod_expr stmts.
4013    2) Add top_stmt and all trunc_div and trunc_mod stmts dominated by top_stmt
4014       to stmts vector.
4015    3) Insert DIVMOD call just before top_stmt and update entries in
4016       stmts vector to use return value of DIMOVD (REALEXPR_PART for div,
4017       IMAGPART_EXPR for mod).  */
4018
4019 static bool
4020 convert_to_divmod (gassign *stmt)
4021 {
4022   if (stmt_can_throw_internal (stmt)
4023       || !divmod_candidate_p (stmt))
4024     return false;
4025
4026   tree op1 = gimple_assign_rhs1 (stmt);
4027   tree op2 = gimple_assign_rhs2 (stmt);
4028
4029   imm_use_iterator use_iter;
4030   gimple *use_stmt;
4031   auto_vec<gimple *> stmts;
4032
4033   gimple *top_stmt = stmt;
4034   basic_block top_bb = gimple_bb (stmt);
4035
4036   /* Part 1: Try to set top_stmt to "topmost" stmt that dominates
4037      at-least stmt and possibly other trunc_div/trunc_mod stmts
4038      having same operands as stmt.  */
4039
4040   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, op1)
4041     {
4042       if (is_gimple_assign (use_stmt)
4043           && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
4044               || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
4045           && operand_equal_p (op1, gimple_assign_rhs1 (use_stmt), 0)
4046           && operand_equal_p (op2, gimple_assign_rhs2 (use_stmt), 0))
4047         {
4048           if (stmt_can_throw_internal (use_stmt))
4049             continue;
4050
4051           basic_block bb = gimple_bb (use_stmt);
4052
4053           if (bb == top_bb)
4054             {
4055               if (gimple_uid (use_stmt) < gimple_uid (top_stmt))
4056                 top_stmt = use_stmt;
4057             }
4058           else if (dominated_by_p (CDI_DOMINATORS, top_bb, bb))
4059             {
4060               top_bb = bb;
4061               top_stmt = use_stmt;
4062             }
4063         }
4064     }
4065
4066   tree top_op1 = gimple_assign_rhs1 (top_stmt);
4067   tree top_op2 = gimple_assign_rhs2 (top_stmt);
4068
4069   stmts.safe_push (top_stmt);
4070   bool div_seen = (gimple_assign_rhs_code (top_stmt) == TRUNC_DIV_EXPR);
4071
4072   /* Part 2: Add all trunc_div/trunc_mod statements domianted by top_bb
4073      to stmts vector. The 2nd loop will always add stmt to stmts vector, since
4074      gimple_bb (top_stmt) dominates gimple_bb (stmt), so the
4075      2nd loop ends up adding at-least single trunc_mod_expr stmt.  */
4076
4077   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, top_op1)
4078     {
4079       if (is_gimple_assign (use_stmt)
4080           && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
4081               || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
4082           && operand_equal_p (top_op1, gimple_assign_rhs1 (use_stmt), 0)
4083           && operand_equal_p (top_op2, gimple_assign_rhs2 (use_stmt), 0))
4084         {
4085           if (use_stmt == top_stmt
4086               || stmt_can_throw_internal (use_stmt)
4087               || !dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt), top_bb))
4088             continue;
4089
4090           stmts.safe_push (use_stmt);
4091           if (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR)
4092             div_seen = true;
4093         }
4094     }
4095
4096   if (!div_seen)
4097     return false;
4098
4099   /* Part 3: Create libcall to internal fn DIVMOD:
4100      divmod_tmp = DIVMOD (op1, op2).  */
4101
4102   gcall *call_stmt = gimple_build_call_internal (IFN_DIVMOD, 2, op1, op2);
4103   tree res = make_temp_ssa_name (build_complex_type (TREE_TYPE (op1)),
4104                                  call_stmt, "divmod_tmp");
4105   gimple_call_set_lhs (call_stmt, res);
4106   /* We rejected throwing statements above.  */
4107   gimple_call_set_nothrow (call_stmt, true);
4108
4109   /* Insert the call before top_stmt.  */
4110   gimple_stmt_iterator top_stmt_gsi = gsi_for_stmt (top_stmt);
4111   gsi_insert_before (&top_stmt_gsi, call_stmt, GSI_SAME_STMT);
4112
4113   widen_mul_stats.divmod_calls_inserted++;
4114
4115   /* Update all statements in stmts vector:
4116      lhs = op1 TRUNC_DIV_EXPR op2 -> lhs = REALPART_EXPR<divmod_tmp>
4117      lhs = op1 TRUNC_MOD_EXPR op2 -> lhs = IMAGPART_EXPR<divmod_tmp>.  */
4118
4119   for (unsigned i = 0; stmts.iterate (i, &use_stmt); ++i)
4120     {
4121       tree new_rhs;
4122
4123       switch (gimple_assign_rhs_code (use_stmt))
4124         {
4125           case TRUNC_DIV_EXPR:
4126             new_rhs = fold_build1 (REALPART_EXPR, TREE_TYPE (op1), res);
4127             break;
4128
4129           case TRUNC_MOD_EXPR:
4130             new_rhs = fold_build1 (IMAGPART_EXPR, TREE_TYPE (op1), res);
4131             break;
4132
4133           default:
4134             gcc_unreachable ();
4135         }
4136
4137       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
4138       gimple_assign_set_rhs_from_tree (&gsi, new_rhs);
4139       update_stmt (use_stmt);
4140     }
4141
4142   return true;
4143 }
4144
4145 /* Find integer multiplications where the operands are extended from
4146    smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
4147    where appropriate.  */
4148
4149 namespace {
4150
4151 const pass_data pass_data_optimize_widening_mul =
4152 {
4153   GIMPLE_PASS, /* type */
4154   "widening_mul", /* name */
4155   OPTGROUP_NONE, /* optinfo_flags */
4156   TV_NONE, /* tv_id */
4157   PROP_ssa, /* properties_required */
4158   0, /* properties_provided */
4159   0, /* properties_destroyed */
4160   0, /* todo_flags_start */
4161   TODO_update_ssa, /* todo_flags_finish */
4162 };
4163
4164 class pass_optimize_widening_mul : public gimple_opt_pass
4165 {
4166 public:
4167   pass_optimize_widening_mul (gcc::context *ctxt)
4168     : gimple_opt_pass (pass_data_optimize_widening_mul, ctxt)
4169   {}
4170
4171   /* opt_pass methods: */
4172   virtual bool gate (function *)
4173     {
4174       return flag_expensive_optimizations && optimize;
4175     }
4176
4177   virtual unsigned int execute (function *);
4178
4179 }; // class pass_optimize_widening_mul
4180
4181 unsigned int
4182 pass_optimize_widening_mul::execute (function *fun)
4183 {
4184   basic_block bb;
4185   bool cfg_changed = false;
4186
4187   memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
4188   calculate_dominance_info (CDI_DOMINATORS);
4189   renumber_gimple_stmt_uids ();
4190
4191   FOR_EACH_BB_FN (bb, fun)
4192     {
4193       gimple_stmt_iterator gsi;
4194
4195       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
4196         {
4197           gimple *stmt = gsi_stmt (gsi);
4198           enum tree_code code;
4199
4200           if (is_gimple_assign (stmt))
4201             {
4202               code = gimple_assign_rhs_code (stmt);
4203               switch (code)
4204                 {
4205                 case MULT_EXPR:
4206                   if (!convert_mult_to_widen (stmt, &gsi)
4207                       && !convert_expand_mult_copysign (stmt, &gsi)
4208                       && convert_mult_to_fma (stmt,
4209                                               gimple_assign_rhs1 (stmt),
4210                                               gimple_assign_rhs2 (stmt)))
4211                     {
4212                       gsi_remove (&gsi, true);
4213                       release_defs (stmt);
4214                       continue;
4215                     }
4216                   break;
4217
4218                 case PLUS_EXPR:
4219                 case MINUS_EXPR:
4220                   if (!convert_plusminus_to_widen (&gsi, stmt, code))
4221                     match_uaddsub_overflow (&gsi, stmt, code);
4222                   break;
4223
4224                 case TRUNC_MOD_EXPR:
4225                   convert_to_divmod (as_a<gassign *> (stmt));
4226                   break;
4227
4228                 default:;
4229                 }
4230             }
4231           else if (is_gimple_call (stmt)
4232                    && gimple_call_lhs (stmt))
4233             {
4234               tree fndecl = gimple_call_fndecl (stmt);
4235               if (fndecl
4236                   && gimple_call_builtin_p (stmt, BUILT_IN_NORMAL))
4237                 {
4238                   switch (DECL_FUNCTION_CODE (fndecl))
4239                     {
4240                       case BUILT_IN_POWF:
4241                       case BUILT_IN_POW:
4242                       case BUILT_IN_POWL:
4243                         if (TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST
4244                             && real_equal
4245                                  (&TREE_REAL_CST (gimple_call_arg (stmt, 1)),
4246                                   &dconst2)
4247                             && convert_mult_to_fma (stmt,
4248                                                     gimple_call_arg (stmt, 0),
4249                                                     gimple_call_arg (stmt, 0)))
4250                           {
4251                             unlink_stmt_vdef (stmt);
4252                             if (gsi_remove (&gsi, true)
4253                                 && gimple_purge_dead_eh_edges (bb))
4254                               cfg_changed = true;
4255                             release_defs (stmt);
4256                             continue;
4257                           }
4258                           break;
4259
4260                       default:;
4261                     }
4262                 }
4263             }
4264           gsi_next (&gsi);
4265         }
4266     }
4267
4268   statistics_counter_event (fun, "widening multiplications inserted",
4269                             widen_mul_stats.widen_mults_inserted);
4270   statistics_counter_event (fun, "widening maccs inserted",
4271                             widen_mul_stats.maccs_inserted);
4272   statistics_counter_event (fun, "fused multiply-adds inserted",
4273                             widen_mul_stats.fmas_inserted);
4274   statistics_counter_event (fun, "divmod calls inserted",
4275                             widen_mul_stats.divmod_calls_inserted);
4276
4277   return cfg_changed ? TODO_cleanup_cfg : 0;
4278 }
4279
4280 } // anon namespace
4281
4282 gimple_opt_pass *
4283 make_pass_optimize_widening_mul (gcc::context *ctxt)
4284 {
4285   return new pass_optimize_widening_mul (ctxt);
4286 }