gcc-4_9-branch/gcc/tree-ssa-math-opts.c

   1 /* Global, SSA-based optimizations using mathematical identities.
   2    Copyright (C) 2005-2014 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* Currently, the only mini-pass in this file tries to CSE reciprocal
  21    operations.  These are common in sequences such as this one:
  22
  23         modulus = sqrt(x*x + y*y + z*z);
  24         x = x / modulus;
  25         y = y / modulus;
  26         z = z / modulus;
  27
  28    that can be optimized to
  29
  30         modulus = sqrt(x*x + y*y + z*z);
  31         rmodulus = 1.0 / modulus;
  32         x = x * rmodulus;
  33         y = y * rmodulus;
  34         z = z * rmodulus;
  35
  36    We do this for loop invariant divisors, and with this pass whenever
  37    we notice that a division has the same divisor multiple times.
  38
  39    Of course, like in PRE, we don't insert a division if a dominator
  40    already has one.  However, this cannot be done as an extension of
  41    PRE for several reasons.
  42
  43    First of all, with some experiments it was found out that the
  44    transformation is not always useful if there are only two divisions
  45    hy the same divisor.  This is probably because modern processors
  46    can pipeline the divisions; on older, in-order processors it should
  47    still be effective to optimize two divisions by the same number.
  48    We make this a param, and it shall be called N in the remainder of
  49    this comment.
  50
  51    Second, if trapping math is active, we have less freedom on where
  52    to insert divisions: we can only do so in basic blocks that already
  53    contain one.  (If divisions don't trap, instead, we can insert
  54    divisions elsewhere, which will be in blocks that are common dominators
  55    of those that have the division).
  56
  57    We really don't want to compute the reciprocal unless a division will
  58    be found.  To do this, we won't insert the division in a basic block
  59    that has less than N divisions *post-dominating* it.
  60
  61    The algorithm constructs a subset of the dominator tree, holding the
  62    blocks containing the divisions and the common dominators to them,
  63    and walk it twice.  The first walk is in post-order, and it annotates
  64    each block with the number of divisions that post-dominate it: this
  65    gives information on where divisions can be inserted profitably.
  66    The second walk is in pre-order, and it inserts divisions as explained
  67    above, and replaces divisions by multiplications.
  68
  69    In the best case, the cost of the pass is O(n_statements).  In the
  70    worst-case, the cost is due to creating the dominator tree subset,
  71    with a cost of O(n_basic_blocks ^ 2); however this can only happen
  72    for n_statements / n_basic_blocks statements.  So, the amortized cost
  73    of creating the dominator tree subset is O(n_basic_blocks) and the
  74    worst-case cost of the pass is O(n_statements * n_basic_blocks).
  75
  76    More practically, the cost will be small because there are few
  77    divisions, and they tend to be in the same basic block, so insert_bb
  78    is called very few times.
  79
  80    If we did this using domwalk.c, an efficient implementation would have
  81    to work on all the variables in a single pass, because we could not
  82    work on just a subset of the dominator tree, as we do now, and the
  83    cost would also be something like O(n_statements * n_basic_blocks).
  84    The data structures would be more complex in order to work on all the
  85    variables in a single pass.  */
  86
  87 #include "config.h"
  88 #include "system.h"
  89 #include "coretypes.h"
  90 #include "tm.h"
  91 #include "flags.h"
  92 #include "tree.h"
  93 #include "basic-block.h"
  94 #include "tree-ssa-alias.h"
  95 #include "internal-fn.h"
  96 #include "gimple-fold.h"
  97 #include "gimple-expr.h"
  98 #include "is-a.h"
  99 #include "gimple.h"
 100 #include "gimple-iterator.h"
 101 #include "gimplify-me.h"
 102 #include "stor-layout.h"
 103 #include "gimple-ssa.h"
 104 #include "tree-cfg.h"
 105 #include "tree-phinodes.h"
 106 #include "ssa-iterators.h"
 107 #include "stringpool.h"
 108 #include "tree-ssanames.h"
 109 #include "expr.h"
 110 #include "tree-dfa.h"
 111 #include "tree-ssa.h"
 112 #include "tree-pass.h"
 113 #include "alloc-pool.h"
 114 #include "target.h"
 115 #include "gimple-pretty-print.h"
 116
 117 /* FIXME: RTL headers have to be included here for optabs.  */
 118 #include "rtl.h"                /* Because optabs.h wants enum rtx_code.  */
 119 #include "expr.h"               /* Because optabs.h wants sepops.  */
 120 #include "optabs.h"
 121
 122 /* This structure represents one basic block that either computes a
 123    division, or is a common dominator for basic block that compute a
 124    division.  */
 125 struct occurrence {
 126   /* The basic block represented by this structure.  */
 127   basic_block bb;
 128
 129   /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
 130      inserted in BB.  */
 131   tree recip_def;
 132
 133   /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
 134      was inserted in BB.  */
 135   gimple recip_def_stmt;
 136
 137   /* Pointer to a list of "struct occurrence"s for blocks dominated
 138      by BB.  */
 139   struct occurrence *children;
 140
 141   /* Pointer to the next "struct occurrence"s in the list of blocks
 142      sharing a common dominator.  */
 143   struct occurrence *next;
 144
 145   /* The number of divisions that are in BB before compute_merit.  The
 146      number of divisions that are in BB or post-dominate it after
 147      compute_merit.  */
 148   int num_divisions;
 149
 150   /* True if the basic block has a division, false if it is a common
 151      dominator for basic blocks that do.  If it is false and trapping
 152      math is active, BB is not a candidate for inserting a reciprocal.  */
 153   bool bb_has_division;
 154 };
 155
 156 static struct
 157 {
 158   /* Number of 1.0/X ops inserted.  */
 159   int rdivs_inserted;
 160
 161   /* Number of 1.0/FUNC ops inserted.  */
 162   int rfuncs_inserted;
 163 } reciprocal_stats;
 164
 165 static struct
 166 {
 167   /* Number of cexpi calls inserted.  */
 168   int inserted;
 169 } sincos_stats;
 170
 171 static struct
 172 {
 173   /* Number of hand-written 16-bit bswaps found.  */
 174   int found_16bit;
 175
 176   /* Number of hand-written 32-bit bswaps found.  */
 177   int found_32bit;
 178
 179   /* Number of hand-written 64-bit bswaps found.  */
 180   int found_64bit;
 181 } bswap_stats;
 182
 183 static struct
 184 {
 185   /* Number of widening multiplication ops inserted.  */
 186   int widen_mults_inserted;
 187
 188   /* Number of integer multiply-and-accumulate ops inserted.  */
 189   int maccs_inserted;
 190
 191   /* Number of fp fused multiply-add ops inserted.  */
 192   int fmas_inserted;
 193 } widen_mul_stats;
 194
 195 /* The instance of "struct occurrence" representing the highest
 196    interesting block in the dominator tree.  */
 197 static struct occurrence *occ_head;
 198
 199 /* Allocation pool for getting instances of "struct occurrence".  */
 200 static alloc_pool occ_pool;
 201
 202
 203
 204 /* Allocate and return a new struct occurrence for basic block BB, and
 205    whose children list is headed by CHILDREN.  */
 206 static struct occurrence *
 207 occ_new (basic_block bb, struct occurrence *children)
 208 {
 209   struct occurrence *occ;
 210
 211   bb->aux = occ = (struct occurrence *) pool_alloc (occ_pool);
 212   memset (occ, 0, sizeof (struct occurrence));
 213
 214   occ->bb = bb;
 215   occ->children = children;
 216   return occ;
 217 }
 218
 219
 220 /* Insert NEW_OCC into our subset of the dominator tree.  P_HEAD points to a
 221    list of "struct occurrence"s, one per basic block, having IDOM as
 222    their common dominator.
 223
 224    We try to insert NEW_OCC as deep as possible in the tree, and we also
 225    insert any other block that is a common dominator for BB and one
 226    block already in the tree.  */
 227
 228 static void
 229 insert_bb (struct occurrence *new_occ, basic_block idom,
 230            struct occurrence **p_head)
 231 {
 232   struct occurrence *occ, **p_occ;
 233
 234   for (p_occ = p_head; (occ = *p_occ) != NULL; )
 235     {
 236       basic_block bb = new_occ->bb, occ_bb = occ->bb;
 237       basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb);
 238       if (dom == bb)
 239         {
 240           /* BB dominates OCC_BB.  OCC becomes NEW_OCC's child: remove OCC
 241              from its list.  */
 242           *p_occ = occ->next;
 243           occ->next = new_occ->children;
 244           new_occ->children = occ;
 245
 246           /* Try the next block (it may as well be dominated by BB).  */
 247         }
 248
 249       else if (dom == occ_bb)
 250         {
 251           /* OCC_BB dominates BB.  Tail recurse to look deeper.  */
 252           insert_bb (new_occ, dom, &occ->children);
 253           return;
 254         }
 255
 256       else if (dom != idom)
 257         {
 258           gcc_assert (!dom->aux);
 259
 260           /* There is a dominator between IDOM and BB, add it and make
 261              two children out of NEW_OCC and OCC.  First, remove OCC from
 262              its list.  */
 263           *p_occ = occ->next;
 264           new_occ->next = occ;
 265           occ->next = NULL;
 266
 267           /* None of the previous blocks has DOM as a dominator: if we tail
 268              recursed, we would reexamine them uselessly. Just switch BB with
 269              DOM, and go on looking for blocks dominated by DOM.  */
 270           new_occ = occ_new (dom, new_occ);
 271         }
 272
 273       else
 274         {
 275           /* Nothing special, go on with the next element.  */
 276           p_occ = &occ->next;
 277         }
 278     }
 279
 280   /* No place was found as a child of IDOM.  Make BB a sibling of IDOM.  */
 281   new_occ->next = *p_head;
 282   *p_head = new_occ;
 283 }
 284
 285 /* Register that we found a division in BB.  */
 286
 287 static inline void
 288 register_division_in (basic_block bb)
 289 {
 290   struct occurrence *occ;
 291
 292   occ = (struct occurrence *) bb->aux;
 293   if (!occ)
 294     {
 295       occ = occ_new (bb, NULL);
 296       insert_bb (occ, ENTRY_BLOCK_PTR_FOR_FN (cfun), &occ_head);
 297     }
 298
 299   occ->bb_has_division = true;
 300   occ->num_divisions++;
 301 }
 302
 303
 304 /* Compute the number of divisions that postdominate each block in OCC and
 305    its children.  */
 306
 307 static void
 308 compute_merit (struct occurrence *occ)
 309 {
 310   struct occurrence *occ_child;
 311   basic_block dom = occ->bb;
 312
 313   for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
 314     {
 315       basic_block bb;
 316       if (occ_child->children)
 317         compute_merit (occ_child);
 318
 319       if (flag_exceptions)
 320         bb = single_noncomplex_succ (dom);
 321       else
 322         bb = dom;
 323
 324       if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
 325         occ->num_divisions += occ_child->num_divisions;
 326     }
 327 }
 328
 329
 330 /* Return whether USE_STMT is a floating-point division by DEF.  */
 331 static inline bool
 332 is_division_by (gimple use_stmt, tree def)
 333 {
 334   return is_gimple_assign (use_stmt)
 335          && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
 336          && gimple_assign_rhs2 (use_stmt) == def
 337          /* Do not recognize x / x as valid division, as we are getting
 338             confused later by replacing all immediate uses x in such
 339             a stmt.  */
 340          && gimple_assign_rhs1 (use_stmt) != def;
 341 }
 342
 343 /* Walk the subset of the dominator tree rooted at OCC, setting the
 344    RECIP_DEF field to a definition of 1.0 / DEF that can be used in
 345    the given basic block.  The field may be left NULL, of course,
 346    if it is not possible or profitable to do the optimization.
 347
 348    DEF_BSI is an iterator pointing at the statement defining DEF.
 349    If RECIP_DEF is set, a dominator already has a computation that can
 350    be used.  */
 351
 352 static void
 353 insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
 354                     tree def, tree recip_def, int threshold)
 355 {
 356   tree type;
 357   gimple new_stmt;
 358   gimple_stmt_iterator gsi;
 359   struct occurrence *occ_child;
 360
 361   if (!recip_def
 362       && (occ->bb_has_division || !flag_trapping_math)
 363       && occ->num_divisions >= threshold)
 364     {
 365       /* Make a variable with the replacement and substitute it.  */
 366       type = TREE_TYPE (def);
 367       recip_def = create_tmp_reg (type, "reciptmp");
 368       new_stmt = gimple_build_assign_with_ops (RDIV_EXPR, recip_def,
 369                                                build_one_cst (type), def);
 370
 371       if (occ->bb_has_division)
 372         {
 373           /* Case 1: insert before an existing division.  */
 374           gsi = gsi_after_labels (occ->bb);
 375           while (!gsi_end_p (gsi) && !is_division_by (gsi_stmt (gsi), def))
 376             gsi_next (&gsi);
 377
 378           gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
 379         }
 380       else if (def_gsi && occ->bb == def_gsi->bb)
 381         {
 382           /* Case 2: insert right after the definition.  Note that this will
 383              never happen if the definition statement can throw, because in
 384              that case the sole successor of the statement's basic block will
 385              dominate all the uses as well.  */
 386           gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
 387         }
 388       else
 389         {
 390           /* Case 3: insert in a basic block not containing defs/uses.  */
 391           gsi = gsi_after_labels (occ->bb);
 392           gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
 393         }
 394
 395       reciprocal_stats.rdivs_inserted++;
 396
 397       occ->recip_def_stmt = new_stmt;
 398     }
 399
 400   occ->recip_def = recip_def;
 401   for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
 402     insert_reciprocals (def_gsi, occ_child, def, recip_def, threshold);
 403 }
 404
 405
 406 /* Replace the division at USE_P with a multiplication by the reciprocal, if
 407    possible.  */
 408
 409 static inline void
 410 replace_reciprocal (use_operand_p use_p)
 411 {
 412   gimple use_stmt = USE_STMT (use_p);
 413   basic_block bb = gimple_bb (use_stmt);
 414   struct occurrence *occ = (struct occurrence *) bb->aux;
 415
 416   if (optimize_bb_for_speed_p (bb)
 417       && occ->recip_def && use_stmt != occ->recip_def_stmt)
 418     {
 419       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
 420       gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
 421       SET_USE (use_p, occ->recip_def);
 422       fold_stmt_inplace (&gsi);
 423       update_stmt (use_stmt);
 424     }
 425 }
 426
 427
 428 /* Free OCC and return one more "struct occurrence" to be freed.  */
 429
 430 static struct occurrence *
 431 free_bb (struct occurrence *occ)
 432 {
 433   struct occurrence *child, *next;
 434
 435   /* First get the two pointers hanging off OCC.  */
 436   next = occ->next;
 437   child = occ->children;
 438   occ->bb->aux = NULL;
 439   pool_free (occ_pool, occ);
 440
 441   /* Now ensure that we don't recurse unless it is necessary.  */
 442   if (!child)
 443     return next;
 444   else
 445     {
 446       while (next)
 447         next = free_bb (next);
 448
 449       return child;
 450     }
 451 }
 452
 453
 454 /* Look for floating-point divisions among DEF's uses, and try to
 455    replace them by multiplications with the reciprocal.  Add
 456    as many statements computing the reciprocal as needed.
 457
 458    DEF must be a GIMPLE register of a floating-point type.  */
 459
 460 static void
 461 execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
 462 {
 463   use_operand_p use_p;
 464   imm_use_iterator use_iter;
 465   struct occurrence *occ;
 466   int count = 0, threshold;
 467
 468   gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && is_gimple_reg (def));
 469
 470   FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
 471     {
 472       gimple use_stmt = USE_STMT (use_p);
 473       if (is_division_by (use_stmt, def))
 474         {
 475           register_division_in (gimple_bb (use_stmt));
 476           count++;
 477         }
 478     }
 479
 480   /* Do the expensive part only if we can hope to optimize something.  */
 481   threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
 482   if (count >= threshold)
 483     {
 484       gimple use_stmt;
 485       for (occ = occ_head; occ; occ = occ->next)
 486         {
 487           compute_merit (occ);
 488           insert_reciprocals (def_gsi, occ, def, NULL, threshold);
 489         }
 490
 491       FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
 492         {
 493           if (is_division_by (use_stmt, def))
 494             {
 495               FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
 496                 replace_reciprocal (use_p);
 497             }
 498         }
 499     }
 500
 501   for (occ = occ_head; occ; )
 502     occ = free_bb (occ);
 503
 504   occ_head = NULL;
 505 }
 506
 507 static bool
 508 gate_cse_reciprocals (void)
 509 {
 510   return optimize && flag_reciprocal_math;
 511 }
 512
 513 /* Go through all the floating-point SSA_NAMEs, and call
 514    execute_cse_reciprocals_1 on each of them.  */
 515 static unsigned int
 516 execute_cse_reciprocals (void)
 517 {
 518   basic_block bb;
 519   tree arg;
 520
 521   occ_pool = create_alloc_pool ("dominators for recip",
 522                                 sizeof (struct occurrence),
 523                                 n_basic_blocks_for_fn (cfun) / 3 + 1);
 524
 525   memset (&reciprocal_stats, 0, sizeof (reciprocal_stats));
 526   calculate_dominance_info (CDI_DOMINATORS);
 527   calculate_dominance_info (CDI_POST_DOMINATORS);
 528
 529 #ifdef ENABLE_CHECKING
 530   FOR_EACH_BB_FN (bb, cfun)
 531     gcc_assert (!bb->aux);
 532 #endif
 533
 534   for (arg = DECL_ARGUMENTS (cfun->decl); arg; arg = DECL_CHAIN (arg))
 535     if (FLOAT_TYPE_P (TREE_TYPE (arg))
 536         && is_gimple_reg (arg))
 537       {
 538         tree name = ssa_default_def (cfun, arg);
 539         if (name)
 540           execute_cse_reciprocals_1 (NULL, name);
 541       }
 542
 543   FOR_EACH_BB_FN (bb, cfun)
 544     {
 545       gimple_stmt_iterator gsi;
 546       gimple phi;
 547       tree def;
 548
 549       for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 550         {
 551           phi = gsi_stmt (gsi);
 552           def = PHI_RESULT (phi);
 553           if (! virtual_operand_p (def)
 554               && FLOAT_TYPE_P (TREE_TYPE (def)))
 555             execute_cse_reciprocals_1 (NULL, def);
 556         }
 557
 558       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 559         {
 560           gimple stmt = gsi_stmt (gsi);
 561
 562           if (gimple_has_lhs (stmt)
 563               && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
 564               && FLOAT_TYPE_P (TREE_TYPE (def))
 565               && TREE_CODE (def) == SSA_NAME)
 566             execute_cse_reciprocals_1 (&gsi, def);
 567         }
 568
 569       if (optimize_bb_for_size_p (bb))
 570         continue;
 571
 572       /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b).  */
 573       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 574         {
 575           gimple stmt = gsi_stmt (gsi);
 576           tree fndecl;
 577
 578           if (is_gimple_assign (stmt)
 579               && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
 580             {
 581               tree arg1 = gimple_assign_rhs2 (stmt);
 582               gimple stmt1;
 583
 584               if (TREE_CODE (arg1) != SSA_NAME)
 585                 continue;
 586
 587               stmt1 = SSA_NAME_DEF_STMT (arg1);
 588
 589               if (is_gimple_call (stmt1)
 590                   && gimple_call_lhs (stmt1)
 591                   && (fndecl = gimple_call_fndecl (stmt1))
 592                   && (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
 593                       || DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD))
 594                 {
 595                   enum built_in_function code;
 596                   bool md_code, fail;
 597                   imm_use_iterator ui;
 598                   use_operand_p use_p;
 599
 600                   code = DECL_FUNCTION_CODE (fndecl);
 601                   md_code = DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD;
 602
 603                   fndecl = targetm.builtin_reciprocal (code, md_code, false);
 604                   if (!fndecl)
 605                     continue;
 606
 607                   /* Check that all uses of the SSA name are divisions,
 608                      otherwise replacing the defining statement will do
 609                      the wrong thing.  */
 610                   fail = false;
 611                   FOR_EACH_IMM_USE_FAST (use_p, ui, arg1)
 612                     {
 613                       gimple stmt2 = USE_STMT (use_p);
 614                       if (is_gimple_debug (stmt2))
 615                         continue;
 616                       if (!is_gimple_assign (stmt2)
 617                           || gimple_assign_rhs_code (stmt2) != RDIV_EXPR
 618                           || gimple_assign_rhs1 (stmt2) == arg1
 619                           || gimple_assign_rhs2 (stmt2) != arg1)
 620                         {
 621                           fail = true;
 622                           break;
 623                         }
 624                     }
 625                   if (fail)
 626                     continue;
 627
 628                   gimple_replace_ssa_lhs (stmt1, arg1);
 629                   gimple_call_set_fndecl (stmt1, fndecl);
 630                   update_stmt (stmt1);
 631                   reciprocal_stats.rfuncs_inserted++;
 632
 633                   FOR_EACH_IMM_USE_STMT (stmt, ui, arg1)
 634                     {
 635                       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
 636                       gimple_assign_set_rhs_code (stmt, MULT_EXPR);
 637                       fold_stmt_inplace (&gsi);
 638                       update_stmt (stmt);
 639                     }
 640                 }
 641             }
 642         }
 643     }
 644
 645   statistics_counter_event (cfun, "reciprocal divs inserted",
 646                             reciprocal_stats.rdivs_inserted);
 647   statistics_counter_event (cfun, "reciprocal functions inserted",
 648                             reciprocal_stats.rfuncs_inserted);
 649
 650   free_dominance_info (CDI_DOMINATORS);
 651   free_dominance_info (CDI_POST_DOMINATORS);
 652   free_alloc_pool (occ_pool);
 653   return 0;
 654 }
 655
 656 namespace {
 657
 658 const pass_data pass_data_cse_reciprocals =
 659 {
 660   GIMPLE_PASS, /* type */
 661   "recip", /* name */
 662   OPTGROUP_NONE, /* optinfo_flags */
 663   true, /* has_gate */
 664   true, /* has_execute */
 665   TV_NONE, /* tv_id */
 666   PROP_ssa, /* properties_required */
 667   0, /* properties_provided */
 668   0, /* properties_destroyed */
 669   0, /* todo_flags_start */
 670   ( TODO_update_ssa | TODO_verify_ssa
 671     | TODO_verify_stmts ), /* todo_flags_finish */
 672 };
 673
 674 class pass_cse_reciprocals : public gimple_opt_pass
 675 {
 676 public:
 677   pass_cse_reciprocals (gcc::context *ctxt)
 678     : gimple_opt_pass (pass_data_cse_reciprocals, ctxt)
 679   {}
 680
 681   /* opt_pass methods: */
 682   bool gate () { return gate_cse_reciprocals (); }
 683   unsigned int execute () { return execute_cse_reciprocals (); }
 684
 685 }; // class pass_cse_reciprocals
 686
 687 } // anon namespace
 688
 689 gimple_opt_pass *
 690 make_pass_cse_reciprocals (gcc::context *ctxt)
 691 {
 692   return new pass_cse_reciprocals (ctxt);
 693 }
 694
 695 /* Records an occurrence at statement USE_STMT in the vector of trees
 696    STMTS if it is dominated by *TOP_BB or dominates it or this basic block
 697    is not yet initialized.  Returns true if the occurrence was pushed on
 698    the vector.  Adjusts *TOP_BB to be the basic block dominating all
 699    statements in the vector.  */
 700
 701 static bool
 702 maybe_record_sincos (vec<gimple> *stmts,
 703                      basic_block *top_bb, gimple use_stmt)
 704 {
 705   basic_block use_bb = gimple_bb (use_stmt);
 706   if (*top_bb
 707       && (*top_bb == use_bb
 708           || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb)))
 709     stmts->safe_push (use_stmt);
 710   else if (!*top_bb
 711            || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb))
 712     {
 713       stmts->safe_push (use_stmt);
 714       *top_bb = use_bb;
 715     }
 716   else
 717     return false;
 718
 719   return true;
 720 }
 721
 722 /* Look for sin, cos and cexpi calls with the same argument NAME and
 723    create a single call to cexpi CSEing the result in this case.
 724    We first walk over all immediate uses of the argument collecting
 725    statements that we can CSE in a vector and in a second pass replace
 726    the statement rhs with a REALPART or IMAGPART expression on the
 727    result of the cexpi call we insert before the use statement that
 728    dominates all other candidates.  */
 729
 730 static bool
 731 execute_cse_sincos_1 (tree name)
 732 {
 733   gimple_stmt_iterator gsi;
 734   imm_use_iterator use_iter;
 735   tree fndecl, res, type;
 736   gimple def_stmt, use_stmt, stmt;
 737   int seen_cos = 0, seen_sin = 0, seen_cexpi = 0;
 738   vec<gimple> stmts = vNULL;
 739   basic_block top_bb = NULL;
 740   int i;
 741   bool cfg_changed = false;
 742
 743   type = TREE_TYPE (name);
 744   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
 745     {
 746       if (gimple_code (use_stmt) != GIMPLE_CALL
 747           || !gimple_call_lhs (use_stmt)
 748           || !(fndecl = gimple_call_fndecl (use_stmt))
 749           || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
 750         continue;
 751
 752       switch (DECL_FUNCTION_CODE (fndecl))
 753         {
 754         CASE_FLT_FN (BUILT_IN_COS):
 755           seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
 756           break;
 757
 758         CASE_FLT_FN (BUILT_IN_SIN):
 759           seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
 760           break;
 761
 762         CASE_FLT_FN (BUILT_IN_CEXPI):
 763           seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
 764           break;
 765
 766         default:;
 767         }
 768     }
 769
 770   if (seen_cos + seen_sin + seen_cexpi <= 1)
 771     {
 772       stmts.release ();
 773       return false;
 774     }
 775
 776   /* Simply insert cexpi at the beginning of top_bb but not earlier than
 777      the name def statement.  */
 778   fndecl = mathfn_built_in (type, BUILT_IN_CEXPI);
 779   if (!fndecl)
 780     return false;
 781   stmt = gimple_build_call (fndecl, 1, name);
 782   res = make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl)), stmt, "sincostmp");
 783   gimple_call_set_lhs (stmt, res);
 784
 785   def_stmt = SSA_NAME_DEF_STMT (name);
 786   if (!SSA_NAME_IS_DEFAULT_DEF (name)
 787       && gimple_code (def_stmt) != GIMPLE_PHI
 788       && gimple_bb (def_stmt) == top_bb)
 789     {
 790       gsi = gsi_for_stmt (def_stmt);
 791       gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
 792     }
 793   else
 794     {
 795       gsi = gsi_after_labels (top_bb);
 796       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
 797     }
 798   sincos_stats.inserted++;
 799
 800   /* And adjust the recorded old call sites.  */
 801   for (i = 0; stmts.iterate (i, &use_stmt); ++i)
 802     {
 803       tree rhs = NULL;
 804       fndecl = gimple_call_fndecl (use_stmt);
 805
 806       switch (DECL_FUNCTION_CODE (fndecl))
 807         {
 808         CASE_FLT_FN (BUILT_IN_COS):
 809           rhs = fold_build1 (REALPART_EXPR, type, res);
 810           break;
 811
 812         CASE_FLT_FN (BUILT_IN_SIN):
 813           rhs = fold_build1 (IMAGPART_EXPR, type, res);
 814           break;
 815
 816         CASE_FLT_FN (BUILT_IN_CEXPI):
 817           rhs = res;
 818           break;
 819
 820         default:;
 821           gcc_unreachable ();
 822         }
 823
 824         /* Replace call with a copy.  */
 825         stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs);
 826
 827         gsi = gsi_for_stmt (use_stmt);
 828         gsi_replace (&gsi, stmt, true);
 829         if (gimple_purge_dead_eh_edges (gimple_bb (stmt)))
 830           cfg_changed = true;
 831     }
 832
 833   stmts.release ();
 834
 835   return cfg_changed;
 836 }
 837
 838 /* To evaluate powi(x,n), the floating point value x raised to the
 839    constant integer exponent n, we use a hybrid algorithm that
 840    combines the "window method" with look-up tables.  For an
 841    introduction to exponentiation algorithms and "addition chains",
 842    see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
 843    "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
 844    3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
 845    Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998.  */
 846
 847 /* Provide a default value for POWI_MAX_MULTS, the maximum number of
 848    multiplications to inline before calling the system library's pow
 849    function.  powi(x,n) requires at worst 2*bits(n)-2 multiplications,
 850    so this default never requires calling pow, powf or powl.  */
 851
 852 #ifndef POWI_MAX_MULTS
 853 #define POWI_MAX_MULTS  (2*HOST_BITS_PER_WIDE_INT-2)
 854 #endif
 855
 856 /* The size of the "optimal power tree" lookup table.  All
 857    exponents less than this value are simply looked up in the
 858    powi_table below.  This threshold is also used to size the
 859    cache of pseudo registers that hold intermediate results.  */
 860 #define POWI_TABLE_SIZE 256
 861
 862 /* The size, in bits of the window, used in the "window method"
 863    exponentiation algorithm.  This is equivalent to a radix of
 864    (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method".  */
 865 #define POWI_WINDOW_SIZE 3
 866
 867 /* The following table is an efficient representation of an
 868    "optimal power tree".  For each value, i, the corresponding
 869    value, j, in the table states than an optimal evaluation
 870    sequence for calculating pow(x,i) can be found by evaluating
 871    pow(x,j)*pow(x,i-j).  An optimal power tree for the first
 872    100 integers is given in Knuth's "Seminumerical algorithms".  */
 873
 874 static const unsigned char powi_table[POWI_TABLE_SIZE] =
 875   {
 876       0,   1,   1,   2,   2,   3,   3,   4,  /*   0 -   7 */
 877       4,   6,   5,   6,   6,  10,   7,   9,  /*   8 -  15 */
 878       8,  16,   9,  16,  10,  12,  11,  13,  /*  16 -  23 */
 879      12,  17,  13,  18,  14,  24,  15,  26,  /*  24 -  31 */
 880      16,  17,  17,  19,  18,  33,  19,  26,  /*  32 -  39 */
 881      20,  25,  21,  40,  22,  27,  23,  44,  /*  40 -  47 */
 882      24,  32,  25,  34,  26,  29,  27,  44,  /*  48 -  55 */
 883      28,  31,  29,  34,  30,  60,  31,  36,  /*  56 -  63 */
 884      32,  64,  33,  34,  34,  46,  35,  37,  /*  64 -  71 */
 885      36,  65,  37,  50,  38,  48,  39,  69,  /*  72 -  79 */
 886      40,  49,  41,  43,  42,  51,  43,  58,  /*  80 -  87 */
 887      44,  64,  45,  47,  46,  59,  47,  76,  /*  88 -  95 */
 888      48,  65,  49,  66,  50,  67,  51,  66,  /*  96 - 103 */
 889      52,  70,  53,  74,  54, 104,  55,  74,  /* 104 - 111 */
 890      56,  64,  57,  69,  58,  78,  59,  68,  /* 112 - 119 */
 891      60,  61,  61,  80,  62,  75,  63,  68,  /* 120 - 127 */
 892      64,  65,  65, 128,  66, 129,  67,  90,  /* 128 - 135 */
 893      68,  73,  69, 131,  70,  94,  71,  88,  /* 136 - 143 */
 894      72, 128,  73,  98,  74, 132,  75, 121,  /* 144 - 151 */
 895      76, 102,  77, 124,  78, 132,  79, 106,  /* 152 - 159 */
 896      80,  97,  81, 160,  82,  99,  83, 134,  /* 160 - 167 */
 897      84,  86,  85,  95,  86, 160,  87, 100,  /* 168 - 175 */
 898      88, 113,  89,  98,  90, 107,  91, 122,  /* 176 - 183 */
 899      92, 111,  93, 102,  94, 126,  95, 150,  /* 184 - 191 */
 900      96, 128,  97, 130,  98, 133,  99, 195,  /* 192 - 199 */
 901     100, 128, 101, 123, 102, 164, 103, 138,  /* 200 - 207 */
 902     104, 145, 105, 146, 106, 109, 107, 149,  /* 208 - 215 */
 903     108, 200, 109, 146, 110, 170, 111, 157,  /* 216 - 223 */
 904     112, 128, 113, 130, 114, 182, 115, 132,  /* 224 - 231 */
 905     116, 200, 117, 132, 118, 158, 119, 206,  /* 232 - 239 */
 906     120, 240, 121, 162, 122, 147, 123, 152,  /* 240 - 247 */
 907     124, 166, 125, 214, 126, 138, 127, 153,  /* 248 - 255 */
 908   };
 909
 910
 911 /* Return the number of multiplications required to calculate
 912    powi(x,n) where n is less than POWI_TABLE_SIZE.  This is a
 913    subroutine of powi_cost.  CACHE is an array indicating
 914    which exponents have already been calculated.  */
 915
 916 static int
 917 powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache)
 918 {
 919   /* If we've already calculated this exponent, then this evaluation
 920      doesn't require any additional multiplications.  */
 921   if (cache[n])
 922     return 0;
 923
 924   cache[n] = true;
 925   return powi_lookup_cost (n - powi_table[n], cache)
 926          + powi_lookup_cost (powi_table[n], cache) + 1;
 927 }
 928
 929 /* Return the number of multiplications required to calculate
 930    powi(x,n) for an arbitrary x, given the exponent N.  This
 931    function needs to be kept in sync with powi_as_mults below.  */
 932
 933 static int
 934 powi_cost (HOST_WIDE_INT n)
 935 {
 936   bool cache[POWI_TABLE_SIZE];
 937   unsigned HOST_WIDE_INT digit;
 938   unsigned HOST_WIDE_INT val;
 939   int result;
 940
 941   if (n == 0)
 942     return 0;
 943
 944   /* Ignore the reciprocal when calculating the cost.  */
 945   val = (n < 0) ? -n : n;
 946
 947   /* Initialize the exponent cache.  */
 948   memset (cache, 0, POWI_TABLE_SIZE * sizeof (bool));
 949   cache[1] = true;
 950
 951   result = 0;
 952
 953   while (val >= POWI_TABLE_SIZE)
 954     {
 955       if (val & 1)
 956         {
 957           digit = val & ((1 << POWI_WINDOW_SIZE) - 1);
 958           result += powi_lookup_cost (digit, cache)
 959                     + POWI_WINDOW_SIZE + 1;
 960           val >>= POWI_WINDOW_SIZE;
 961         }
 962       else
 963         {
 964           val >>= 1;
 965           result++;
 966         }
 967     }
 968
 969   return result + powi_lookup_cost (val, cache);
 970 }
 971
 972 /* Recursive subroutine of powi_as_mults.  This function takes the
 973    array, CACHE, of already calculated exponents and an exponent N and
 974    returns a tree that corresponds to CACHE[1]**N, with type TYPE.  */
 975
 976 static tree
 977 powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type,
 978                  HOST_WIDE_INT n, tree *cache)
 979 {
 980   tree op0, op1, ssa_target;
 981   unsigned HOST_WIDE_INT digit;
 982   gimple mult_stmt;
 983
 984   if (n < POWI_TABLE_SIZE && cache[n])
 985     return cache[n];
 986
 987   ssa_target = make_temp_ssa_name (type, NULL, "powmult");
 988
 989   if (n < POWI_TABLE_SIZE)
 990     {
 991       cache[n] = ssa_target;
 992       op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache);
 993       op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache);
 994     }
 995   else if (n & 1)
 996     {
 997       digit = n & ((1 << POWI_WINDOW_SIZE) - 1);
 998       op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache);
 999       op1 = powi_as_mults_1 (gsi, loc, type, digit, cache);
1000     }
1001   else
1002     {
1003       op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache);
1004       op1 = op0;
1005     }
1006
1007   mult_stmt = gimple_build_assign_with_ops (MULT_EXPR, ssa_target, op0, op1);
1008   gimple_set_location (mult_stmt, loc);
1009   gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
1010
1011   return ssa_target;
1012 }
1013
1014 /* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
1015    This function needs to be kept in sync with powi_cost above.  */
1016
1017 static tree
1018 powi_as_mults (gimple_stmt_iterator *gsi, location_t loc,
1019                tree arg0, HOST_WIDE_INT n)
1020 {
1021   tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0);
1022   gimple div_stmt;
1023   tree target;
1024
1025   if (n == 0)
1026     return build_real (type, dconst1);
1027
1028   memset (cache, 0,  sizeof (cache));
1029   cache[1] = arg0;
1030
1031   result = powi_as_mults_1 (gsi, loc, type, (n < 0) ? -n : n, cache);
1032   if (n >= 0)
1033     return result;
1034
1035   /* If the original exponent was negative, reciprocate the result.  */
1036   target = make_temp_ssa_name (type, NULL, "powmult");
1037   div_stmt = gimple_build_assign_with_ops (RDIV_EXPR, target,
1038                                            build_real (type, dconst1),
1039                                            result);
1040   gimple_set_location (div_stmt, loc);
1041   gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
1042
1043   return target;
1044 }
1045
1046 /* ARG0 and N are the two arguments to a powi builtin in GSI with
1047    location info LOC.  If the arguments are appropriate, create an
1048    equivalent sequence of statements prior to GSI using an optimal
1049    number of multiplications, and return an expession holding the
1050    result.  */
1051
1052 static tree
1053 gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc,
1054                             tree arg0, HOST_WIDE_INT n)
1055 {
1056   /* Avoid largest negative number.  */
1057   if (n != -n
1058       && ((n >= -1 && n <= 2)
1059           || (optimize_function_for_speed_p (cfun)
1060               && powi_cost (n) <= POWI_MAX_MULTS)))
1061     return powi_as_mults (gsi, loc, arg0, n);
1062
1063   return NULL_TREE;
1064 }
1065
1066 /* Build a gimple call statement that calls FN with argument ARG.
1067    Set the lhs of the call statement to a fresh SSA name.  Insert the
1068    statement prior to GSI's current position, and return the fresh
1069    SSA name.  */
1070
1071 static tree
1072 build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc,
1073                        tree fn, tree arg)
1074 {
1075   gimple call_stmt;
1076   tree ssa_target;
1077
1078   call_stmt = gimple_build_call (fn, 1, arg);
1079   ssa_target = make_temp_ssa_name (TREE_TYPE (arg), NULL, "powroot");
1080   gimple_set_lhs (call_stmt, ssa_target);
1081   gimple_set_location (call_stmt, loc);
1082   gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
1083
1084   return ssa_target;
1085 }
1086
1087 /* Build a gimple binary operation with the given CODE and arguments
1088    ARG0, ARG1, assigning the result to a new SSA name for variable
1089    TARGET.  Insert the statement prior to GSI's current position, and
1090    return the fresh SSA name.*/
1091
1092 static tree
1093 build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc,
1094                         const char *name, enum tree_code code,
1095                         tree arg0, tree arg1)
1096 {
1097   tree result = make_temp_ssa_name (TREE_TYPE (arg0), NULL, name);
1098   gimple stmt = gimple_build_assign_with_ops (code, result, arg0, arg1);
1099   gimple_set_location (stmt, loc);
1100   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1101   return result;
1102 }
1103
1104 /* Build a gimple reference operation with the given CODE and argument
1105    ARG, assigning the result to a new SSA name of TYPE with NAME.
1106    Insert the statement prior to GSI's current position, and return
1107    the fresh SSA name.  */
1108
1109 static inline tree
1110 build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type,
1111                       const char *name, enum tree_code code, tree arg0)
1112 {
1113   tree result = make_temp_ssa_name (type, NULL, name);
1114   gimple stmt = gimple_build_assign (result, build1 (code, type, arg0));
1115   gimple_set_location (stmt, loc);
1116   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1117   return result;
1118 }
1119
1120 /* Build a gimple assignment to cast VAL to TYPE.  Insert the statement
1121    prior to GSI's current position, and return the fresh SSA name.  */
1122
1123 static tree
1124 build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,
1125                        tree type, tree val)
1126 {
1127   tree result = make_ssa_name (type, NULL);
1128   gimple stmt = gimple_build_assign_with_ops (NOP_EXPR, result, val, NULL_TREE);
1129   gimple_set_location (stmt, loc);
1130   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1131   return result;
1132 }
1133
1134 /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
1135    with location info LOC.  If possible, create an equivalent and
1136    less expensive sequence of statements prior to GSI, and return an
1137    expession holding the result.  */
1138
1139 static tree
1140 gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,
1141                            tree arg0, tree arg1)
1142 {
1143   REAL_VALUE_TYPE c, cint, dconst1_4, dconst3_4, dconst1_3, dconst1_6;
1144   REAL_VALUE_TYPE c2, dconst3;
1145   HOST_WIDE_INT n;
1146   tree type, sqrtfn, cbrtfn, sqrt_arg0, sqrt_sqrt, result, cbrt_x, powi_cbrt_x;
1147   enum machine_mode mode;
1148   bool hw_sqrt_exists, c_is_int, c2_is_int;
1149
1150   /* If the exponent isn't a constant, there's nothing of interest
1151      to be done.  */
1152   if (TREE_CODE (arg1) != REAL_CST)
1153     return NULL_TREE;
1154
1155   /* If the exponent is equivalent to an integer, expand to an optimal
1156      multiplication sequence when profitable.  */
1157   c = TREE_REAL_CST (arg1);
1158   n = real_to_integer (&c);
1159   real_from_integer (&cint, VOIDmode, n, n < 0 ? -1 : 0, 0);
1160   c_is_int = real_identical (&c, &cint);
1161
1162   if (c_is_int
1163       && ((n >= -1 && n <= 2)
1164           || (flag_unsafe_math_optimizations
1165               && optimize_insn_for_speed_p ()
1166               && powi_cost (n) <= POWI_MAX_MULTS)))
1167     return gimple_expand_builtin_powi (gsi, loc, arg0, n);
1168
1169   /* Attempt various optimizations using sqrt and cbrt.  */
1170   type = TREE_TYPE (arg0);
1171   mode = TYPE_MODE (type);
1172   sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1173
1174   /* Optimize pow(x,0.5) = sqrt(x).  This replacement is always safe
1175      unless signed zeros must be maintained.  pow(-0,0.5) = +0, while
1176      sqrt(-0) = -0.  */
1177   if (sqrtfn
1178       && REAL_VALUES_EQUAL (c, dconsthalf)
1179       && !HONOR_SIGNED_ZEROS (mode))
1180     return build_and_insert_call (gsi, loc, sqrtfn, arg0);
1181
1182   /* Optimize pow(x,0.25) = sqrt(sqrt(x)).  Assume on most machines that
1183      a builtin sqrt instruction is smaller than a call to pow with 0.25,
1184      so do this optimization even if -Os.  Don't do this optimization
1185      if we don't have a hardware sqrt insn.  */
1186   dconst1_4 = dconst1;
1187   SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
1188   hw_sqrt_exists = optab_handler (sqrt_optab, mode) != CODE_FOR_nothing;
1189
1190   if (flag_unsafe_math_optimizations
1191       && sqrtfn
1192       && REAL_VALUES_EQUAL (c, dconst1_4)
1193       && hw_sqrt_exists)
1194     {
1195       /* sqrt(x)  */
1196       sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
1197
1198       /* sqrt(sqrt(x))  */
1199       return build_and_insert_call (gsi, loc, sqrtfn, sqrt_arg0);
1200     }
1201
1202   /* Optimize pow(x,0.75) = sqrt(x) * sqrt(sqrt(x)) unless we are
1203      optimizing for space.  Don't do this optimization if we don't have
1204      a hardware sqrt insn.  */
1205   real_from_integer (&dconst3_4, VOIDmode, 3, 0, 0);
1206   SET_REAL_EXP (&dconst3_4, REAL_EXP (&dconst3_4) - 2);
1207
1208   if (flag_unsafe_math_optimizations
1209       && sqrtfn
1210       && optimize_function_for_speed_p (cfun)
1211       && REAL_VALUES_EQUAL (c, dconst3_4)
1212       && hw_sqrt_exists)
1213     {
1214       /* sqrt(x)  */
1215       sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
1216
1217       /* sqrt(sqrt(x))  */
1218       sqrt_sqrt = build_and_insert_call (gsi, loc, sqrtfn, sqrt_arg0);
1219
1220       /* sqrt(x) * sqrt(sqrt(x))  */
1221       return build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1222                                      sqrt_arg0, sqrt_sqrt);
1223     }
1224
1225   /* Optimize pow(x,1./3.) = cbrt(x).  This requires unsafe math
1226      optimizations since 1./3. is not exactly representable.  If x
1227      is negative and finite, the correct value of pow(x,1./3.) is
1228      a NaN with the "invalid" exception raised, because the value
1229      of 1./3. actually has an even denominator.  The correct value
1230      of cbrt(x) is a negative real value.  */
1231   cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT);
1232   dconst1_3 = real_value_truncate (mode, dconst_third ());
1233
1234   if (flag_unsafe_math_optimizations
1235       && cbrtfn
1236       && (gimple_val_nonnegative_real_p (arg0) || !HONOR_NANS (mode))
1237       && REAL_VALUES_EQUAL (c, dconst1_3))
1238     return build_and_insert_call (gsi, loc, cbrtfn, arg0);
1239
1240   /* Optimize pow(x,1./6.) = cbrt(sqrt(x)).  Don't do this optimization
1241      if we don't have a hardware sqrt insn.  */
1242   dconst1_6 = dconst1_3;
1243   SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
1244
1245   if (flag_unsafe_math_optimizations
1246       && sqrtfn
1247       && cbrtfn
1248       && (gimple_val_nonnegative_real_p (arg0) || !HONOR_NANS (mode))
1249       && optimize_function_for_speed_p (cfun)
1250       && hw_sqrt_exists
1251       && REAL_VALUES_EQUAL (c, dconst1_6))
1252     {
1253       /* sqrt(x)  */
1254       sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
1255
1256       /* cbrt(sqrt(x))  */
1257       return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0);
1258     }
1259
1260   /* Optimize pow(x,c), where n = 2c for some nonzero integer n
1261      and c not an integer, into
1262
1263        sqrt(x) * powi(x, n/2),                n > 0;
1264        1.0 / (sqrt(x) * powi(x, abs(n/2))),   n < 0.
1265
1266      Do not calculate the powi factor when n/2 = 0.  */
1267   real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
1268   n = real_to_integer (&c2);
1269   real_from_integer (&cint, VOIDmode, n, n < 0 ? -1 : 0, 0);
1270   c2_is_int = real_identical (&c2, &cint);
1271
1272   if (flag_unsafe_math_optimizations
1273       && sqrtfn
1274       && c2_is_int
1275       && !c_is_int
1276       && optimize_function_for_speed_p (cfun))
1277     {
1278       tree powi_x_ndiv2 = NULL_TREE;
1279
1280       /* Attempt to fold powi(arg0, abs(n/2)) into multiplies.  If not
1281          possible or profitable, give up.  Skip the degenerate case when
1282          n is 1 or -1, where the result is always 1.  */
1283       if (absu_hwi (n) != 1)
1284         {
1285           powi_x_ndiv2 = gimple_expand_builtin_powi (gsi, loc, arg0,
1286                                                      abs_hwi (n / 2));
1287           if (!powi_x_ndiv2)
1288             return NULL_TREE;
1289         }
1290
1291       /* Calculate sqrt(x).  When n is not 1 or -1, multiply it by the
1292          result of the optimal multiply sequence just calculated.  */
1293       sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
1294
1295       if (absu_hwi (n) == 1)
1296         result = sqrt_arg0;
1297       else
1298         result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1299                                          sqrt_arg0, powi_x_ndiv2);
1300
1301       /* If n is negative, reciprocate the result.  */
1302       if (n < 0)
1303         result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1304                                          build_real (type, dconst1), result);
1305       return result;
1306     }
1307
1308   /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
1309
1310      powi(x, n/3) * powi(cbrt(x), n%3),                    n > 0;
1311      1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)),  n < 0.
1312
1313      Do not calculate the first factor when n/3 = 0.  As cbrt(x) is
1314      different from pow(x, 1./3.) due to rounding and behavior with
1315      negative x, we need to constrain this transformation to unsafe
1316      math and positive x or finite math.  */
1317   real_from_integer (&dconst3, VOIDmode, 3, 0, 0);
1318   real_arithmetic (&c2, MULT_EXPR, &c, &dconst3);
1319   real_round (&c2, mode, &c2);
1320   n = real_to_integer (&c2);
1321   real_from_integer (&cint, VOIDmode, n, n < 0 ? -1 : 0, 0);
1322   real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3);
1323   real_convert (&c2, mode, &c2);
1324
1325   if (flag_unsafe_math_optimizations
1326       && cbrtfn
1327       && (gimple_val_nonnegative_real_p (arg0) || !HONOR_NANS (mode))
1328       && real_identical (&c2, &c)
1329       && !c2_is_int
1330       && optimize_function_for_speed_p (cfun)
1331       && powi_cost (n / 3) <= POWI_MAX_MULTS)
1332     {
1333       tree powi_x_ndiv3 = NULL_TREE;
1334
1335       /* Attempt to fold powi(arg0, abs(n/3)) into multiplies.  If not
1336          possible or profitable, give up.  Skip the degenerate case when
1337          abs(n) < 3, where the result is always 1.  */
1338       if (absu_hwi (n) >= 3)
1339         {
1340           powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0,
1341                                                      abs_hwi (n / 3));
1342           if (!powi_x_ndiv3)
1343             return NULL_TREE;
1344         }
1345
1346       /* Calculate powi(cbrt(x), n%3).  Don't use gimple_expand_builtin_powi
1347          as that creates an unnecessary variable.  Instead, just produce
1348          either cbrt(x) or cbrt(x) * cbrt(x).  */
1349       cbrt_x = build_and_insert_call (gsi, loc, cbrtfn, arg0);
1350
1351       if (absu_hwi (n) % 3 == 1)
1352         powi_cbrt_x = cbrt_x;
1353       else
1354         powi_cbrt_x = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1355                                               cbrt_x, cbrt_x);
1356
1357       /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1.  */
1358       if (absu_hwi (n) < 3)
1359         result = powi_cbrt_x;
1360       else
1361         result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1362                                          powi_x_ndiv3, powi_cbrt_x);
1363
1364       /* If n is negative, reciprocate the result.  */
1365       if (n < 0)
1366         result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1367                                          build_real (type, dconst1), result);
1368
1369       return result;
1370     }
1371
1372   /* No optimizations succeeded.  */
1373   return NULL_TREE;
1374 }
1375
1376 /* ARG is the argument to a cabs builtin call in GSI with location info
1377    LOC.  Create a sequence of statements prior to GSI that calculates
1378    sqrt(R*R + I*I), where R and I are the real and imaginary components
1379    of ARG, respectively.  Return an expression holding the result.  */
1380
1381 static tree
1382 gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg)
1383 {
1384   tree real_part, imag_part, addend1, addend2, sum, result;
1385   tree type = TREE_TYPE (TREE_TYPE (arg));
1386   tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1387   enum machine_mode mode = TYPE_MODE (type);
1388
1389   if (!flag_unsafe_math_optimizations
1390       || !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi)))
1391       || !sqrtfn
1392       || optab_handler (sqrt_optab, mode) == CODE_FOR_nothing)
1393     return NULL_TREE;
1394
1395   real_part = build_and_insert_ref (gsi, loc, type, "cabs",
1396                                     REALPART_EXPR, arg);
1397   addend1 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
1398                                     real_part, real_part);
1399   imag_part = build_and_insert_ref (gsi, loc, type, "cabs",
1400                                     IMAGPART_EXPR, arg);
1401   addend2 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
1402                                     imag_part, imag_part);
1403   sum = build_and_insert_binop (gsi, loc, "cabs", PLUS_EXPR, addend1, addend2);
1404   result = build_and_insert_call (gsi, loc, sqrtfn, sum);
1405
1406   return result;
1407 }
1408
1409 /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
1410    on the SSA_NAME argument of each of them.  Also expand powi(x,n) into
1411    an optimal number of multiplies, when n is a constant.  */
1412
1413 static unsigned int
1414 execute_cse_sincos (void)
1415 {
1416   basic_block bb;
1417   bool cfg_changed = false;
1418
1419   calculate_dominance_info (CDI_DOMINATORS);
1420   memset (&sincos_stats, 0, sizeof (sincos_stats));
1421
1422   FOR_EACH_BB_FN (bb, cfun)
1423     {
1424       gimple_stmt_iterator gsi;
1425       bool cleanup_eh = false;
1426
1427       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1428         {
1429           gimple stmt = gsi_stmt (gsi);
1430           tree fndecl;
1431
1432           /* Only the last stmt in a bb could throw, no need to call
1433              gimple_purge_dead_eh_edges if we change something in the middle
1434              of a basic block.  */
1435           cleanup_eh = false;
1436
1437           if (is_gimple_call (stmt)
1438               && gimple_call_lhs (stmt)
1439               && (fndecl = gimple_call_fndecl (stmt))
1440               && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
1441             {
1442               tree arg, arg0, arg1, result;
1443               HOST_WIDE_INT n;
1444               location_t loc;
1445
1446               switch (DECL_FUNCTION_CODE (fndecl))
1447                 {
1448                 CASE_FLT_FN (BUILT_IN_COS):
1449                 CASE_FLT_FN (BUILT_IN_SIN):
1450                 CASE_FLT_FN (BUILT_IN_CEXPI):
1451                   /* Make sure we have either sincos or cexp.  */
1452                   if (!targetm.libc_has_function (function_c99_math_complex)
1453                       && !targetm.libc_has_function (function_sincos))
1454                     break;
1455
1456                   arg = gimple_call_arg (stmt, 0);
1457                   if (TREE_CODE (arg) == SSA_NAME)
1458                     cfg_changed |= execute_cse_sincos_1 (arg);
1459                   break;
1460
1461                 CASE_FLT_FN (BUILT_IN_POW):
1462                   arg0 = gimple_call_arg (stmt, 0);
1463                   arg1 = gimple_call_arg (stmt, 1);
1464
1465                   loc = gimple_location (stmt);
1466                   result = gimple_expand_builtin_pow (&gsi, loc, arg0, arg1);
1467
1468                   if (result)
1469                     {
1470                       tree lhs = gimple_get_lhs (stmt);
1471                       gimple new_stmt = gimple_build_assign (lhs, result);
1472                       gimple_set_location (new_stmt, loc);
1473                       unlink_stmt_vdef (stmt);
1474                       gsi_replace (&gsi, new_stmt, true);
1475                       cleanup_eh = true;
1476                       if (gimple_vdef (stmt))
1477                         release_ssa_name (gimple_vdef (stmt));
1478                     }
1479                   break;
1480
1481                 CASE_FLT_FN (BUILT_IN_POWI):
1482                   arg0 = gimple_call_arg (stmt, 0);
1483                   arg1 = gimple_call_arg (stmt, 1);
1484                   loc = gimple_location (stmt);
1485
1486                   if (real_minus_onep (arg0))
1487                     {
1488                       tree t0, t1, cond, one, minus_one;
1489                       gimple stmt;
1490
1491                       t0 = TREE_TYPE (arg0);
1492                       t1 = TREE_TYPE (arg1);
1493                       one = build_real (t0, dconst1);
1494                       minus_one = build_real (t0, dconstm1);
1495
1496                       cond = make_temp_ssa_name (t1, NULL, "powi_cond");
1497                       stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, cond,
1498                                                            arg1,
1499                                                            build_int_cst (t1,
1500                                                                           1));
1501                       gimple_set_location (stmt, loc);
1502                       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1503
1504                       result = make_temp_ssa_name (t0, NULL, "powi");
1505                       stmt = gimple_build_assign_with_ops (COND_EXPR, result,
1506                                                            cond,
1507                                                            minus_one, one);
1508                       gimple_set_location (stmt, loc);
1509                       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1510                     }
1511                   else
1512                     {
1513                       if (!tree_fits_shwi_p (arg1))
1514                         break;
1515
1516                       n = tree_to_shwi (arg1);
1517                       result = gimple_expand_builtin_powi (&gsi, loc, arg0, n);
1518                     }
1519
1520                   if (result)
1521                     {
1522                       tree lhs = gimple_get_lhs (stmt);
1523                       gimple new_stmt = gimple_build_assign (lhs, result);
1524                       gimple_set_location (new_stmt, loc);
1525                       unlink_stmt_vdef (stmt);
1526                       gsi_replace (&gsi, new_stmt, true);
1527                       cleanup_eh = true;
1528                       if (gimple_vdef (stmt))
1529                         release_ssa_name (gimple_vdef (stmt));
1530                     }
1531                   break;
1532
1533                 CASE_FLT_FN (BUILT_IN_CABS):
1534                   arg0 = gimple_call_arg (stmt, 0);
1535                   loc = gimple_location (stmt);
1536                   result = gimple_expand_builtin_cabs (&gsi, loc, arg0);
1537
1538                   if (result)
1539                     {
1540                       tree lhs = gimple_get_lhs (stmt);
1541                       gimple new_stmt = gimple_build_assign (lhs, result);
1542                       gimple_set_location (new_stmt, loc);
1543                       unlink_stmt_vdef (stmt);
1544                       gsi_replace (&gsi, new_stmt, true);
1545                       cleanup_eh = true;
1546                       if (gimple_vdef (stmt))
1547                         release_ssa_name (gimple_vdef (stmt));
1548                     }
1549                   break;
1550
1551                 default:;
1552                 }
1553             }
1554         }
1555       if (cleanup_eh)
1556         cfg_changed |= gimple_purge_dead_eh_edges (bb);
1557     }
1558
1559   statistics_counter_event (cfun, "sincos statements inserted",
1560                             sincos_stats.inserted);
1561
1562   free_dominance_info (CDI_DOMINATORS);
1563   return cfg_changed ? TODO_cleanup_cfg : 0;
1564 }
1565
1566 static bool
1567 gate_cse_sincos (void)
1568 {
1569   /* We no longer require either sincos or cexp, since powi expansion
1570      piggybacks on this pass.  */
1571   return optimize;
1572 }
1573
1574 namespace {
1575
1576 const pass_data pass_data_cse_sincos =
1577 {
1578   GIMPLE_PASS, /* type */
1579   "sincos", /* name */
1580   OPTGROUP_NONE, /* optinfo_flags */
1581   true, /* has_gate */
1582   true, /* has_execute */
1583   TV_NONE, /* tv_id */
1584   PROP_ssa, /* properties_required */
1585   0, /* properties_provided */
1586   0, /* properties_destroyed */
1587   0, /* todo_flags_start */
1588   ( TODO_update_ssa | TODO_verify_ssa
1589     | TODO_verify_stmts ), /* todo_flags_finish */
1590 };
1591
1592 class pass_cse_sincos : public gimple_opt_pass
1593 {
1594 public:
1595   pass_cse_sincos (gcc::context *ctxt)
1596     : gimple_opt_pass (pass_data_cse_sincos, ctxt)
1597   {}
1598
1599   /* opt_pass methods: */
1600   bool gate () { return gate_cse_sincos (); }
1601   unsigned int execute () { return execute_cse_sincos (); }
1602
1603 }; // class pass_cse_sincos
1604
1605 } // anon namespace
1606
1607 gimple_opt_pass *
1608 make_pass_cse_sincos (gcc::context *ctxt)
1609 {
1610   return new pass_cse_sincos (ctxt);
1611 }
1612
1613 /* A symbolic number is used to detect byte permutation and selection
1614    patterns.  Therefore the field N contains an artificial number
1615    consisting of byte size markers:
1616
1617    0    - byte has the value 0
1618    1..size - byte contains the content of the byte
1619    number indexed with that value minus one  */
1620
1621 struct symbolic_number {
1622   unsigned HOST_WIDEST_INT n;
1623   tree type;
1624 };
1625
1626 /* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
1627    number N.  Return false if the requested operation is not permitted
1628    on a symbolic number.  */
1629
1630 static inline bool
1631 do_shift_rotate (enum tree_code code,
1632                  struct symbolic_number *n,
1633                  int count)
1634 {
1635   int bitsize = TYPE_PRECISION (n->type);
1636
1637   if (count % 8 != 0)
1638     return false;
1639
1640   /* Zero out the extra bits of N in order to avoid them being shifted
1641      into the significant bits.  */
1642   if (bitsize < 8 * (int)sizeof (HOST_WIDEST_INT))
1643     n->n &= ((unsigned HOST_WIDEST_INT)1 << bitsize) - 1;
1644
1645   switch (code)
1646     {
1647     case LSHIFT_EXPR:
1648       n->n <<= count;
1649       break;
1650     case RSHIFT_EXPR:
1651       /* Arithmetic shift of signed type: result is dependent on the value.  */
1652       if (!TYPE_UNSIGNED (n->type)
1653           && (n->n & ((unsigned HOST_WIDEST_INT) 0xff << (bitsize - 8))))
1654         return false;
1655       n->n >>= count;
1656       break;
1657     case LROTATE_EXPR:
1658       n->n = (n->n << count) | (n->n >> (bitsize - count));
1659       break;
1660     case RROTATE_EXPR:
1661       n->n = (n->n >> count) | (n->n << (bitsize - count));
1662       break;
1663     default:
1664       return false;
1665     }
1666   /* Zero unused bits for size.  */
1667   if (bitsize < 8 * (int)sizeof (HOST_WIDEST_INT))
1668     n->n &= ((unsigned HOST_WIDEST_INT)1 << bitsize) - 1;
1669   return true;
1670 }
1671
1672 /* Perform sanity checking for the symbolic number N and the gimple
1673    statement STMT.  */
1674
1675 static inline bool
1676 verify_symbolic_number_p (struct symbolic_number *n, gimple stmt)
1677 {
1678   tree lhs_type;
1679
1680   lhs_type = gimple_expr_type (stmt);
1681
1682   if (TREE_CODE (lhs_type) != INTEGER_TYPE)
1683     return false;
1684
1685   if (TYPE_PRECISION (lhs_type) != TYPE_PRECISION (n->type))
1686     return false;
1687
1688   return true;
1689 }
1690
1691 /* find_bswap_1 invokes itself recursively with N and tries to perform
1692    the operation given by the rhs of STMT on the result.  If the
1693    operation could successfully be executed the function returns the
1694    tree expression of the source operand and NULL otherwise.  */
1695
1696 static tree
1697 find_bswap_1 (gimple stmt, struct symbolic_number *n, int limit)
1698 {
1699   enum tree_code code;
1700   tree rhs1, rhs2 = NULL;
1701   gimple rhs1_stmt, rhs2_stmt;
1702   tree source_expr1;
1703   enum gimple_rhs_class rhs_class;
1704
1705   if (!limit || !is_gimple_assign (stmt))
1706     return NULL_TREE;
1707
1708   rhs1 = gimple_assign_rhs1 (stmt);
1709
1710   if (TREE_CODE (rhs1) != SSA_NAME)
1711     return NULL_TREE;
1712
1713   code = gimple_assign_rhs_code (stmt);
1714   rhs_class = gimple_assign_rhs_class (stmt);
1715   rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
1716
1717   if (rhs_class == GIMPLE_BINARY_RHS)
1718     rhs2 = gimple_assign_rhs2 (stmt);
1719
1720   /* Handle unary rhs and binary rhs with integer constants as second
1721      operand.  */
1722
1723   if (rhs_class == GIMPLE_UNARY_RHS
1724       || (rhs_class == GIMPLE_BINARY_RHS
1725           && TREE_CODE (rhs2) == INTEGER_CST))
1726     {
1727       if (code != BIT_AND_EXPR
1728           && code != LSHIFT_EXPR
1729           && code != RSHIFT_EXPR
1730           && code != LROTATE_EXPR
1731           && code != RROTATE_EXPR
1732           && code != NOP_EXPR
1733           && code != CONVERT_EXPR)
1734         return NULL_TREE;
1735
1736       source_expr1 = find_bswap_1 (rhs1_stmt, n, limit - 1);
1737
1738       /* If find_bswap_1 returned NULL STMT is a leaf node and we have
1739          to initialize the symbolic number.  */
1740       if (!source_expr1)
1741         {
1742           int size;
1743
1744           /* Set up the symbolic number N by setting each byte to a
1745              value between 1 and the byte size of rhs1.  The highest
1746              order byte is set to n->size and the lowest order
1747              byte to 1.  */
1748           n->type = TREE_TYPE (rhs1);
1749           size = TYPE_PRECISION (n->type);
1750           if (size % BITS_PER_UNIT != 0)
1751             return NULL_TREE;
1752           if (size > HOST_BITS_PER_WIDEST_INT)
1753             return NULL_TREE;
1754           size /= BITS_PER_UNIT;
1755           n->n = (sizeof (HOST_WIDEST_INT) < 8 ? 0 :
1756                   (unsigned HOST_WIDEST_INT)0x08070605 << 32 | 0x04030201);
1757
1758           if (size < (int)sizeof (HOST_WIDEST_INT))
1759             n->n &= ((unsigned HOST_WIDEST_INT)1 <<
1760                      (size * BITS_PER_UNIT)) - 1;
1761
1762           source_expr1 = rhs1;
1763         }
1764
1765       switch (code)
1766         {
1767         case BIT_AND_EXPR:
1768           {
1769             int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
1770             unsigned HOST_WIDEST_INT val = widest_int_cst_value (rhs2);
1771             unsigned HOST_WIDEST_INT tmp = val;
1772
1773             /* Only constants masking full bytes are allowed.  */
1774             for (i = 0; i < size; i++, tmp >>= BITS_PER_UNIT)
1775               if ((tmp & 0xff) != 0 && (tmp & 0xff) != 0xff)
1776                 return NULL_TREE;
1777
1778             n->n &= val;
1779           }
1780           break;
1781         case LSHIFT_EXPR:
1782         case RSHIFT_EXPR:
1783         case LROTATE_EXPR:
1784         case RROTATE_EXPR:
1785           if (!do_shift_rotate (code, n, (int)TREE_INT_CST_LOW (rhs2)))
1786             return NULL_TREE;
1787           break;
1788         CASE_CONVERT:
1789           {
1790             int type_size, old_type_size;
1791             tree type;
1792
1793             type = gimple_expr_type (stmt);
1794             type_size = TYPE_PRECISION (type);
1795             if (type_size % BITS_PER_UNIT != 0)
1796               return NULL_TREE;
1797             if (type_size > (int) HOST_BITS_PER_WIDEST_INT)
1798               return NULL_TREE;
1799
1800             /* Sign extension: result is dependent on the value.  */
1801             old_type_size = TYPE_PRECISION (n->type);
1802             if (!TYPE_UNSIGNED (n->type)
1803                 && type_size > old_type_size
1804                 && n->n &
1805                    ((unsigned HOST_WIDEST_INT) 0xff << (old_type_size - 8)))
1806               return NULL_TREE;
1807
1808             if (type_size / BITS_PER_UNIT < (int)(sizeof (HOST_WIDEST_INT)))
1809               {
1810                 /* If STMT casts to a smaller type mask out the bits not
1811                    belonging to the target type.  */
1812                 n->n &= ((unsigned HOST_WIDEST_INT)1 << type_size) - 1;
1813               }
1814             n->type = type;
1815           }
1816           break;
1817         default:
1818           return NULL_TREE;
1819         };
1820       return verify_symbolic_number_p (n, stmt) ? source_expr1 : NULL;
1821     }
1822
1823   /* Handle binary rhs.  */
1824
1825   if (rhs_class == GIMPLE_BINARY_RHS)
1826     {
1827       int i, size;
1828       struct symbolic_number n1, n2;
1829       unsigned HOST_WIDEST_INT mask;
1830       tree source_expr2;
1831
1832       if (code != BIT_IOR_EXPR)
1833         return NULL_TREE;
1834
1835       if (TREE_CODE (rhs2) != SSA_NAME)
1836         return NULL_TREE;
1837
1838       rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
1839
1840       switch (code)
1841         {
1842         case BIT_IOR_EXPR:
1843           source_expr1 = find_bswap_1 (rhs1_stmt, &n1, limit - 1);
1844
1845           if (!source_expr1)
1846             return NULL_TREE;
1847
1848           source_expr2 = find_bswap_1 (rhs2_stmt, &n2, limit - 1);
1849
1850           if (source_expr1 != source_expr2
1851               || TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type))
1852             return NULL_TREE;
1853
1854           n->type = n1.type;
1855           size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
1856           for (i = 0, mask = 0xff; i < size; i++, mask <<= BITS_PER_UNIT)
1857             {
1858               unsigned HOST_WIDEST_INT masked1, masked2;
1859
1860               masked1 = n1.n & mask;
1861               masked2 = n2.n & mask;
1862               if (masked1 && masked2 && masked1 != masked2)
1863                 return NULL_TREE;
1864             }
1865           n->n = n1.n | n2.n;
1866
1867           if (!verify_symbolic_number_p (n, stmt))
1868             return NULL_TREE;
1869
1870           break;
1871         default:
1872           return NULL_TREE;
1873         }
1874       return source_expr1;
1875     }
1876   return NULL_TREE;
1877 }
1878
1879 /* Check if STMT completes a bswap implementation consisting of ORs,
1880    SHIFTs and ANDs.  Return the source tree expression on which the
1881    byte swap is performed and NULL if no bswap was found.  */
1882
1883 static tree
1884 find_bswap (gimple stmt)
1885 {
1886 /* The number which the find_bswap result should match in order to
1887    have a full byte swap.  The number is shifted to the left according
1888    to the size of the symbolic number before using it.  */
1889   unsigned HOST_WIDEST_INT cmp =
1890     sizeof (HOST_WIDEST_INT) < 8 ? 0 :
1891     (unsigned HOST_WIDEST_INT)0x01020304 << 32 | 0x05060708;
1892
1893   struct symbolic_number n;
1894   tree source_expr;
1895   int limit, bitsize;
1896
1897   /* The last parameter determines the depth search limit.  It usually
1898      correlates directly to the number of bytes to be touched.  We
1899      increase that number by three  here in order to also
1900      cover signed -> unsigned converions of the src operand as can be seen
1901      in libgcc, and for initial shift/and operation of the src operand.  */
1902   limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt)));
1903   limit += 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit);
1904   source_expr =  find_bswap_1 (stmt, &n, limit);
1905
1906   if (!source_expr)
1907     return NULL_TREE;
1908
1909   /* Zero out the extra bits of N and CMP.  */
1910   bitsize = TYPE_PRECISION (n.type);
1911   if (bitsize < 8 * (int)sizeof (HOST_WIDEST_INT))
1912     {
1913       unsigned HOST_WIDEST_INT mask =
1914         ((unsigned HOST_WIDEST_INT)1 << bitsize) - 1;
1915
1916       n.n &= mask;
1917       cmp >>= sizeof (HOST_WIDEST_INT) * BITS_PER_UNIT - bitsize;
1918     }
1919
1920   /* A complete byte swap should make the symbolic number to start
1921      with the largest digit in the highest order byte.  */
1922   if (cmp != n.n)
1923     return NULL_TREE;
1924
1925   return source_expr;
1926 }
1927
1928 /* Find manual byte swap implementations and turn them into a bswap
1929    builtin invokation.  */
1930
1931 static unsigned int
1932 execute_optimize_bswap (void)
1933 {
1934   basic_block bb;
1935   bool bswap16_p, bswap32_p, bswap64_p;
1936   bool changed = false;
1937   tree bswap16_type = NULL_TREE, bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;
1938
1939   if (BITS_PER_UNIT != 8 || CHAR_BIT != 8)
1940     return 0;
1941
1942   if (sizeof (HOST_WIDEST_INT) < 8)
1943     return 0;
1944
1945   bswap16_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP16)
1946                && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing);
1947   bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
1948                && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing);
1949   bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
1950                && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
1951                    || (bswap32_p && word_mode == SImode)));
1952
1953   if (!bswap16_p && !bswap32_p && !bswap64_p)
1954     return 0;
1955
1956   /* Determine the argument type of the builtins.  The code later on
1957      assumes that the return and argument type are the same.  */
1958   if (bswap16_p)
1959     {
1960       tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP16);
1961       bswap16_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1962     }
1963
1964   if (bswap32_p)
1965     {
1966       tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1967       bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1968     }
1969
1970   if (bswap64_p)
1971     {
1972       tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1973       bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1974     }
1975
1976   memset (&bswap_stats, 0, sizeof (bswap_stats));
1977
1978   FOR_EACH_BB_FN (bb, cfun)
1979     {
1980       gimple_stmt_iterator gsi;
1981
1982       /* We do a reverse scan for bswap patterns to make sure we get the
1983          widest match. As bswap pattern matching doesn't handle
1984          previously inserted smaller bswap replacements as sub-
1985          patterns, the wider variant wouldn't be detected.  */
1986       for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
1987         {
1988           gimple stmt = gsi_stmt (gsi);
1989           tree bswap_src, bswap_type;
1990           tree bswap_tmp;
1991           tree fndecl = NULL_TREE;
1992           int type_size;
1993           gimple call;
1994
1995           if (!is_gimple_assign (stmt)
1996               || gimple_assign_rhs_code (stmt) != BIT_IOR_EXPR)
1997             continue;
1998
1999           type_size = TYPE_PRECISION (gimple_expr_type (stmt));
2000
2001           switch (type_size)
2002             {
2003             case 16:
2004               if (bswap16_p)
2005                 {
2006                   fndecl = builtin_decl_explicit (BUILT_IN_BSWAP16);
2007                   bswap_type = bswap16_type;
2008                 }
2009               break;
2010             case 32:
2011               if (bswap32_p)
2012                 {
2013                   fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
2014                   bswap_type = bswap32_type;
2015                 }
2016               break;
2017             case 64:
2018               if (bswap64_p)
2019                 {
2020                   fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
2021                   bswap_type = bswap64_type;
2022                 }
2023               break;
2024             default:
2025               continue;
2026             }
2027
2028           if (!fndecl)
2029             continue;
2030
2031           bswap_src = find_bswap (stmt);
2032
2033           if (!bswap_src)
2034             continue;
2035
2036           changed = true;
2037           if (type_size == 16)
2038             bswap_stats.found_16bit++;
2039           else if (type_size == 32)
2040             bswap_stats.found_32bit++;
2041           else
2042             bswap_stats.found_64bit++;
2043
2044           bswap_tmp = bswap_src;
2045
2046           /* Convert the src expression if necessary.  */
2047           if (!useless_type_conversion_p (TREE_TYPE (bswap_tmp), bswap_type))
2048             {
2049               gimple convert_stmt;
2050               bswap_tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc");
2051               convert_stmt = gimple_build_assign_with_ops
2052                                 (NOP_EXPR, bswap_tmp, bswap_src, NULL);
2053               gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
2054             }
2055
2056           call = gimple_build_call (fndecl, 1, bswap_tmp);
2057
2058           bswap_tmp = gimple_assign_lhs (stmt);
2059
2060           /* Convert the result if necessary.  */
2061           if (!useless_type_conversion_p (TREE_TYPE (bswap_tmp), bswap_type))
2062             {
2063               gimple convert_stmt;
2064               bswap_tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
2065               convert_stmt = gimple_build_assign_with_ops
2066                         (NOP_EXPR, gimple_assign_lhs (stmt), bswap_tmp, NULL);
2067               gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT);
2068             }
2069
2070           gimple_call_set_lhs (call, bswap_tmp);
2071
2072           if (dump_file)
2073             {
2074               fprintf (dump_file, "%d bit bswap implementation found at: ",
2075                        (int)type_size);
2076               print_gimple_stmt (dump_file, stmt, 0, 0);
2077             }
2078
2079           gsi_insert_after (&gsi, call, GSI_SAME_STMT);
2080           gsi_remove (&gsi, true);
2081         }
2082     }
2083
2084   statistics_counter_event (cfun, "16-bit bswap implementations found",
2085                             bswap_stats.found_16bit);
2086   statistics_counter_event (cfun, "32-bit bswap implementations found",
2087                             bswap_stats.found_32bit);
2088   statistics_counter_event (cfun, "64-bit bswap implementations found",
2089                             bswap_stats.found_64bit);
2090
2091   return (changed ? TODO_update_ssa | TODO_verify_ssa
2092           | TODO_verify_stmts : 0);
2093 }
2094
2095 static bool
2096 gate_optimize_bswap (void)
2097 {
2098   return flag_expensive_optimizations && optimize;
2099 }
2100
2101 namespace {
2102
2103 const pass_data pass_data_optimize_bswap =
2104 {
2105   GIMPLE_PASS, /* type */
2106   "bswap", /* name */
2107   OPTGROUP_NONE, /* optinfo_flags */
2108   true, /* has_gate */
2109   true, /* has_execute */
2110   TV_NONE, /* tv_id */
2111   PROP_ssa, /* properties_required */
2112   0, /* properties_provided */
2113   0, /* properties_destroyed */
2114   0, /* todo_flags_start */
2115   0, /* todo_flags_finish */
2116 };
2117
2118 class pass_optimize_bswap : public gimple_opt_pass
2119 {
2120 public:
2121   pass_optimize_bswap (gcc::context *ctxt)
2122     : gimple_opt_pass (pass_data_optimize_bswap, ctxt)
2123   {}
2124
2125   /* opt_pass methods: */
2126   bool gate () { return gate_optimize_bswap (); }
2127   unsigned int execute () { return execute_optimize_bswap (); }
2128
2129 }; // class pass_optimize_bswap
2130
2131 } // anon namespace
2132
2133 gimple_opt_pass *
2134 make_pass_optimize_bswap (gcc::context *ctxt)
2135 {
2136   return new pass_optimize_bswap (ctxt);
2137 }
2138
2139 /* Return true if stmt is a type conversion operation that can be stripped
2140    when used in a widening multiply operation.  */
2141 static bool
2142 widening_mult_conversion_strippable_p (tree result_type, gimple stmt)
2143 {
2144   enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
2145
2146   if (TREE_CODE (result_type) == INTEGER_TYPE)
2147     {
2148       tree op_type;
2149       tree inner_op_type;
2150
2151       if (!CONVERT_EXPR_CODE_P (rhs_code))
2152         return false;
2153
2154       op_type = TREE_TYPE (gimple_assign_lhs (stmt));
2155
2156       /* If the type of OP has the same precision as the result, then
2157          we can strip this conversion.  The multiply operation will be
2158          selected to create the correct extension as a by-product.  */
2159       if (TYPE_PRECISION (result_type) == TYPE_PRECISION (op_type))
2160         return true;
2161
2162       /* We can also strip a conversion if it preserves the signed-ness of
2163          the operation and doesn't narrow the range.  */
2164       inner_op_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
2165
2166       /* If the inner-most type is unsigned, then we can strip any
2167          intermediate widening operation.  If it's signed, then the
2168          intermediate widening operation must also be signed.  */
2169       if ((TYPE_UNSIGNED (inner_op_type)
2170            || TYPE_UNSIGNED (op_type) == TYPE_UNSIGNED (inner_op_type))
2171           && TYPE_PRECISION (op_type) > TYPE_PRECISION (inner_op_type))
2172         return true;
2173
2174       return false;
2175     }
2176
2177   return rhs_code == FIXED_CONVERT_EXPR;
2178 }
2179
2180 /* Return true if RHS is a suitable operand for a widening multiplication,
2181    assuming a target type of TYPE.
2182    There are two cases:
2183
2184      - RHS makes some value at least twice as wide.  Store that value
2185        in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
2186
2187      - RHS is an integer constant.  Store that value in *NEW_RHS_OUT if so,
2188        but leave *TYPE_OUT untouched.  */
2189
2190 static bool
2191 is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out,
2192                         tree *new_rhs_out)
2193 {
2194   gimple stmt;
2195   tree type1, rhs1;
2196
2197   if (TREE_CODE (rhs) == SSA_NAME)
2198     {
2199       stmt = SSA_NAME_DEF_STMT (rhs);
2200       if (is_gimple_assign (stmt))
2201         {
2202           if (! widening_mult_conversion_strippable_p (type, stmt))
2203             rhs1 = rhs;
2204           else
2205             {
2206               rhs1 = gimple_assign_rhs1 (stmt);
2207
2208               if (TREE_CODE (rhs1) == INTEGER_CST)
2209                 {
2210                   *new_rhs_out = rhs1;
2211                   *type_out = NULL;
2212                   return true;
2213                 }
2214             }
2215         }
2216       else
2217         rhs1 = rhs;
2218
2219       type1 = TREE_TYPE (rhs1);
2220
2221       if (TREE_CODE (type1) != TREE_CODE (type)
2222           || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type))
2223         return false;
2224
2225       *new_rhs_out = rhs1;
2226       *type_out = type1;
2227       return true;
2228     }
2229
2230   if (TREE_CODE (rhs) == INTEGER_CST)
2231     {
2232       *new_rhs_out = rhs;
2233       *type_out = NULL;
2234       return true;
2235     }
2236
2237   return false;
2238 }
2239
2240 /* Return true if STMT performs a widening multiplication, assuming the
2241    output type is TYPE.  If so, store the unwidened types of the operands
2242    in *TYPE1_OUT and *TYPE2_OUT respectively.  Also fill *RHS1_OUT and
2243    *RHS2_OUT such that converting those operands to types *TYPE1_OUT
2244    and *TYPE2_OUT would give the operands of the multiplication.  */
2245
2246 static bool
2247 is_widening_mult_p (gimple stmt,
2248                     tree *type1_out, tree *rhs1_out,
2249                     tree *type2_out, tree *rhs2_out)
2250 {
2251   tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2252
2253   if (TREE_CODE (type) != INTEGER_TYPE
2254       && TREE_CODE (type) != FIXED_POINT_TYPE)
2255     return false;
2256
2257   if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out,
2258                                rhs1_out))
2259     return false;
2260
2261   if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out,
2262                                rhs2_out))
2263     return false;
2264
2265   if (*type1_out == NULL)
2266     {
2267       if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out))
2268         return false;
2269       *type1_out = *type2_out;
2270     }
2271
2272   if (*type2_out == NULL)
2273     {
2274       if (!int_fits_type_p (*rhs2_out, *type1_out))
2275         return false;
2276       *type2_out = *type1_out;
2277     }
2278
2279   /* Ensure that the larger of the two operands comes first. */
2280   if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out))
2281     {
2282       tree tmp;
2283       tmp = *type1_out;
2284       *type1_out = *type2_out;
2285       *type2_out = tmp;
2286       tmp = *rhs1_out;
2287       *rhs1_out = *rhs2_out;
2288       *rhs2_out = tmp;
2289     }
2290
2291   return true;
2292 }
2293
2294 /* Process a single gimple statement STMT, which has a MULT_EXPR as
2295    its rhs, and try to convert it into a WIDEN_MULT_EXPR.  The return
2296    value is true iff we converted the statement.  */
2297
2298 static bool
2299 convert_mult_to_widen (gimple stmt, gimple_stmt_iterator *gsi)
2300 {
2301   tree lhs, rhs1, rhs2, type, type1, type2;
2302   enum insn_code handler;
2303   enum machine_mode to_mode, from_mode, actual_mode;
2304   optab op;
2305   int actual_precision;
2306   location_t loc = gimple_location (stmt);
2307   bool from_unsigned1, from_unsigned2;
2308
2309   lhs = gimple_assign_lhs (stmt);
2310   type = TREE_TYPE (lhs);
2311   if (TREE_CODE (type) != INTEGER_TYPE)
2312     return false;
2313
2314   if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2))
2315     return false;
2316
2317   to_mode = TYPE_MODE (type);
2318   from_mode = TYPE_MODE (type1);
2319   from_unsigned1 = TYPE_UNSIGNED (type1);
2320   from_unsigned2 = TYPE_UNSIGNED (type2);
2321
2322   if (from_unsigned1 && from_unsigned2)
2323     op = umul_widen_optab;
2324   else if (!from_unsigned1 && !from_unsigned2)
2325     op = smul_widen_optab;
2326   else
2327     op = usmul_widen_optab;
2328
2329   handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode,
2330                                                   0, &actual_mode);
2331
2332   if (handler == CODE_FOR_nothing)
2333     {
2334       if (op != smul_widen_optab)
2335         {
2336           /* We can use a signed multiply with unsigned types as long as
2337              there is a wider mode to use, or it is the smaller of the two
2338              types that is unsigned.  Note that type1 >= type2, always.  */
2339           if ((TYPE_UNSIGNED (type1)
2340                && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2341               || (TYPE_UNSIGNED (type2)
2342                   && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2343             {
2344               from_mode = GET_MODE_WIDER_MODE (from_mode);
2345               if (GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode))
2346                 return false;
2347             }
2348
2349           op = smul_widen_optab;
2350           handler = find_widening_optab_handler_and_mode (op, to_mode,
2351                                                           from_mode, 0,
2352                                                           &actual_mode);
2353
2354           if (handler == CODE_FOR_nothing)
2355             return false;
2356
2357           from_unsigned1 = from_unsigned2 = false;
2358         }
2359       else
2360         return false;
2361     }
2362
2363   /* Ensure that the inputs to the handler are in the correct precison
2364      for the opcode.  This will be the full mode size.  */
2365   actual_precision = GET_MODE_PRECISION (actual_mode);
2366   if (2 * actual_precision > TYPE_PRECISION (type))
2367     return false;
2368   if (actual_precision != TYPE_PRECISION (type1)
2369       || from_unsigned1 != TYPE_UNSIGNED (type1))
2370     rhs1 = build_and_insert_cast (gsi, loc,
2371                                   build_nonstandard_integer_type
2372                                     (actual_precision, from_unsigned1), rhs1);
2373   if (actual_precision != TYPE_PRECISION (type2)
2374       || from_unsigned2 != TYPE_UNSIGNED (type2))
2375     rhs2 = build_and_insert_cast (gsi, loc,
2376                                   build_nonstandard_integer_type
2377                                     (actual_precision, from_unsigned2), rhs2);
2378
2379   /* Handle constants.  */
2380   if (TREE_CODE (rhs1) == INTEGER_CST)
2381     rhs1 = fold_convert (type1, rhs1);
2382   if (TREE_CODE (rhs2) == INTEGER_CST)
2383     rhs2 = fold_convert (type2, rhs2);
2384
2385   gimple_assign_set_rhs1 (stmt, rhs1);
2386   gimple_assign_set_rhs2 (stmt, rhs2);
2387   gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
2388   update_stmt (stmt);
2389   widen_mul_stats.widen_mults_inserted++;
2390   return true;
2391 }
2392
2393 /* Process a single gimple statement STMT, which is found at the
2394    iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
2395    rhs (given by CODE), and try to convert it into a
2396    WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR.  The return value
2397    is true iff we converted the statement.  */
2398
2399 static bool
2400 convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt,
2401                             enum tree_code code)
2402 {
2403   gimple rhs1_stmt = NULL, rhs2_stmt = NULL;
2404   gimple conv1_stmt = NULL, conv2_stmt = NULL, conv_stmt;
2405   tree type, type1, type2, optype;
2406   tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
2407   enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
2408   optab this_optab;
2409   enum tree_code wmult_code;
2410   enum insn_code handler;
2411   enum machine_mode to_mode, from_mode, actual_mode;
2412   location_t loc = gimple_location (stmt);
2413   int actual_precision;
2414   bool from_unsigned1, from_unsigned2;
2415
2416   lhs = gimple_assign_lhs (stmt);
2417   type = TREE_TYPE (lhs);
2418   if (TREE_CODE (type) != INTEGER_TYPE
2419       && TREE_CODE (type) != FIXED_POINT_TYPE)
2420     return false;
2421
2422   if (code == MINUS_EXPR)
2423     wmult_code = WIDEN_MULT_MINUS_EXPR;
2424   else
2425     wmult_code = WIDEN_MULT_PLUS_EXPR;
2426
2427   rhs1 = gimple_assign_rhs1 (stmt);
2428   rhs2 = gimple_assign_rhs2 (stmt);
2429
2430   if (TREE_CODE (rhs1) == SSA_NAME)
2431     {
2432       rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2433       if (is_gimple_assign (rhs1_stmt))
2434         rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2435     }
2436
2437   if (TREE_CODE (rhs2) == SSA_NAME)
2438     {
2439       rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2440       if (is_gimple_assign (rhs2_stmt))
2441         rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2442     }
2443
2444   /* Allow for one conversion statement between the multiply
2445      and addition/subtraction statement.  If there are more than
2446      one conversions then we assume they would invalidate this
2447      transformation.  If that's not the case then they should have
2448      been folded before now.  */
2449   if (CONVERT_EXPR_CODE_P (rhs1_code))
2450     {
2451       conv1_stmt = rhs1_stmt;
2452       rhs1 = gimple_assign_rhs1 (rhs1_stmt);
2453       if (TREE_CODE (rhs1) == SSA_NAME)
2454         {
2455           rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2456           if (is_gimple_assign (rhs1_stmt))
2457             rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2458         }
2459       else
2460         return false;
2461     }
2462   if (CONVERT_EXPR_CODE_P (rhs2_code))
2463     {
2464       conv2_stmt = rhs2_stmt;
2465       rhs2 = gimple_assign_rhs1 (rhs2_stmt);
2466       if (TREE_CODE (rhs2) == SSA_NAME)
2467         {
2468           rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2469           if (is_gimple_assign (rhs2_stmt))
2470             rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2471         }
2472       else
2473         return false;
2474     }
2475
2476   /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
2477      is_widening_mult_p, but we still need the rhs returns.
2478
2479      It might also appear that it would be sufficient to use the existing
2480      operands of the widening multiply, but that would limit the choice of
2481      multiply-and-accumulate instructions.
2482
2483      If the widened-multiplication result has more than one uses, it is
2484      probably wiser not to do the conversion.  */
2485   if (code == PLUS_EXPR
2486       && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
2487     {
2488       if (!has_single_use (rhs1)
2489           || !is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
2490                                   &type2, &mult_rhs2))
2491         return false;
2492       add_rhs = rhs2;
2493       conv_stmt = conv1_stmt;
2494     }
2495   else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
2496     {
2497       if (!has_single_use (rhs2)
2498           || !is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
2499                                   &type2, &mult_rhs2))
2500         return false;
2501       add_rhs = rhs1;
2502       conv_stmt = conv2_stmt;
2503     }
2504   else
2505     return false;
2506
2507   to_mode = TYPE_MODE (type);
2508   from_mode = TYPE_MODE (type1);
2509   from_unsigned1 = TYPE_UNSIGNED (type1);
2510   from_unsigned2 = TYPE_UNSIGNED (type2);
2511   optype = type1;
2512
2513   /* There's no such thing as a mixed sign madd yet, so use a wider mode.  */
2514   if (from_unsigned1 != from_unsigned2)
2515     {
2516       if (!INTEGRAL_TYPE_P (type))
2517         return false;
2518       /* We can use a signed multiply with unsigned types as long as
2519          there is a wider mode to use, or it is the smaller of the two
2520          types that is unsigned.  Note that type1 >= type2, always.  */
2521       if ((from_unsigned1
2522            && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2523           || (from_unsigned2
2524               && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2525         {
2526           from_mode = GET_MODE_WIDER_MODE (from_mode);
2527           if (GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode))
2528             return false;
2529         }
2530
2531       from_unsigned1 = from_unsigned2 = false;
2532       optype = build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode),
2533                                                false);
2534     }
2535
2536   /* If there was a conversion between the multiply and addition
2537      then we need to make sure it fits a multiply-and-accumulate.
2538      The should be a single mode change which does not change the
2539      value.  */
2540   if (conv_stmt)
2541     {
2542       /* We use the original, unmodified data types for this.  */
2543       tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt));
2544       tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt));
2545       int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2);
2546       bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2);
2547
2548       if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type))
2549         {
2550           /* Conversion is a truncate.  */
2551           if (TYPE_PRECISION (to_type) < data_size)
2552             return false;
2553         }
2554       else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type))
2555         {
2556           /* Conversion is an extend.  Check it's the right sort.  */
2557           if (TYPE_UNSIGNED (from_type) != is_unsigned
2558               && !(is_unsigned && TYPE_PRECISION (from_type) > data_size))
2559             return false;
2560         }
2561       /* else convert is a no-op for our purposes.  */
2562     }
2563
2564   /* Verify that the machine can perform a widening multiply
2565      accumulate in this mode/signedness combination, otherwise
2566      this transformation is likely to pessimize code.  */
2567   this_optab = optab_for_tree_code (wmult_code, optype, optab_default);
2568   handler = find_widening_optab_handler_and_mode (this_optab, to_mode,
2569                                                   from_mode, 0, &actual_mode);
2570
2571   if (handler == CODE_FOR_nothing)
2572     return false;
2573
2574   /* Ensure that the inputs to the handler are in the correct precison
2575      for the opcode.  This will be the full mode size.  */
2576   actual_precision = GET_MODE_PRECISION (actual_mode);
2577   if (actual_precision != TYPE_PRECISION (type1)
2578       || from_unsigned1 != TYPE_UNSIGNED (type1))
2579     mult_rhs1 = build_and_insert_cast (gsi, loc,
2580                                        build_nonstandard_integer_type
2581                                          (actual_precision, from_unsigned1),
2582                                        mult_rhs1);
2583   if (actual_precision != TYPE_PRECISION (type2)
2584       || from_unsigned2 != TYPE_UNSIGNED (type2))
2585     mult_rhs2 = build_and_insert_cast (gsi, loc,
2586                                        build_nonstandard_integer_type
2587                                          (actual_precision, from_unsigned2),
2588                                        mult_rhs2);
2589
2590   if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs)))
2591     add_rhs = build_and_insert_cast (gsi, loc, type, add_rhs);
2592
2593   /* Handle constants.  */
2594   if (TREE_CODE (mult_rhs1) == INTEGER_CST)
2595     mult_rhs1 = fold_convert (type1, mult_rhs1);
2596   if (TREE_CODE (mult_rhs2) == INTEGER_CST)
2597     mult_rhs2 = fold_convert (type2, mult_rhs2);
2598
2599   gimple_assign_set_rhs_with_ops_1 (gsi, wmult_code, mult_rhs1, mult_rhs2,
2600                                     add_rhs);
2601   update_stmt (gsi_stmt (*gsi));
2602   widen_mul_stats.maccs_inserted++;
2603   return true;
2604 }
2605
2606 /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
2607    with uses in additions and subtractions to form fused multiply-add
2608    operations.  Returns true if successful and MUL_STMT should be removed.  */
2609
2610 static bool
2611 convert_mult_to_fma (gimple mul_stmt, tree op1, tree op2)
2612 {
2613   tree mul_result = gimple_get_lhs (mul_stmt);
2614   tree type = TREE_TYPE (mul_result);
2615   gimple use_stmt, neguse_stmt, fma_stmt;
2616   use_operand_p use_p;
2617   imm_use_iterator imm_iter;
2618
2619   if (FLOAT_TYPE_P (type)
2620       && flag_fp_contract_mode == FP_CONTRACT_OFF)
2621     return false;
2622
2623   /* We don't want to do bitfield reduction ops.  */
2624   if (INTEGRAL_TYPE_P (type)
2625       && (TYPE_PRECISION (type)
2626           != GET_MODE_PRECISION (TYPE_MODE (type))))
2627     return false;
2628
2629   /* If the target doesn't support it, don't generate it.  We assume that
2630      if fma isn't available then fms, fnma or fnms are not either.  */
2631   if (optab_handler (fma_optab, TYPE_MODE (type)) == CODE_FOR_nothing)
2632     return false;
2633
2634   /* If the multiplication has zero uses, it is kept around probably because
2635      of -fnon-call-exceptions.  Don't optimize it away in that case,
2636      it is DCE job.  */
2637   if (has_zero_uses (mul_result))
2638     return false;
2639
2640   /* Make sure that the multiplication statement becomes dead after
2641      the transformation, thus that all uses are transformed to FMAs.
2642      This means we assume that an FMA operation has the same cost
2643      as an addition.  */
2644   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
2645     {
2646       enum tree_code use_code;
2647       tree result = mul_result;
2648       bool negate_p = false;
2649
2650       use_stmt = USE_STMT (use_p);
2651
2652       if (is_gimple_debug (use_stmt))
2653         continue;
2654
2655       /* For now restrict this operations to single basic blocks.  In theory
2656          we would want to support sinking the multiplication in
2657          m = a*b;
2658          if ()
2659            ma = m + c;
2660          else
2661            d = m;
2662          to form a fma in the then block and sink the multiplication to the
2663          else block.  */
2664       if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
2665         return false;
2666
2667       if (!is_gimple_assign (use_stmt))
2668         return false;
2669
2670       use_code = gimple_assign_rhs_code (use_stmt);
2671
2672       /* A negate on the multiplication leads to FNMA.  */
2673       if (use_code == NEGATE_EXPR)
2674         {
2675           ssa_op_iter iter;
2676           use_operand_p usep;
2677
2678           result = gimple_assign_lhs (use_stmt);
2679
2680           /* Make sure the negate statement becomes dead with this
2681              single transformation.  */
2682           if (!single_imm_use (gimple_assign_lhs (use_stmt),
2683                                &use_p, &neguse_stmt))
2684             return false;
2685
2686           /* Make sure the multiplication isn't also used on that stmt.  */
2687           FOR_EACH_PHI_OR_STMT_USE (usep, neguse_stmt, iter, SSA_OP_USE)
2688             if (USE_FROM_PTR (usep) == mul_result)
2689               return false;
2690
2691           /* Re-validate.  */
2692           use_stmt = neguse_stmt;
2693           if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
2694             return false;
2695           if (!is_gimple_assign (use_stmt))
2696             return false;
2697
2698           use_code = gimple_assign_rhs_code (use_stmt);
2699           negate_p = true;
2700         }
2701
2702       switch (use_code)
2703         {
2704         case MINUS_EXPR:
2705           if (gimple_assign_rhs2 (use_stmt) == result)
2706             negate_p = !negate_p;
2707           break;
2708         case PLUS_EXPR:
2709           break;
2710         default:
2711           /* FMA can only be formed from PLUS and MINUS.  */
2712           return false;
2713         }
2714
2715       /* If the subtrahend (gimple_assign_rhs2 (use_stmt)) is computed
2716          by a MULT_EXPR that we'll visit later, we might be able to
2717          get a more profitable match with fnma.
2718          OTOH, if we don't, a negate / fma pair has likely lower latency
2719          that a mult / subtract pair.  */
2720       if (use_code == MINUS_EXPR && !negate_p
2721           && gimple_assign_rhs1 (use_stmt) == result
2722           && optab_handler (fms_optab, TYPE_MODE (type)) == CODE_FOR_nothing
2723           && optab_handler (fnma_optab, TYPE_MODE (type)) != CODE_FOR_nothing)
2724         {
2725           tree rhs2 = gimple_assign_rhs2 (use_stmt);
2726
2727           if (TREE_CODE (rhs2) == SSA_NAME)
2728             {
2729               gimple stmt2 = SSA_NAME_DEF_STMT (rhs2);
2730               if (has_single_use (rhs2)
2731                   && is_gimple_assign (stmt2)
2732                   && gimple_assign_rhs_code (stmt2) == MULT_EXPR)
2733               return false;
2734             }
2735         }
2736
2737       /* We can't handle a * b + a * b.  */
2738       if (gimple_assign_rhs1 (use_stmt) == gimple_assign_rhs2 (use_stmt))
2739         return false;
2740
2741       /* While it is possible to validate whether or not the exact form
2742          that we've recognized is available in the backend, the assumption
2743          is that the transformation is never a loss.  For instance, suppose
2744          the target only has the plain FMA pattern available.  Consider
2745          a*b-c -> fma(a,b,-c): we've exchanged MUL+SUB for FMA+NEG, which
2746          is still two operations.  Consider -(a*b)-c -> fma(-a,b,-c): we
2747          still have 3 operations, but in the FMA form the two NEGs are
2748          independent and could be run in parallel.  */
2749     }
2750
2751   FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
2752     {
2753       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
2754       enum tree_code use_code;
2755       tree addop, mulop1 = op1, result = mul_result;
2756       bool negate_p = false;
2757
2758       if (is_gimple_debug (use_stmt))
2759         continue;
2760
2761       use_code = gimple_assign_rhs_code (use_stmt);
2762       if (use_code == NEGATE_EXPR)
2763         {
2764           result = gimple_assign_lhs (use_stmt);
2765           single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt);
2766           gsi_remove (&gsi, true);
2767           release_defs (use_stmt);
2768
2769           use_stmt = neguse_stmt;
2770           gsi = gsi_for_stmt (use_stmt);
2771           use_code = gimple_assign_rhs_code (use_stmt);
2772           negate_p = true;
2773         }
2774
2775       if (gimple_assign_rhs1 (use_stmt) == result)
2776         {
2777           addop = gimple_assign_rhs2 (use_stmt);
2778           /* a * b - c -> a * b + (-c)  */
2779           if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
2780             addop = force_gimple_operand_gsi (&gsi,
2781                                               build1 (NEGATE_EXPR,
2782                                                       type, addop),
2783                                               true, NULL_TREE, true,
2784                                               GSI_SAME_STMT);
2785         }
2786       else
2787         {
2788           addop = gimple_assign_rhs1 (use_stmt);
2789           /* a - b * c -> (-b) * c + a */
2790           if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
2791             negate_p = !negate_p;
2792         }
2793
2794       if (negate_p)
2795         mulop1 = force_gimple_operand_gsi (&gsi,
2796                                            build1 (NEGATE_EXPR,
2797                                                    type, mulop1),
2798                                            true, NULL_TREE, true,
2799                                            GSI_SAME_STMT);
2800
2801       fma_stmt = gimple_build_assign_with_ops (FMA_EXPR,
2802                                                gimple_assign_lhs (use_stmt),
2803                                                mulop1, op2,
2804                                                addop);
2805       gsi_replace (&gsi, fma_stmt, true);
2806       widen_mul_stats.fmas_inserted++;
2807     }
2808
2809   return true;
2810 }
2811
2812 /* Find integer multiplications where the operands are extended from
2813    smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
2814    where appropriate.  */
2815
2816 static unsigned int
2817 execute_optimize_widening_mul (void)
2818 {
2819   basic_block bb;
2820   bool cfg_changed = false;
2821
2822   memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
2823
2824   FOR_EACH_BB_FN (bb, cfun)
2825     {
2826       gimple_stmt_iterator gsi;
2827
2828       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
2829         {
2830           gimple stmt = gsi_stmt (gsi);
2831           enum tree_code code;
2832
2833           if (is_gimple_assign (stmt))
2834             {
2835               code = gimple_assign_rhs_code (stmt);
2836               switch (code)
2837                 {
2838                 case MULT_EXPR:
2839                   if (!convert_mult_to_widen (stmt, &gsi)
2840                       && convert_mult_to_fma (stmt,
2841                                               gimple_assign_rhs1 (stmt),
2842                                               gimple_assign_rhs2 (stmt)))
2843                     {
2844                       gsi_remove (&gsi, true);
2845                       release_defs (stmt);
2846                       continue;
2847                     }
2848                   break;
2849
2850                 case PLUS_EXPR:
2851                 case MINUS_EXPR:
2852                   convert_plusminus_to_widen (&gsi, stmt, code);
2853                   break;
2854
2855                 default:;
2856                 }
2857             }
2858           else if (is_gimple_call (stmt)
2859                    && gimple_call_lhs (stmt))
2860             {
2861               tree fndecl = gimple_call_fndecl (stmt);
2862               if (fndecl
2863                   && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
2864                 {
2865                   switch (DECL_FUNCTION_CODE (fndecl))
2866                     {
2867                       case BUILT_IN_POWF:
2868                       case BUILT_IN_POW:
2869                       case BUILT_IN_POWL:
2870                         if (TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST
2871                             && REAL_VALUES_EQUAL
2872                                  (TREE_REAL_CST (gimple_call_arg (stmt, 1)),
2873                                   dconst2)
2874                             && convert_mult_to_fma (stmt,
2875                                                     gimple_call_arg (stmt, 0),
2876                                                     gimple_call_arg (stmt, 0)))
2877                           {
2878                             unlink_stmt_vdef (stmt);
2879                             if (gsi_remove (&gsi, true)
2880                                 && gimple_purge_dead_eh_edges (bb))
2881                               cfg_changed = true;
2882                             release_defs (stmt);
2883                             continue;
2884                           }
2885                           break;
2886
2887                       default:;
2888                     }
2889                 }
2890             }
2891           gsi_next (&gsi);
2892         }
2893     }
2894
2895   statistics_counter_event (cfun, "widening multiplications inserted",
2896                             widen_mul_stats.widen_mults_inserted);
2897   statistics_counter_event (cfun, "widening maccs inserted",
2898                             widen_mul_stats.maccs_inserted);
2899   statistics_counter_event (cfun, "fused multiply-adds inserted",
2900                             widen_mul_stats.fmas_inserted);
2901
2902   return cfg_changed ? TODO_cleanup_cfg : 0;
2903 }
2904
2905 static bool
2906 gate_optimize_widening_mul (void)
2907 {
2908   return flag_expensive_optimizations && optimize;
2909 }
2910
2911 namespace {
2912
2913 const pass_data pass_data_optimize_widening_mul =
2914 {
2915   GIMPLE_PASS, /* type */
2916   "widening_mul", /* name */
2917   OPTGROUP_NONE, /* optinfo_flags */
2918   true, /* has_gate */
2919   true, /* has_execute */
2920   TV_NONE, /* tv_id */
2921   PROP_ssa, /* properties_required */
2922   0, /* properties_provided */
2923   0, /* properties_destroyed */
2924   0, /* todo_flags_start */
2925   ( TODO_verify_ssa | TODO_verify_stmts
2926     | TODO_update_ssa ), /* todo_flags_finish */
2927 };
2928
2929 class pass_optimize_widening_mul : public gimple_opt_pass
2930 {
2931 public:
2932   pass_optimize_widening_mul (gcc::context *ctxt)
2933     : gimple_opt_pass (pass_data_optimize_widening_mul, ctxt)
2934   {}
2935
2936   /* opt_pass methods: */
2937   bool gate () { return gate_optimize_widening_mul (); }
2938   unsigned int execute () { return execute_optimize_widening_mul (); }
2939
2940 }; // class pass_optimize_widening_mul
2941
2942 } // anon namespace
2943
2944 gimple_opt_pass *
2945 make_pass_optimize_widening_mul (gcc::context *ctxt)
2946 {
2947   return new pass_optimize_widening_mul (ctxt);
2948 }