gcc/tree-ssa-reassoc.c

   1 /* Reassociation for trees.
   2    Copyright (C) 2005-2015 Free Software Foundation, Inc.
   3    Contributed by Daniel Berlin <dan@dberlin.org>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 3, or (at your option)
  10 any later version.
  11
  12 GCC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "backend.h"
  25 #include "target.h"
  26 #include "rtl.h"
  27 #include "tree.h"
  28 #include "gimple.h"
  29 #include "cfghooks.h"
  30 #include "alloc-pool.h"
  31 #include "tree-pass.h"
  32 #include "tm_p.h"
  33 #include "ssa.h"
  34 #include "optabs-tree.h"
  35 #include "gimple-pretty-print.h"
  36 #include "diagnostic-core.h"
  37 #include "fold-const.h"
  38 #include "stor-layout.h"
  39 #include "cfganal.h"
  40 #include "gimple-fold.h"
  41 #include "tree-eh.h"
  42 #include "gimple-iterator.h"
  43 #include "gimplify-me.h"
  44 #include "tree-cfg.h"
  45 #include "tree-ssa-loop.h"
  46 #include "flags.h"
  47 #include "tree-ssa.h"
  48 #include "langhooks.h"
  49 #include "cfgloop.h"
  50 #include "params.h"
  51 #include "builtins.h"
  52 #include "gimplify.h"
  53
  54 /*  This is a simple global reassociation pass.  It is, in part, based
  55     on the LLVM pass of the same name (They do some things more/less
  56     than we do, in different orders, etc).
  57
  58     It consists of five steps:
  59
  60     1. Breaking up subtract operations into addition + negate, where
  61     it would promote the reassociation of adds.
  62
  63     2. Left linearization of the expression trees, so that (A+B)+(C+D)
  64     becomes (((A+B)+C)+D), which is easier for us to rewrite later.
  65     During linearization, we place the operands of the binary
  66     expressions into a vector of operand_entry_*
  67
  68     3. Optimization of the operand lists, eliminating things like a +
  69     -a, a & a, etc.
  70
  71     3a. Combine repeated factors with the same occurrence counts
  72     into a __builtin_powi call that will later be optimized into
  73     an optimal number of multiplies.
  74
  75     4. Rewrite the expression trees we linearized and optimized so
  76     they are in proper rank order.
  77
  78     5. Repropagate negates, as nothing else will clean it up ATM.
  79
  80     A bit of theory on #4, since nobody seems to write anything down
  81     about why it makes sense to do it the way they do it:
  82
  83     We could do this much nicer theoretically, but don't (for reasons
  84     explained after how to do it theoretically nice :P).
  85
  86     In order to promote the most redundancy elimination, you want
  87     binary expressions whose operands are the same rank (or
  88     preferably, the same value) exposed to the redundancy eliminator,
  89     for possible elimination.
  90
  91     So the way to do this if we really cared, is to build the new op
  92     tree from the leaves to the roots, merging as you go, and putting the
  93     new op on the end of the worklist, until you are left with one
  94     thing on the worklist.
  95
  96     IE if you have to rewrite the following set of operands (listed with
  97     rank in parentheses), with opcode PLUS_EXPR:
  98
  99     a (1),  b (1),  c (1),  d (2), e (2)
 100
 101
 102     We start with our merge worklist empty, and the ops list with all of
 103     those on it.
 104
 105     You want to first merge all leaves of the same rank, as much as
 106     possible.
 107
 108     So first build a binary op of
 109
 110     mergetmp = a + b, and put "mergetmp" on the merge worklist.
 111
 112     Because there is no three operand form of PLUS_EXPR, c is not going to
 113     be exposed to redundancy elimination as a rank 1 operand.
 114
 115     So you might as well throw it on the merge worklist (you could also
 116     consider it to now be a rank two operand, and merge it with d and e,
 117     but in this case, you then have evicted e from a binary op. So at
 118     least in this situation, you can't win.)
 119
 120     Then build a binary op of d + e
 121     mergetmp2 = d + e
 122
 123     and put mergetmp2 on the merge worklist.
 124
 125     so merge worklist = {mergetmp, c, mergetmp2}
 126
 127     Continue building binary ops of these operations until you have only
 128     one operation left on the worklist.
 129
 130     So we have
 131
 132     build binary op
 133     mergetmp3 = mergetmp + c
 134
 135     worklist = {mergetmp2, mergetmp3}
 136
 137     mergetmp4 = mergetmp2 + mergetmp3
 138
 139     worklist = {mergetmp4}
 140
 141     because we have one operation left, we can now just set the original
 142     statement equal to the result of that operation.
 143
 144     This will at least expose a + b  and d + e to redundancy elimination
 145     as binary operations.
 146
 147     For extra points, you can reuse the old statements to build the
 148     mergetmps, since you shouldn't run out.
 149
 150     So why don't we do this?
 151
 152     Because it's expensive, and rarely will help.  Most trees we are
 153     reassociating have 3 or less ops.  If they have 2 ops, they already
 154     will be written into a nice single binary op.  If you have 3 ops, a
 155     single simple check suffices to tell you whether the first two are of the
 156     same rank.  If so, you know to order it
 157
 158     mergetmp = op1 + op2
 159     newstmt = mergetmp + op3
 160
 161     instead of
 162     mergetmp = op2 + op3
 163     newstmt = mergetmp + op1
 164
 165     If all three are of the same rank, you can't expose them all in a
 166     single binary operator anyway, so the above is *still* the best you
 167     can do.
 168
 169     Thus, this is what we do.  When we have three ops left, we check to see
 170     what order to put them in, and call it a day.  As a nod to vector sum
 171     reduction, we check if any of the ops are really a phi node that is a
 172     destructive update for the associating op, and keep the destructive
 173     update together for vector sum reduction recognition.  */
 174
 175
 176 /* Statistics */
 177 static struct
 178 {
 179   int linearized;
 180   int constants_eliminated;
 181   int ops_eliminated;
 182   int rewritten;
 183   int pows_encountered;
 184   int pows_created;
 185 } reassociate_stats;
 186
 187 /* Operator, rank pair.  */
 188 struct operand_entry
 189 {
 190   unsigned int rank;
 191   int id;
 192   tree op;
 193   unsigned int count;
 194 };
 195
 196 static object_allocator<operand_entry> operand_entry_pool
 197   ("operand entry pool");
 198
 199 /* This is used to assign a unique ID to each struct operand_entry
 200    so that qsort results are identical on different hosts.  */
 201 static int next_operand_entry_id;
 202
 203 /* Starting rank number for a given basic block, so that we can rank
 204    operations using unmovable instructions in that BB based on the bb
 205    depth.  */
 206 static long *bb_rank;
 207
 208 /* Operand->rank hashtable.  */
 209 static hash_map<tree, long> *operand_rank;
 210
 211 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
 212    all basic blocks the CFG should be adjusted - basic blocks
 213    split right after that SSA_NAME's definition statement and before
 214    the only use, which must be a bit ior.  */
 215 static vec<tree> reassoc_branch_fixups;
 216
 217 /* Forward decls.  */
 218 static long get_rank (tree);
 219 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
 220
 221 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
 222    possibly added by gsi_remove.  */
 223
 224 bool
 225 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
 226 {
 227   gimple *stmt = gsi_stmt (*gsi);
 228
 229   if (!MAY_HAVE_DEBUG_STMTS || gimple_code (stmt) == GIMPLE_PHI)
 230     return gsi_remove (gsi, true);
 231
 232   gimple_stmt_iterator prev = *gsi;
 233   gsi_prev (&prev);
 234   unsigned uid = gimple_uid (stmt);
 235   basic_block bb = gimple_bb (stmt);
 236   bool ret = gsi_remove (gsi, true);
 237   if (!gsi_end_p (prev))
 238     gsi_next (&prev);
 239   else
 240     prev = gsi_start_bb (bb);
 241   gimple *end_stmt = gsi_stmt (*gsi);
 242   while ((stmt = gsi_stmt (prev)) != end_stmt)
 243     {
 244       gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
 245       gimple_set_uid (stmt, uid);
 246       gsi_next (&prev);
 247     }
 248   return ret;
 249 }
 250
 251 /* Bias amount for loop-carried phis.  We want this to be larger than
 252    the depth of any reassociation tree we can see, but not larger than
 253    the rank difference between two blocks.  */
 254 #define PHI_LOOP_BIAS (1 << 15)
 255
 256 /* Rank assigned to a phi statement.  If STMT is a loop-carried phi of
 257    an innermost loop, and the phi has only a single use which is inside
 258    the loop, then the rank is the block rank of the loop latch plus an
 259    extra bias for the loop-carried dependence.  This causes expressions
 260    calculated into an accumulator variable to be independent for each
 261    iteration of the loop.  If STMT is some other phi, the rank is the
 262    block rank of its containing block.  */
 263 static long
 264 phi_rank (gimple *stmt)
 265 {
 266   basic_block bb = gimple_bb (stmt);
 267   struct loop *father = bb->loop_father;
 268   tree res;
 269   unsigned i;
 270   use_operand_p use;
 271   gimple *use_stmt;
 272
 273   /* We only care about real loops (those with a latch).  */
 274   if (!father->latch)
 275     return bb_rank[bb->index];
 276
 277   /* Interesting phis must be in headers of innermost loops.  */
 278   if (bb != father->header
 279       || father->inner)
 280     return bb_rank[bb->index];
 281
 282   /* Ignore virtual SSA_NAMEs.  */
 283   res = gimple_phi_result (stmt);
 284   if (virtual_operand_p (res))
 285     return bb_rank[bb->index];
 286
 287   /* The phi definition must have a single use, and that use must be
 288      within the loop.  Otherwise this isn't an accumulator pattern.  */
 289   if (!single_imm_use (res, &use, &use_stmt)
 290       || gimple_bb (use_stmt)->loop_father != father)
 291     return bb_rank[bb->index];
 292
 293   /* Look for phi arguments from within the loop.  If found, bias this phi.  */
 294   for (i = 0; i < gimple_phi_num_args (stmt); i++)
 295     {
 296       tree arg = gimple_phi_arg_def (stmt, i);
 297       if (TREE_CODE (arg) == SSA_NAME
 298           && !SSA_NAME_IS_DEFAULT_DEF (arg))
 299         {
 300           gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
 301           if (gimple_bb (def_stmt)->loop_father == father)
 302             return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
 303         }
 304     }
 305
 306   /* Must be an uninteresting phi.  */
 307   return bb_rank[bb->index];
 308 }
 309
 310 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
 311    loop-carried dependence of an innermost loop, return TRUE; else
 312    return FALSE.  */
 313 static bool
 314 loop_carried_phi (tree exp)
 315 {
 316   gimple *phi_stmt;
 317   long block_rank;
 318
 319   if (TREE_CODE (exp) != SSA_NAME
 320       || SSA_NAME_IS_DEFAULT_DEF (exp))
 321     return false;
 322
 323   phi_stmt = SSA_NAME_DEF_STMT (exp);
 324
 325   if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
 326     return false;
 327
 328   /* Non-loop-carried phis have block rank.  Loop-carried phis have
 329      an additional bias added in.  If this phi doesn't have block rank,
 330      it's biased and should not be propagated.  */
 331   block_rank = bb_rank[gimple_bb (phi_stmt)->index];
 332
 333   if (phi_rank (phi_stmt) != block_rank)
 334     return true;
 335
 336   return false;
 337 }
 338
 339 /* Return the maximum of RANK and the rank that should be propagated
 340    from expression OP.  For most operands, this is just the rank of OP.
 341    For loop-carried phis, the value is zero to avoid undoing the bias
 342    in favor of the phi.  */
 343 static long
 344 propagate_rank (long rank, tree op)
 345 {
 346   long op_rank;
 347
 348   if (loop_carried_phi (op))
 349     return rank;
 350
 351   op_rank = get_rank (op);
 352
 353   return MAX (rank, op_rank);
 354 }
 355
 356 /* Look up the operand rank structure for expression E.  */
 357
 358 static inline long
 359 find_operand_rank (tree e)
 360 {
 361   long *slot = operand_rank->get (e);
 362   return slot ? *slot : -1;
 363 }
 364
 365 /* Insert {E,RANK} into the operand rank hashtable.  */
 366
 367 static inline void
 368 insert_operand_rank (tree e, long rank)
 369 {
 370   gcc_assert (rank > 0);
 371   gcc_assert (!operand_rank->put (e, rank));
 372 }
 373
 374 /* Given an expression E, return the rank of the expression.  */
 375
 376 static long
 377 get_rank (tree e)
 378 {
 379   /* SSA_NAME's have the rank of the expression they are the result
 380      of.
 381      For globals and uninitialized values, the rank is 0.
 382      For function arguments, use the pre-setup rank.
 383      For PHI nodes, stores, asm statements, etc, we use the rank of
 384      the BB.
 385      For simple operations, the rank is the maximum rank of any of
 386      its operands, or the bb_rank, whichever is less.
 387      I make no claims that this is optimal, however, it gives good
 388      results.  */
 389
 390   /* We make an exception to the normal ranking system to break
 391      dependences of accumulator variables in loops.  Suppose we
 392      have a simple one-block loop containing:
 393
 394        x_1 = phi(x_0, x_2)
 395        b = a + x_1
 396        c = b + d
 397        x_2 = c + e
 398
 399      As shown, each iteration of the calculation into x is fully
 400      dependent upon the iteration before it.  We would prefer to
 401      see this in the form:
 402
 403        x_1 = phi(x_0, x_2)
 404        b = a + d
 405        c = b + e
 406        x_2 = c + x_1
 407
 408      If the loop is unrolled, the calculations of b and c from
 409      different iterations can be interleaved.
 410
 411      To obtain this result during reassociation, we bias the rank
 412      of the phi definition x_1 upward, when it is recognized as an
 413      accumulator pattern.  The artificial rank causes it to be
 414      added last, providing the desired independence.  */
 415
 416   if (TREE_CODE (e) == SSA_NAME)
 417     {
 418       ssa_op_iter iter;
 419       gimple *stmt;
 420       long rank;
 421       tree op;
 422
 423       if (SSA_NAME_IS_DEFAULT_DEF (e))
 424         return find_operand_rank (e);
 425
 426       stmt = SSA_NAME_DEF_STMT (e);
 427       if (gimple_code (stmt) == GIMPLE_PHI)
 428         return phi_rank (stmt);
 429
 430       if (!is_gimple_assign (stmt))
 431         return bb_rank[gimple_bb (stmt)->index];
 432
 433       /* If we already have a rank for this expression, use that.  */
 434       rank = find_operand_rank (e);
 435       if (rank != -1)
 436         return rank;
 437
 438       /* Otherwise, find the maximum rank for the operands.  As an
 439          exception, remove the bias from loop-carried phis when propagating
 440          the rank so that dependent operations are not also biased.  */
 441       /* Simply walk over all SSA uses - this takes advatage of the
 442          fact that non-SSA operands are is_gimple_min_invariant and
 443          thus have rank 0.  */
 444       rank = 0;
 445       FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
 446         rank = propagate_rank (rank, op);
 447
 448       if (dump_file && (dump_flags & TDF_DETAILS))
 449         {
 450           fprintf (dump_file, "Rank for ");
 451           print_generic_expr (dump_file, e, 0);
 452           fprintf (dump_file, " is %ld\n", (rank + 1));
 453         }
 454
 455       /* Note the rank in the hashtable so we don't recompute it.  */
 456       insert_operand_rank (e, (rank + 1));
 457       return (rank + 1);
 458     }
 459
 460   /* Constants, globals, etc., are rank 0 */
 461   return 0;
 462 }
 463
 464
 465 /* We want integer ones to end up last no matter what, since they are
 466    the ones we can do the most with.  */
 467 #define INTEGER_CONST_TYPE 1 << 3
 468 #define FLOAT_CONST_TYPE 1 << 2
 469 #define OTHER_CONST_TYPE 1 << 1
 470
 471 /* Classify an invariant tree into integer, float, or other, so that
 472    we can sort them to be near other constants of the same type.  */
 473 static inline int
 474 constant_type (tree t)
 475 {
 476   if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
 477     return INTEGER_CONST_TYPE;
 478   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
 479     return FLOAT_CONST_TYPE;
 480   else
 481     return OTHER_CONST_TYPE;
 482 }
 483
 484 /* qsort comparison function to sort operand entries PA and PB by rank
 485    so that the sorted array is ordered by rank in decreasing order.  */
 486 static int
 487 sort_by_operand_rank (const void *pa, const void *pb)
 488 {
 489   const operand_entry *oea = *(const operand_entry *const *)pa;
 490   const operand_entry *oeb = *(const operand_entry *const *)pb;
 491
 492   /* It's nicer for optimize_expression if constants that are likely
 493      to fold when added/multiplied//whatever are put next to each
 494      other.  Since all constants have rank 0, order them by type.  */
 495   if (oeb->rank == 0 && oea->rank == 0)
 496     {
 497       if (constant_type (oeb->op) != constant_type (oea->op))
 498         return constant_type (oeb->op) - constant_type (oea->op);
 499       else
 500         /* To make sorting result stable, we use unique IDs to determine
 501            order.  */
 502         return oeb->id - oea->id;
 503     }
 504
 505   /* Lastly, make sure the versions that are the same go next to each
 506      other.  */
 507   if ((oeb->rank - oea->rank == 0)
 508       && TREE_CODE (oea->op) == SSA_NAME
 509       && TREE_CODE (oeb->op) == SSA_NAME)
 510     {
 511       /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
 512          versions of removed SSA_NAMEs, so if possible, prefer to sort
 513          based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
 514          See PR60418.  */
 515       if (!SSA_NAME_IS_DEFAULT_DEF (oea->op)
 516           && !SSA_NAME_IS_DEFAULT_DEF (oeb->op)
 517           && SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
 518         {
 519           gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
 520           gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
 521           basic_block bba = gimple_bb (stmta);
 522           basic_block bbb = gimple_bb (stmtb);
 523           if (bbb != bba)
 524             {
 525               if (bb_rank[bbb->index] != bb_rank[bba->index])
 526                 return bb_rank[bbb->index] - bb_rank[bba->index];
 527             }
 528           else
 529             {
 530               bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
 531               bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
 532               if (da != db)
 533                 return da ? 1 : -1;
 534             }
 535         }
 536
 537       if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
 538         return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
 539       else
 540         return oeb->id - oea->id;
 541     }
 542
 543   if (oeb->rank != oea->rank)
 544     return oeb->rank - oea->rank;
 545   else
 546     return oeb->id - oea->id;
 547 }
 548
 549 /* Add an operand entry to *OPS for the tree operand OP.  */
 550
 551 static void
 552 add_to_ops_vec (vec<operand_entry *> *ops, tree op)
 553 {
 554   operand_entry *oe = operand_entry_pool.allocate ();
 555
 556   oe->op = op;
 557   oe->rank = get_rank (op);
 558   oe->id = next_operand_entry_id++;
 559   oe->count = 1;
 560   ops->safe_push (oe);
 561 }
 562
 563 /* Add an operand entry to *OPS for the tree operand OP with repeat
 564    count REPEAT.  */
 565
 566 static void
 567 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
 568                        HOST_WIDE_INT repeat)
 569 {
 570   operand_entry *oe = operand_entry_pool.allocate ();
 571
 572   oe->op = op;
 573   oe->rank = get_rank (op);
 574   oe->id = next_operand_entry_id++;
 575   oe->count = repeat;
 576   ops->safe_push (oe);
 577
 578   reassociate_stats.pows_encountered++;
 579 }
 580
 581 /* Return true if STMT is reassociable operation containing a binary
 582    operation with tree code CODE, and is inside LOOP.  */
 583
 584 static bool
 585 is_reassociable_op (gimple *stmt, enum tree_code code, struct loop *loop)
 586 {
 587   basic_block bb = gimple_bb (stmt);
 588
 589   if (gimple_bb (stmt) == NULL)
 590     return false;
 591
 592   if (!flow_bb_inside_loop_p (loop, bb))
 593     return false;
 594
 595   if (is_gimple_assign (stmt)
 596       && gimple_assign_rhs_code (stmt) == code
 597       && has_single_use (gimple_assign_lhs (stmt)))
 598     return true;
 599
 600   return false;
 601 }
 602
 603
 604 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
 605    operand of the negate operation.  Otherwise, return NULL.  */
 606
 607 static tree
 608 get_unary_op (tree name, enum tree_code opcode)
 609 {
 610   gimple *stmt = SSA_NAME_DEF_STMT (name);
 611
 612   if (!is_gimple_assign (stmt))
 613     return NULL_TREE;
 614
 615   if (gimple_assign_rhs_code (stmt) == opcode)
 616     return gimple_assign_rhs1 (stmt);
 617   return NULL_TREE;
 618 }
 619
 620 /* If CURR and LAST are a pair of ops that OPCODE allows us to
 621    eliminate through equivalences, do so, remove them from OPS, and
 622    return true.  Otherwise, return false.  */
 623
 624 static bool
 625 eliminate_duplicate_pair (enum tree_code opcode,
 626                           vec<operand_entry *> *ops,
 627                           bool *all_done,
 628                           unsigned int i,
 629                           operand_entry *curr,
 630                           operand_entry *last)
 631 {
 632
 633   /* If we have two of the same op, and the opcode is & |, min, or max,
 634      we can eliminate one of them.
 635      If we have two of the same op, and the opcode is ^, we can
 636      eliminate both of them.  */
 637
 638   if (last && last->op == curr->op)
 639     {
 640       switch (opcode)
 641         {
 642         case MAX_EXPR:
 643         case MIN_EXPR:
 644         case BIT_IOR_EXPR:
 645         case BIT_AND_EXPR:
 646           if (dump_file && (dump_flags & TDF_DETAILS))
 647             {
 648               fprintf (dump_file, "Equivalence: ");
 649               print_generic_expr (dump_file, curr->op, 0);
 650               fprintf (dump_file, " [&|minmax] ");
 651               print_generic_expr (dump_file, last->op, 0);
 652               fprintf (dump_file, " -> ");
 653               print_generic_stmt (dump_file, last->op, 0);
 654             }
 655
 656           ops->ordered_remove (i);
 657           reassociate_stats.ops_eliminated ++;
 658
 659           return true;
 660
 661         case BIT_XOR_EXPR:
 662           if (dump_file && (dump_flags & TDF_DETAILS))
 663             {
 664               fprintf (dump_file, "Equivalence: ");
 665               print_generic_expr (dump_file, curr->op, 0);
 666               fprintf (dump_file, " ^ ");
 667               print_generic_expr (dump_file, last->op, 0);
 668               fprintf (dump_file, " -> nothing\n");
 669             }
 670
 671           reassociate_stats.ops_eliminated += 2;
 672
 673           if (ops->length () == 2)
 674             {
 675               ops->create (0);
 676               add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
 677               *all_done = true;
 678             }
 679           else
 680             {
 681               ops->ordered_remove (i-1);
 682               ops->ordered_remove (i-1);
 683             }
 684
 685           return true;
 686
 687         default:
 688           break;
 689         }
 690     }
 691   return false;
 692 }
 693
 694 static vec<tree> plus_negates;
 695
 696 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
 697    expression, look in OPS for a corresponding positive operation to cancel
 698    it out.  If we find one, remove the other from OPS, replace
 699    OPS[CURRINDEX] with 0 or -1, respectively, and return true.  Otherwise,
 700    return false. */
 701
 702 static bool
 703 eliminate_plus_minus_pair (enum tree_code opcode,
 704                            vec<operand_entry *> *ops,
 705                            unsigned int currindex,
 706                            operand_entry *curr)
 707 {
 708   tree negateop;
 709   tree notop;
 710   unsigned int i;
 711   operand_entry *oe;
 712
 713   if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
 714     return false;
 715
 716   negateop = get_unary_op (curr->op, NEGATE_EXPR);
 717   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 718   if (negateop == NULL_TREE && notop == NULL_TREE)
 719     return false;
 720
 721   /* Any non-negated version will have a rank that is one less than
 722      the current rank.  So once we hit those ranks, if we don't find
 723      one, we can stop.  */
 724
 725   for (i = currindex + 1;
 726        ops->iterate (i, &oe)
 727        && oe->rank >= curr->rank - 1 ;
 728        i++)
 729     {
 730       if (oe->op == negateop)
 731         {
 732
 733           if (dump_file && (dump_flags & TDF_DETAILS))
 734             {
 735               fprintf (dump_file, "Equivalence: ");
 736               print_generic_expr (dump_file, negateop, 0);
 737               fprintf (dump_file, " + -");
 738               print_generic_expr (dump_file, oe->op, 0);
 739               fprintf (dump_file, " -> 0\n");
 740             }
 741
 742           ops->ordered_remove (i);
 743           add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
 744           ops->ordered_remove (currindex);
 745           reassociate_stats.ops_eliminated ++;
 746
 747           return true;
 748         }
 749       else if (oe->op == notop)
 750         {
 751           tree op_type = TREE_TYPE (oe->op);
 752
 753           if (dump_file && (dump_flags & TDF_DETAILS))
 754             {
 755               fprintf (dump_file, "Equivalence: ");
 756               print_generic_expr (dump_file, notop, 0);
 757               fprintf (dump_file, " + ~");
 758               print_generic_expr (dump_file, oe->op, 0);
 759               fprintf (dump_file, " -> -1\n");
 760             }
 761
 762           ops->ordered_remove (i);
 763           add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
 764           ops->ordered_remove (currindex);
 765           reassociate_stats.ops_eliminated ++;
 766
 767           return true;
 768         }
 769     }
 770
 771   /* CURR->OP is a negate expr in a plus expr: save it for later
 772      inspection in repropagate_negates().  */
 773   if (negateop != NULL_TREE)
 774     plus_negates.safe_push (curr->op);
 775
 776   return false;
 777 }
 778
 779 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
 780    bitwise not expression, look in OPS for a corresponding operand to
 781    cancel it out.  If we find one, remove the other from OPS, replace
 782    OPS[CURRINDEX] with 0, and return true.  Otherwise, return
 783    false. */
 784
 785 static bool
 786 eliminate_not_pairs (enum tree_code opcode,
 787                      vec<operand_entry *> *ops,
 788                      unsigned int currindex,
 789                      operand_entry *curr)
 790 {
 791   tree notop;
 792   unsigned int i;
 793   operand_entry *oe;
 794
 795   if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
 796       || TREE_CODE (curr->op) != SSA_NAME)
 797     return false;
 798
 799   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 800   if (notop == NULL_TREE)
 801     return false;
 802
 803   /* Any non-not version will have a rank that is one less than
 804      the current rank.  So once we hit those ranks, if we don't find
 805      one, we can stop.  */
 806
 807   for (i = currindex + 1;
 808        ops->iterate (i, &oe)
 809        && oe->rank >= curr->rank - 1;
 810        i++)
 811     {
 812       if (oe->op == notop)
 813         {
 814           if (dump_file && (dump_flags & TDF_DETAILS))
 815             {
 816               fprintf (dump_file, "Equivalence: ");
 817               print_generic_expr (dump_file, notop, 0);
 818               if (opcode == BIT_AND_EXPR)
 819                 fprintf (dump_file, " & ~");
 820               else if (opcode == BIT_IOR_EXPR)
 821                 fprintf (dump_file, " | ~");
 822               print_generic_expr (dump_file, oe->op, 0);
 823               if (opcode == BIT_AND_EXPR)
 824                 fprintf (dump_file, " -> 0\n");
 825               else if (opcode == BIT_IOR_EXPR)
 826                 fprintf (dump_file, " -> -1\n");
 827             }
 828
 829           if (opcode == BIT_AND_EXPR)
 830             oe->op = build_zero_cst (TREE_TYPE (oe->op));
 831           else if (opcode == BIT_IOR_EXPR)
 832             oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
 833
 834           reassociate_stats.ops_eliminated += ops->length () - 1;
 835           ops->truncate (0);
 836           ops->quick_push (oe);
 837           return true;
 838         }
 839     }
 840
 841   return false;
 842 }
 843
 844 /* Use constant value that may be present in OPS to try to eliminate
 845    operands.  Note that this function is only really used when we've
 846    eliminated ops for other reasons, or merged constants.  Across
 847    single statements, fold already does all of this, plus more.  There
 848    is little point in duplicating logic, so I've only included the
 849    identities that I could ever construct testcases to trigger.  */
 850
 851 static void
 852 eliminate_using_constants (enum tree_code opcode,
 853                            vec<operand_entry *> *ops)
 854 {
 855   operand_entry *oelast = ops->last ();
 856   tree type = TREE_TYPE (oelast->op);
 857
 858   if (oelast->rank == 0
 859       && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
 860     {
 861       switch (opcode)
 862         {
 863         case BIT_AND_EXPR:
 864           if (integer_zerop (oelast->op))
 865             {
 866               if (ops->length () != 1)
 867                 {
 868                   if (dump_file && (dump_flags & TDF_DETAILS))
 869                     fprintf (dump_file, "Found & 0, removing all other ops\n");
 870
 871                   reassociate_stats.ops_eliminated += ops->length () - 1;
 872
 873                   ops->truncate (0);
 874                   ops->quick_push (oelast);
 875                   return;
 876                 }
 877             }
 878           else if (integer_all_onesp (oelast->op))
 879             {
 880               if (ops->length () != 1)
 881                 {
 882                   if (dump_file && (dump_flags & TDF_DETAILS))
 883                     fprintf (dump_file, "Found & -1, removing\n");
 884                   ops->pop ();
 885                   reassociate_stats.ops_eliminated++;
 886                 }
 887             }
 888           break;
 889         case BIT_IOR_EXPR:
 890           if (integer_all_onesp (oelast->op))
 891             {
 892               if (ops->length () != 1)
 893                 {
 894                   if (dump_file && (dump_flags & TDF_DETAILS))
 895                     fprintf (dump_file, "Found | -1, removing all other ops\n");
 896
 897                   reassociate_stats.ops_eliminated += ops->length () - 1;
 898
 899                   ops->truncate (0);
 900                   ops->quick_push (oelast);
 901                   return;
 902                 }
 903             }
 904           else if (integer_zerop (oelast->op))
 905             {
 906               if (ops->length () != 1)
 907                 {
 908                   if (dump_file && (dump_flags & TDF_DETAILS))
 909                     fprintf (dump_file, "Found | 0, removing\n");
 910                   ops->pop ();
 911                   reassociate_stats.ops_eliminated++;
 912                 }
 913             }
 914           break;
 915         case MULT_EXPR:
 916           if (integer_zerop (oelast->op)
 917               || (FLOAT_TYPE_P (type)
 918                   && !HONOR_NANS (type)
 919                   && !HONOR_SIGNED_ZEROS (type)
 920                   && real_zerop (oelast->op)))
 921             {
 922               if (ops->length () != 1)
 923                 {
 924                   if (dump_file && (dump_flags & TDF_DETAILS))
 925                     fprintf (dump_file, "Found * 0, removing all other ops\n");
 926
 927                   reassociate_stats.ops_eliminated += ops->length () - 1;
 928                   ops->truncate (1);
 929                   ops->quick_push (oelast);
 930                   return;
 931                 }
 932             }
 933           else if (integer_onep (oelast->op)
 934                    || (FLOAT_TYPE_P (type)
 935                        && !HONOR_SNANS (type)
 936                        && real_onep (oelast->op)))
 937             {
 938               if (ops->length () != 1)
 939                 {
 940                   if (dump_file && (dump_flags & TDF_DETAILS))
 941                     fprintf (dump_file, "Found * 1, removing\n");
 942                   ops->pop ();
 943                   reassociate_stats.ops_eliminated++;
 944                   return;
 945                 }
 946             }
 947           break;
 948         case BIT_XOR_EXPR:
 949         case PLUS_EXPR:
 950         case MINUS_EXPR:
 951           if (integer_zerop (oelast->op)
 952               || (FLOAT_TYPE_P (type)
 953                   && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
 954                   && fold_real_zero_addition_p (type, oelast->op,
 955                                                 opcode == MINUS_EXPR)))
 956             {
 957               if (ops->length () != 1)
 958                 {
 959                   if (dump_file && (dump_flags & TDF_DETAILS))
 960                     fprintf (dump_file, "Found [|^+] 0, removing\n");
 961                   ops->pop ();
 962                   reassociate_stats.ops_eliminated++;
 963                   return;
 964                 }
 965             }
 966           break;
 967         default:
 968           break;
 969         }
 970     }
 971 }
 972
 973
 974 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
 975                                  bool, bool);
 976
 977 /* Structure for tracking and counting operands.  */
 978 struct oecount {
 979   int cnt;
 980   int id;
 981   enum tree_code oecode;
 982   tree op;
 983 };
 984
 985
 986 /* The heap for the oecount hashtable and the sorted list of operands.  */
 987 static vec<oecount> cvec;
 988
 989
 990 /* Oecount hashtable helpers.  */
 991
 992 struct oecount_hasher : int_hash <int, 0, 1>
 993 {
 994   static inline hashval_t hash (int);
 995   static inline bool equal (int, int);
 996 };
 997
 998 /* Hash function for oecount.  */
 999
1000 inline hashval_t
1001 oecount_hasher::hash (int p)
1002 {
1003   const oecount *c = &cvec[p - 42];
1004   return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1005 }
1006
1007 /* Comparison function for oecount.  */
1008
1009 inline bool
1010 oecount_hasher::equal (int p1, int p2)
1011 {
1012   const oecount *c1 = &cvec[p1 - 42];
1013   const oecount *c2 = &cvec[p2 - 42];
1014   return (c1->oecode == c2->oecode
1015           && c1->op == c2->op);
1016 }
1017
1018 /* Comparison function for qsort sorting oecount elements by count.  */
1019
1020 static int
1021 oecount_cmp (const void *p1, const void *p2)
1022 {
1023   const oecount *c1 = (const oecount *)p1;
1024   const oecount *c2 = (const oecount *)p2;
1025   if (c1->cnt != c2->cnt)
1026     return c1->cnt - c2->cnt;
1027   else
1028     /* If counts are identical, use unique IDs to stabilize qsort.  */
1029     return c1->id - c2->id;
1030 }
1031
1032 /* Return TRUE iff STMT represents a builtin call that raises OP
1033    to some exponent.  */
1034
1035 static bool
1036 stmt_is_power_of_op (gimple *stmt, tree op)
1037 {
1038   tree fndecl;
1039
1040   if (!is_gimple_call (stmt))
1041     return false;
1042
1043   fndecl = gimple_call_fndecl (stmt);
1044
1045   if (!fndecl
1046       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
1047     return false;
1048
1049   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1050     {
1051     CASE_FLT_FN (BUILT_IN_POW):
1052     CASE_FLT_FN (BUILT_IN_POWI):
1053       return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1054
1055     default:
1056       return false;
1057     }
1058 }
1059
1060 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1061    in place and return the result.  Assumes that stmt_is_power_of_op
1062    was previously called for STMT and returned TRUE.  */
1063
1064 static HOST_WIDE_INT
1065 decrement_power (gimple *stmt)
1066 {
1067   REAL_VALUE_TYPE c, cint;
1068   HOST_WIDE_INT power;
1069   tree arg1;
1070
1071   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1072     {
1073     CASE_FLT_FN (BUILT_IN_POW):
1074       arg1 = gimple_call_arg (stmt, 1);
1075       c = TREE_REAL_CST (arg1);
1076       power = real_to_integer (&c) - 1;
1077       real_from_integer (&cint, VOIDmode, power, SIGNED);
1078       gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1079       return power;
1080
1081     CASE_FLT_FN (BUILT_IN_POWI):
1082       arg1 = gimple_call_arg (stmt, 1);
1083       power = TREE_INT_CST_LOW (arg1) - 1;
1084       gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1085       return power;
1086
1087     default:
1088       gcc_unreachable ();
1089     }
1090 }
1091
1092 /* Find the single immediate use of STMT's LHS, and replace it
1093    with OP.  Remove STMT.  If STMT's LHS is the same as *DEF,
1094    replace *DEF with OP as well.  */
1095
1096 static void
1097 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1098 {
1099   tree lhs;
1100   gimple *use_stmt;
1101   use_operand_p use;
1102   gimple_stmt_iterator gsi;
1103
1104   if (is_gimple_call (stmt))
1105     lhs = gimple_call_lhs (stmt);
1106   else
1107     lhs = gimple_assign_lhs (stmt);
1108
1109   gcc_assert (has_single_use (lhs));
1110   single_imm_use (lhs, &use, &use_stmt);
1111   if (lhs == *def)
1112     *def = op;
1113   SET_USE (use, op);
1114   if (TREE_CODE (op) != SSA_NAME)
1115     update_stmt (use_stmt);
1116   gsi = gsi_for_stmt (stmt);
1117   unlink_stmt_vdef (stmt);
1118   reassoc_remove_stmt (&gsi);
1119   release_defs (stmt);
1120 }
1121
1122 /* Walks the linear chain with result *DEF searching for an operation
1123    with operand OP and code OPCODE removing that from the chain.  *DEF
1124    is updated if there is only one operand but no operation left.  */
1125
1126 static void
1127 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1128 {
1129   gimple *stmt = SSA_NAME_DEF_STMT (*def);
1130
1131   do
1132     {
1133       tree name;
1134
1135       if (opcode == MULT_EXPR
1136           && stmt_is_power_of_op (stmt, op))
1137         {
1138           if (decrement_power (stmt) == 1)
1139             propagate_op_to_single_use (op, stmt, def);
1140           return;
1141         }
1142
1143       name = gimple_assign_rhs1 (stmt);
1144
1145       /* If this is the operation we look for and one of the operands
1146          is ours simply propagate the other operand into the stmts
1147          single use.  */
1148       if (gimple_assign_rhs_code (stmt) == opcode
1149           && (name == op
1150               || gimple_assign_rhs2 (stmt) == op))
1151         {
1152           if (name == op)
1153             name = gimple_assign_rhs2 (stmt);
1154           propagate_op_to_single_use (name, stmt, def);
1155           return;
1156         }
1157
1158       /* We might have a multiply of two __builtin_pow* calls, and
1159          the operand might be hiding in the rightmost one.  */
1160       if (opcode == MULT_EXPR
1161           && gimple_assign_rhs_code (stmt) == opcode
1162           && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1163           && has_single_use (gimple_assign_rhs2 (stmt)))
1164         {
1165           gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1166           if (stmt_is_power_of_op (stmt2, op))
1167             {
1168               if (decrement_power (stmt2) == 1)
1169                 propagate_op_to_single_use (op, stmt2, def);
1170               return;
1171             }
1172         }
1173
1174       /* Continue walking the chain.  */
1175       gcc_assert (name != op
1176                   && TREE_CODE (name) == SSA_NAME);
1177       stmt = SSA_NAME_DEF_STMT (name);
1178     }
1179   while (1);
1180 }
1181
1182 /* Returns true if statement S1 dominates statement S2.  Like
1183    stmt_dominates_stmt_p, but uses stmt UIDs to optimize.  */
1184
1185 static bool
1186 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1187 {
1188   basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1189
1190   /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1191      SSA_NAME.  Assume it lives at the beginning of function and
1192      thus dominates everything.  */
1193   if (!bb1 || s1 == s2)
1194     return true;
1195
1196   /* If bb2 is NULL, it doesn't dominate any stmt with a bb.  */
1197   if (!bb2)
1198     return false;
1199
1200   if (bb1 == bb2)
1201     {
1202       /* PHIs in the same basic block are assumed to be
1203          executed all in parallel, if only one stmt is a PHI,
1204          it dominates the other stmt in the same basic block.  */
1205       if (gimple_code (s1) == GIMPLE_PHI)
1206         return true;
1207
1208       if (gimple_code (s2) == GIMPLE_PHI)
1209         return false;
1210
1211       gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1212
1213       if (gimple_uid (s1) < gimple_uid (s2))
1214         return true;
1215
1216       if (gimple_uid (s1) > gimple_uid (s2))
1217         return false;
1218
1219       gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1220       unsigned int uid = gimple_uid (s1);
1221       for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1222         {
1223           gimple *s = gsi_stmt (gsi);
1224           if (gimple_uid (s) != uid)
1225             break;
1226           if (s == s2)
1227             return true;
1228         }
1229
1230       return false;
1231     }
1232
1233   return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1234 }
1235
1236 /* Insert STMT after INSERT_POINT.  */
1237
1238 static void
1239 insert_stmt_after (gimple *stmt, gimple *insert_point)
1240 {
1241   gimple_stmt_iterator gsi;
1242   basic_block bb;
1243
1244   if (gimple_code (insert_point) == GIMPLE_PHI)
1245     bb = gimple_bb (insert_point);
1246   else if (!stmt_ends_bb_p (insert_point))
1247     {
1248       gsi = gsi_for_stmt (insert_point);
1249       gimple_set_uid (stmt, gimple_uid (insert_point));
1250       gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1251       return;
1252     }
1253   else
1254     /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1255        thus if it must end a basic block, it should be a call that can
1256        throw, or some assignment that can throw.  If it throws, the LHS
1257        of it will not be initialized though, so only valid places using
1258        the SSA_NAME should be dominated by the fallthru edge.  */
1259     bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1260   gsi = gsi_after_labels (bb);
1261   if (gsi_end_p (gsi))
1262     {
1263       gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1264       gimple_set_uid (stmt,
1265                       gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1266     }
1267   else
1268     gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1269   gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1270 }
1271
1272 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1273    the result.  Places the statement after the definition of either
1274    OP1 or OP2.  Returns the new statement.  */
1275
1276 static gimple *
1277 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1278 {
1279   gimple *op1def = NULL, *op2def = NULL;
1280   gimple_stmt_iterator gsi;
1281   tree op;
1282   gassign *sum;
1283
1284   /* Create the addition statement.  */
1285   op = make_ssa_name (type);
1286   sum = gimple_build_assign (op, opcode, op1, op2);
1287
1288   /* Find an insertion place and insert.  */
1289   if (TREE_CODE (op1) == SSA_NAME)
1290     op1def = SSA_NAME_DEF_STMT (op1);
1291   if (TREE_CODE (op2) == SSA_NAME)
1292     op2def = SSA_NAME_DEF_STMT (op2);
1293   if ((!op1def || gimple_nop_p (op1def))
1294       && (!op2def || gimple_nop_p (op2def)))
1295     {
1296       gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1297       if (gsi_end_p (gsi))
1298         {
1299           gimple_stmt_iterator gsi2
1300             = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1301           gimple_set_uid (sum,
1302                           gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1303         }
1304       else
1305         gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1306       gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1307     }
1308   else
1309     {
1310       gimple *insert_point;
1311       if ((!op1def || gimple_nop_p (op1def))
1312            || (op2def && !gimple_nop_p (op2def)
1313                && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1314         insert_point = op2def;
1315       else
1316         insert_point = op1def;
1317       insert_stmt_after (sum, insert_point);
1318     }
1319   update_stmt (sum);
1320
1321   return sum;
1322 }
1323
1324 /* Perform un-distribution of divisions and multiplications.
1325    A * X + B * X is transformed into (A + B) * X and A / X + B / X
1326    to (A + B) / X for real X.
1327
1328    The algorithm is organized as follows.
1329
1330     - First we walk the addition chain *OPS looking for summands that
1331       are defined by a multiplication or a real division.  This results
1332       in the candidates bitmap with relevant indices into *OPS.
1333
1334     - Second we build the chains of multiplications or divisions for
1335       these candidates, counting the number of occurrences of (operand, code)
1336       pairs in all of the candidates chains.
1337
1338     - Third we sort the (operand, code) pairs by number of occurrence and
1339       process them starting with the pair with the most uses.
1340
1341       * For each such pair we walk the candidates again to build a
1342         second candidate bitmap noting all multiplication/division chains
1343         that have at least one occurrence of (operand, code).
1344
1345       * We build an alternate addition chain only covering these
1346         candidates with one (operand, code) operation removed from their
1347         multiplication/division chain.
1348
1349       * The first candidate gets replaced by the alternate addition chain
1350         multiplied/divided by the operand.
1351
1352       * All candidate chains get disabled for further processing and
1353         processing of (operand, code) pairs continues.
1354
1355   The alternate addition chains built are re-processed by the main
1356   reassociation algorithm which allows optimizing a * x * y + b * y * x
1357   to (a + b ) * x * y in one invocation of the reassociation pass.  */
1358
1359 static bool
1360 undistribute_ops_list (enum tree_code opcode,
1361                        vec<operand_entry *> *ops, struct loop *loop)
1362 {
1363   unsigned int length = ops->length ();
1364   operand_entry *oe1;
1365   unsigned i, j;
1366   sbitmap candidates, candidates2;
1367   unsigned nr_candidates, nr_candidates2;
1368   sbitmap_iterator sbi0;
1369   vec<operand_entry *> *subops;
1370   bool changed = false;
1371   int next_oecount_id = 0;
1372
1373   if (length <= 1
1374       || opcode != PLUS_EXPR)
1375     return false;
1376
1377   /* Build a list of candidates to process.  */
1378   candidates = sbitmap_alloc (length);
1379   bitmap_clear (candidates);
1380   nr_candidates = 0;
1381   FOR_EACH_VEC_ELT (*ops, i, oe1)
1382     {
1383       enum tree_code dcode;
1384       gimple *oe1def;
1385
1386       if (TREE_CODE (oe1->op) != SSA_NAME)
1387         continue;
1388       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1389       if (!is_gimple_assign (oe1def))
1390         continue;
1391       dcode = gimple_assign_rhs_code (oe1def);
1392       if ((dcode != MULT_EXPR
1393            && dcode != RDIV_EXPR)
1394           || !is_reassociable_op (oe1def, dcode, loop))
1395         continue;
1396
1397       bitmap_set_bit (candidates, i);
1398       nr_candidates++;
1399     }
1400
1401   if (nr_candidates < 2)
1402     {
1403       sbitmap_free (candidates);
1404       return false;
1405     }
1406
1407   if (dump_file && (dump_flags & TDF_DETAILS))
1408     {
1409       fprintf (dump_file, "searching for un-distribute opportunities ");
1410       print_generic_expr (dump_file,
1411         (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1412       fprintf (dump_file, " %d\n", nr_candidates);
1413     }
1414
1415   /* Build linearized sub-operand lists and the counting table.  */
1416   cvec.create (0);
1417
1418   hash_table<oecount_hasher> ctable (15);
1419
1420   /* ??? Macro arguments cannot have multi-argument template types in
1421      them.  This typedef is needed to workaround that limitation.  */
1422   typedef vec<operand_entry *> vec_operand_entry_t_heap;
1423   subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1424   EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1425     {
1426       gimple *oedef;
1427       enum tree_code oecode;
1428       unsigned j;
1429
1430       oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1431       oecode = gimple_assign_rhs_code (oedef);
1432       linearize_expr_tree (&subops[i], oedef,
1433                            associative_tree_code (oecode), false);
1434
1435       FOR_EACH_VEC_ELT (subops[i], j, oe1)
1436         {
1437           oecount c;
1438           int *slot;
1439           int idx;
1440           c.oecode = oecode;
1441           c.cnt = 1;
1442           c.id = next_oecount_id++;
1443           c.op = oe1->op;
1444           cvec.safe_push (c);
1445           idx = cvec.length () + 41;
1446           slot = ctable.find_slot (idx, INSERT);
1447           if (!*slot)
1448             {
1449               *slot = idx;
1450             }
1451           else
1452             {
1453               cvec.pop ();
1454               cvec[*slot - 42].cnt++;
1455             }
1456         }
1457     }
1458
1459   /* Sort the counting table.  */
1460   cvec.qsort (oecount_cmp);
1461
1462   if (dump_file && (dump_flags & TDF_DETAILS))
1463     {
1464       oecount *c;
1465       fprintf (dump_file, "Candidates:\n");
1466       FOR_EACH_VEC_ELT (cvec, j, c)
1467         {
1468           fprintf (dump_file, "  %u %s: ", c->cnt,
1469                    c->oecode == MULT_EXPR
1470                    ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1471           print_generic_expr (dump_file, c->op, 0);
1472           fprintf (dump_file, "\n");
1473         }
1474     }
1475
1476   /* Process the (operand, code) pairs in order of most occurrence.  */
1477   candidates2 = sbitmap_alloc (length);
1478   while (!cvec.is_empty ())
1479     {
1480       oecount *c = &cvec.last ();
1481       if (c->cnt < 2)
1482         break;
1483
1484       /* Now collect the operands in the outer chain that contain
1485          the common operand in their inner chain.  */
1486       bitmap_clear (candidates2);
1487       nr_candidates2 = 0;
1488       EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1489         {
1490           gimple *oedef;
1491           enum tree_code oecode;
1492           unsigned j;
1493           tree op = (*ops)[i]->op;
1494
1495           /* If we undistributed in this chain already this may be
1496              a constant.  */
1497           if (TREE_CODE (op) != SSA_NAME)
1498             continue;
1499
1500           oedef = SSA_NAME_DEF_STMT (op);
1501           oecode = gimple_assign_rhs_code (oedef);
1502           if (oecode != c->oecode)
1503             continue;
1504
1505           FOR_EACH_VEC_ELT (subops[i], j, oe1)
1506             {
1507               if (oe1->op == c->op)
1508                 {
1509                   bitmap_set_bit (candidates2, i);
1510                   ++nr_candidates2;
1511                   break;
1512                 }
1513             }
1514         }
1515
1516       if (nr_candidates2 >= 2)
1517         {
1518           operand_entry *oe1, *oe2;
1519           gimple *prod;
1520           int first = bitmap_first_set_bit (candidates2);
1521
1522           /* Build the new addition chain.  */
1523           oe1 = (*ops)[first];
1524           if (dump_file && (dump_flags & TDF_DETAILS))
1525             {
1526               fprintf (dump_file, "Building (");
1527               print_generic_expr (dump_file, oe1->op, 0);
1528             }
1529           zero_one_operation (&oe1->op, c->oecode, c->op);
1530           EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1531             {
1532               gimple *sum;
1533               oe2 = (*ops)[i];
1534               if (dump_file && (dump_flags & TDF_DETAILS))
1535                 {
1536                   fprintf (dump_file, " + ");
1537                   print_generic_expr (dump_file, oe2->op, 0);
1538                 }
1539               zero_one_operation (&oe2->op, c->oecode, c->op);
1540               sum = build_and_add_sum (TREE_TYPE (oe1->op),
1541                                        oe1->op, oe2->op, opcode);
1542               oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1543               oe2->rank = 0;
1544               oe1->op = gimple_get_lhs (sum);
1545             }
1546
1547           /* Apply the multiplication/division.  */
1548           prod = build_and_add_sum (TREE_TYPE (oe1->op),
1549                                     oe1->op, c->op, c->oecode);
1550           if (dump_file && (dump_flags & TDF_DETAILS))
1551             {
1552               fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1553               print_generic_expr (dump_file, c->op, 0);
1554               fprintf (dump_file, "\n");
1555             }
1556
1557           /* Record it in the addition chain and disable further
1558              undistribution with this op.  */
1559           oe1->op = gimple_assign_lhs (prod);
1560           oe1->rank = get_rank (oe1->op);
1561           subops[first].release ();
1562
1563           changed = true;
1564         }
1565
1566       cvec.pop ();
1567     }
1568
1569   for (i = 0; i < ops->length (); ++i)
1570     subops[i].release ();
1571   free (subops);
1572   cvec.release ();
1573   sbitmap_free (candidates);
1574   sbitmap_free (candidates2);
1575
1576   return changed;
1577 }
1578
1579 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1580    expression, examine the other OPS to see if any of them are comparisons
1581    of the same values, which we may be able to combine or eliminate.
1582    For example, we can rewrite (a < b) | (a == b) as (a <= b).  */
1583
1584 static bool
1585 eliminate_redundant_comparison (enum tree_code opcode,
1586                                 vec<operand_entry *> *ops,
1587                                 unsigned int currindex,
1588                                 operand_entry *curr)
1589 {
1590   tree op1, op2;
1591   enum tree_code lcode, rcode;
1592   gimple *def1, *def2;
1593   int i;
1594   operand_entry *oe;
1595
1596   if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1597     return false;
1598
1599   /* Check that CURR is a comparison.  */
1600   if (TREE_CODE (curr->op) != SSA_NAME)
1601     return false;
1602   def1 = SSA_NAME_DEF_STMT (curr->op);
1603   if (!is_gimple_assign (def1))
1604     return false;
1605   lcode = gimple_assign_rhs_code (def1);
1606   if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1607     return false;
1608   op1 = gimple_assign_rhs1 (def1);
1609   op2 = gimple_assign_rhs2 (def1);
1610
1611   /* Now look for a similar comparison in the remaining OPS.  */
1612   for (i = currindex + 1; ops->iterate (i, &oe); i++)
1613     {
1614       tree t;
1615
1616       if (TREE_CODE (oe->op) != SSA_NAME)
1617         continue;
1618       def2 = SSA_NAME_DEF_STMT (oe->op);
1619       if (!is_gimple_assign (def2))
1620         continue;
1621       rcode = gimple_assign_rhs_code (def2);
1622       if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1623         continue;
1624
1625       /* If we got here, we have a match.  See if we can combine the
1626          two comparisons.  */
1627       if (opcode == BIT_IOR_EXPR)
1628         t = maybe_fold_or_comparisons (lcode, op1, op2,
1629                                        rcode, gimple_assign_rhs1 (def2),
1630                                        gimple_assign_rhs2 (def2));
1631       else
1632         t = maybe_fold_and_comparisons (lcode, op1, op2,
1633                                         rcode, gimple_assign_rhs1 (def2),
1634                                         gimple_assign_rhs2 (def2));
1635       if (!t)
1636         continue;
1637
1638       /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1639          always give us a boolean_type_node value back.  If the original
1640          BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1641          we need to convert.  */
1642       if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1643         t = fold_convert (TREE_TYPE (curr->op), t);
1644
1645       if (TREE_CODE (t) != INTEGER_CST
1646           && !operand_equal_p (t, curr->op, 0))
1647         {
1648           enum tree_code subcode;
1649           tree newop1, newop2;
1650           if (!COMPARISON_CLASS_P (t))
1651             continue;
1652           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1653           STRIP_USELESS_TYPE_CONVERSION (newop1);
1654           STRIP_USELESS_TYPE_CONVERSION (newop2);
1655           if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1656             continue;
1657         }
1658
1659       if (dump_file && (dump_flags & TDF_DETAILS))
1660         {
1661           fprintf (dump_file, "Equivalence: ");
1662           print_generic_expr (dump_file, curr->op, 0);
1663           fprintf (dump_file, " %s ", op_symbol_code (opcode));
1664           print_generic_expr (dump_file, oe->op, 0);
1665           fprintf (dump_file, " -> ");
1666           print_generic_expr (dump_file, t, 0);
1667           fprintf (dump_file, "\n");
1668         }
1669
1670       /* Now we can delete oe, as it has been subsumed by the new combined
1671          expression t.  */
1672       ops->ordered_remove (i);
1673       reassociate_stats.ops_eliminated ++;
1674
1675       /* If t is the same as curr->op, we're done.  Otherwise we must
1676          replace curr->op with t.  Special case is if we got a constant
1677          back, in which case we add it to the end instead of in place of
1678          the current entry.  */
1679       if (TREE_CODE (t) == INTEGER_CST)
1680         {
1681           ops->ordered_remove (currindex);
1682           add_to_ops_vec (ops, t);
1683         }
1684       else if (!operand_equal_p (t, curr->op, 0))
1685         {
1686           gimple *sum;
1687           enum tree_code subcode;
1688           tree newop1;
1689           tree newop2;
1690           gcc_assert (COMPARISON_CLASS_P (t));
1691           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1692           STRIP_USELESS_TYPE_CONVERSION (newop1);
1693           STRIP_USELESS_TYPE_CONVERSION (newop2);
1694           gcc_checking_assert (is_gimple_val (newop1)
1695                                && is_gimple_val (newop2));
1696           sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1697           curr->op = gimple_get_lhs (sum);
1698         }
1699       return true;
1700     }
1701
1702   return false;
1703 }
1704
1705 /* Perform various identities and other optimizations on the list of
1706    operand entries, stored in OPS.  The tree code for the binary
1707    operation between all the operands is OPCODE.  */
1708
1709 static void
1710 optimize_ops_list (enum tree_code opcode,
1711                    vec<operand_entry *> *ops)
1712 {
1713   unsigned int length = ops->length ();
1714   unsigned int i;
1715   operand_entry *oe;
1716   operand_entry *oelast = NULL;
1717   bool iterate = false;
1718
1719   if (length == 1)
1720     return;
1721
1722   oelast = ops->last ();
1723
1724   /* If the last two are constants, pop the constants off, merge them
1725      and try the next two.  */
1726   if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1727     {
1728       operand_entry *oelm1 = (*ops)[length - 2];
1729
1730       if (oelm1->rank == 0
1731           && is_gimple_min_invariant (oelm1->op)
1732           && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1733                                        TREE_TYPE (oelast->op)))
1734         {
1735           tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1736                                      oelm1->op, oelast->op);
1737
1738           if (folded && is_gimple_min_invariant (folded))
1739             {
1740               if (dump_file && (dump_flags & TDF_DETAILS))
1741                 fprintf (dump_file, "Merging constants\n");
1742
1743               ops->pop ();
1744               ops->pop ();
1745
1746               add_to_ops_vec (ops, folded);
1747               reassociate_stats.constants_eliminated++;
1748
1749               optimize_ops_list (opcode, ops);
1750               return;
1751             }
1752         }
1753     }
1754
1755   eliminate_using_constants (opcode, ops);
1756   oelast = NULL;
1757
1758   for (i = 0; ops->iterate (i, &oe);)
1759     {
1760       bool done = false;
1761
1762       if (eliminate_not_pairs (opcode, ops, i, oe))
1763         return;
1764       if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1765           || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1766           || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1767         {
1768           if (done)
1769             return;
1770           iterate = true;
1771           oelast = NULL;
1772           continue;
1773         }
1774       oelast = oe;
1775       i++;
1776     }
1777
1778   length = ops->length ();
1779   oelast = ops->last ();
1780
1781   if (iterate)
1782     optimize_ops_list (opcode, ops);
1783 }
1784
1785 /* The following functions are subroutines to optimize_range_tests and allow
1786    it to try to change a logical combination of comparisons into a range
1787    test.
1788
1789    For example, both
1790         X == 2 || X == 5 || X == 3 || X == 4
1791    and
1792         X >= 2 && X <= 5
1793    are converted to
1794         (unsigned) (X - 2) <= 3
1795
1796    For more information see comments above fold_test_range in fold-const.c,
1797    this implementation is for GIMPLE.  */
1798
1799 struct range_entry
1800 {
1801   tree exp;
1802   tree low;
1803   tree high;
1804   bool in_p;
1805   bool strict_overflow_p;
1806   unsigned int idx, next;
1807 };
1808
1809 /* This is similar to make_range in fold-const.c, but on top of
1810    GIMPLE instead of trees.  If EXP is non-NULL, it should be
1811    an SSA_NAME and STMT argument is ignored, otherwise STMT
1812    argument should be a GIMPLE_COND.  */
1813
1814 static void
1815 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
1816 {
1817   int in_p;
1818   tree low, high;
1819   bool is_bool, strict_overflow_p;
1820
1821   r->exp = NULL_TREE;
1822   r->in_p = false;
1823   r->strict_overflow_p = false;
1824   r->low = NULL_TREE;
1825   r->high = NULL_TREE;
1826   if (exp != NULL_TREE
1827       && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1828     return;
1829
1830   /* Start with simply saying "EXP != 0" and then look at the code of EXP
1831      and see if we can refine the range.  Some of the cases below may not
1832      happen, but it doesn't seem worth worrying about this.  We "continue"
1833      the outer loop when we've changed something; otherwise we "break"
1834      the switch, which will "break" the while.  */
1835   low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1836   high = low;
1837   in_p = 0;
1838   strict_overflow_p = false;
1839   is_bool = false;
1840   if (exp == NULL_TREE)
1841     is_bool = true;
1842   else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1843     {
1844       if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1845         is_bool = true;
1846       else
1847         return;
1848     }
1849   else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1850     is_bool = true;
1851
1852   while (1)
1853     {
1854       enum tree_code code;
1855       tree arg0, arg1, exp_type;
1856       tree nexp;
1857       location_t loc;
1858
1859       if (exp != NULL_TREE)
1860         {
1861           if (TREE_CODE (exp) != SSA_NAME
1862               || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
1863             break;
1864
1865           stmt = SSA_NAME_DEF_STMT (exp);
1866           if (!is_gimple_assign (stmt))
1867             break;
1868
1869           code = gimple_assign_rhs_code (stmt);
1870           arg0 = gimple_assign_rhs1 (stmt);
1871           arg1 = gimple_assign_rhs2 (stmt);
1872           exp_type = TREE_TYPE (exp);
1873         }
1874       else
1875         {
1876           code = gimple_cond_code (stmt);
1877           arg0 = gimple_cond_lhs (stmt);
1878           arg1 = gimple_cond_rhs (stmt);
1879           exp_type = boolean_type_node;
1880         }
1881
1882       if (TREE_CODE (arg0) != SSA_NAME)
1883         break;
1884       loc = gimple_location (stmt);
1885       switch (code)
1886         {
1887         case BIT_NOT_EXPR:
1888           if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
1889               /* Ensure the range is either +[-,0], +[0,0],
1890                  -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1891                  -[1,1].  If it is e.g. +[-,-] or -[-,-]
1892                  or similar expression of unconditional true or
1893                  false, it should not be negated.  */
1894               && ((high && integer_zerop (high))
1895                   || (low && integer_onep (low))))
1896             {
1897               in_p = !in_p;
1898               exp = arg0;
1899               continue;
1900             }
1901           break;
1902         case SSA_NAME:
1903           exp = arg0;
1904           continue;
1905         CASE_CONVERT:
1906           if (is_bool)
1907             goto do_default;
1908           if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1909             {
1910               if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1911                 is_bool = true;
1912               else
1913                 return;
1914             }
1915           else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1916             is_bool = true;
1917           goto do_default;
1918         case EQ_EXPR:
1919         case NE_EXPR:
1920         case LT_EXPR:
1921         case LE_EXPR:
1922         case GE_EXPR:
1923         case GT_EXPR:
1924           is_bool = true;
1925           /* FALLTHRU */
1926         default:
1927           if (!is_bool)
1928             return;
1929         do_default:
1930           nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1931                                   &low, &high, &in_p,
1932                                   &strict_overflow_p);
1933           if (nexp != NULL_TREE)
1934             {
1935               exp = nexp;
1936               gcc_assert (TREE_CODE (exp) == SSA_NAME);
1937               continue;
1938             }
1939           break;
1940         }
1941       break;
1942     }
1943   if (is_bool)
1944     {
1945       r->exp = exp;
1946       r->in_p = in_p;
1947       r->low = low;
1948       r->high = high;
1949       r->strict_overflow_p = strict_overflow_p;
1950     }
1951 }
1952
1953 /* Comparison function for qsort.  Sort entries
1954    without SSA_NAME exp first, then with SSA_NAMEs sorted
1955    by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1956    by increasing ->low and if ->low is the same, by increasing
1957    ->high.  ->low == NULL_TREE means minimum, ->high == NULL_TREE
1958    maximum.  */
1959
1960 static int
1961 range_entry_cmp (const void *a, const void *b)
1962 {
1963   const struct range_entry *p = (const struct range_entry *) a;
1964   const struct range_entry *q = (const struct range_entry *) b;
1965
1966   if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
1967     {
1968       if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1969         {
1970           /* Group range_entries for the same SSA_NAME together.  */
1971           if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
1972             return -1;
1973           else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
1974             return 1;
1975           /* If ->low is different, NULL low goes first, then by
1976              ascending low.  */
1977           if (p->low != NULL_TREE)
1978             {
1979               if (q->low != NULL_TREE)
1980                 {
1981                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
1982                                           p->low, q->low);
1983                   if (tem && integer_onep (tem))
1984                     return -1;
1985                   tem = fold_binary (GT_EXPR, boolean_type_node,
1986                                      p->low, q->low);
1987                   if (tem && integer_onep (tem))
1988                     return 1;
1989                 }
1990               else
1991                 return 1;
1992             }
1993           else if (q->low != NULL_TREE)
1994             return -1;
1995           /* If ->high is different, NULL high goes last, before that by
1996              ascending high.  */
1997           if (p->high != NULL_TREE)
1998             {
1999               if (q->high != NULL_TREE)
2000                 {
2001                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
2002                                           p->high, q->high);
2003                   if (tem && integer_onep (tem))
2004                     return -1;
2005                   tem = fold_binary (GT_EXPR, boolean_type_node,
2006                                      p->high, q->high);
2007                   if (tem && integer_onep (tem))
2008                     return 1;
2009                 }
2010               else
2011                 return -1;
2012             }
2013           else if (q->high != NULL_TREE)
2014             return 1;
2015           /* If both ranges are the same, sort below by ascending idx.  */
2016         }
2017       else
2018         return 1;
2019     }
2020   else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2021     return -1;
2022
2023   if (p->idx < q->idx)
2024     return -1;
2025   else
2026     {
2027       gcc_checking_assert (p->idx > q->idx);
2028       return 1;
2029     }
2030 }
2031
2032 /* Helper routine of optimize_range_test.
2033    [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2034    RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2035    OPCODE and OPS are arguments of optimize_range_tests.  If OTHERRANGE
2036    is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2037    an array of COUNT pointers to other ranges.  Return
2038    true if the range merge has been successful.
2039    If OPCODE is ERROR_MARK, this is called from within
2040    maybe_optimize_range_tests and is performing inter-bb range optimization.
2041    In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2042    oe->rank.  */
2043
2044 static bool
2045 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2046                    struct range_entry **otherrangep,
2047                    unsigned int count, enum tree_code opcode,
2048                    vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2049                    bool in_p, tree low, tree high, bool strict_overflow_p)
2050 {
2051   operand_entry *oe = (*ops)[range->idx];
2052   tree op = oe->op;
2053   gimple *stmt = op ? SSA_NAME_DEF_STMT (op) :
2054     last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2055   location_t loc = gimple_location (stmt);
2056   tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2057   tree tem = build_range_check (loc, optype, unshare_expr (exp),
2058                                 in_p, low, high);
2059   enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2060   gimple_stmt_iterator gsi;
2061   unsigned int i;
2062
2063   if (tem == NULL_TREE)
2064     return false;
2065
2066   if (strict_overflow_p && issue_strict_overflow_warning (wc))
2067     warning_at (loc, OPT_Wstrict_overflow,
2068                 "assuming signed overflow does not occur "
2069                 "when simplifying range test");
2070
2071   if (dump_file && (dump_flags & TDF_DETAILS))
2072     {
2073       struct range_entry *r;
2074       fprintf (dump_file, "Optimizing range tests ");
2075       print_generic_expr (dump_file, range->exp, 0);
2076       fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2077       print_generic_expr (dump_file, range->low, 0);
2078       fprintf (dump_file, ", ");
2079       print_generic_expr (dump_file, range->high, 0);
2080       fprintf (dump_file, "]");
2081       for (i = 0; i < count; i++)
2082         {
2083           if (otherrange)
2084             r = otherrange + i;
2085           else
2086             r = otherrangep[i];
2087           fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2088           print_generic_expr (dump_file, r->low, 0);
2089           fprintf (dump_file, ", ");
2090           print_generic_expr (dump_file, r->high, 0);
2091           fprintf (dump_file, "]");
2092         }
2093       fprintf (dump_file, "\n into ");
2094       print_generic_expr (dump_file, tem, 0);
2095       fprintf (dump_file, "\n");
2096     }
2097
2098   if (opcode == BIT_IOR_EXPR
2099       || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2100     tem = invert_truthvalue_loc (loc, tem);
2101
2102   tem = fold_convert_loc (loc, optype, tem);
2103   gsi = gsi_for_stmt (stmt);
2104   unsigned int uid = gimple_uid (stmt);
2105   /* In rare cases range->exp can be equal to lhs of stmt.
2106      In that case we have to insert after the stmt rather then before
2107      it.  If stmt is a PHI, insert it at the start of the basic block.  */
2108   if (op != range->exp)
2109     {
2110       gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2111       tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2112                                       GSI_SAME_STMT);
2113       gsi_prev (&gsi);
2114     }
2115   else if (gimple_code (stmt) != GIMPLE_PHI)
2116     {
2117       gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2118       tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false,
2119                                       GSI_CONTINUE_LINKING);
2120     }
2121   else
2122     {
2123       gsi = gsi_after_labels (gimple_bb (stmt));
2124       if (!gsi_end_p (gsi))
2125         uid = gimple_uid (gsi_stmt (gsi));
2126       else
2127         {
2128           gsi = gsi_start_bb (gimple_bb (stmt));
2129           uid = 1;
2130           while (!gsi_end_p (gsi))
2131             {
2132               uid = gimple_uid (gsi_stmt (gsi));
2133               gsi_next (&gsi);
2134             }
2135         }
2136       gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2137       tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2138                                       GSI_SAME_STMT);
2139       if (gsi_end_p (gsi))
2140         gsi = gsi_last_bb (gimple_bb (stmt));
2141       else
2142         gsi_prev (&gsi);
2143     }
2144   for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2145     if (gimple_uid (gsi_stmt (gsi)))
2146       break;
2147     else
2148       gimple_set_uid (gsi_stmt (gsi), uid);
2149
2150   oe->op = tem;
2151   range->exp = exp;
2152   range->low = low;
2153   range->high = high;
2154   range->in_p = in_p;
2155   range->strict_overflow_p = false;
2156
2157   for (i = 0; i < count; i++)
2158     {
2159       if (otherrange)
2160         range = otherrange + i;
2161       else
2162         range = otherrangep[i];
2163       oe = (*ops)[range->idx];
2164       /* Now change all the other range test immediate uses, so that
2165          those tests will be optimized away.  */
2166       if (opcode == ERROR_MARK)
2167         {
2168           if (oe->op)
2169             oe->op = build_int_cst (TREE_TYPE (oe->op),
2170                                     oe->rank == BIT_IOR_EXPR ? 0 : 1);
2171           else
2172             oe->op = (oe->rank == BIT_IOR_EXPR
2173                       ? boolean_false_node : boolean_true_node);
2174         }
2175       else
2176         oe->op = error_mark_node;
2177       range->exp = NULL_TREE;
2178     }
2179   return true;
2180 }
2181
2182 /* Optimize X == CST1 || X == CST2
2183    if popcount (CST1 ^ CST2) == 1 into
2184    (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2185    Similarly for ranges.  E.g.
2186    X != 2 && X != 3 && X != 10 && X != 11
2187    will be transformed by the previous optimization into
2188    !((X - 2U) <= 1U || (X - 10U) <= 1U)
2189    and this loop can transform that into
2190    !(((X & ~8) - 2U) <= 1U).  */
2191
2192 static bool
2193 optimize_range_tests_xor (enum tree_code opcode, tree type,
2194                           tree lowi, tree lowj, tree highi, tree highj,
2195                           vec<operand_entry *> *ops,
2196                           struct range_entry *rangei,
2197                           struct range_entry *rangej)
2198 {
2199   tree lowxor, highxor, tem, exp;
2200   /* Check lowi ^ lowj == highi ^ highj and
2201      popcount (lowi ^ lowj) == 1.  */
2202   lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2203   if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2204     return false;
2205   if (!integer_pow2p (lowxor))
2206     return false;
2207   highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2208   if (!tree_int_cst_equal (lowxor, highxor))
2209     return false;
2210
2211   tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2212   exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2213   lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2214   highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2215   if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2216                          NULL, rangei->in_p, lowj, highj,
2217                          rangei->strict_overflow_p
2218                          || rangej->strict_overflow_p))
2219     return true;
2220   return false;
2221 }
2222
2223 /* Optimize X == CST1 || X == CST2
2224    if popcount (CST2 - CST1) == 1 into
2225    ((X - CST1) & ~(CST2 - CST1)) == 0.
2226    Similarly for ranges.  E.g.
2227    X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2228    || X == 75 || X == 45
2229    will be transformed by the previous optimization into
2230    (X - 43U) <= 3U || (X - 75U) <= 3U
2231    and this loop can transform that into
2232    ((X - 43U) & ~(75U - 43U)) <= 3U.  */
2233 static bool
2234 optimize_range_tests_diff (enum tree_code opcode, tree type,
2235                             tree lowi, tree lowj, tree highi, tree highj,
2236                             vec<operand_entry *> *ops,
2237                             struct range_entry *rangei,
2238                             struct range_entry *rangej)
2239 {
2240   tree tem1, tem2, mask;
2241   /* Check highi - lowi == highj - lowj.  */
2242   tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2243   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2244     return false;
2245   tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2246   if (!tree_int_cst_equal (tem1, tem2))
2247     return false;
2248   /* Check popcount (lowj - lowi) == 1.  */
2249   tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2250   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2251     return false;
2252   if (!integer_pow2p (tem1))
2253     return false;
2254
2255   type = unsigned_type_for (type);
2256   tem1 = fold_convert (type, tem1);
2257   tem2 = fold_convert (type, tem2);
2258   lowi = fold_convert (type, lowi);
2259   mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2260   tem1 = fold_binary (MINUS_EXPR, type,
2261                       fold_convert (type, rangei->exp), lowi);
2262   tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2263   lowj = build_int_cst (type, 0);
2264   if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2265                          NULL, rangei->in_p, lowj, tem2,
2266                          rangei->strict_overflow_p
2267                          || rangej->strict_overflow_p))
2268     return true;
2269   return false;
2270 }
2271
2272 /* It does some common checks for function optimize_range_tests_xor and
2273    optimize_range_tests_diff.
2274    If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2275    Else it calls optimize_range_tests_diff.  */
2276
2277 static bool
2278 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2279                         bool optimize_xor, vec<operand_entry *> *ops,
2280                         struct range_entry *ranges)
2281 {
2282   int i, j;
2283   bool any_changes = false;
2284   for (i = first; i < length; i++)
2285     {
2286       tree lowi, highi, lowj, highj, type, tem;
2287
2288       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2289         continue;
2290       type = TREE_TYPE (ranges[i].exp);
2291       if (!INTEGRAL_TYPE_P (type))
2292         continue;
2293       lowi = ranges[i].low;
2294       if (lowi == NULL_TREE)
2295         lowi = TYPE_MIN_VALUE (type);
2296       highi = ranges[i].high;
2297       if (highi == NULL_TREE)
2298         continue;
2299       for (j = i + 1; j < length && j < i + 64; j++)
2300         {
2301           bool changes;
2302           if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2303             continue;
2304           lowj = ranges[j].low;
2305           if (lowj == NULL_TREE)
2306             continue;
2307           highj = ranges[j].high;
2308           if (highj == NULL_TREE)
2309             highj = TYPE_MAX_VALUE (type);
2310           /* Check lowj > highi.  */
2311           tem = fold_binary (GT_EXPR, boolean_type_node,
2312                              lowj, highi);
2313           if (tem == NULL_TREE || !integer_onep (tem))
2314             continue;
2315           if (optimize_xor)
2316             changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2317                                                 highi, highj, ops,
2318                                                 ranges + i, ranges + j);
2319           else
2320             changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2321                                                  highi, highj, ops,
2322                                                  ranges + i, ranges + j);
2323           if (changes)
2324             {
2325               any_changes = true;
2326               break;
2327             }
2328         }
2329     }
2330   return any_changes;
2331 }
2332
2333 /* Helper function of optimize_range_tests_to_bit_test.  Handle a single
2334    range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2335    EXP on success, NULL otherwise.  */
2336
2337 static tree
2338 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
2339                        wide_int *mask, tree *totallowp)
2340 {
2341   tree tem = int_const_binop (MINUS_EXPR, high, low);
2342   if (tem == NULL_TREE
2343       || TREE_CODE (tem) != INTEGER_CST
2344       || TREE_OVERFLOW (tem)
2345       || tree_int_cst_sgn (tem) == -1
2346       || compare_tree_int (tem, prec) != -1)
2347     return NULL_TREE;
2348
2349   unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
2350   *mask = wi::shifted_mask (0, max, false, prec);
2351   if (TREE_CODE (exp) == BIT_AND_EXPR
2352       && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2353     {
2354       widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
2355       msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
2356       if (wi::popcount (msk) == 1
2357           && wi::ltu_p (msk, prec - max))
2358         {
2359           *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
2360           max += msk.to_uhwi ();
2361           exp = TREE_OPERAND (exp, 0);
2362           if (integer_zerop (low)
2363               && TREE_CODE (exp) == PLUS_EXPR
2364               && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2365             {
2366               tree ret = TREE_OPERAND (exp, 0);
2367               STRIP_NOPS (ret);
2368               widest_int bias
2369                 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
2370                                      TYPE_PRECISION (TREE_TYPE (low))));
2371               tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
2372               if (totallowp)
2373                 {
2374                   *totallowp = tbias;
2375                   return ret;
2376                 }
2377               else if (!tree_int_cst_lt (totallow, tbias))
2378                 return NULL_TREE;
2379               bias = wi::to_widest (tbias);
2380               bias -= wi::to_widest (totallow);
2381               if (wi::ges_p (bias, 0) && wi::lts_p (bias, prec - max))
2382                 {
2383                   *mask = wi::lshift (*mask, bias);
2384                   return ret;
2385                 }
2386             }
2387         }
2388     }
2389   if (totallowp)
2390     return exp;
2391   if (!tree_int_cst_lt (totallow, low))
2392     return exp;
2393   tem = int_const_binop (MINUS_EXPR, low, totallow);
2394   if (tem == NULL_TREE
2395       || TREE_CODE (tem) != INTEGER_CST
2396       || TREE_OVERFLOW (tem)
2397       || compare_tree_int (tem, prec - max) == 1)
2398     return NULL_TREE;
2399
2400   *mask = wi::lshift (*mask, wi::to_widest (tem));
2401   return exp;
2402 }
2403
2404 /* Attempt to optimize small range tests using bit test.
2405    E.g.
2406    X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2407    && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2408    has been by earlier optimizations optimized into:
2409    ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2410    As all the 43 through 82 range is less than 64 numbers,
2411    for 64-bit word targets optimize that into:
2412    (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0  */
2413
2414 static bool
2415 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
2416                                   vec<operand_entry *> *ops,
2417                                   struct range_entry *ranges)
2418 {
2419   int i, j;
2420   bool any_changes = false;
2421   int prec = GET_MODE_BITSIZE (word_mode);
2422   auto_vec<struct range_entry *, 64> candidates;
2423
2424   for (i = first; i < length - 2; i++)
2425     {
2426       tree lowi, highi, lowj, highj, type;
2427
2428       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2429         continue;
2430       type = TREE_TYPE (ranges[i].exp);
2431       if (!INTEGRAL_TYPE_P (type))
2432         continue;
2433       lowi = ranges[i].low;
2434       if (lowi == NULL_TREE)
2435         lowi = TYPE_MIN_VALUE (type);
2436       highi = ranges[i].high;
2437       if (highi == NULL_TREE)
2438         continue;
2439       wide_int mask;
2440       tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
2441                                         highi, &mask, &lowi);
2442       if (exp == NULL_TREE)
2443         continue;
2444       bool strict_overflow_p = ranges[i].strict_overflow_p;
2445       candidates.truncate (0);
2446       int end = MIN (i + 64, length);
2447       for (j = i + 1; j < end; j++)
2448         {
2449           tree exp2;
2450           if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
2451             continue;
2452           if (ranges[j].exp == exp)
2453             ;
2454           else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
2455             {
2456               exp2 = TREE_OPERAND (ranges[j].exp, 0);
2457               if (exp2 == exp)
2458                 ;
2459               else if (TREE_CODE (exp2) == PLUS_EXPR)
2460                 {
2461                   exp2 = TREE_OPERAND (exp2, 0);
2462                   STRIP_NOPS (exp2);
2463                   if (exp2 != exp)
2464                     continue;
2465                 }
2466               else
2467                 continue;
2468             }
2469           else
2470             continue;
2471           lowj = ranges[j].low;
2472           if (lowj == NULL_TREE)
2473             continue;
2474           highj = ranges[j].high;
2475           if (highj == NULL_TREE)
2476             highj = TYPE_MAX_VALUE (type);
2477           wide_int mask2;
2478           exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
2479                                         highj, &mask2, NULL);
2480           if (exp2 != exp)
2481             continue;
2482           mask |= mask2;
2483           strict_overflow_p |= ranges[j].strict_overflow_p;
2484           candidates.safe_push (&ranges[j]);
2485         }
2486
2487       /* If we need otherwise 3 or more comparisons, use a bit test.  */
2488       if (candidates.length () >= 2)
2489         {
2490           tree high = wide_int_to_tree (TREE_TYPE (lowi),
2491                                         wi::to_widest (lowi)
2492                                         + prec - 1 - wi::clz (mask));
2493           operand_entry *oe = (*ops)[ranges[i].idx];
2494           tree op = oe->op;
2495           gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2496                            : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2497           location_t loc = gimple_location (stmt);
2498           tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2499
2500           /* See if it isn't cheaper to pretend the minimum value of the
2501              range is 0, if maximum value is small enough.
2502              We can avoid then subtraction of the minimum value, but the
2503              mask constant could be perhaps more expensive.  */
2504           if (compare_tree_int (lowi, 0) > 0
2505               && compare_tree_int (high, prec) < 0)
2506             {
2507               int cost_diff;
2508               HOST_WIDE_INT m = tree_to_uhwi (lowi);
2509               rtx reg = gen_raw_REG (word_mode, 10000);
2510               bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
2511               cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg,
2512                                                       GEN_INT (-m)), speed_p);
2513               rtx r = immed_wide_int_const (mask, word_mode);
2514               cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
2515                                          word_mode, speed_p);
2516               r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
2517               cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
2518                                          word_mode, speed_p);
2519               if (cost_diff > 0)
2520                 {
2521                   mask = wi::lshift (mask, m);
2522                   lowi = build_zero_cst (TREE_TYPE (lowi));
2523                 }
2524             }
2525
2526           tree tem = build_range_check (loc, optype, unshare_expr (exp),
2527                                         false, lowi, high);
2528           if (tem == NULL_TREE || is_gimple_val (tem))
2529             continue;
2530           tree etype = unsigned_type_for (TREE_TYPE (exp));
2531           exp = fold_build2_loc (loc, MINUS_EXPR, etype,
2532                                  fold_convert_loc (loc, etype, exp),
2533                                  fold_convert_loc (loc, etype, lowi));
2534           exp = fold_convert_loc (loc, integer_type_node, exp);
2535           tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
2536           exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
2537                                  build_int_cst (word_type, 1), exp);
2538           exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
2539                                  wide_int_to_tree (word_type, mask));
2540           exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
2541                                  build_zero_cst (word_type));
2542           if (is_gimple_val (exp))
2543             continue;
2544
2545           /* The shift might have undefined behavior if TEM is true,
2546              but reassociate_bb isn't prepared to have basic blocks
2547              split when it is running.  So, temporarily emit a code
2548              with BIT_IOR_EXPR instead of &&, and fix it up in
2549              branch_fixup.  */
2550           gimple_seq seq;
2551           tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
2552           gcc_assert (TREE_CODE (tem) == SSA_NAME);
2553           gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
2554           gimple_seq seq2;
2555           exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
2556           gimple_seq_add_seq_without_update (&seq, seq2);
2557           gcc_assert (TREE_CODE (exp) == SSA_NAME);
2558           gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
2559           gimple *g = gimple_build_assign (make_ssa_name (optype),
2560                                           BIT_IOR_EXPR, tem, exp);
2561           gimple_set_location (g, loc);
2562           gimple_seq_add_stmt_without_update (&seq, g);
2563           exp = gimple_assign_lhs (g);
2564           tree val = build_zero_cst (optype);
2565           if (update_range_test (&ranges[i], NULL, candidates.address (),
2566                                  candidates.length (), opcode, ops, exp,
2567                                  seq, false, val, val, strict_overflow_p))
2568             {
2569               any_changes = true;
2570               reassoc_branch_fixups.safe_push (tem);
2571             }
2572           else
2573             gimple_seq_discard (seq);
2574         }
2575     }
2576   return any_changes;
2577 }
2578
2579 /* Optimize range tests, similarly how fold_range_test optimizes
2580    it on trees.  The tree code for the binary
2581    operation between all the operands is OPCODE.
2582    If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2583    maybe_optimize_range_tests for inter-bb range optimization.
2584    In that case if oe->op is NULL, oe->id is bb->index whose
2585    GIMPLE_COND is && or ||ed into the test, and oe->rank says
2586    the actual opcode.  */
2587
2588 static bool
2589 optimize_range_tests (enum tree_code opcode,
2590                       vec<operand_entry *> *ops)
2591 {
2592   unsigned int length = ops->length (), i, j, first;
2593   operand_entry *oe;
2594   struct range_entry *ranges;
2595   bool any_changes = false;
2596
2597   if (length == 1)
2598     return false;
2599
2600   ranges = XNEWVEC (struct range_entry, length);
2601   for (i = 0; i < length; i++)
2602     {
2603       oe = (*ops)[i];
2604       ranges[i].idx = i;
2605       init_range_entry (ranges + i, oe->op,
2606                         oe->op ? NULL :
2607                           last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
2608       /* For | invert it now, we will invert it again before emitting
2609          the optimized expression.  */
2610       if (opcode == BIT_IOR_EXPR
2611           || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2612         ranges[i].in_p = !ranges[i].in_p;
2613     }
2614
2615   qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2616   for (i = 0; i < length; i++)
2617     if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2618       break;
2619
2620   /* Try to merge ranges.  */
2621   for (first = i; i < length; i++)
2622     {
2623       tree low = ranges[i].low;
2624       tree high = ranges[i].high;
2625       int in_p = ranges[i].in_p;
2626       bool strict_overflow_p = ranges[i].strict_overflow_p;
2627       int update_fail_count = 0;
2628
2629       for (j = i + 1; j < length; j++)
2630         {
2631           if (ranges[i].exp != ranges[j].exp)
2632             break;
2633           if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2634                              ranges[j].in_p, ranges[j].low, ranges[j].high))
2635             break;
2636           strict_overflow_p |= ranges[j].strict_overflow_p;
2637         }
2638
2639       if (j == i + 1)
2640         continue;
2641
2642       if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
2643                              opcode, ops, ranges[i].exp, NULL, in_p,
2644                              low, high, strict_overflow_p))
2645         {
2646           i = j - 1;
2647           any_changes = true;
2648         }
2649       /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2650          while update_range_test would fail.  */
2651       else if (update_fail_count == 64)
2652         i = j - 1;
2653       else
2654         ++update_fail_count;
2655     }
2656
2657   any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2658                                          ops, ranges);
2659
2660   if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2661     any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2662                                            ops, ranges);
2663   if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
2664     any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
2665                                                      ops, ranges);
2666
2667   if (any_changes && opcode != ERROR_MARK)
2668     {
2669       j = 0;
2670       FOR_EACH_VEC_ELT (*ops, i, oe)
2671         {
2672           if (oe->op == error_mark_node)
2673             continue;
2674           else if (i != j)
2675             (*ops)[j] = oe;
2676           j++;
2677         }
2678       ops->truncate (j);
2679     }
2680
2681   XDELETEVEC (ranges);
2682   return any_changes;
2683 }
2684
2685 /* Return true if STMT is a cast like:
2686    <bb N>:
2687    ...
2688    _123 = (int) _234;
2689
2690    <bb M>:
2691    # _345 = PHI <_123(N), 1(...), 1(...)>
2692    where _234 has bool type, _123 has single use and
2693    bb N has a single successor M.  This is commonly used in
2694    the last block of a range test.  */
2695
2696 static bool
2697 final_range_test_p (gimple *stmt)
2698 {
2699   basic_block bb, rhs_bb;
2700   edge e;
2701   tree lhs, rhs;
2702   use_operand_p use_p;
2703   gimple *use_stmt;
2704
2705   if (!gimple_assign_cast_p (stmt))
2706     return false;
2707   bb = gimple_bb (stmt);
2708   if (!single_succ_p (bb))
2709     return false;
2710   e = single_succ_edge (bb);
2711   if (e->flags & EDGE_COMPLEX)
2712     return false;
2713
2714   lhs = gimple_assign_lhs (stmt);
2715   rhs = gimple_assign_rhs1 (stmt);
2716   if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2717       || TREE_CODE (rhs) != SSA_NAME
2718       || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2719     return false;
2720
2721   /* Test whether lhs is consumed only by a PHI in the only successor bb.  */
2722   if (!single_imm_use (lhs, &use_p, &use_stmt))
2723     return false;
2724
2725   if (gimple_code (use_stmt) != GIMPLE_PHI
2726       || gimple_bb (use_stmt) != e->dest)
2727     return false;
2728
2729   /* And that the rhs is defined in the same loop.  */
2730   rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2731   if (rhs_bb == NULL
2732       || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2733     return false;
2734
2735   return true;
2736 }
2737
2738 /* Return true if BB is suitable basic block for inter-bb range test
2739    optimization.  If BACKWARD is true, BB should be the only predecessor
2740    of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2741    or compared with to find a common basic block to which all conditions
2742    branch to if true resp. false.  If BACKWARD is false, TEST_BB should
2743    be the only predecessor of BB.  */
2744
2745 static bool
2746 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2747                   bool backward)
2748 {
2749   edge_iterator ei, ei2;
2750   edge e, e2;
2751   gimple *stmt;
2752   gphi_iterator gsi;
2753   bool other_edge_seen = false;
2754   bool is_cond;
2755
2756   if (test_bb == bb)
2757     return false;
2758   /* Check last stmt first.  */
2759   stmt = last_stmt (bb);
2760   if (stmt == NULL
2761       || (gimple_code (stmt) != GIMPLE_COND
2762           && (backward || !final_range_test_p (stmt)))
2763       || gimple_visited_p (stmt)
2764       || stmt_could_throw_p (stmt)
2765       || *other_bb == bb)
2766     return false;
2767   is_cond = gimple_code (stmt) == GIMPLE_COND;
2768   if (is_cond)
2769     {
2770       /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2771          goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2772          to *OTHER_BB (if not set yet, try to find it out).  */
2773       if (EDGE_COUNT (bb->succs) != 2)
2774         return false;
2775       FOR_EACH_EDGE (e, ei, bb->succs)
2776         {
2777           if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2778             return false;
2779           if (e->dest == test_bb)
2780             {
2781               if (backward)
2782                 continue;
2783               else
2784                 return false;
2785             }
2786           if (e->dest == bb)
2787             return false;
2788           if (*other_bb == NULL)
2789             {
2790               FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2791                 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2792                   return false;
2793                 else if (e->dest == e2->dest)
2794                   *other_bb = e->dest;
2795               if (*other_bb == NULL)
2796                 return false;
2797             }
2798           if (e->dest == *other_bb)
2799             other_edge_seen = true;
2800           else if (backward)
2801             return false;
2802         }
2803       if (*other_bb == NULL || !other_edge_seen)
2804         return false;
2805     }
2806   else if (single_succ (bb) != *other_bb)
2807     return false;
2808
2809   /* Now check all PHIs of *OTHER_BB.  */
2810   e = find_edge (bb, *other_bb);
2811   e2 = find_edge (test_bb, *other_bb);
2812   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2813     {
2814       gphi *phi = gsi.phi ();
2815       /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2816          corresponding to BB and TEST_BB predecessor must be the same.  */
2817       if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2818                             gimple_phi_arg_def (phi, e2->dest_idx), 0))
2819         {
2820           /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2821              one of the PHIs should have the lhs of the last stmt in
2822              that block as PHI arg and that PHI should have 0 or 1
2823              corresponding to it in all other range test basic blocks
2824              considered.  */
2825           if (!is_cond)
2826             {
2827               if (gimple_phi_arg_def (phi, e->dest_idx)
2828                   == gimple_assign_lhs (stmt)
2829                   && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2830                       || integer_onep (gimple_phi_arg_def (phi,
2831                                                            e2->dest_idx))))
2832                 continue;
2833             }
2834           else
2835             {
2836               gimple *test_last = last_stmt (test_bb);
2837               if (gimple_code (test_last) != GIMPLE_COND
2838                   && gimple_phi_arg_def (phi, e2->dest_idx)
2839                      == gimple_assign_lhs (test_last)
2840                   && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2841                       || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2842                 continue;
2843             }
2844
2845           return false;
2846         }
2847     }
2848   return true;
2849 }
2850
2851 /* Return true if BB doesn't have side-effects that would disallow
2852    range test optimization, all SSA_NAMEs set in the bb are consumed
2853    in the bb and there are no PHIs.  */
2854
2855 static bool
2856 no_side_effect_bb (basic_block bb)
2857 {
2858   gimple_stmt_iterator gsi;
2859   gimple *last;
2860
2861   if (!gimple_seq_empty_p (phi_nodes (bb)))
2862     return false;
2863   last = last_stmt (bb);
2864   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2865     {
2866       gimple *stmt = gsi_stmt (gsi);
2867       tree lhs;
2868       imm_use_iterator imm_iter;
2869       use_operand_p use_p;
2870
2871       if (is_gimple_debug (stmt))
2872         continue;
2873       if (gimple_has_side_effects (stmt))
2874         return false;
2875       if (stmt == last)
2876         return true;
2877       if (!is_gimple_assign (stmt))
2878         return false;
2879       lhs = gimple_assign_lhs (stmt);
2880       if (TREE_CODE (lhs) != SSA_NAME)
2881         return false;
2882       if (gimple_assign_rhs_could_trap_p (stmt))
2883         return false;
2884       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2885         {
2886           gimple *use_stmt = USE_STMT (use_p);
2887           if (is_gimple_debug (use_stmt))
2888             continue;
2889           if (gimple_bb (use_stmt) != bb)
2890             return false;
2891         }
2892     }
2893   return false;
2894 }
2895
2896 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2897    return true and fill in *OPS recursively.  */
2898
2899 static bool
2900 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
2901          struct loop *loop)
2902 {
2903   gimple *stmt = SSA_NAME_DEF_STMT (var);
2904   tree rhs[2];
2905   int i;
2906
2907   if (!is_reassociable_op (stmt, code, loop))
2908     return false;
2909
2910   rhs[0] = gimple_assign_rhs1 (stmt);
2911   rhs[1] = gimple_assign_rhs2 (stmt);
2912   gimple_set_visited (stmt, true);
2913   for (i = 0; i < 2; i++)
2914     if (TREE_CODE (rhs[i]) == SSA_NAME
2915         && !get_ops (rhs[i], code, ops, loop)
2916         && has_single_use (rhs[i]))
2917       {
2918         operand_entry *oe = operand_entry_pool.allocate ();
2919
2920         oe->op = rhs[i];
2921         oe->rank = code;
2922         oe->id = 0;
2923         oe->count = 1;
2924         ops->safe_push (oe);
2925       }
2926   return true;
2927 }
2928
2929 /* Find the ops that were added by get_ops starting from VAR, see if
2930    they were changed during update_range_test and if yes, create new
2931    stmts.  */
2932
2933 static tree
2934 update_ops (tree var, enum tree_code code, vec<operand_entry *> ops,
2935             unsigned int *pidx, struct loop *loop)
2936 {
2937   gimple *stmt = SSA_NAME_DEF_STMT (var);
2938   tree rhs[4];
2939   int i;
2940
2941   if (!is_reassociable_op (stmt, code, loop))
2942     return NULL;
2943
2944   rhs[0] = gimple_assign_rhs1 (stmt);
2945   rhs[1] = gimple_assign_rhs2 (stmt);
2946   rhs[2] = rhs[0];
2947   rhs[3] = rhs[1];
2948   for (i = 0; i < 2; i++)
2949     if (TREE_CODE (rhs[i]) == SSA_NAME)
2950       {
2951         rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
2952         if (rhs[2 + i] == NULL_TREE)
2953           {
2954             if (has_single_use (rhs[i]))
2955               rhs[2 + i] = ops[(*pidx)++]->op;
2956             else
2957               rhs[2 + i] = rhs[i];
2958           }
2959       }
2960   if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
2961       && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
2962     {
2963       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2964       var = make_ssa_name (TREE_TYPE (var));
2965       gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
2966                                         rhs[2], rhs[3]);
2967       gimple_set_uid (g, gimple_uid (stmt));
2968       gimple_set_visited (g, true);
2969       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2970     }
2971   return var;
2972 }
2973
2974 /* Structure to track the initial value passed to get_ops and
2975    the range in the ops vector for each basic block.  */
2976
2977 struct inter_bb_range_test_entry
2978 {
2979   tree op;
2980   unsigned int first_idx, last_idx;
2981 };
2982
2983 /* Inter-bb range test optimization.  */
2984
2985 static void
2986 maybe_optimize_range_tests (gimple *stmt)
2987 {
2988   basic_block first_bb = gimple_bb (stmt);
2989   basic_block last_bb = first_bb;
2990   basic_block other_bb = NULL;
2991   basic_block bb;
2992   edge_iterator ei;
2993   edge e;
2994   auto_vec<operand_entry *> ops;
2995   auto_vec<inter_bb_range_test_entry> bbinfo;
2996   bool any_changes = false;
2997
2998   /* Consider only basic blocks that end with GIMPLE_COND or
2999      a cast statement satisfying final_range_test_p.  All
3000      but the last bb in the first_bb .. last_bb range
3001      should end with GIMPLE_COND.  */
3002   if (gimple_code (stmt) == GIMPLE_COND)
3003     {
3004       if (EDGE_COUNT (first_bb->succs) != 2)
3005         return;
3006     }
3007   else if (final_range_test_p (stmt))
3008     other_bb = single_succ (first_bb);
3009   else
3010     return;
3011
3012   if (stmt_could_throw_p (stmt))
3013     return;
3014
3015   /* As relative ordering of post-dominator sons isn't fixed,
3016      maybe_optimize_range_tests can be called first on any
3017      bb in the range we want to optimize.  So, start searching
3018      backwards, if first_bb can be set to a predecessor.  */
3019   while (single_pred_p (first_bb))
3020     {
3021       basic_block pred_bb = single_pred (first_bb);
3022       if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
3023         break;
3024       if (!no_side_effect_bb (first_bb))
3025         break;
3026       first_bb = pred_bb;
3027     }
3028   /* If first_bb is last_bb, other_bb hasn't been computed yet.
3029      Before starting forward search in last_bb successors, find
3030      out the other_bb.  */
3031   if (first_bb == last_bb)
3032     {
3033       other_bb = NULL;
3034       /* As non-GIMPLE_COND last stmt always terminates the range,
3035          if forward search didn't discover anything, just give up.  */
3036       if (gimple_code (stmt) != GIMPLE_COND)
3037         return;
3038       /* Look at both successors.  Either it ends with a GIMPLE_COND
3039          and satisfies suitable_cond_bb, or ends with a cast and
3040          other_bb is that cast's successor.  */
3041       FOR_EACH_EDGE (e, ei, first_bb->succs)
3042         if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
3043             || e->dest == first_bb)
3044           return;
3045         else if (single_pred_p (e->dest))
3046           {
3047             stmt = last_stmt (e->dest);
3048             if (stmt
3049                 && gimple_code (stmt) == GIMPLE_COND
3050                 && EDGE_COUNT (e->dest->succs) == 2)
3051               {
3052                 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
3053                   break;
3054                 else
3055                   other_bb = NULL;
3056               }
3057             else if (stmt
3058                      && final_range_test_p (stmt)
3059                      && find_edge (first_bb, single_succ (e->dest)))
3060               {
3061                 other_bb = single_succ (e->dest);
3062                 if (other_bb == first_bb)
3063                   other_bb = NULL;
3064               }
3065           }
3066       if (other_bb == NULL)
3067         return;
3068     }
3069   /* Now do the forward search, moving last_bb to successor bbs
3070      that aren't other_bb.  */
3071   while (EDGE_COUNT (last_bb->succs) == 2)
3072     {
3073       FOR_EACH_EDGE (e, ei, last_bb->succs)
3074         if (e->dest != other_bb)
3075           break;
3076       if (e == NULL)
3077         break;
3078       if (!single_pred_p (e->dest))
3079         break;
3080       if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
3081         break;
3082       if (!no_side_effect_bb (e->dest))
3083         break;
3084       last_bb = e->dest;
3085     }
3086   if (first_bb == last_bb)
3087     return;
3088   /* Here basic blocks first_bb through last_bb's predecessor
3089      end with GIMPLE_COND, all of them have one of the edges to
3090      other_bb and another to another block in the range,
3091      all blocks except first_bb don't have side-effects and
3092      last_bb ends with either GIMPLE_COND, or cast satisfying
3093      final_range_test_p.  */
3094   for (bb = last_bb; ; bb = single_pred (bb))
3095     {
3096       enum tree_code code;
3097       tree lhs, rhs;
3098       inter_bb_range_test_entry bb_ent;
3099
3100       bb_ent.op = NULL_TREE;
3101       bb_ent.first_idx = ops.length ();
3102       bb_ent.last_idx = bb_ent.first_idx;
3103       e = find_edge (bb, other_bb);
3104       stmt = last_stmt (bb);
3105       gimple_set_visited (stmt, true);
3106       if (gimple_code (stmt) != GIMPLE_COND)
3107         {
3108           use_operand_p use_p;
3109           gimple *phi;
3110           edge e2;
3111           unsigned int d;
3112
3113           lhs = gimple_assign_lhs (stmt);
3114           rhs = gimple_assign_rhs1 (stmt);
3115           gcc_assert (bb == last_bb);
3116
3117           /* stmt is
3118              _123 = (int) _234;
3119
3120              followed by:
3121              <bb M>:
3122              # _345 = PHI <_123(N), 1(...), 1(...)>
3123
3124              or 0 instead of 1.  If it is 0, the _234
3125              range test is anded together with all the
3126              other range tests, if it is 1, it is ored with
3127              them.  */
3128           single_imm_use (lhs, &use_p, &phi);
3129           gcc_assert (gimple_code (phi) == GIMPLE_PHI);
3130           e2 = find_edge (first_bb, other_bb);
3131           d = e2->dest_idx;
3132           gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
3133           if (integer_zerop (gimple_phi_arg_def (phi, d)))
3134             code = BIT_AND_EXPR;
3135           else
3136             {
3137               gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
3138               code = BIT_IOR_EXPR;
3139             }
3140
3141           /* If _234 SSA_NAME_DEF_STMT is
3142              _234 = _567 | _789;
3143              (or &, corresponding to 1/0 in the phi arguments,
3144              push into ops the individual range test arguments
3145              of the bitwise or resp. and, recursively.  */
3146           if (!get_ops (rhs, code, &ops,
3147                         loop_containing_stmt (stmt))
3148               && has_single_use (rhs))
3149             {
3150               /* Otherwise, push the _234 range test itself.  */
3151               operand_entry *oe = operand_entry_pool.allocate ();
3152
3153               oe->op = rhs;
3154               oe->rank = code;
3155               oe->id = 0;
3156               oe->count = 1;
3157               ops.safe_push (oe);
3158               bb_ent.last_idx++;
3159             }
3160           else
3161             bb_ent.last_idx = ops.length ();
3162           bb_ent.op = rhs;
3163           bbinfo.safe_push (bb_ent);
3164           continue;
3165         }
3166       /* Otherwise stmt is GIMPLE_COND.  */
3167       code = gimple_cond_code (stmt);
3168       lhs = gimple_cond_lhs (stmt);
3169       rhs = gimple_cond_rhs (stmt);
3170       if (TREE_CODE (lhs) == SSA_NAME
3171           && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3172           && ((code != EQ_EXPR && code != NE_EXPR)
3173               || rhs != boolean_false_node
3174                  /* Either push into ops the individual bitwise
3175                     or resp. and operands, depending on which
3176                     edge is other_bb.  */
3177               || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
3178                                  ^ (code == EQ_EXPR))
3179                                 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
3180                            loop_containing_stmt (stmt))))
3181         {
3182           /* Or push the GIMPLE_COND stmt itself.  */
3183           operand_entry *oe = operand_entry_pool.allocate ();
3184
3185           oe->op = NULL;
3186           oe->rank = (e->flags & EDGE_TRUE_VALUE)
3187                      ? BIT_IOR_EXPR : BIT_AND_EXPR;
3188           /* oe->op = NULL signs that there is no SSA_NAME
3189              for the range test, and oe->id instead is the
3190              basic block number, at which's end the GIMPLE_COND
3191              is.  */
3192           oe->id = bb->index;
3193           oe->count = 1;
3194           ops.safe_push (oe);
3195           bb_ent.op = NULL;
3196           bb_ent.last_idx++;
3197         }
3198       else if (ops.length () > bb_ent.first_idx)
3199         {
3200           bb_ent.op = lhs;
3201           bb_ent.last_idx = ops.length ();
3202         }
3203       bbinfo.safe_push (bb_ent);
3204       if (bb == first_bb)
3205         break;
3206     }
3207   if (ops.length () > 1)
3208     any_changes = optimize_range_tests (ERROR_MARK, &ops);
3209   if (any_changes)
3210     {
3211       unsigned int idx;
3212       /* update_ops relies on has_single_use predicates returning the
3213          same values as it did during get_ops earlier.  Additionally it
3214          never removes statements, only adds new ones and it should walk
3215          from the single imm use and check the predicate already before
3216          making those changes.
3217          On the other side, the handling of GIMPLE_COND directly can turn
3218          previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
3219          it needs to be done in a separate loop afterwards.  */
3220       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3221         {
3222           if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3223               && bbinfo[idx].op != NULL_TREE)
3224             {
3225               tree new_op;
3226
3227               stmt = last_stmt (bb);
3228               new_op = update_ops (bbinfo[idx].op,
3229                                    (enum tree_code)
3230                                    ops[bbinfo[idx].first_idx]->rank,
3231                                    ops, &bbinfo[idx].first_idx,
3232                                    loop_containing_stmt (stmt));
3233               if (new_op == NULL_TREE)
3234                 {
3235                   gcc_assert (bb == last_bb);
3236                   new_op = ops[bbinfo[idx].first_idx++]->op;
3237                 }
3238               if (bbinfo[idx].op != new_op)
3239                 {
3240                   imm_use_iterator iter;
3241                   use_operand_p use_p;
3242                   gimple *use_stmt, *cast_stmt = NULL;
3243
3244                   FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
3245                     if (is_gimple_debug (use_stmt))
3246                       continue;
3247                     else if (gimple_code (use_stmt) == GIMPLE_COND
3248                              || gimple_code (use_stmt) == GIMPLE_PHI)
3249                       FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3250                         SET_USE (use_p, new_op);
3251                     else if (gimple_assign_cast_p (use_stmt))
3252                       cast_stmt = use_stmt;
3253                     else
3254                       gcc_unreachable ();
3255                   if (cast_stmt)
3256                     {
3257                       gcc_assert (bb == last_bb);
3258                       tree lhs = gimple_assign_lhs (cast_stmt);
3259                       tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
3260                       enum tree_code rhs_code
3261                         = gimple_assign_rhs_code (cast_stmt);
3262                       gassign *g;
3263                       if (is_gimple_min_invariant (new_op))
3264                         {
3265                           new_op = fold_convert (TREE_TYPE (lhs), new_op);
3266                           g = gimple_build_assign (new_lhs, new_op);
3267                         }
3268                       else
3269                         g = gimple_build_assign (new_lhs, rhs_code, new_op);
3270                       gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt);
3271                       gimple_set_uid (g, gimple_uid (cast_stmt));
3272                       gimple_set_visited (g, true);
3273                       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3274                       FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
3275                         if (is_gimple_debug (use_stmt))
3276                           continue;
3277                         else if (gimple_code (use_stmt) == GIMPLE_COND
3278                                  || gimple_code (use_stmt) == GIMPLE_PHI)
3279                           FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3280                             SET_USE (use_p, new_lhs);
3281                         else
3282                           gcc_unreachable ();
3283                     }
3284                 }
3285             }
3286           if (bb == first_bb)
3287             break;
3288         }
3289       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3290         {
3291           if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3292               && bbinfo[idx].op == NULL_TREE
3293               && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
3294             {
3295               gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
3296               if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
3297                 gimple_cond_make_false (cond_stmt);
3298               else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
3299                 gimple_cond_make_true (cond_stmt);
3300               else
3301                 {
3302                   gimple_cond_set_code (cond_stmt, NE_EXPR);
3303                   gimple_cond_set_lhs (cond_stmt,
3304                                        ops[bbinfo[idx].first_idx]->op);
3305                   gimple_cond_set_rhs (cond_stmt, boolean_false_node);
3306                 }
3307               update_stmt (cond_stmt);
3308             }
3309           if (bb == first_bb)
3310             break;
3311         }
3312     }
3313 }
3314
3315 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3316    of STMT in it's operands.  This is also known as a "destructive
3317    update" operation.  */
3318
3319 static bool
3320 is_phi_for_stmt (gimple *stmt, tree operand)
3321 {
3322   gimple *def_stmt;
3323   gphi *def_phi;
3324   tree lhs;
3325   use_operand_p arg_p;
3326   ssa_op_iter i;
3327
3328   if (TREE_CODE (operand) != SSA_NAME)
3329     return false;
3330
3331   lhs = gimple_assign_lhs (stmt);
3332
3333   def_stmt = SSA_NAME_DEF_STMT (operand);
3334   def_phi = dyn_cast <gphi *> (def_stmt);
3335   if (!def_phi)
3336     return false;
3337
3338   FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
3339     if (lhs == USE_FROM_PTR (arg_p))
3340       return true;
3341   return false;
3342 }
3343
3344 /* Remove def stmt of VAR if VAR has zero uses and recurse
3345    on rhs1 operand if so.  */
3346
3347 static void
3348 remove_visited_stmt_chain (tree var)
3349 {
3350   gimple *stmt;
3351   gimple_stmt_iterator gsi;
3352
3353   while (1)
3354     {
3355       if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
3356         return;
3357       stmt = SSA_NAME_DEF_STMT (var);
3358       if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
3359         {
3360           var = gimple_assign_rhs1 (stmt);
3361           gsi = gsi_for_stmt (stmt);
3362           reassoc_remove_stmt (&gsi);
3363           release_defs (stmt);
3364         }
3365       else
3366         return;
3367     }
3368 }
3369
3370 /* This function checks three consequtive operands in
3371    passed operands vector OPS starting from OPINDEX and
3372    swaps two operands if it is profitable for binary operation
3373    consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3374
3375    We pair ops with the same rank if possible.
3376
3377    The alternative we try is to see if STMT is a destructive
3378    update style statement, which is like:
3379    b = phi (a, ...)
3380    a = c + b;
3381    In that case, we want to use the destructive update form to
3382    expose the possible vectorizer sum reduction opportunity.
3383    In that case, the third operand will be the phi node. This
3384    check is not performed if STMT is null.
3385
3386    We could, of course, try to be better as noted above, and do a
3387    lot of work to try to find these opportunities in >3 operand
3388    cases, but it is unlikely to be worth it.  */
3389
3390 static void
3391 swap_ops_for_binary_stmt (vec<operand_entry *> ops,
3392                           unsigned int opindex, gimple *stmt)
3393 {
3394   operand_entry *oe1, *oe2, *oe3;
3395
3396   oe1 = ops[opindex];
3397   oe2 = ops[opindex + 1];
3398   oe3 = ops[opindex + 2];
3399
3400   if ((oe1->rank == oe2->rank
3401        && oe2->rank != oe3->rank)
3402       || (stmt && is_phi_for_stmt (stmt, oe3->op)
3403           && !is_phi_for_stmt (stmt, oe1->op)
3404           && !is_phi_for_stmt (stmt, oe2->op)))
3405     {
3406       operand_entry temp = *oe3;
3407       oe3->op = oe1->op;
3408       oe3->rank = oe1->rank;
3409       oe1->op = temp.op;
3410       oe1->rank= temp.rank;
3411     }
3412   else if ((oe1->rank == oe3->rank
3413             && oe2->rank != oe3->rank)
3414            || (stmt && is_phi_for_stmt (stmt, oe2->op)
3415                && !is_phi_for_stmt (stmt, oe1->op)
3416                && !is_phi_for_stmt (stmt, oe3->op)))
3417     {
3418       operand_entry temp = *oe2;
3419       oe2->op = oe1->op;
3420       oe2->rank = oe1->rank;
3421       oe1->op = temp.op;
3422       oe1->rank = temp.rank;
3423     }
3424 }
3425
3426 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3427    two definitions, otherwise return STMT.  */
3428
3429 static inline gimple *
3430 find_insert_point (gimple *stmt, tree rhs1, tree rhs2)
3431 {
3432   if (TREE_CODE (rhs1) == SSA_NAME
3433       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
3434     stmt = SSA_NAME_DEF_STMT (rhs1);
3435   if (TREE_CODE (rhs2) == SSA_NAME
3436       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
3437     stmt = SSA_NAME_DEF_STMT (rhs2);
3438   return stmt;
3439 }
3440
3441 /* Recursively rewrite our linearized statements so that the operators
3442    match those in OPS[OPINDEX], putting the computation in rank
3443    order.  Return new lhs.  */
3444
3445 static tree
3446 rewrite_expr_tree (gimple *stmt, unsigned int opindex,
3447                    vec<operand_entry *> ops, bool changed)
3448 {
3449   tree rhs1 = gimple_assign_rhs1 (stmt);
3450   tree rhs2 = gimple_assign_rhs2 (stmt);
3451   tree lhs = gimple_assign_lhs (stmt);
3452   operand_entry *oe;
3453
3454   /* The final recursion case for this function is that you have
3455      exactly two operations left.
3456      If we had exactly one op in the entire list to start with, we
3457      would have never called this function, and the tail recursion
3458      rewrites them one at a time.  */
3459   if (opindex + 2 == ops.length ())
3460     {
3461       operand_entry *oe1, *oe2;
3462
3463       oe1 = ops[opindex];
3464       oe2 = ops[opindex + 1];
3465
3466       if (rhs1 != oe1->op || rhs2 != oe2->op)
3467         {
3468           gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3469           unsigned int uid = gimple_uid (stmt);
3470
3471           if (dump_file && (dump_flags & TDF_DETAILS))
3472             {
3473               fprintf (dump_file, "Transforming ");
3474               print_gimple_stmt (dump_file, stmt, 0, 0);
3475             }
3476
3477           /* Even when changed is false, reassociation could have e.g. removed
3478              some redundant operations, so unless we are just swapping the
3479              arguments or unless there is no change at all (then we just
3480              return lhs), force creation of a new SSA_NAME.  */
3481           if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
3482             {
3483               gimple *insert_point
3484                 = find_insert_point (stmt, oe1->op, oe2->op);
3485               lhs = make_ssa_name (TREE_TYPE (lhs));
3486               stmt
3487                 = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
3488                                        oe1->op, oe2->op);
3489               gimple_set_uid (stmt, uid);
3490               gimple_set_visited (stmt, true);
3491               if (insert_point == gsi_stmt (gsi))
3492                 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3493               else
3494                 insert_stmt_after (stmt, insert_point);
3495             }
3496           else
3497             {
3498               gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
3499                                    == stmt);
3500               gimple_assign_set_rhs1 (stmt, oe1->op);
3501               gimple_assign_set_rhs2 (stmt, oe2->op);
3502               update_stmt (stmt);
3503             }
3504
3505           if (rhs1 != oe1->op && rhs1 != oe2->op)
3506             remove_visited_stmt_chain (rhs1);
3507
3508           if (dump_file && (dump_flags & TDF_DETAILS))
3509             {
3510               fprintf (dump_file, " into ");
3511               print_gimple_stmt (dump_file, stmt, 0, 0);
3512             }
3513         }
3514       return lhs;
3515     }
3516
3517   /* If we hit here, we should have 3 or more ops left.  */
3518   gcc_assert (opindex + 2 < ops.length ());
3519
3520   /* Rewrite the next operator.  */
3521   oe = ops[opindex];
3522
3523   /* Recurse on the LHS of the binary operator, which is guaranteed to
3524      be the non-leaf side.  */
3525   tree new_rhs1
3526     = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
3527                          changed || oe->op != rhs2);
3528
3529   if (oe->op != rhs2 || new_rhs1 != rhs1)
3530     {
3531       if (dump_file && (dump_flags & TDF_DETAILS))
3532         {
3533           fprintf (dump_file, "Transforming ");
3534           print_gimple_stmt (dump_file, stmt, 0, 0);
3535         }
3536
3537       /* If changed is false, this is either opindex == 0
3538          or all outer rhs2's were equal to corresponding oe->op,
3539          and powi_result is NULL.
3540          That means lhs is equivalent before and after reassociation.
3541          Otherwise ensure the old lhs SSA_NAME is not reused and
3542          create a new stmt as well, so that any debug stmts will be
3543          properly adjusted.  */
3544       if (changed)
3545         {
3546           gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3547           unsigned int uid = gimple_uid (stmt);
3548           gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op);
3549
3550           lhs = make_ssa_name (TREE_TYPE (lhs));
3551           stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
3552                                       new_rhs1, oe->op);
3553           gimple_set_uid (stmt, uid);
3554           gimple_set_visited (stmt, true);
3555           if (insert_point == gsi_stmt (gsi))
3556             gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3557           else
3558             insert_stmt_after (stmt, insert_point);
3559         }
3560       else
3561         {
3562           gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
3563                                == stmt);
3564           gimple_assign_set_rhs1 (stmt, new_rhs1);
3565           gimple_assign_set_rhs2 (stmt, oe->op);
3566           update_stmt (stmt);
3567         }
3568
3569       if (dump_file && (dump_flags & TDF_DETAILS))
3570         {
3571           fprintf (dump_file, " into ");
3572           print_gimple_stmt (dump_file, stmt, 0, 0);
3573         }
3574     }
3575   return lhs;
3576 }
3577
3578 /* Find out how many cycles we need to compute statements chain.
3579    OPS_NUM holds number os statements in a chain.  CPU_WIDTH is a
3580    maximum number of independent statements we may execute per cycle.  */
3581
3582 static int
3583 get_required_cycles (int ops_num, int cpu_width)
3584 {
3585   int res;
3586   int elog;
3587   unsigned int rest;
3588
3589   /* While we have more than 2 * cpu_width operands
3590      we may reduce number of operands by cpu_width
3591      per cycle.  */
3592   res = ops_num / (2 * cpu_width);
3593
3594   /* Remained operands count may be reduced twice per cycle
3595      until we have only one operand.  */
3596   rest = (unsigned)(ops_num - res * cpu_width);
3597   elog = exact_log2 (rest);
3598   if (elog >= 0)
3599     res += elog;
3600   else
3601     res += floor_log2 (rest) + 1;
3602
3603   return res;
3604 }
3605
3606 /* Returns an optimal number of registers to use for computation of
3607    given statements.  */
3608
3609 static int
3610 get_reassociation_width (int ops_num, enum tree_code opc,
3611                          machine_mode mode)
3612 {
3613   int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
3614   int width;
3615   int width_min;
3616   int cycles_best;
3617
3618   if (param_width > 0)
3619     width = param_width;
3620   else
3621     width = targetm.sched.reassociation_width (opc, mode);
3622
3623   if (width == 1)
3624     return width;
3625
3626   /* Get the minimal time required for sequence computation.  */
3627   cycles_best = get_required_cycles (ops_num, width);
3628
3629   /* Check if we may use less width and still compute sequence for
3630      the same time.  It will allow us to reduce registers usage.
3631      get_required_cycles is monotonically increasing with lower width
3632      so we can perform a binary search for the minimal width that still
3633      results in the optimal cycle count.  */
3634   width_min = 1;
3635   while (width > width_min)
3636     {
3637       int width_mid = (width + width_min) / 2;
3638
3639       if (get_required_cycles (ops_num, width_mid) == cycles_best)
3640         width = width_mid;
3641       else if (width_min < width_mid)
3642         width_min = width_mid;
3643       else
3644         break;
3645     }
3646
3647   return width;
3648 }
3649
3650 /* Recursively rewrite our linearized statements so that the operators
3651    match those in OPS[OPINDEX], putting the computation in rank
3652    order and trying to allow operations to be executed in
3653    parallel.  */
3654
3655 static void
3656 rewrite_expr_tree_parallel (gassign *stmt, int width,
3657                             vec<operand_entry *> ops)
3658 {
3659   enum tree_code opcode = gimple_assign_rhs_code (stmt);
3660   int op_num = ops.length ();
3661   int stmt_num = op_num - 1;
3662   gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
3663   int op_index = op_num - 1;
3664   int stmt_index = 0;
3665   int ready_stmts_end = 0;
3666   int i = 0;
3667   tree last_rhs1 = gimple_assign_rhs1 (stmt);
3668
3669   /* We start expression rewriting from the top statements.
3670      So, in this loop we create a full list of statements
3671      we will work with.  */
3672   stmts[stmt_num - 1] = stmt;
3673   for (i = stmt_num - 2; i >= 0; i--)
3674     stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3675
3676   for (i = 0; i < stmt_num; i++)
3677     {
3678       tree op1, op2;
3679
3680       /* Determine whether we should use results of
3681          already handled statements or not.  */
3682       if (ready_stmts_end == 0
3683           && (i - stmt_index >= width || op_index < 1))
3684         ready_stmts_end = i;
3685
3686       /* Now we choose operands for the next statement.  Non zero
3687          value in ready_stmts_end means here that we should use
3688          the result of already generated statements as new operand.  */
3689       if (ready_stmts_end > 0)
3690         {
3691           op1 = gimple_assign_lhs (stmts[stmt_index++]);
3692           if (ready_stmts_end > stmt_index)
3693             op2 = gimple_assign_lhs (stmts[stmt_index++]);
3694           else if (op_index >= 0)
3695             op2 = ops[op_index--]->op;
3696           else
3697             {
3698               gcc_assert (stmt_index < i);
3699               op2 = gimple_assign_lhs (stmts[stmt_index++]);
3700             }
3701
3702           if (stmt_index >= ready_stmts_end)
3703             ready_stmts_end = 0;
3704         }
3705       else
3706         {
3707           if (op_index > 1)
3708             swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3709           op2 = ops[op_index--]->op;
3710           op1 = ops[op_index--]->op;
3711         }
3712
3713       /* If we emit the last statement then we should put
3714          operands into the last statement.  It will also
3715          break the loop.  */
3716       if (op_index < 0 && stmt_index == i)
3717         i = stmt_num - 1;
3718
3719       if (dump_file && (dump_flags & TDF_DETAILS))
3720         {
3721           fprintf (dump_file, "Transforming ");
3722           print_gimple_stmt (dump_file, stmts[i], 0, 0);
3723         }
3724
3725       /* We keep original statement only for the last one.  All
3726          others are recreated.  */
3727       if (i == stmt_num - 1)
3728         {
3729           gimple_assign_set_rhs1 (stmts[i], op1);
3730           gimple_assign_set_rhs2 (stmts[i], op2);
3731           update_stmt (stmts[i]);
3732         }
3733       else
3734         stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3735
3736       if (dump_file && (dump_flags & TDF_DETAILS))
3737         {
3738           fprintf (dump_file, " into ");
3739           print_gimple_stmt (dump_file, stmts[i], 0, 0);
3740         }
3741     }
3742
3743   remove_visited_stmt_chain (last_rhs1);
3744 }
3745
3746 /* Transform STMT, which is really (A +B) + (C + D) into the left
3747    linear form, ((A+B)+C)+D.
3748    Recurse on D if necessary.  */
3749
3750 static void
3751 linearize_expr (gimple *stmt)
3752 {
3753   gimple_stmt_iterator gsi;
3754   gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3755   gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3756   gimple *oldbinrhs = binrhs;
3757   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3758   gimple *newbinrhs = NULL;
3759   struct loop *loop = loop_containing_stmt (stmt);
3760   tree lhs = gimple_assign_lhs (stmt);
3761
3762   gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3763               && is_reassociable_op (binrhs, rhscode, loop));
3764
3765   gsi = gsi_for_stmt (stmt);
3766
3767   gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3768   binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
3769                                 gimple_assign_rhs_code (binrhs),
3770                                 gimple_assign_lhs (binlhs),
3771                                 gimple_assign_rhs2 (binrhs));
3772   gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3773   gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
3774   gimple_set_uid (binrhs, gimple_uid (stmt));
3775
3776   if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3777     newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3778
3779   if (dump_file && (dump_flags & TDF_DETAILS))
3780     {
3781       fprintf (dump_file, "Linearized: ");
3782       print_gimple_stmt (dump_file, stmt, 0, 0);
3783     }
3784
3785   reassociate_stats.linearized++;
3786   update_stmt (stmt);
3787
3788   gsi = gsi_for_stmt (oldbinrhs);
3789   reassoc_remove_stmt (&gsi);
3790   release_defs (oldbinrhs);
3791
3792   gimple_set_visited (stmt, true);
3793   gimple_set_visited (binlhs, true);
3794   gimple_set_visited (binrhs, true);
3795
3796   /* Tail recurse on the new rhs if it still needs reassociation.  */
3797   if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3798     /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3799            want to change the algorithm while converting to tuples.  */
3800     linearize_expr (stmt);
3801 }
3802
3803 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3804    it.  Otherwise, return NULL.  */
3805
3806 static gimple *
3807 get_single_immediate_use (tree lhs)
3808 {
3809   use_operand_p immuse;
3810   gimple *immusestmt;
3811
3812   if (TREE_CODE (lhs) == SSA_NAME
3813       && single_imm_use (lhs, &immuse, &immusestmt)
3814       && is_gimple_assign (immusestmt))
3815     return immusestmt;
3816
3817   return NULL;
3818 }
3819
3820 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3821    representing the negated value.  Insertions of any necessary
3822    instructions go before GSI.
3823    This function is recursive in that, if you hand it "a_5" as the
3824    value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3825    transform b_3 + b_4 into a_5 = -b_3 + -b_4.  */
3826
3827 static tree
3828 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
3829 {
3830   gimple *negatedefstmt = NULL;
3831   tree resultofnegate;
3832   gimple_stmt_iterator gsi;
3833   unsigned int uid;
3834
3835   /* If we are trying to negate a name, defined by an add, negate the
3836      add operands instead.  */
3837   if (TREE_CODE (tonegate) == SSA_NAME)
3838     negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3839   if (TREE_CODE (tonegate) == SSA_NAME
3840       && is_gimple_assign (negatedefstmt)
3841       && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3842       && has_single_use (gimple_assign_lhs (negatedefstmt))
3843       && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3844     {
3845       tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3846       tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3847       tree lhs = gimple_assign_lhs (negatedefstmt);
3848       gimple *g;
3849
3850       gsi = gsi_for_stmt (negatedefstmt);
3851       rhs1 = negate_value (rhs1, &gsi);
3852
3853       gsi = gsi_for_stmt (negatedefstmt);
3854       rhs2 = negate_value (rhs2, &gsi);
3855
3856       gsi = gsi_for_stmt (negatedefstmt);
3857       lhs = make_ssa_name (TREE_TYPE (lhs));
3858       gimple_set_visited (negatedefstmt, true);
3859       g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
3860       gimple_set_uid (g, gimple_uid (negatedefstmt));
3861       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3862       return lhs;
3863     }
3864
3865   tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3866   resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
3867                                              NULL_TREE, true, GSI_SAME_STMT);
3868   gsi = *gsip;
3869   uid = gimple_uid (gsi_stmt (gsi));
3870   for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
3871     {
3872       gimple *stmt = gsi_stmt (gsi);
3873       if (gimple_uid (stmt) != 0)
3874         break;
3875       gimple_set_uid (stmt, uid);
3876     }
3877   return resultofnegate;
3878 }
3879
3880 /* Return true if we should break up the subtract in STMT into an add
3881    with negate.  This is true when we the subtract operands are really
3882    adds, or the subtract itself is used in an add expression.  In
3883    either case, breaking up the subtract into an add with negate
3884    exposes the adds to reassociation.  */
3885
3886 static bool
3887 should_break_up_subtract (gimple *stmt)
3888 {
3889   tree lhs = gimple_assign_lhs (stmt);
3890   tree binlhs = gimple_assign_rhs1 (stmt);
3891   tree binrhs = gimple_assign_rhs2 (stmt);
3892   gimple *immusestmt;
3893   struct loop *loop = loop_containing_stmt (stmt);
3894
3895   if (TREE_CODE (binlhs) == SSA_NAME
3896       && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3897     return true;
3898
3899   if (TREE_CODE (binrhs) == SSA_NAME
3900       && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3901     return true;
3902
3903   if (TREE_CODE (lhs) == SSA_NAME
3904       && (immusestmt = get_single_immediate_use (lhs))
3905       && is_gimple_assign (immusestmt)
3906       && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3907           ||  gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3908     return true;
3909   return false;
3910 }
3911
3912 /* Transform STMT from A - B into A + -B.  */
3913
3914 static void
3915 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
3916 {
3917   tree rhs1 = gimple_assign_rhs1 (stmt);
3918   tree rhs2 = gimple_assign_rhs2 (stmt);
3919
3920   if (dump_file && (dump_flags & TDF_DETAILS))
3921     {
3922       fprintf (dump_file, "Breaking up subtract ");
3923       print_gimple_stmt (dump_file, stmt, 0, 0);
3924     }
3925
3926   rhs2 = negate_value (rhs2, gsip);
3927   gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3928   update_stmt (stmt);
3929 }
3930
3931 /* Determine whether STMT is a builtin call that raises an SSA name
3932    to an integer power and has only one use.  If so, and this is early
3933    reassociation and unsafe math optimizations are permitted, place
3934    the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3935    If any of these conditions does not hold, return FALSE.  */
3936
3937 static bool
3938 acceptable_pow_call (gimple *stmt, tree *base, HOST_WIDE_INT *exponent)
3939 {
3940   tree fndecl, arg1;
3941   REAL_VALUE_TYPE c, cint;
3942
3943   if (!first_pass_instance
3944       || !flag_unsafe_math_optimizations
3945       || !is_gimple_call (stmt)
3946       || !has_single_use (gimple_call_lhs (stmt)))
3947     return false;
3948
3949   fndecl = gimple_call_fndecl (stmt);
3950
3951   if (!fndecl
3952       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3953     return false;
3954
3955   switch (DECL_FUNCTION_CODE (fndecl))
3956     {
3957     CASE_FLT_FN (BUILT_IN_POW):
3958       if (flag_errno_math)
3959         return false;
3960
3961       *base = gimple_call_arg (stmt, 0);
3962       arg1 = gimple_call_arg (stmt, 1);
3963
3964       if (TREE_CODE (arg1) != REAL_CST)
3965         return false;
3966
3967       c = TREE_REAL_CST (arg1);
3968
3969       if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
3970         return false;
3971
3972       *exponent = real_to_integer (&c);
3973       real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
3974       if (!real_identical (&c, &cint))
3975         return false;
3976
3977       break;
3978
3979     CASE_FLT_FN (BUILT_IN_POWI):
3980       *base = gimple_call_arg (stmt, 0);
3981       arg1 = gimple_call_arg (stmt, 1);
3982
3983       if (!tree_fits_shwi_p (arg1))
3984         return false;
3985
3986       *exponent = tree_to_shwi (arg1);
3987       break;
3988
3989     default:
3990       return false;
3991     }
3992
3993   /* Expanding negative exponents is generally unproductive, so we don't
3994      complicate matters with those.  Exponents of zero and one should
3995      have been handled by expression folding.  */
3996   if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
3997     return false;
3998
3999   return true;
4000 }
4001
4002 /* Recursively linearize a binary expression that is the RHS of STMT.
4003    Place the operands of the expression tree in the vector named OPS.  */
4004
4005 static void
4006 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
4007                      bool is_associative, bool set_visited)
4008 {
4009   tree binlhs = gimple_assign_rhs1 (stmt);
4010   tree binrhs = gimple_assign_rhs2 (stmt);
4011   gimple *binlhsdef = NULL, *binrhsdef = NULL;
4012   bool binlhsisreassoc = false;
4013   bool binrhsisreassoc = false;
4014   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4015   struct loop *loop = loop_containing_stmt (stmt);
4016   tree base = NULL_TREE;
4017   HOST_WIDE_INT exponent = 0;
4018
4019   if (set_visited)
4020     gimple_set_visited (stmt, true);
4021
4022   if (TREE_CODE (binlhs) == SSA_NAME)
4023     {
4024       binlhsdef = SSA_NAME_DEF_STMT (binlhs);
4025       binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
4026                          && !stmt_could_throw_p (binlhsdef));
4027     }
4028
4029   if (TREE_CODE (binrhs) == SSA_NAME)
4030     {
4031       binrhsdef = SSA_NAME_DEF_STMT (binrhs);
4032       binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
4033                          && !stmt_could_throw_p (binrhsdef));
4034     }
4035
4036   /* If the LHS is not reassociable, but the RHS is, we need to swap
4037      them.  If neither is reassociable, there is nothing we can do, so
4038      just put them in the ops vector.  If the LHS is reassociable,
4039      linearize it.  If both are reassociable, then linearize the RHS
4040      and the LHS.  */
4041
4042   if (!binlhsisreassoc)
4043     {
4044       /* If this is not a associative operation like division, give up.  */
4045       if (!is_associative)
4046         {
4047           add_to_ops_vec (ops, binrhs);
4048           return;
4049         }
4050
4051       if (!binrhsisreassoc)
4052         {
4053           if (rhscode == MULT_EXPR
4054               && TREE_CODE (binrhs) == SSA_NAME
4055               && acceptable_pow_call (binrhsdef, &base, &exponent))
4056             {
4057               add_repeat_to_ops_vec (ops, base, exponent);
4058               gimple_set_visited (binrhsdef, true);
4059             }
4060           else
4061             add_to_ops_vec (ops, binrhs);
4062
4063           if (rhscode == MULT_EXPR
4064               && TREE_CODE (binlhs) == SSA_NAME
4065               && acceptable_pow_call (binlhsdef, &base, &exponent))
4066             {
4067               add_repeat_to_ops_vec (ops, base, exponent);
4068               gimple_set_visited (binlhsdef, true);
4069             }
4070           else
4071             add_to_ops_vec (ops, binlhs);
4072
4073           return;
4074         }
4075
4076       if (dump_file && (dump_flags & TDF_DETAILS))
4077         {
4078           fprintf (dump_file, "swapping operands of ");
4079           print_gimple_stmt (dump_file, stmt, 0, 0);
4080         }
4081
4082       swap_ssa_operands (stmt,
4083                          gimple_assign_rhs1_ptr (stmt),
4084                          gimple_assign_rhs2_ptr (stmt));
4085       update_stmt (stmt);
4086
4087       if (dump_file && (dump_flags & TDF_DETAILS))
4088         {
4089           fprintf (dump_file, " is now ");
4090           print_gimple_stmt (dump_file, stmt, 0, 0);
4091         }
4092
4093       /* We want to make it so the lhs is always the reassociative op,
4094          so swap.  */
4095       std::swap (binlhs, binrhs);
4096     }
4097   else if (binrhsisreassoc)
4098     {
4099       linearize_expr (stmt);
4100       binlhs = gimple_assign_rhs1 (stmt);
4101       binrhs = gimple_assign_rhs2 (stmt);
4102     }
4103
4104   gcc_assert (TREE_CODE (binrhs) != SSA_NAME
4105               || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
4106                                       rhscode, loop));
4107   linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
4108                        is_associative, set_visited);
4109
4110   if (rhscode == MULT_EXPR
4111       && TREE_CODE (binrhs) == SSA_NAME
4112       && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
4113     {
4114       add_repeat_to_ops_vec (ops, base, exponent);
4115       gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
4116     }
4117   else
4118     add_to_ops_vec (ops, binrhs);
4119 }
4120
4121 /* Repropagate the negates back into subtracts, since no other pass
4122    currently does it.  */
4123
4124 static void
4125 repropagate_negates (void)
4126 {
4127   unsigned int i = 0;
4128   tree negate;
4129
4130   FOR_EACH_VEC_ELT (plus_negates, i, negate)
4131     {
4132       gimple *user = get_single_immediate_use (negate);
4133
4134       if (!user || !is_gimple_assign (user))
4135         continue;
4136
4137       /* The negate operand can be either operand of a PLUS_EXPR
4138          (it can be the LHS if the RHS is a constant for example).
4139
4140          Force the negate operand to the RHS of the PLUS_EXPR, then
4141          transform the PLUS_EXPR into a MINUS_EXPR.  */
4142       if (gimple_assign_rhs_code (user) == PLUS_EXPR)
4143         {
4144           /* If the negated operand appears on the LHS of the
4145              PLUS_EXPR, exchange the operands of the PLUS_EXPR
4146              to force the negated operand to the RHS of the PLUS_EXPR.  */
4147           if (gimple_assign_rhs1 (user) == negate)
4148             {
4149               swap_ssa_operands (user,
4150                                  gimple_assign_rhs1_ptr (user),
4151                                  gimple_assign_rhs2_ptr (user));
4152             }
4153
4154           /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
4155              the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR.  */
4156           if (gimple_assign_rhs2 (user) == negate)
4157             {
4158               tree rhs1 = gimple_assign_rhs1 (user);
4159               tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
4160               gimple_stmt_iterator gsi = gsi_for_stmt (user);
4161               gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
4162               update_stmt (user);
4163             }
4164         }
4165       else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
4166         {
4167           if (gimple_assign_rhs1 (user) == negate)
4168             {
4169               /* We have
4170                    x = -a
4171                    y = x - b
4172                  which we transform into
4173                    x = a + b
4174                    y = -x .
4175                  This pushes down the negate which we possibly can merge
4176                  into some other operation, hence insert it into the
4177                  plus_negates vector.  */
4178               gimple *feed = SSA_NAME_DEF_STMT (negate);
4179               tree a = gimple_assign_rhs1 (feed);
4180               tree b = gimple_assign_rhs2 (user);
4181               gimple_stmt_iterator gsi = gsi_for_stmt (feed);
4182               gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
4183               tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
4184               gimple *g = gimple_build_assign (x, PLUS_EXPR, a, b);
4185               gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
4186               gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
4187               user = gsi_stmt (gsi2);
4188               update_stmt (user);
4189               reassoc_remove_stmt (&gsi);
4190               release_defs (feed);
4191               plus_negates.safe_push (gimple_assign_lhs (user));
4192             }
4193           else
4194             {
4195               /* Transform "x = -a; y = b - x" into "y = b + a", getting
4196                  rid of one operation.  */
4197               gimple *feed = SSA_NAME_DEF_STMT (negate);
4198               tree a = gimple_assign_rhs1 (feed);
4199               tree rhs1 = gimple_assign_rhs1 (user);
4200               gimple_stmt_iterator gsi = gsi_for_stmt (user);
4201               gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
4202               update_stmt (gsi_stmt (gsi));
4203             }
4204         }
4205     }
4206 }
4207
4208 /* Returns true if OP is of a type for which we can do reassociation.
4209    That is for integral or non-saturating fixed-point types, and for
4210    floating point type when associative-math is enabled.  */
4211
4212 static bool
4213 can_reassociate_p (tree op)
4214 {
4215   tree type = TREE_TYPE (op);
4216   if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
4217       || NON_SAT_FIXED_POINT_TYPE_P (type)
4218       || (flag_associative_math && FLOAT_TYPE_P (type)))
4219     return true;
4220   return false;
4221 }
4222
4223 /* Break up subtract operations in block BB.
4224
4225    We do this top down because we don't know whether the subtract is
4226    part of a possible chain of reassociation except at the top.
4227
4228    IE given
4229    d = f + g
4230    c = a + e
4231    b = c - d
4232    q = b - r
4233    k = t - q
4234
4235    we want to break up k = t - q, but we won't until we've transformed q
4236    = b - r, which won't be broken up until we transform b = c - d.
4237
4238    En passant, clear the GIMPLE visited flag on every statement
4239    and set UIDs within each basic block.  */
4240
4241 static void
4242 break_up_subtract_bb (basic_block bb)
4243 {
4244   gimple_stmt_iterator gsi;
4245   basic_block son;
4246   unsigned int uid = 1;
4247
4248   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4249     {
4250       gimple *stmt = gsi_stmt (gsi);
4251       gimple_set_visited (stmt, false);
4252       gimple_set_uid (stmt, uid++);
4253
4254       if (!is_gimple_assign (stmt)
4255           || !can_reassociate_p (gimple_assign_lhs (stmt)))
4256         continue;
4257
4258       /* Look for simple gimple subtract operations.  */
4259       if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
4260         {
4261           if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
4262               || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
4263             continue;
4264
4265           /* Check for a subtract used only in an addition.  If this
4266              is the case, transform it into add of a negate for better
4267              reassociation.  IE transform C = A-B into C = A + -B if C
4268              is only used in an addition.  */
4269           if (should_break_up_subtract (stmt))
4270             break_up_subtract (stmt, &gsi);
4271         }
4272       else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
4273                && can_reassociate_p (gimple_assign_rhs1 (stmt)))
4274         plus_negates.safe_push (gimple_assign_lhs (stmt));
4275     }
4276   for (son = first_dom_son (CDI_DOMINATORS, bb);
4277        son;
4278        son = next_dom_son (CDI_DOMINATORS, son))
4279     break_up_subtract_bb (son);
4280 }
4281
4282 /* Used for repeated factor analysis.  */
4283 struct repeat_factor
4284 {
4285   /* An SSA name that occurs in a multiply chain.  */
4286   tree factor;
4287
4288   /* Cached rank of the factor.  */
4289   unsigned rank;
4290
4291   /* Number of occurrences of the factor in the chain.  */
4292   HOST_WIDE_INT count;
4293
4294   /* An SSA name representing the product of this factor and
4295      all factors appearing later in the repeated factor vector.  */
4296   tree repr;
4297 };
4298
4299
4300 static vec<repeat_factor> repeat_factor_vec;
4301
4302 /* Used for sorting the repeat factor vector.  Sort primarily by
4303    ascending occurrence count, secondarily by descending rank.  */
4304
4305 static int
4306 compare_repeat_factors (const void *x1, const void *x2)
4307 {
4308   const repeat_factor *rf1 = (const repeat_factor *) x1;
4309   const repeat_factor *rf2 = (const repeat_factor *) x2;
4310
4311   if (rf1->count != rf2->count)
4312     return rf1->count - rf2->count;
4313
4314   return rf2->rank - rf1->rank;
4315 }
4316
4317 /* Look for repeated operands in OPS in the multiply tree rooted at
4318    STMT.  Replace them with an optimal sequence of multiplies and powi
4319    builtin calls, and remove the used operands from OPS.  Return an
4320    SSA name representing the value of the replacement sequence.  */
4321
4322 static tree
4323 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
4324 {
4325   unsigned i, j, vec_len;
4326   int ii;
4327   operand_entry *oe;
4328   repeat_factor *rf1, *rf2;
4329   repeat_factor rfnew;
4330   tree result = NULL_TREE;
4331   tree target_ssa, iter_result;
4332   tree type = TREE_TYPE (gimple_get_lhs (stmt));
4333   tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
4334   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4335   gimple *mul_stmt, *pow_stmt;
4336
4337   /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4338      target.  */
4339   if (!powi_fndecl)
4340     return NULL_TREE;
4341
4342   /* Allocate the repeated factor vector.  */
4343   repeat_factor_vec.create (10);
4344
4345   /* Scan the OPS vector for all SSA names in the product and build
4346      up a vector of occurrence counts for each factor.  */
4347   FOR_EACH_VEC_ELT (*ops, i, oe)
4348     {
4349       if (TREE_CODE (oe->op) == SSA_NAME)
4350         {
4351           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4352             {
4353               if (rf1->factor == oe->op)
4354                 {
4355                   rf1->count += oe->count;
4356                   break;
4357                 }
4358             }
4359
4360           if (j >= repeat_factor_vec.length ())
4361             {
4362               rfnew.factor = oe->op;
4363               rfnew.rank = oe->rank;
4364               rfnew.count = oe->count;
4365               rfnew.repr = NULL_TREE;
4366               repeat_factor_vec.safe_push (rfnew);
4367             }
4368         }
4369     }
4370
4371   /* Sort the repeated factor vector by (a) increasing occurrence count,
4372      and (b) decreasing rank.  */
4373   repeat_factor_vec.qsort (compare_repeat_factors);
4374
4375   /* It is generally best to combine as many base factors as possible
4376      into a product before applying __builtin_powi to the result.
4377      However, the sort order chosen for the repeated factor vector
4378      allows us to cache partial results for the product of the base
4379      factors for subsequent use.  When we already have a cached partial
4380      result from a previous iteration, it is best to make use of it
4381      before looking for another __builtin_pow opportunity.
4382
4383      As an example, consider x * x * y * y * y * z * z * z * z.
4384      We want to first compose the product x * y * z, raise it to the
4385      second power, then multiply this by y * z, and finally multiply
4386      by z.  This can be done in 5 multiplies provided we cache y * z
4387      for use in both expressions:
4388
4389         t1 = y * z
4390         t2 = t1 * x
4391         t3 = t2 * t2
4392         t4 = t1 * t3
4393         result = t4 * z
4394
4395      If we instead ignored the cached y * z and first multiplied by
4396      the __builtin_pow opportunity z * z, we would get the inferior:
4397
4398         t1 = y * z
4399         t2 = t1 * x
4400         t3 = t2 * t2
4401         t4 = z * z
4402         t5 = t3 * t4
4403         result = t5 * y  */
4404
4405   vec_len = repeat_factor_vec.length ();
4406
4407   /* Repeatedly look for opportunities to create a builtin_powi call.  */
4408   while (true)
4409     {
4410       HOST_WIDE_INT power;
4411
4412       /* First look for the largest cached product of factors from
4413          preceding iterations.  If found, create a builtin_powi for
4414          it if the minimum occurrence count for its factors is at
4415          least 2, or just use this cached product as our next
4416          multiplicand if the minimum occurrence count is 1.  */
4417       FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4418         {
4419           if (rf1->repr && rf1->count > 0)
4420             break;
4421         }
4422
4423       if (j < vec_len)
4424         {
4425           power = rf1->count;
4426
4427           if (power == 1)
4428             {
4429               iter_result = rf1->repr;
4430
4431               if (dump_file && (dump_flags & TDF_DETAILS))
4432                 {
4433                   unsigned elt;
4434                   repeat_factor *rf;
4435                   fputs ("Multiplying by cached product ", dump_file);
4436                   for (elt = j; elt < vec_len; elt++)
4437                     {
4438                       rf = &repeat_factor_vec[elt];
4439                       print_generic_expr (dump_file, rf->factor, 0);
4440                       if (elt < vec_len - 1)
4441                         fputs (" * ", dump_file);
4442                     }
4443                   fputs ("\n", dump_file);
4444                 }
4445             }
4446           else
4447             {
4448               iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4449               pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4450                                             build_int_cst (integer_type_node,
4451                                                            power));
4452               gimple_call_set_lhs (pow_stmt, iter_result);
4453               gimple_set_location (pow_stmt, gimple_location (stmt));
4454               gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4455
4456               if (dump_file && (dump_flags & TDF_DETAILS))
4457                 {
4458                   unsigned elt;
4459                   repeat_factor *rf;
4460                   fputs ("Building __builtin_pow call for cached product (",
4461                          dump_file);
4462                   for (elt = j; elt < vec_len; elt++)
4463                     {
4464                       rf = &repeat_factor_vec[elt];
4465                       print_generic_expr (dump_file, rf->factor, 0);
4466                       if (elt < vec_len - 1)
4467                         fputs (" * ", dump_file);
4468                     }
4469                   fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
4470                            power);
4471                 }
4472             }
4473         }
4474       else
4475         {
4476           /* Otherwise, find the first factor in the repeated factor
4477              vector whose occurrence count is at least 2.  If no such
4478              factor exists, there are no builtin_powi opportunities
4479              remaining.  */
4480           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4481             {
4482               if (rf1->count >= 2)
4483                 break;
4484             }
4485
4486           if (j >= vec_len)
4487             break;
4488
4489           power = rf1->count;
4490
4491           if (dump_file && (dump_flags & TDF_DETAILS))
4492             {
4493               unsigned elt;
4494               repeat_factor *rf;
4495               fputs ("Building __builtin_pow call for (", dump_file);
4496               for (elt = j; elt < vec_len; elt++)
4497                 {
4498                   rf = &repeat_factor_vec[elt];
4499                   print_generic_expr (dump_file, rf->factor, 0);
4500                   if (elt < vec_len - 1)
4501                     fputs (" * ", dump_file);
4502                 }
4503               fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
4504             }
4505
4506           reassociate_stats.pows_created++;
4507
4508           /* Visit each element of the vector in reverse order (so that
4509              high-occurrence elements are visited first, and within the
4510              same occurrence count, lower-ranked elements are visited
4511              first).  Form a linear product of all elements in this order
4512              whose occurrencce count is at least that of element J.
4513              Record the SSA name representing the product of each element
4514              with all subsequent elements in the vector.  */
4515           if (j == vec_len - 1)
4516             rf1->repr = rf1->factor;
4517           else
4518             {
4519               for (ii = vec_len - 2; ii >= (int)j; ii--)
4520                 {
4521                   tree op1, op2;
4522
4523                   rf1 = &repeat_factor_vec[ii];
4524                   rf2 = &repeat_factor_vec[ii + 1];
4525
4526                   /* Init the last factor's representative to be itself.  */
4527                   if (!rf2->repr)
4528                     rf2->repr = rf2->factor;
4529
4530                   op1 = rf1->factor;
4531                   op2 = rf2->repr;
4532
4533                   target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4534                   mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
4535                                                   op1, op2);
4536                   gimple_set_location (mul_stmt, gimple_location (stmt));
4537                   gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4538                   rf1->repr = target_ssa;
4539
4540                   /* Don't reprocess the multiply we just introduced.  */
4541                   gimple_set_visited (mul_stmt, true);
4542                 }
4543             }
4544
4545           /* Form a call to __builtin_powi for the maximum product
4546              just formed, raised to the power obtained earlier.  */
4547           rf1 = &repeat_factor_vec[j];
4548           iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4549           pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4550                                         build_int_cst (integer_type_node,
4551                                                        power));
4552           gimple_call_set_lhs (pow_stmt, iter_result);
4553           gimple_set_location (pow_stmt, gimple_location (stmt));
4554           gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4555         }
4556
4557       /* If we previously formed at least one other builtin_powi call,
4558          form the product of this one and those others.  */
4559       if (result)
4560         {
4561           tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4562           mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
4563                                           result, iter_result);
4564           gimple_set_location (mul_stmt, gimple_location (stmt));
4565           gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4566           gimple_set_visited (mul_stmt, true);
4567           result = new_result;
4568         }
4569       else
4570         result = iter_result;
4571
4572       /* Decrement the occurrence count of each element in the product
4573          by the count found above, and remove this many copies of each
4574          factor from OPS.  */
4575       for (i = j; i < vec_len; i++)
4576         {
4577           unsigned k = power;
4578           unsigned n;
4579
4580           rf1 = &repeat_factor_vec[i];
4581           rf1->count -= power;
4582
4583           FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
4584             {
4585               if (oe->op == rf1->factor)
4586                 {
4587                   if (oe->count <= k)
4588                     {
4589                       ops->ordered_remove (n);
4590                       k -= oe->count;
4591
4592                       if (k == 0)
4593                         break;
4594                     }
4595                   else
4596                     {
4597                       oe->count -= k;
4598                       break;
4599                     }
4600                 }
4601             }
4602         }
4603     }
4604
4605   /* At this point all elements in the repeated factor vector have a
4606      remaining occurrence count of 0 or 1, and those with a count of 1
4607      don't have cached representatives.  Re-sort the ops vector and
4608      clean up.  */
4609   ops->qsort (sort_by_operand_rank);
4610   repeat_factor_vec.release ();
4611
4612   /* Return the final product computed herein.  Note that there may
4613      still be some elements with single occurrence count left in OPS;
4614      those will be handled by the normal reassociation logic.  */
4615   return result;
4616 }
4617
4618 /* Attempt to optimize
4619    CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
4620    CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0.  */
4621
4622 static void
4623 attempt_builtin_copysign (vec<operand_entry *> *ops)
4624 {
4625   operand_entry *oe;
4626   unsigned int i;
4627   unsigned int length = ops->length ();
4628   tree cst = ops->last ()->op;
4629
4630   if (length == 1 || TREE_CODE (cst) != REAL_CST)
4631     return;
4632
4633   FOR_EACH_VEC_ELT (*ops, i, oe)
4634     {
4635       if (TREE_CODE (oe->op) == SSA_NAME
4636           && has_single_use (oe->op))
4637         {
4638           gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
4639           if (gimple_call_builtin_p (def_stmt, BUILT_IN_NORMAL))
4640             {
4641               tree fndecl = gimple_call_fndecl (def_stmt);
4642               tree arg0, arg1;
4643               switch (DECL_FUNCTION_CODE (fndecl))
4644                 {
4645                 CASE_FLT_FN (BUILT_IN_COPYSIGN):
4646                   arg0 = gimple_call_arg (def_stmt, 0);
4647                   arg1 = gimple_call_arg (def_stmt, 1);
4648                   /* The first argument of copysign must be a constant,
4649                      otherwise there's nothing to do.  */
4650                   if (TREE_CODE (arg0) == REAL_CST)
4651                     {
4652                       tree mul = const_binop (MULT_EXPR, TREE_TYPE (cst),
4653                                               cst, arg0);
4654                       /* If we couldn't fold to a single constant, skip it.
4655                          That happens e.g. for inexact multiplication when
4656                          -frounding-math.  */
4657                       if (mul == NULL_TREE)
4658                         break;
4659                       /* Instead of adjusting the old DEF_STMT, let's build
4660                          a new call to not leak the LHS and prevent keeping
4661                          bogus debug statements.  DCE will clean up the old
4662                          call.  */
4663                       gcall *call = gimple_build_call (fndecl, 2, mul, arg1);
4664                       tree lhs = make_ssa_name (TREE_TYPE (arg0));
4665                       gimple_call_set_lhs (call, lhs);
4666                       gimple_set_location (call, gimple_location (def_stmt));
4667                       insert_stmt_after (call, def_stmt);
4668                       /* We've used the constant, get rid of it.  */
4669                       ops->pop ();
4670                       bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
4671                       /* Handle the CST1 < 0 case by negating the result.  */
4672                       if (cst1_neg)
4673                         {
4674                           tree negrhs = make_ssa_name (TREE_TYPE (lhs));
4675                           gimple *negate_stmt
4676                             = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
4677                           insert_stmt_after (negate_stmt, call);
4678                           oe->op = negrhs;
4679                         }
4680                       else
4681                         oe->op = lhs;
4682                       if (dump_file && (dump_flags & TDF_DETAILS))
4683                         {
4684                           fprintf (dump_file, "Optimizing copysign: ");
4685                           print_generic_expr (dump_file, cst, 0);
4686                           fprintf (dump_file, " * ");
4687                           print_generic_expr (dump_file,
4688                                               gimple_call_fn (def_stmt), 0);
4689                           fprintf (dump_file, " (");
4690                           print_generic_expr (dump_file, arg0, 0);
4691                           fprintf (dump_file, ", ");
4692                           print_generic_expr (dump_file, arg1, 0);
4693                           fprintf (dump_file, ") into %s",
4694                                    cst1_neg ? "-" : "");
4695                           print_generic_expr (dump_file,
4696                                               gimple_call_fn (def_stmt), 0);
4697                           fprintf (dump_file, " (");
4698                           print_generic_expr (dump_file, mul, 0);
4699                           fprintf (dump_file, ", ");
4700                           print_generic_expr (dump_file, arg1, 0);
4701                           fprintf (dump_file, "\n");
4702                         }
4703                       return;
4704                     }
4705                   break;
4706                 default:
4707                   break;
4708                 }
4709             }
4710         }
4711     }
4712 }
4713
4714 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS.  */
4715
4716 static void
4717 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
4718 {
4719   tree rhs1;
4720
4721   if (dump_file && (dump_flags & TDF_DETAILS))
4722     {
4723       fprintf (dump_file, "Transforming ");
4724       print_gimple_stmt (dump_file, stmt, 0, 0);
4725     }
4726
4727   rhs1 = gimple_assign_rhs1 (stmt);
4728   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
4729   update_stmt (stmt);
4730   remove_visited_stmt_chain (rhs1);
4731
4732   if (dump_file && (dump_flags & TDF_DETAILS))
4733     {
4734       fprintf (dump_file, " into ");
4735       print_gimple_stmt (dump_file, stmt, 0, 0);
4736     }
4737 }
4738
4739 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2.  */
4740
4741 static void
4742 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
4743                             tree rhs1, tree rhs2)
4744 {
4745   if (dump_file && (dump_flags & TDF_DETAILS))
4746     {
4747       fprintf (dump_file, "Transforming ");
4748       print_gimple_stmt (dump_file, stmt, 0, 0);
4749     }
4750
4751   gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
4752   update_stmt (gsi_stmt (*gsi));
4753   remove_visited_stmt_chain (rhs1);
4754
4755   if (dump_file && (dump_flags & TDF_DETAILS))
4756     {
4757       fprintf (dump_file, " into ");
4758       print_gimple_stmt (dump_file, stmt, 0, 0);
4759     }
4760 }
4761
4762 /* Reassociate expressions in basic block BB and its post-dominator as
4763    children.  */
4764
4765 static void
4766 reassociate_bb (basic_block bb)
4767 {
4768   gimple_stmt_iterator gsi;
4769   basic_block son;
4770   gimple *stmt = last_stmt (bb);
4771
4772   if (stmt && !gimple_visited_p (stmt))
4773     maybe_optimize_range_tests (stmt);
4774
4775   for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4776     {
4777       stmt = gsi_stmt (gsi);
4778
4779       if (is_gimple_assign (stmt)
4780           && !stmt_could_throw_p (stmt))
4781         {
4782           tree lhs, rhs1, rhs2;
4783           enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4784
4785           /* If this is not a gimple binary expression, there is
4786              nothing for us to do with it.  */
4787           if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4788             continue;
4789
4790           /* If this was part of an already processed statement,
4791              we don't need to touch it again. */
4792           if (gimple_visited_p (stmt))
4793             {
4794               /* This statement might have become dead because of previous
4795                  reassociations.  */
4796               if (has_zero_uses (gimple_get_lhs (stmt)))
4797                 {
4798                   reassoc_remove_stmt (&gsi);
4799                   release_defs (stmt);
4800                   /* We might end up removing the last stmt above which
4801                      places the iterator to the end of the sequence.
4802                      Reset it to the last stmt in this case which might
4803                      be the end of the sequence as well if we removed
4804                      the last statement of the sequence.  In which case
4805                      we need to bail out.  */
4806                   if (gsi_end_p (gsi))
4807                     {
4808                       gsi = gsi_last_bb (bb);
4809                       if (gsi_end_p (gsi))
4810                         break;
4811                     }
4812                 }
4813               continue;
4814             }
4815
4816           lhs = gimple_assign_lhs (stmt);
4817           rhs1 = gimple_assign_rhs1 (stmt);
4818           rhs2 = gimple_assign_rhs2 (stmt);
4819
4820           /* For non-bit or min/max operations we can't associate
4821              all types.  Verify that here.  */
4822           if (rhs_code != BIT_IOR_EXPR
4823               && rhs_code != BIT_AND_EXPR
4824               && rhs_code != BIT_XOR_EXPR
4825               && rhs_code != MIN_EXPR
4826               && rhs_code != MAX_EXPR
4827               && (!can_reassociate_p (lhs)
4828                   || !can_reassociate_p (rhs1)
4829                   || !can_reassociate_p (rhs2)))
4830             continue;
4831
4832           if (associative_tree_code (rhs_code))
4833             {
4834               auto_vec<operand_entry *> ops;
4835               tree powi_result = NULL_TREE;
4836
4837               /* There may be no immediate uses left by the time we
4838                  get here because we may have eliminated them all.  */
4839               if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4840                 continue;
4841
4842               gimple_set_visited (stmt, true);
4843               linearize_expr_tree (&ops, stmt, true, true);
4844               ops.qsort (sort_by_operand_rank);
4845               optimize_ops_list (rhs_code, &ops);
4846               if (undistribute_ops_list (rhs_code, &ops,
4847                                          loop_containing_stmt (stmt)))
4848                 {
4849                   ops.qsort (sort_by_operand_rank);
4850                   optimize_ops_list (rhs_code, &ops);
4851                 }
4852
4853               if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4854                 optimize_range_tests (rhs_code, &ops);
4855
4856               if (rhs_code == MULT_EXPR)
4857                 attempt_builtin_copysign (&ops);
4858
4859               if (first_pass_instance
4860                   && rhs_code == MULT_EXPR
4861                   && flag_unsafe_math_optimizations)
4862                 powi_result = attempt_builtin_powi (stmt, &ops);
4863
4864               /* If the operand vector is now empty, all operands were
4865                  consumed by the __builtin_powi optimization.  */
4866               if (ops.length () == 0)
4867                 transform_stmt_to_copy (&gsi, stmt, powi_result);
4868               else if (ops.length () == 1)
4869                 {
4870                   tree last_op = ops.last ()->op;
4871
4872                   if (powi_result)
4873                     transform_stmt_to_multiply (&gsi, stmt, last_op,
4874                                                 powi_result);
4875                   else
4876                     transform_stmt_to_copy (&gsi, stmt, last_op);
4877                 }
4878               else
4879                 {
4880                   machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4881                   int ops_num = ops.length ();
4882                   int width = get_reassociation_width (ops_num, rhs_code, mode);
4883                   tree new_lhs = lhs;
4884
4885                   if (dump_file && (dump_flags & TDF_DETAILS))
4886                     fprintf (dump_file,
4887                              "Width = %d was chosen for reassociation\n", width);
4888
4889                   if (width > 1
4890                       && ops.length () > 3)
4891                     rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
4892                                                 width, ops);
4893                   else
4894                     {
4895                       /* When there are three operands left, we want
4896                          to make sure the ones that get the double
4897                          binary op are chosen wisely.  */
4898                       int len = ops.length ();
4899                       if (len >= 3)
4900                         swap_ops_for_binary_stmt (ops, len - 3, stmt);
4901
4902                       new_lhs = rewrite_expr_tree (stmt, 0, ops,
4903                                                    powi_result != NULL);
4904                     }
4905
4906                   /* If we combined some repeated factors into a
4907                      __builtin_powi call, multiply that result by the
4908                      reassociated operands.  */
4909                   if (powi_result)
4910                     {
4911                       gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
4912                       tree type = TREE_TYPE (lhs);
4913                       tree target_ssa = make_temp_ssa_name (type, NULL,
4914                                                             "reassocpow");
4915                       gimple_set_lhs (lhs_stmt, target_ssa);
4916                       update_stmt (lhs_stmt);
4917                       if (lhs != new_lhs)
4918                         target_ssa = new_lhs;
4919                       mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
4920                                                       powi_result, target_ssa);
4921                       gimple_set_location (mul_stmt, gimple_location (stmt));
4922                       gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4923                     }
4924                 }
4925             }
4926         }
4927     }
4928   for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4929        son;
4930        son = next_dom_son (CDI_POST_DOMINATORS, son))
4931     reassociate_bb (son);
4932 }
4933
4934 /* Add jumps around shifts for range tests turned into bit tests.
4935    For each SSA_NAME VAR we have code like:
4936    VAR = ...; // final stmt of range comparison
4937    // bit test here...;
4938    OTHERVAR = ...; // final stmt of the bit test sequence
4939    RES = VAR | OTHERVAR;
4940    Turn the above into:
4941    VAR = ...;
4942    if (VAR != 0)
4943      goto <l3>;
4944    else
4945      goto <l2>;
4946    <l2>:
4947    // bit test here...;
4948    OTHERVAR = ...;
4949    <l3>:
4950    # RES = PHI<1(l1), OTHERVAR(l2)>;  */
4951
4952 static void
4953 branch_fixup (void)
4954 {
4955   tree var;
4956   unsigned int i;
4957
4958   FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
4959     {
4960       gimple *def_stmt = SSA_NAME_DEF_STMT (var);
4961       gimple *use_stmt;
4962       use_operand_p use;
4963       bool ok = single_imm_use (var, &use, &use_stmt);
4964       gcc_assert (ok
4965                   && is_gimple_assign (use_stmt)
4966                   && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
4967                   && gimple_bb (def_stmt) == gimple_bb (use_stmt));
4968
4969       basic_block cond_bb = gimple_bb (def_stmt);
4970       basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
4971       basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
4972
4973       gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
4974       gimple *g = gimple_build_cond (NE_EXPR, var,
4975                                      build_zero_cst (TREE_TYPE (var)),
4976                                      NULL_TREE, NULL_TREE);
4977       location_t loc = gimple_location (use_stmt);
4978       gimple_set_location (g, loc);
4979       gsi_insert_after (&gsi, g, GSI_NEW_STMT);
4980
4981       edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
4982       etrue->probability = REG_BR_PROB_BASE / 2;
4983       etrue->count = cond_bb->count / 2;
4984       edge efalse = find_edge (cond_bb, then_bb);
4985       efalse->flags = EDGE_FALSE_VALUE;
4986       efalse->probability -= etrue->probability;
4987       efalse->count -= etrue->count;
4988       then_bb->count -= etrue->count;
4989
4990       tree othervar = NULL_TREE;
4991       if (gimple_assign_rhs1 (use_stmt) == var)
4992         othervar = gimple_assign_rhs2 (use_stmt);
4993       else if (gimple_assign_rhs2 (use_stmt) == var)
4994         othervar = gimple_assign_rhs1 (use_stmt);
4995       else
4996         gcc_unreachable ();
4997       tree lhs = gimple_assign_lhs (use_stmt);
4998       gphi *phi = create_phi_node (lhs, merge_bb);
4999       add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
5000       add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
5001       gsi = gsi_for_stmt (use_stmt);
5002       gsi_remove (&gsi, true);
5003
5004       set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
5005       set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
5006     }
5007   reassoc_branch_fixups.release ();
5008 }
5009
5010 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
5011 void debug_ops_vector (vec<operand_entry *> ops);
5012
5013 /* Dump the operand entry vector OPS to FILE.  */
5014
5015 void
5016 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
5017 {
5018   operand_entry *oe;
5019   unsigned int i;
5020
5021   FOR_EACH_VEC_ELT (ops, i, oe)
5022     {
5023       fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
5024       print_generic_expr (file, oe->op, 0);
5025       fprintf (file, "\n");
5026     }
5027 }
5028
5029 /* Dump the operand entry vector OPS to STDERR.  */
5030
5031 DEBUG_FUNCTION void
5032 debug_ops_vector (vec<operand_entry *> ops)
5033 {
5034   dump_ops_vector (stderr, ops);
5035 }
5036
5037 static void
5038 do_reassoc (void)
5039 {
5040   break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
5041   reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
5042 }
5043
5044 /* Initialize the reassociation pass.  */
5045
5046 static void
5047 init_reassoc (void)
5048 {
5049   int i;
5050   long rank = 2;
5051   int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
5052
5053   /* Find the loops, so that we can prevent moving calculations in
5054      them.  */
5055   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
5056
5057   memset (&reassociate_stats, 0, sizeof (reassociate_stats));
5058
5059   next_operand_entry_id = 0;
5060
5061   /* Reverse RPO (Reverse Post Order) will give us something where
5062      deeper loops come later.  */
5063   pre_and_rev_post_order_compute (NULL, bbs, false);
5064   bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
5065   operand_rank = new hash_map<tree, long>;
5066
5067   /* Give each default definition a distinct rank.  This includes
5068      parameters and the static chain.  Walk backwards over all
5069      SSA names so that we get proper rank ordering according
5070      to tree_swap_operands_p.  */
5071   for (i = num_ssa_names - 1; i > 0; --i)
5072     {
5073       tree name = ssa_name (i);
5074       if (name && SSA_NAME_IS_DEFAULT_DEF (name))
5075         insert_operand_rank (name, ++rank);
5076     }
5077
5078   /* Set up rank for each BB  */
5079   for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
5080     bb_rank[bbs[i]] = ++rank  << 16;
5081
5082   free (bbs);
5083   calculate_dominance_info (CDI_POST_DOMINATORS);
5084   plus_negates = vNULL;
5085 }
5086
5087 /* Cleanup after the reassociation pass, and print stats if
5088    requested.  */
5089
5090 static void
5091 fini_reassoc (void)
5092 {
5093   statistics_counter_event (cfun, "Linearized",
5094                             reassociate_stats.linearized);
5095   statistics_counter_event (cfun, "Constants eliminated",
5096                             reassociate_stats.constants_eliminated);
5097   statistics_counter_event (cfun, "Ops eliminated",
5098                             reassociate_stats.ops_eliminated);
5099   statistics_counter_event (cfun, "Statements rewritten",
5100                             reassociate_stats.rewritten);
5101   statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
5102                             reassociate_stats.pows_encountered);
5103   statistics_counter_event (cfun, "Built-in powi calls created",
5104                             reassociate_stats.pows_created);
5105
5106   delete operand_rank;
5107   operand_entry_pool.release ();
5108   free (bb_rank);
5109   plus_negates.release ();
5110   free_dominance_info (CDI_POST_DOMINATORS);
5111   loop_optimizer_finalize ();
5112 }
5113
5114 /* Gate and execute functions for Reassociation.  */
5115
5116 static unsigned int
5117 execute_reassoc (void)
5118 {
5119   init_reassoc ();
5120
5121   do_reassoc ();
5122   repropagate_negates ();
5123   branch_fixup ();
5124
5125   fini_reassoc ();
5126   return 0;
5127 }
5128
5129 namespace {
5130
5131 const pass_data pass_data_reassoc =
5132 {
5133   GIMPLE_PASS, /* type */
5134   "reassoc", /* name */
5135   OPTGROUP_NONE, /* optinfo_flags */
5136   TV_TREE_REASSOC, /* tv_id */
5137   ( PROP_cfg | PROP_ssa ), /* properties_required */
5138   0, /* properties_provided */
5139   0, /* properties_destroyed */
5140   0, /* todo_flags_start */
5141   TODO_update_ssa_only_virtuals, /* todo_flags_finish */
5142 };
5143
5144 class pass_reassoc : public gimple_opt_pass
5145 {
5146 public:
5147   pass_reassoc (gcc::context *ctxt)
5148     : gimple_opt_pass (pass_data_reassoc, ctxt)
5149   {}
5150
5151   /* opt_pass methods: */
5152   opt_pass * clone () { return new pass_reassoc (m_ctxt); }
5153   virtual bool gate (function *) { return flag_tree_reassoc != 0; }
5154   virtual unsigned int execute (function *) { return execute_reassoc (); }
5155
5156 }; // class pass_reassoc
5157
5158 } // anon namespace
5159
5160 gimple_opt_pass *
5161 make_pass_reassoc (gcc::context *ctxt)
5162 {
5163   return new pass_reassoc (ctxt);
5164 }