gcc/tree-ssa-reassoc.c

   1 /* Reassociation for trees.
   2    Copyright (C) 2005-2014 Free Software Foundation, Inc.
   3    Contributed by Daniel Berlin <dan@dberlin.org>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 3, or (at your option)
  10 any later version.
  11
  12 GCC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "hash-table.h"
  25 #include "tm.h"
  26 #include "rtl.h"
  27 #include "tm_p.h"
  28 #include "tree.h"
  29 #include "stor-layout.h"
  30 #include "basic-block.h"
  31 #include "gimple-pretty-print.h"
  32 #include "tree-inline.h"
  33 #include "hash-map.h"
  34 #include "tree-ssa-alias.h"
  35 #include "internal-fn.h"
  36 #include "gimple-fold.h"
  37 #include "tree-eh.h"
  38 #include "gimple-expr.h"
  39 #include "is-a.h"
  40 #include "gimple.h"
  41 #include "gimple-iterator.h"
  42 #include "gimplify-me.h"
  43 #include "gimple-ssa.h"
  44 #include "tree-cfg.h"
  45 #include "tree-phinodes.h"
  46 #include "ssa-iterators.h"
  47 #include "stringpool.h"
  48 #include "tree-ssanames.h"
  49 #include "tree-ssa-loop-niter.h"
  50 #include "tree-ssa-loop.h"
  51 #include "expr.h"
  52 #include "tree-dfa.h"
  53 #include "tree-ssa.h"
  54 #include "tree-iterator.h"
  55 #include "tree-pass.h"
  56 #include "alloc-pool.h"
  57 #include "langhooks.h"
  58 #include "cfgloop.h"
  59 #include "flags.h"
  60 #include "target.h"
  61 #include "params.h"
  62 #include "diagnostic-core.h"
  63 #include "builtins.h"
  64
  65 /*  This is a simple global reassociation pass.  It is, in part, based
  66     on the LLVM pass of the same name (They do some things more/less
  67     than we do, in different orders, etc).
  68
  69     It consists of five steps:
  70
  71     1. Breaking up subtract operations into addition + negate, where
  72     it would promote the reassociation of adds.
  73
  74     2. Left linearization of the expression trees, so that (A+B)+(C+D)
  75     becomes (((A+B)+C)+D), which is easier for us to rewrite later.
  76     During linearization, we place the operands of the binary
  77     expressions into a vector of operand_entry_t
  78
  79     3. Optimization of the operand lists, eliminating things like a +
  80     -a, a & a, etc.
  81
  82     3a. Combine repeated factors with the same occurrence counts
  83     into a __builtin_powi call that will later be optimized into
  84     an optimal number of multiplies.
  85
  86     4. Rewrite the expression trees we linearized and optimized so
  87     they are in proper rank order.
  88
  89     5. Repropagate negates, as nothing else will clean it up ATM.
  90
  91     A bit of theory on #4, since nobody seems to write anything down
  92     about why it makes sense to do it the way they do it:
  93
  94     We could do this much nicer theoretically, but don't (for reasons
  95     explained after how to do it theoretically nice :P).
  96
  97     In order to promote the most redundancy elimination, you want
  98     binary expressions whose operands are the same rank (or
  99     preferably, the same value) exposed to the redundancy eliminator,
 100     for possible elimination.
 101
 102     So the way to do this if we really cared, is to build the new op
 103     tree from the leaves to the roots, merging as you go, and putting the
 104     new op on the end of the worklist, until you are left with one
 105     thing on the worklist.
 106
 107     IE if you have to rewrite the following set of operands (listed with
 108     rank in parentheses), with opcode PLUS_EXPR:
 109
 110     a (1),  b (1),  c (1),  d (2), e (2)
 111
 112
 113     We start with our merge worklist empty, and the ops list with all of
 114     those on it.
 115
 116     You want to first merge all leaves of the same rank, as much as
 117     possible.
 118
 119     So first build a binary op of
 120
 121     mergetmp = a + b, and put "mergetmp" on the merge worklist.
 122
 123     Because there is no three operand form of PLUS_EXPR, c is not going to
 124     be exposed to redundancy elimination as a rank 1 operand.
 125
 126     So you might as well throw it on the merge worklist (you could also
 127     consider it to now be a rank two operand, and merge it with d and e,
 128     but in this case, you then have evicted e from a binary op. So at
 129     least in this situation, you can't win.)
 130
 131     Then build a binary op of d + e
 132     mergetmp2 = d + e
 133
 134     and put mergetmp2 on the merge worklist.
 135
 136     so merge worklist = {mergetmp, c, mergetmp2}
 137
 138     Continue building binary ops of these operations until you have only
 139     one operation left on the worklist.
 140
 141     So we have
 142
 143     build binary op
 144     mergetmp3 = mergetmp + c
 145
 146     worklist = {mergetmp2, mergetmp3}
 147
 148     mergetmp4 = mergetmp2 + mergetmp3
 149
 150     worklist = {mergetmp4}
 151
 152     because we have one operation left, we can now just set the original
 153     statement equal to the result of that operation.
 154
 155     This will at least expose a + b  and d + e to redundancy elimination
 156     as binary operations.
 157
 158     For extra points, you can reuse the old statements to build the
 159     mergetmps, since you shouldn't run out.
 160
 161     So why don't we do this?
 162
 163     Because it's expensive, and rarely will help.  Most trees we are
 164     reassociating have 3 or less ops.  If they have 2 ops, they already
 165     will be written into a nice single binary op.  If you have 3 ops, a
 166     single simple check suffices to tell you whether the first two are of the
 167     same rank.  If so, you know to order it
 168
 169     mergetmp = op1 + op2
 170     newstmt = mergetmp + op3
 171
 172     instead of
 173     mergetmp = op2 + op3
 174     newstmt = mergetmp + op1
 175
 176     If all three are of the same rank, you can't expose them all in a
 177     single binary operator anyway, so the above is *still* the best you
 178     can do.
 179
 180     Thus, this is what we do.  When we have three ops left, we check to see
 181     what order to put them in, and call it a day.  As a nod to vector sum
 182     reduction, we check if any of the ops are really a phi node that is a
 183     destructive update for the associating op, and keep the destructive
 184     update together for vector sum reduction recognition.  */
 185
 186
 187 /* Statistics */
 188 static struct
 189 {
 190   int linearized;
 191   int constants_eliminated;
 192   int ops_eliminated;
 193   int rewritten;
 194   int pows_encountered;
 195   int pows_created;
 196 } reassociate_stats;
 197
 198 /* Operator, rank pair.  */
 199 typedef struct operand_entry
 200 {
 201   unsigned int rank;
 202   int id;
 203   tree op;
 204   unsigned int count;
 205 } *operand_entry_t;
 206
 207 static alloc_pool operand_entry_pool;
 208
 209 /* This is used to assign a unique ID to each struct operand_entry
 210    so that qsort results are identical on different hosts.  */
 211 static int next_operand_entry_id;
 212
 213 /* Starting rank number for a given basic block, so that we can rank
 214    operations using unmovable instructions in that BB based on the bb
 215    depth.  */
 216 static long *bb_rank;
 217
 218 /* Operand->rank hashtable.  */
 219 static hash_map<tree, long> *operand_rank;
 220
 221 /* Forward decls.  */
 222 static long get_rank (tree);
 223 static bool reassoc_stmt_dominates_stmt_p (gimple, gimple);
 224
 225 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
 226    possibly added by gsi_remove.  */
 227
 228 bool
 229 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
 230 {
 231   gimple stmt = gsi_stmt (*gsi);
 232
 233   if (!MAY_HAVE_DEBUG_STMTS || gimple_code (stmt) == GIMPLE_PHI)
 234     return gsi_remove (gsi, true);
 235
 236   gimple_stmt_iterator prev = *gsi;
 237   gsi_prev (&prev);
 238   unsigned uid = gimple_uid (stmt);
 239   basic_block bb = gimple_bb (stmt);
 240   bool ret = gsi_remove (gsi, true);
 241   if (!gsi_end_p (prev))
 242     gsi_next (&prev);
 243   else
 244     prev = gsi_start_bb (bb);
 245   gimple end_stmt = gsi_stmt (*gsi);
 246   while ((stmt = gsi_stmt (prev)) != end_stmt)
 247     {
 248       gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
 249       gimple_set_uid (stmt, uid);
 250       gsi_next (&prev);
 251     }
 252   return ret;
 253 }
 254
 255 /* Bias amount for loop-carried phis.  We want this to be larger than
 256    the depth of any reassociation tree we can see, but not larger than
 257    the rank difference between two blocks.  */
 258 #define PHI_LOOP_BIAS (1 << 15)
 259
 260 /* Rank assigned to a phi statement.  If STMT is a loop-carried phi of
 261    an innermost loop, and the phi has only a single use which is inside
 262    the loop, then the rank is the block rank of the loop latch plus an
 263    extra bias for the loop-carried dependence.  This causes expressions
 264    calculated into an accumulator variable to be independent for each
 265    iteration of the loop.  If STMT is some other phi, the rank is the
 266    block rank of its containing block.  */
 267 static long
 268 phi_rank (gimple stmt)
 269 {
 270   basic_block bb = gimple_bb (stmt);
 271   struct loop *father = bb->loop_father;
 272   tree res;
 273   unsigned i;
 274   use_operand_p use;
 275   gimple use_stmt;
 276
 277   /* We only care about real loops (those with a latch).  */
 278   if (!father->latch)
 279     return bb_rank[bb->index];
 280
 281   /* Interesting phis must be in headers of innermost loops.  */
 282   if (bb != father->header
 283       || father->inner)
 284     return bb_rank[bb->index];
 285
 286   /* Ignore virtual SSA_NAMEs.  */
 287   res = gimple_phi_result (stmt);
 288   if (virtual_operand_p (res))
 289     return bb_rank[bb->index];
 290
 291   /* The phi definition must have a single use, and that use must be
 292      within the loop.  Otherwise this isn't an accumulator pattern.  */
 293   if (!single_imm_use (res, &use, &use_stmt)
 294       || gimple_bb (use_stmt)->loop_father != father)
 295     return bb_rank[bb->index];
 296
 297   /* Look for phi arguments from within the loop.  If found, bias this phi.  */
 298   for (i = 0; i < gimple_phi_num_args (stmt); i++)
 299     {
 300       tree arg = gimple_phi_arg_def (stmt, i);
 301       if (TREE_CODE (arg) == SSA_NAME
 302           && !SSA_NAME_IS_DEFAULT_DEF (arg))
 303         {
 304           gimple def_stmt = SSA_NAME_DEF_STMT (arg);
 305           if (gimple_bb (def_stmt)->loop_father == father)
 306             return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
 307         }
 308     }
 309
 310   /* Must be an uninteresting phi.  */
 311   return bb_rank[bb->index];
 312 }
 313
 314 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
 315    loop-carried dependence of an innermost loop, return TRUE; else
 316    return FALSE.  */
 317 static bool
 318 loop_carried_phi (tree exp)
 319 {
 320   gimple phi_stmt;
 321   long block_rank;
 322
 323   if (TREE_CODE (exp) != SSA_NAME
 324       || SSA_NAME_IS_DEFAULT_DEF (exp))
 325     return false;
 326
 327   phi_stmt = SSA_NAME_DEF_STMT (exp);
 328
 329   if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
 330     return false;
 331
 332   /* Non-loop-carried phis have block rank.  Loop-carried phis have
 333      an additional bias added in.  If this phi doesn't have block rank,
 334      it's biased and should not be propagated.  */
 335   block_rank = bb_rank[gimple_bb (phi_stmt)->index];
 336
 337   if (phi_rank (phi_stmt) != block_rank)
 338     return true;
 339
 340   return false;
 341 }
 342
 343 /* Return the maximum of RANK and the rank that should be propagated
 344    from expression OP.  For most operands, this is just the rank of OP.
 345    For loop-carried phis, the value is zero to avoid undoing the bias
 346    in favor of the phi.  */
 347 static long
 348 propagate_rank (long rank, tree op)
 349 {
 350   long op_rank;
 351
 352   if (loop_carried_phi (op))
 353     return rank;
 354
 355   op_rank = get_rank (op);
 356
 357   return MAX (rank, op_rank);
 358 }
 359
 360 /* Look up the operand rank structure for expression E.  */
 361
 362 static inline long
 363 find_operand_rank (tree e)
 364 {
 365   long *slot = operand_rank->get (e);
 366   return slot ? *slot : -1;
 367 }
 368
 369 /* Insert {E,RANK} into the operand rank hashtable.  */
 370
 371 static inline void
 372 insert_operand_rank (tree e, long rank)
 373 {
 374   gcc_assert (rank > 0);
 375   gcc_assert (!operand_rank->put (e, rank));
 376 }
 377
 378 /* Given an expression E, return the rank of the expression.  */
 379
 380 static long
 381 get_rank (tree e)
 382 {
 383   /* Constants have rank 0.  */
 384   if (is_gimple_min_invariant (e))
 385     return 0;
 386
 387   /* SSA_NAME's have the rank of the expression they are the result
 388      of.
 389      For globals and uninitialized values, the rank is 0.
 390      For function arguments, use the pre-setup rank.
 391      For PHI nodes, stores, asm statements, etc, we use the rank of
 392      the BB.
 393      For simple operations, the rank is the maximum rank of any of
 394      its operands, or the bb_rank, whichever is less.
 395      I make no claims that this is optimal, however, it gives good
 396      results.  */
 397
 398   /* We make an exception to the normal ranking system to break
 399      dependences of accumulator variables in loops.  Suppose we
 400      have a simple one-block loop containing:
 401
 402        x_1 = phi(x_0, x_2)
 403        b = a + x_1
 404        c = b + d
 405        x_2 = c + e
 406
 407      As shown, each iteration of the calculation into x is fully
 408      dependent upon the iteration before it.  We would prefer to
 409      see this in the form:
 410
 411        x_1 = phi(x_0, x_2)
 412        b = a + d
 413        c = b + e
 414        x_2 = c + x_1
 415
 416      If the loop is unrolled, the calculations of b and c from
 417      different iterations can be interleaved.
 418
 419      To obtain this result during reassociation, we bias the rank
 420      of the phi definition x_1 upward, when it is recognized as an
 421      accumulator pattern.  The artificial rank causes it to be
 422      added last, providing the desired independence.  */
 423
 424   if (TREE_CODE (e) == SSA_NAME)
 425     {
 426       gimple stmt;
 427       long rank;
 428       int i, n;
 429       tree op;
 430
 431       if (SSA_NAME_IS_DEFAULT_DEF (e))
 432         return find_operand_rank (e);
 433
 434       stmt = SSA_NAME_DEF_STMT (e);
 435       if (gimple_code (stmt) == GIMPLE_PHI)
 436         return phi_rank (stmt);
 437
 438       if (!is_gimple_assign (stmt)
 439           || gimple_vdef (stmt))
 440         return bb_rank[gimple_bb (stmt)->index];
 441
 442       /* If we already have a rank for this expression, use that.  */
 443       rank = find_operand_rank (e);
 444       if (rank != -1)
 445         return rank;
 446
 447       /* Otherwise, find the maximum rank for the operands.  As an
 448          exception, remove the bias from loop-carried phis when propagating
 449          the rank so that dependent operations are not also biased.  */
 450       rank = 0;
 451       if (gimple_assign_single_p (stmt))
 452         {
 453           tree rhs = gimple_assign_rhs1 (stmt);
 454           n = TREE_OPERAND_LENGTH (rhs);
 455           if (n == 0)
 456             rank = propagate_rank (rank, rhs);
 457           else
 458             {
 459               for (i = 0; i < n; i++)
 460                 {
 461                   op = TREE_OPERAND (rhs, i);
 462
 463                   if (op != NULL_TREE)
 464                     rank = propagate_rank (rank, op);
 465                 }
 466             }
 467         }
 468       else
 469         {
 470           n = gimple_num_ops (stmt);
 471           for (i = 1; i < n; i++)
 472             {
 473               op = gimple_op (stmt, i);
 474               gcc_assert (op);
 475               rank = propagate_rank (rank, op);
 476             }
 477         }
 478
 479       if (dump_file && (dump_flags & TDF_DETAILS))
 480         {
 481           fprintf (dump_file, "Rank for ");
 482           print_generic_expr (dump_file, e, 0);
 483           fprintf (dump_file, " is %ld\n", (rank + 1));
 484         }
 485
 486       /* Note the rank in the hashtable so we don't recompute it.  */
 487       insert_operand_rank (e, (rank + 1));
 488       return (rank + 1);
 489     }
 490
 491   /* Globals, etc,  are rank 0 */
 492   return 0;
 493 }
 494
 495
 496 /* We want integer ones to end up last no matter what, since they are
 497    the ones we can do the most with.  */
 498 #define INTEGER_CONST_TYPE 1 << 3
 499 #define FLOAT_CONST_TYPE 1 << 2
 500 #define OTHER_CONST_TYPE 1 << 1
 501
 502 /* Classify an invariant tree into integer, float, or other, so that
 503    we can sort them to be near other constants of the same type.  */
 504 static inline int
 505 constant_type (tree t)
 506 {
 507   if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
 508     return INTEGER_CONST_TYPE;
 509   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
 510     return FLOAT_CONST_TYPE;
 511   else
 512     return OTHER_CONST_TYPE;
 513 }
 514
 515 /* qsort comparison function to sort operand entries PA and PB by rank
 516    so that the sorted array is ordered by rank in decreasing order.  */
 517 static int
 518 sort_by_operand_rank (const void *pa, const void *pb)
 519 {
 520   const operand_entry_t oea = *(const operand_entry_t *)pa;
 521   const operand_entry_t oeb = *(const operand_entry_t *)pb;
 522
 523   /* It's nicer for optimize_expression if constants that are likely
 524      to fold when added/multiplied//whatever are put next to each
 525      other.  Since all constants have rank 0, order them by type.  */
 526   if (oeb->rank == 0 && oea->rank == 0)
 527     {
 528       if (constant_type (oeb->op) != constant_type (oea->op))
 529         return constant_type (oeb->op) - constant_type (oea->op);
 530       else
 531         /* To make sorting result stable, we use unique IDs to determine
 532            order.  */
 533         return oeb->id - oea->id;
 534     }
 535
 536   /* Lastly, make sure the versions that are the same go next to each
 537      other.  */
 538   if ((oeb->rank - oea->rank == 0)
 539       && TREE_CODE (oea->op) == SSA_NAME
 540       && TREE_CODE (oeb->op) == SSA_NAME)
 541     {
 542       /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
 543          versions of removed SSA_NAMEs, so if possible, prefer to sort
 544          based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
 545          See PR60418.  */
 546       if (!SSA_NAME_IS_DEFAULT_DEF (oea->op)
 547           && !SSA_NAME_IS_DEFAULT_DEF (oeb->op)
 548           && SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
 549         {
 550           gimple stmta = SSA_NAME_DEF_STMT (oea->op);
 551           gimple stmtb = SSA_NAME_DEF_STMT (oeb->op);
 552           basic_block bba = gimple_bb (stmta);
 553           basic_block bbb = gimple_bb (stmtb);
 554           if (bbb != bba)
 555             {
 556               if (bb_rank[bbb->index] != bb_rank[bba->index])
 557                 return bb_rank[bbb->index] - bb_rank[bba->index];
 558             }
 559           else
 560             {
 561               bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
 562               bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
 563               if (da != db)
 564                 return da ? 1 : -1;
 565             }
 566         }
 567
 568       if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
 569         return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
 570       else
 571         return oeb->id - oea->id;
 572     }
 573
 574   if (oeb->rank != oea->rank)
 575     return oeb->rank - oea->rank;
 576   else
 577     return oeb->id - oea->id;
 578 }
 579
 580 /* Add an operand entry to *OPS for the tree operand OP.  */
 581
 582 static void
 583 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
 584 {
 585   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
 586
 587   oe->op = op;
 588   oe->rank = get_rank (op);
 589   oe->id = next_operand_entry_id++;
 590   oe->count = 1;
 591   ops->safe_push (oe);
 592 }
 593
 594 /* Add an operand entry to *OPS for the tree operand OP with repeat
 595    count REPEAT.  */
 596
 597 static void
 598 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
 599                        HOST_WIDE_INT repeat)
 600 {
 601   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
 602
 603   oe->op = op;
 604   oe->rank = get_rank (op);
 605   oe->id = next_operand_entry_id++;
 606   oe->count = repeat;
 607   ops->safe_push (oe);
 608
 609   reassociate_stats.pows_encountered++;
 610 }
 611
 612 /* Return true if STMT is reassociable operation containing a binary
 613    operation with tree code CODE, and is inside LOOP.  */
 614
 615 static bool
 616 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
 617 {
 618   basic_block bb = gimple_bb (stmt);
 619
 620   if (gimple_bb (stmt) == NULL)
 621     return false;
 622
 623   if (!flow_bb_inside_loop_p (loop, bb))
 624     return false;
 625
 626   if (is_gimple_assign (stmt)
 627       && gimple_assign_rhs_code (stmt) == code
 628       && has_single_use (gimple_assign_lhs (stmt)))
 629     return true;
 630
 631   return false;
 632 }
 633
 634
 635 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
 636    operand of the negate operation.  Otherwise, return NULL.  */
 637
 638 static tree
 639 get_unary_op (tree name, enum tree_code opcode)
 640 {
 641   gimple stmt = SSA_NAME_DEF_STMT (name);
 642
 643   if (!is_gimple_assign (stmt))
 644     return NULL_TREE;
 645
 646   if (gimple_assign_rhs_code (stmt) == opcode)
 647     return gimple_assign_rhs1 (stmt);
 648   return NULL_TREE;
 649 }
 650
 651 /* If CURR and LAST are a pair of ops that OPCODE allows us to
 652    eliminate through equivalences, do so, remove them from OPS, and
 653    return true.  Otherwise, return false.  */
 654
 655 static bool
 656 eliminate_duplicate_pair (enum tree_code opcode,
 657                           vec<operand_entry_t> *ops,
 658                           bool *all_done,
 659                           unsigned int i,
 660                           operand_entry_t curr,
 661                           operand_entry_t last)
 662 {
 663
 664   /* If we have two of the same op, and the opcode is & |, min, or max,
 665      we can eliminate one of them.
 666      If we have two of the same op, and the opcode is ^, we can
 667      eliminate both of them.  */
 668
 669   if (last && last->op == curr->op)
 670     {
 671       switch (opcode)
 672         {
 673         case MAX_EXPR:
 674         case MIN_EXPR:
 675         case BIT_IOR_EXPR:
 676         case BIT_AND_EXPR:
 677           if (dump_file && (dump_flags & TDF_DETAILS))
 678             {
 679               fprintf (dump_file, "Equivalence: ");
 680               print_generic_expr (dump_file, curr->op, 0);
 681               fprintf (dump_file, " [&|minmax] ");
 682               print_generic_expr (dump_file, last->op, 0);
 683               fprintf (dump_file, " -> ");
 684               print_generic_stmt (dump_file, last->op, 0);
 685             }
 686
 687           ops->ordered_remove (i);
 688           reassociate_stats.ops_eliminated ++;
 689
 690           return true;
 691
 692         case BIT_XOR_EXPR:
 693           if (dump_file && (dump_flags & TDF_DETAILS))
 694             {
 695               fprintf (dump_file, "Equivalence: ");
 696               print_generic_expr (dump_file, curr->op, 0);
 697               fprintf (dump_file, " ^ ");
 698               print_generic_expr (dump_file, last->op, 0);
 699               fprintf (dump_file, " -> nothing\n");
 700             }
 701
 702           reassociate_stats.ops_eliminated += 2;
 703
 704           if (ops->length () == 2)
 705             {
 706               ops->create (0);
 707               add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
 708               *all_done = true;
 709             }
 710           else
 711             {
 712               ops->ordered_remove (i-1);
 713               ops->ordered_remove (i-1);
 714             }
 715
 716           return true;
 717
 718         default:
 719           break;
 720         }
 721     }
 722   return false;
 723 }
 724
 725 static vec<tree> plus_negates;
 726
 727 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
 728    expression, look in OPS for a corresponding positive operation to cancel
 729    it out.  If we find one, remove the other from OPS, replace
 730    OPS[CURRINDEX] with 0 or -1, respectively, and return true.  Otherwise,
 731    return false. */
 732
 733 static bool
 734 eliminate_plus_minus_pair (enum tree_code opcode,
 735                            vec<operand_entry_t> *ops,
 736                            unsigned int currindex,
 737                            operand_entry_t curr)
 738 {
 739   tree negateop;
 740   tree notop;
 741   unsigned int i;
 742   operand_entry_t oe;
 743
 744   if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
 745     return false;
 746
 747   negateop = get_unary_op (curr->op, NEGATE_EXPR);
 748   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 749   if (negateop == NULL_TREE && notop == NULL_TREE)
 750     return false;
 751
 752   /* Any non-negated version will have a rank that is one less than
 753      the current rank.  So once we hit those ranks, if we don't find
 754      one, we can stop.  */
 755
 756   for (i = currindex + 1;
 757        ops->iterate (i, &oe)
 758        && oe->rank >= curr->rank - 1 ;
 759        i++)
 760     {
 761       if (oe->op == negateop)
 762         {
 763
 764           if (dump_file && (dump_flags & TDF_DETAILS))
 765             {
 766               fprintf (dump_file, "Equivalence: ");
 767               print_generic_expr (dump_file, negateop, 0);
 768               fprintf (dump_file, " + -");
 769               print_generic_expr (dump_file, oe->op, 0);
 770               fprintf (dump_file, " -> 0\n");
 771             }
 772
 773           ops->ordered_remove (i);
 774           add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
 775           ops->ordered_remove (currindex);
 776           reassociate_stats.ops_eliminated ++;
 777
 778           return true;
 779         }
 780       else if (oe->op == notop)
 781         {
 782           tree op_type = TREE_TYPE (oe->op);
 783
 784           if (dump_file && (dump_flags & TDF_DETAILS))
 785             {
 786               fprintf (dump_file, "Equivalence: ");
 787               print_generic_expr (dump_file, notop, 0);
 788               fprintf (dump_file, " + ~");
 789               print_generic_expr (dump_file, oe->op, 0);
 790               fprintf (dump_file, " -> -1\n");
 791             }
 792
 793           ops->ordered_remove (i);
 794           add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
 795           ops->ordered_remove (currindex);
 796           reassociate_stats.ops_eliminated ++;
 797
 798           return true;
 799         }
 800     }
 801
 802   /* CURR->OP is a negate expr in a plus expr: save it for later
 803      inspection in repropagate_negates().  */
 804   if (negateop != NULL_TREE)
 805     plus_negates.safe_push (curr->op);
 806
 807   return false;
 808 }
 809
 810 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
 811    bitwise not expression, look in OPS for a corresponding operand to
 812    cancel it out.  If we find one, remove the other from OPS, replace
 813    OPS[CURRINDEX] with 0, and return true.  Otherwise, return
 814    false. */
 815
 816 static bool
 817 eliminate_not_pairs (enum tree_code opcode,
 818                      vec<operand_entry_t> *ops,
 819                      unsigned int currindex,
 820                      operand_entry_t curr)
 821 {
 822   tree notop;
 823   unsigned int i;
 824   operand_entry_t oe;
 825
 826   if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
 827       || TREE_CODE (curr->op) != SSA_NAME)
 828     return false;
 829
 830   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 831   if (notop == NULL_TREE)
 832     return false;
 833
 834   /* Any non-not version will have a rank that is one less than
 835      the current rank.  So once we hit those ranks, if we don't find
 836      one, we can stop.  */
 837
 838   for (i = currindex + 1;
 839        ops->iterate (i, &oe)
 840        && oe->rank >= curr->rank - 1;
 841        i++)
 842     {
 843       if (oe->op == notop)
 844         {
 845           if (dump_file && (dump_flags & TDF_DETAILS))
 846             {
 847               fprintf (dump_file, "Equivalence: ");
 848               print_generic_expr (dump_file, notop, 0);
 849               if (opcode == BIT_AND_EXPR)
 850                 fprintf (dump_file, " & ~");
 851               else if (opcode == BIT_IOR_EXPR)
 852                 fprintf (dump_file, " | ~");
 853               print_generic_expr (dump_file, oe->op, 0);
 854               if (opcode == BIT_AND_EXPR)
 855                 fprintf (dump_file, " -> 0\n");
 856               else if (opcode == BIT_IOR_EXPR)
 857                 fprintf (dump_file, " -> -1\n");
 858             }
 859
 860           if (opcode == BIT_AND_EXPR)
 861             oe->op = build_zero_cst (TREE_TYPE (oe->op));
 862           else if (opcode == BIT_IOR_EXPR)
 863             oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
 864
 865           reassociate_stats.ops_eliminated += ops->length () - 1;
 866           ops->truncate (0);
 867           ops->quick_push (oe);
 868           return true;
 869         }
 870     }
 871
 872   return false;
 873 }
 874
 875 /* Use constant value that may be present in OPS to try to eliminate
 876    operands.  Note that this function is only really used when we've
 877    eliminated ops for other reasons, or merged constants.  Across
 878    single statements, fold already does all of this, plus more.  There
 879    is little point in duplicating logic, so I've only included the
 880    identities that I could ever construct testcases to trigger.  */
 881
 882 static void
 883 eliminate_using_constants (enum tree_code opcode,
 884                            vec<operand_entry_t> *ops)
 885 {
 886   operand_entry_t oelast = ops->last ();
 887   tree type = TREE_TYPE (oelast->op);
 888
 889   if (oelast->rank == 0
 890       && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
 891     {
 892       switch (opcode)
 893         {
 894         case BIT_AND_EXPR:
 895           if (integer_zerop (oelast->op))
 896             {
 897               if (ops->length () != 1)
 898                 {
 899                   if (dump_file && (dump_flags & TDF_DETAILS))
 900                     fprintf (dump_file, "Found & 0, removing all other ops\n");
 901
 902                   reassociate_stats.ops_eliminated += ops->length () - 1;
 903
 904                   ops->truncate (0);
 905                   ops->quick_push (oelast);
 906                   return;
 907                 }
 908             }
 909           else if (integer_all_onesp (oelast->op))
 910             {
 911               if (ops->length () != 1)
 912                 {
 913                   if (dump_file && (dump_flags & TDF_DETAILS))
 914                     fprintf (dump_file, "Found & -1, removing\n");
 915                   ops->pop ();
 916                   reassociate_stats.ops_eliminated++;
 917                 }
 918             }
 919           break;
 920         case BIT_IOR_EXPR:
 921           if (integer_all_onesp (oelast->op))
 922             {
 923               if (ops->length () != 1)
 924                 {
 925                   if (dump_file && (dump_flags & TDF_DETAILS))
 926                     fprintf (dump_file, "Found | -1, removing all other ops\n");
 927
 928                   reassociate_stats.ops_eliminated += ops->length () - 1;
 929
 930                   ops->truncate (0);
 931                   ops->quick_push (oelast);
 932                   return;
 933                 }
 934             }
 935           else if (integer_zerop (oelast->op))
 936             {
 937               if (ops->length () != 1)
 938                 {
 939                   if (dump_file && (dump_flags & TDF_DETAILS))
 940                     fprintf (dump_file, "Found | 0, removing\n");
 941                   ops->pop ();
 942                   reassociate_stats.ops_eliminated++;
 943                 }
 944             }
 945           break;
 946         case MULT_EXPR:
 947           if (integer_zerop (oelast->op)
 948               || (FLOAT_TYPE_P (type)
 949                   && !HONOR_NANS (TYPE_MODE (type))
 950                   && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
 951                   && real_zerop (oelast->op)))
 952             {
 953               if (ops->length () != 1)
 954                 {
 955                   if (dump_file && (dump_flags & TDF_DETAILS))
 956                     fprintf (dump_file, "Found * 0, removing all other ops\n");
 957
 958                   reassociate_stats.ops_eliminated += ops->length () - 1;
 959                   ops->truncate (1);
 960                   ops->quick_push (oelast);
 961                   return;
 962                 }
 963             }
 964           else if (integer_onep (oelast->op)
 965                    || (FLOAT_TYPE_P (type)
 966                        && !HONOR_SNANS (TYPE_MODE (type))
 967                        && real_onep (oelast->op)))
 968             {
 969               if (ops->length () != 1)
 970                 {
 971                   if (dump_file && (dump_flags & TDF_DETAILS))
 972                     fprintf (dump_file, "Found * 1, removing\n");
 973                   ops->pop ();
 974                   reassociate_stats.ops_eliminated++;
 975                   return;
 976                 }
 977             }
 978           break;
 979         case BIT_XOR_EXPR:
 980         case PLUS_EXPR:
 981         case MINUS_EXPR:
 982           if (integer_zerop (oelast->op)
 983               || (FLOAT_TYPE_P (type)
 984                   && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
 985                   && fold_real_zero_addition_p (type, oelast->op,
 986                                                 opcode == MINUS_EXPR)))
 987             {
 988               if (ops->length () != 1)
 989                 {
 990                   if (dump_file && (dump_flags & TDF_DETAILS))
 991                     fprintf (dump_file, "Found [|^+] 0, removing\n");
 992                   ops->pop ();
 993                   reassociate_stats.ops_eliminated++;
 994                   return;
 995                 }
 996             }
 997           break;
 998         default:
 999           break;
1000         }
1001     }
1002 }
1003
1004
1005 static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
1006                                  bool, bool);
1007
1008 /* Structure for tracking and counting operands.  */
1009 typedef struct oecount_s {
1010   int cnt;
1011   int id;
1012   enum tree_code oecode;
1013   tree op;
1014 } oecount;
1015
1016
1017 /* The heap for the oecount hashtable and the sorted list of operands.  */
1018 static vec<oecount> cvec;
1019
1020
1021 /* Oecount hashtable helpers.  */
1022
1023 struct oecount_hasher
1024 {
1025   typedef int value_type;
1026   typedef int compare_type;
1027   typedef int store_values_directly;
1028   static inline hashval_t hash (const value_type &);
1029   static inline bool equal (const value_type &, const compare_type &);
1030   static bool is_deleted (int &v) { return v == 1; }
1031   static void mark_deleted (int &e) { e = 1; }
1032   static bool is_empty (int &v) { return v == 0; }
1033   static void mark_empty (int &e) { e = 0; }
1034   static void remove (int &) {}
1035 };
1036
1037 /* Hash function for oecount.  */
1038
1039 inline hashval_t
1040 oecount_hasher::hash (const value_type &p)
1041 {
1042   const oecount *c = &cvec[p - 42];
1043   return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1044 }
1045
1046 /* Comparison function for oecount.  */
1047
1048 inline bool
1049 oecount_hasher::equal (const value_type &p1, const compare_type &p2)
1050 {
1051   const oecount *c1 = &cvec[p1 - 42];
1052   const oecount *c2 = &cvec[p2 - 42];
1053   return (c1->oecode == c2->oecode
1054           && c1->op == c2->op);
1055 }
1056
1057 /* Comparison function for qsort sorting oecount elements by count.  */
1058
1059 static int
1060 oecount_cmp (const void *p1, const void *p2)
1061 {
1062   const oecount *c1 = (const oecount *)p1;
1063   const oecount *c2 = (const oecount *)p2;
1064   if (c1->cnt != c2->cnt)
1065     return c1->cnt - c2->cnt;
1066   else
1067     /* If counts are identical, use unique IDs to stabilize qsort.  */
1068     return c1->id - c2->id;
1069 }
1070
1071 /* Return TRUE iff STMT represents a builtin call that raises OP
1072    to some exponent.  */
1073
1074 static bool
1075 stmt_is_power_of_op (gimple stmt, tree op)
1076 {
1077   tree fndecl;
1078
1079   if (!is_gimple_call (stmt))
1080     return false;
1081
1082   fndecl = gimple_call_fndecl (stmt);
1083
1084   if (!fndecl
1085       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
1086     return false;
1087
1088   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1089     {
1090     CASE_FLT_FN (BUILT_IN_POW):
1091     CASE_FLT_FN (BUILT_IN_POWI):
1092       return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1093
1094     default:
1095       return false;
1096     }
1097 }
1098
1099 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1100    in place and return the result.  Assumes that stmt_is_power_of_op
1101    was previously called for STMT and returned TRUE.  */
1102
1103 static HOST_WIDE_INT
1104 decrement_power (gimple stmt)
1105 {
1106   REAL_VALUE_TYPE c, cint;
1107   HOST_WIDE_INT power;
1108   tree arg1;
1109
1110   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1111     {
1112     CASE_FLT_FN (BUILT_IN_POW):
1113       arg1 = gimple_call_arg (stmt, 1);
1114       c = TREE_REAL_CST (arg1);
1115       power = real_to_integer (&c) - 1;
1116       real_from_integer (&cint, VOIDmode, power, SIGNED);
1117       gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1118       return power;
1119
1120     CASE_FLT_FN (BUILT_IN_POWI):
1121       arg1 = gimple_call_arg (stmt, 1);
1122       power = TREE_INT_CST_LOW (arg1) - 1;
1123       gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1124       return power;
1125
1126     default:
1127       gcc_unreachable ();
1128     }
1129 }
1130
1131 /* Find the single immediate use of STMT's LHS, and replace it
1132    with OP.  Remove STMT.  If STMT's LHS is the same as *DEF,
1133    replace *DEF with OP as well.  */
1134
1135 static void
1136 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1137 {
1138   tree lhs;
1139   gimple use_stmt;
1140   use_operand_p use;
1141   gimple_stmt_iterator gsi;
1142
1143   if (is_gimple_call (stmt))
1144     lhs = gimple_call_lhs (stmt);
1145   else
1146     lhs = gimple_assign_lhs (stmt);
1147
1148   gcc_assert (has_single_use (lhs));
1149   single_imm_use (lhs, &use, &use_stmt);
1150   if (lhs == *def)
1151     *def = op;
1152   SET_USE (use, op);
1153   if (TREE_CODE (op) != SSA_NAME)
1154     update_stmt (use_stmt);
1155   gsi = gsi_for_stmt (stmt);
1156   unlink_stmt_vdef (stmt);
1157   reassoc_remove_stmt (&gsi);
1158   release_defs (stmt);
1159 }
1160
1161 /* Walks the linear chain with result *DEF searching for an operation
1162    with operand OP and code OPCODE removing that from the chain.  *DEF
1163    is updated if there is only one operand but no operation left.  */
1164
1165 static void
1166 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1167 {
1168   gimple stmt = SSA_NAME_DEF_STMT (*def);
1169
1170   do
1171     {
1172       tree name;
1173
1174       if (opcode == MULT_EXPR
1175           && stmt_is_power_of_op (stmt, op))
1176         {
1177           if (decrement_power (stmt) == 1)
1178             propagate_op_to_single_use (op, stmt, def);
1179           return;
1180         }
1181
1182       name = gimple_assign_rhs1 (stmt);
1183
1184       /* If this is the operation we look for and one of the operands
1185          is ours simply propagate the other operand into the stmts
1186          single use.  */
1187       if (gimple_assign_rhs_code (stmt) == opcode
1188           && (name == op
1189               || gimple_assign_rhs2 (stmt) == op))
1190         {
1191           if (name == op)
1192             name = gimple_assign_rhs2 (stmt);
1193           propagate_op_to_single_use (name, stmt, def);
1194           return;
1195         }
1196
1197       /* We might have a multiply of two __builtin_pow* calls, and
1198          the operand might be hiding in the rightmost one.  */
1199       if (opcode == MULT_EXPR
1200           && gimple_assign_rhs_code (stmt) == opcode
1201           && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1202           && has_single_use (gimple_assign_rhs2 (stmt)))
1203         {
1204           gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1205           if (stmt_is_power_of_op (stmt2, op))
1206             {
1207               if (decrement_power (stmt2) == 1)
1208                 propagate_op_to_single_use (op, stmt2, def);
1209               return;
1210             }
1211         }
1212
1213       /* Continue walking the chain.  */
1214       gcc_assert (name != op
1215                   && TREE_CODE (name) == SSA_NAME);
1216       stmt = SSA_NAME_DEF_STMT (name);
1217     }
1218   while (1);
1219 }
1220
1221 /* Returns true if statement S1 dominates statement S2.  Like
1222    stmt_dominates_stmt_p, but uses stmt UIDs to optimize.  */
1223
1224 static bool
1225 reassoc_stmt_dominates_stmt_p (gimple s1, gimple s2)
1226 {
1227   basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1228
1229   /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1230      SSA_NAME.  Assume it lives at the beginning of function and
1231      thus dominates everything.  */
1232   if (!bb1 || s1 == s2)
1233     return true;
1234
1235   /* If bb2 is NULL, it doesn't dominate any stmt with a bb.  */
1236   if (!bb2)
1237     return false;
1238
1239   if (bb1 == bb2)
1240     {
1241       /* PHIs in the same basic block are assumed to be
1242          executed all in parallel, if only one stmt is a PHI,
1243          it dominates the other stmt in the same basic block.  */
1244       if (gimple_code (s1) == GIMPLE_PHI)
1245         return true;
1246
1247       if (gimple_code (s2) == GIMPLE_PHI)
1248         return false;
1249
1250       gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1251
1252       if (gimple_uid (s1) < gimple_uid (s2))
1253         return true;
1254
1255       if (gimple_uid (s1) > gimple_uid (s2))
1256         return false;
1257
1258       gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1259       unsigned int uid = gimple_uid (s1);
1260       for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1261         {
1262           gimple s = gsi_stmt (gsi);
1263           if (gimple_uid (s) != uid)
1264             break;
1265           if (s == s2)
1266             return true;
1267         }
1268
1269       return false;
1270     }
1271
1272   return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1273 }
1274
1275 /* Insert STMT after INSERT_POINT.  */
1276
1277 static void
1278 insert_stmt_after (gimple stmt, gimple insert_point)
1279 {
1280   gimple_stmt_iterator gsi;
1281   basic_block bb;
1282
1283   if (gimple_code (insert_point) == GIMPLE_PHI)
1284     bb = gimple_bb (insert_point);
1285   else if (!stmt_ends_bb_p (insert_point))
1286     {
1287       gsi = gsi_for_stmt (insert_point);
1288       gimple_set_uid (stmt, gimple_uid (insert_point));
1289       gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1290       return;
1291     }
1292   else
1293     /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1294        thus if it must end a basic block, it should be a call that can
1295        throw, or some assignment that can throw.  If it throws, the LHS
1296        of it will not be initialized though, so only valid places using
1297        the SSA_NAME should be dominated by the fallthru edge.  */
1298     bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1299   gsi = gsi_after_labels (bb);
1300   if (gsi_end_p (gsi))
1301     {
1302       gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1303       gimple_set_uid (stmt,
1304                       gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1305     }
1306   else
1307     gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1308   gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1309 }
1310
1311 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1312    the result.  Places the statement after the definition of either
1313    OP1 or OP2.  Returns the new statement.  */
1314
1315 static gimple
1316 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1317 {
1318   gimple op1def = NULL, op2def = NULL;
1319   gimple_stmt_iterator gsi;
1320   tree op;
1321   gimple sum;
1322
1323   /* Create the addition statement.  */
1324   op = make_ssa_name (type, NULL);
1325   sum = gimple_build_assign_with_ops (opcode, op, op1, op2);
1326
1327   /* Find an insertion place and insert.  */
1328   if (TREE_CODE (op1) == SSA_NAME)
1329     op1def = SSA_NAME_DEF_STMT (op1);
1330   if (TREE_CODE (op2) == SSA_NAME)
1331     op2def = SSA_NAME_DEF_STMT (op2);
1332   if ((!op1def || gimple_nop_p (op1def))
1333       && (!op2def || gimple_nop_p (op2def)))
1334     {
1335       gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1336       if (gsi_end_p (gsi))
1337         {
1338           gimple_stmt_iterator gsi2
1339             = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1340           gimple_set_uid (sum,
1341                           gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1342         }
1343       else
1344         gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1345       gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1346     }
1347   else
1348     {
1349       gimple insert_point;
1350       if ((!op1def || gimple_nop_p (op1def))
1351            || (op2def && !gimple_nop_p (op2def)
1352                && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1353         insert_point = op2def;
1354       else
1355         insert_point = op1def;
1356       insert_stmt_after (sum, insert_point);
1357     }
1358   update_stmt (sum);
1359
1360   return sum;
1361 }
1362
1363 /* Perform un-distribution of divisions and multiplications.
1364    A * X + B * X is transformed into (A + B) * X and A / X + B / X
1365    to (A + B) / X for real X.
1366
1367    The algorithm is organized as follows.
1368
1369     - First we walk the addition chain *OPS looking for summands that
1370       are defined by a multiplication or a real division.  This results
1371       in the candidates bitmap with relevant indices into *OPS.
1372
1373     - Second we build the chains of multiplications or divisions for
1374       these candidates, counting the number of occurrences of (operand, code)
1375       pairs in all of the candidates chains.
1376
1377     - Third we sort the (operand, code) pairs by number of occurrence and
1378       process them starting with the pair with the most uses.
1379
1380       * For each such pair we walk the candidates again to build a
1381         second candidate bitmap noting all multiplication/division chains
1382         that have at least one occurrence of (operand, code).
1383
1384       * We build an alternate addition chain only covering these
1385         candidates with one (operand, code) operation removed from their
1386         multiplication/division chain.
1387
1388       * The first candidate gets replaced by the alternate addition chain
1389         multiplied/divided by the operand.
1390
1391       * All candidate chains get disabled for further processing and
1392         processing of (operand, code) pairs continues.
1393
1394   The alternate addition chains built are re-processed by the main
1395   reassociation algorithm which allows optimizing a * x * y + b * y * x
1396   to (a + b ) * x * y in one invocation of the reassociation pass.  */
1397
1398 static bool
1399 undistribute_ops_list (enum tree_code opcode,
1400                        vec<operand_entry_t> *ops, struct loop *loop)
1401 {
1402   unsigned int length = ops->length ();
1403   operand_entry_t oe1;
1404   unsigned i, j;
1405   sbitmap candidates, candidates2;
1406   unsigned nr_candidates, nr_candidates2;
1407   sbitmap_iterator sbi0;
1408   vec<operand_entry_t> *subops;
1409   bool changed = false;
1410   int next_oecount_id = 0;
1411
1412   if (length <= 1
1413       || opcode != PLUS_EXPR)
1414     return false;
1415
1416   /* Build a list of candidates to process.  */
1417   candidates = sbitmap_alloc (length);
1418   bitmap_clear (candidates);
1419   nr_candidates = 0;
1420   FOR_EACH_VEC_ELT (*ops, i, oe1)
1421     {
1422       enum tree_code dcode;
1423       gimple oe1def;
1424
1425       if (TREE_CODE (oe1->op) != SSA_NAME)
1426         continue;
1427       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1428       if (!is_gimple_assign (oe1def))
1429         continue;
1430       dcode = gimple_assign_rhs_code (oe1def);
1431       if ((dcode != MULT_EXPR
1432            && dcode != RDIV_EXPR)
1433           || !is_reassociable_op (oe1def, dcode, loop))
1434         continue;
1435
1436       bitmap_set_bit (candidates, i);
1437       nr_candidates++;
1438     }
1439
1440   if (nr_candidates < 2)
1441     {
1442       sbitmap_free (candidates);
1443       return false;
1444     }
1445
1446   if (dump_file && (dump_flags & TDF_DETAILS))
1447     {
1448       fprintf (dump_file, "searching for un-distribute opportunities ");
1449       print_generic_expr (dump_file,
1450         (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1451       fprintf (dump_file, " %d\n", nr_candidates);
1452     }
1453
1454   /* Build linearized sub-operand lists and the counting table.  */
1455   cvec.create (0);
1456
1457   hash_table<oecount_hasher> ctable (15);
1458
1459   /* ??? Macro arguments cannot have multi-argument template types in
1460      them.  This typedef is needed to workaround that limitation.  */
1461   typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1462   subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1463   EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1464     {
1465       gimple oedef;
1466       enum tree_code oecode;
1467       unsigned j;
1468
1469       oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1470       oecode = gimple_assign_rhs_code (oedef);
1471       linearize_expr_tree (&subops[i], oedef,
1472                            associative_tree_code (oecode), false);
1473
1474       FOR_EACH_VEC_ELT (subops[i], j, oe1)
1475         {
1476           oecount c;
1477           int *slot;
1478           int idx;
1479           c.oecode = oecode;
1480           c.cnt = 1;
1481           c.id = next_oecount_id++;
1482           c.op = oe1->op;
1483           cvec.safe_push (c);
1484           idx = cvec.length () + 41;
1485           slot = ctable.find_slot (idx, INSERT);
1486           if (!*slot)
1487             {
1488               *slot = idx;
1489             }
1490           else
1491             {
1492               cvec.pop ();
1493               cvec[*slot - 42].cnt++;
1494             }
1495         }
1496     }
1497
1498   /* Sort the counting table.  */
1499   cvec.qsort (oecount_cmp);
1500
1501   if (dump_file && (dump_flags & TDF_DETAILS))
1502     {
1503       oecount *c;
1504       fprintf (dump_file, "Candidates:\n");
1505       FOR_EACH_VEC_ELT (cvec, j, c)
1506         {
1507           fprintf (dump_file, "  %u %s: ", c->cnt,
1508                    c->oecode == MULT_EXPR
1509                    ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1510           print_generic_expr (dump_file, c->op, 0);
1511           fprintf (dump_file, "\n");
1512         }
1513     }
1514
1515   /* Process the (operand, code) pairs in order of most occurrence.  */
1516   candidates2 = sbitmap_alloc (length);
1517   while (!cvec.is_empty ())
1518     {
1519       oecount *c = &cvec.last ();
1520       if (c->cnt < 2)
1521         break;
1522
1523       /* Now collect the operands in the outer chain that contain
1524          the common operand in their inner chain.  */
1525       bitmap_clear (candidates2);
1526       nr_candidates2 = 0;
1527       EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1528         {
1529           gimple oedef;
1530           enum tree_code oecode;
1531           unsigned j;
1532           tree op = (*ops)[i]->op;
1533
1534           /* If we undistributed in this chain already this may be
1535              a constant.  */
1536           if (TREE_CODE (op) != SSA_NAME)
1537             continue;
1538
1539           oedef = SSA_NAME_DEF_STMT (op);
1540           oecode = gimple_assign_rhs_code (oedef);
1541           if (oecode != c->oecode)
1542             continue;
1543
1544           FOR_EACH_VEC_ELT (subops[i], j, oe1)
1545             {
1546               if (oe1->op == c->op)
1547                 {
1548                   bitmap_set_bit (candidates2, i);
1549                   ++nr_candidates2;
1550                   break;
1551                 }
1552             }
1553         }
1554
1555       if (nr_candidates2 >= 2)
1556         {
1557           operand_entry_t oe1, oe2;
1558           gimple prod;
1559           int first = bitmap_first_set_bit (candidates2);
1560
1561           /* Build the new addition chain.  */
1562           oe1 = (*ops)[first];
1563           if (dump_file && (dump_flags & TDF_DETAILS))
1564             {
1565               fprintf (dump_file, "Building (");
1566               print_generic_expr (dump_file, oe1->op, 0);
1567             }
1568           zero_one_operation (&oe1->op, c->oecode, c->op);
1569           EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1570             {
1571               gimple sum;
1572               oe2 = (*ops)[i];
1573               if (dump_file && (dump_flags & TDF_DETAILS))
1574                 {
1575                   fprintf (dump_file, " + ");
1576                   print_generic_expr (dump_file, oe2->op, 0);
1577                 }
1578               zero_one_operation (&oe2->op, c->oecode, c->op);
1579               sum = build_and_add_sum (TREE_TYPE (oe1->op),
1580                                        oe1->op, oe2->op, opcode);
1581               oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1582               oe2->rank = 0;
1583               oe1->op = gimple_get_lhs (sum);
1584             }
1585
1586           /* Apply the multiplication/division.  */
1587           prod = build_and_add_sum (TREE_TYPE (oe1->op),
1588                                     oe1->op, c->op, c->oecode);
1589           if (dump_file && (dump_flags & TDF_DETAILS))
1590             {
1591               fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1592               print_generic_expr (dump_file, c->op, 0);
1593               fprintf (dump_file, "\n");
1594             }
1595
1596           /* Record it in the addition chain and disable further
1597              undistribution with this op.  */
1598           oe1->op = gimple_assign_lhs (prod);
1599           oe1->rank = get_rank (oe1->op);
1600           subops[first].release ();
1601
1602           changed = true;
1603         }
1604
1605       cvec.pop ();
1606     }
1607
1608   for (i = 0; i < ops->length (); ++i)
1609     subops[i].release ();
1610   free (subops);
1611   cvec.release ();
1612   sbitmap_free (candidates);
1613   sbitmap_free (candidates2);
1614
1615   return changed;
1616 }
1617
1618 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1619    expression, examine the other OPS to see if any of them are comparisons
1620    of the same values, which we may be able to combine or eliminate.
1621    For example, we can rewrite (a < b) | (a == b) as (a <= b).  */
1622
1623 static bool
1624 eliminate_redundant_comparison (enum tree_code opcode,
1625                                 vec<operand_entry_t> *ops,
1626                                 unsigned int currindex,
1627                                 operand_entry_t curr)
1628 {
1629   tree op1, op2;
1630   enum tree_code lcode, rcode;
1631   gimple def1, def2;
1632   int i;
1633   operand_entry_t oe;
1634
1635   if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1636     return false;
1637
1638   /* Check that CURR is a comparison.  */
1639   if (TREE_CODE (curr->op) != SSA_NAME)
1640     return false;
1641   def1 = SSA_NAME_DEF_STMT (curr->op);
1642   if (!is_gimple_assign (def1))
1643     return false;
1644   lcode = gimple_assign_rhs_code (def1);
1645   if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1646     return false;
1647   op1 = gimple_assign_rhs1 (def1);
1648   op2 = gimple_assign_rhs2 (def1);
1649
1650   /* Now look for a similar comparison in the remaining OPS.  */
1651   for (i = currindex + 1; ops->iterate (i, &oe); i++)
1652     {
1653       tree t;
1654
1655       if (TREE_CODE (oe->op) != SSA_NAME)
1656         continue;
1657       def2 = SSA_NAME_DEF_STMT (oe->op);
1658       if (!is_gimple_assign (def2))
1659         continue;
1660       rcode = gimple_assign_rhs_code (def2);
1661       if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1662         continue;
1663
1664       /* If we got here, we have a match.  See if we can combine the
1665          two comparisons.  */
1666       if (opcode == BIT_IOR_EXPR)
1667         t = maybe_fold_or_comparisons (lcode, op1, op2,
1668                                        rcode, gimple_assign_rhs1 (def2),
1669                                        gimple_assign_rhs2 (def2));
1670       else
1671         t = maybe_fold_and_comparisons (lcode, op1, op2,
1672                                         rcode, gimple_assign_rhs1 (def2),
1673                                         gimple_assign_rhs2 (def2));
1674       if (!t)
1675         continue;
1676
1677       /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1678          always give us a boolean_type_node value back.  If the original
1679          BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1680          we need to convert.  */
1681       if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1682         t = fold_convert (TREE_TYPE (curr->op), t);
1683
1684       if (TREE_CODE (t) != INTEGER_CST
1685           && !operand_equal_p (t, curr->op, 0))
1686         {
1687           enum tree_code subcode;
1688           tree newop1, newop2;
1689           if (!COMPARISON_CLASS_P (t))
1690             continue;
1691           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1692           STRIP_USELESS_TYPE_CONVERSION (newop1);
1693           STRIP_USELESS_TYPE_CONVERSION (newop2);
1694           if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1695             continue;
1696         }
1697
1698       if (dump_file && (dump_flags & TDF_DETAILS))
1699         {
1700           fprintf (dump_file, "Equivalence: ");
1701           print_generic_expr (dump_file, curr->op, 0);
1702           fprintf (dump_file, " %s ", op_symbol_code (opcode));
1703           print_generic_expr (dump_file, oe->op, 0);
1704           fprintf (dump_file, " -> ");
1705           print_generic_expr (dump_file, t, 0);
1706           fprintf (dump_file, "\n");
1707         }
1708
1709       /* Now we can delete oe, as it has been subsumed by the new combined
1710          expression t.  */
1711       ops->ordered_remove (i);
1712       reassociate_stats.ops_eliminated ++;
1713
1714       /* If t is the same as curr->op, we're done.  Otherwise we must
1715          replace curr->op with t.  Special case is if we got a constant
1716          back, in which case we add it to the end instead of in place of
1717          the current entry.  */
1718       if (TREE_CODE (t) == INTEGER_CST)
1719         {
1720           ops->ordered_remove (currindex);
1721           add_to_ops_vec (ops, t);
1722         }
1723       else if (!operand_equal_p (t, curr->op, 0))
1724         {
1725           gimple sum;
1726           enum tree_code subcode;
1727           tree newop1;
1728           tree newop2;
1729           gcc_assert (COMPARISON_CLASS_P (t));
1730           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1731           STRIP_USELESS_TYPE_CONVERSION (newop1);
1732           STRIP_USELESS_TYPE_CONVERSION (newop2);
1733           gcc_checking_assert (is_gimple_val (newop1)
1734                                && is_gimple_val (newop2));
1735           sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1736           curr->op = gimple_get_lhs (sum);
1737         }
1738       return true;
1739     }
1740
1741   return false;
1742 }
1743
1744 /* Perform various identities and other optimizations on the list of
1745    operand entries, stored in OPS.  The tree code for the binary
1746    operation between all the operands is OPCODE.  */
1747
1748 static void
1749 optimize_ops_list (enum tree_code opcode,
1750                    vec<operand_entry_t> *ops)
1751 {
1752   unsigned int length = ops->length ();
1753   unsigned int i;
1754   operand_entry_t oe;
1755   operand_entry_t oelast = NULL;
1756   bool iterate = false;
1757
1758   if (length == 1)
1759     return;
1760
1761   oelast = ops->last ();
1762
1763   /* If the last two are constants, pop the constants off, merge them
1764      and try the next two.  */
1765   if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1766     {
1767       operand_entry_t oelm1 = (*ops)[length - 2];
1768
1769       if (oelm1->rank == 0
1770           && is_gimple_min_invariant (oelm1->op)
1771           && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1772                                        TREE_TYPE (oelast->op)))
1773         {
1774           tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1775                                      oelm1->op, oelast->op);
1776
1777           if (folded && is_gimple_min_invariant (folded))
1778             {
1779               if (dump_file && (dump_flags & TDF_DETAILS))
1780                 fprintf (dump_file, "Merging constants\n");
1781
1782               ops->pop ();
1783               ops->pop ();
1784
1785               add_to_ops_vec (ops, folded);
1786               reassociate_stats.constants_eliminated++;
1787
1788               optimize_ops_list (opcode, ops);
1789               return;
1790             }
1791         }
1792     }
1793
1794   eliminate_using_constants (opcode, ops);
1795   oelast = NULL;
1796
1797   for (i = 0; ops->iterate (i, &oe);)
1798     {
1799       bool done = false;
1800
1801       if (eliminate_not_pairs (opcode, ops, i, oe))
1802         return;
1803       if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1804           || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1805           || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1806         {
1807           if (done)
1808             return;
1809           iterate = true;
1810           oelast = NULL;
1811           continue;
1812         }
1813       oelast = oe;
1814       i++;
1815     }
1816
1817   length = ops->length ();
1818   oelast = ops->last ();
1819
1820   if (iterate)
1821     optimize_ops_list (opcode, ops);
1822 }
1823
1824 /* The following functions are subroutines to optimize_range_tests and allow
1825    it to try to change a logical combination of comparisons into a range
1826    test.
1827
1828    For example, both
1829         X == 2 || X == 5 || X == 3 || X == 4
1830    and
1831         X >= 2 && X <= 5
1832    are converted to
1833         (unsigned) (X - 2) <= 3
1834
1835    For more information see comments above fold_test_range in fold-const.c,
1836    this implementation is for GIMPLE.  */
1837
1838 struct range_entry
1839 {
1840   tree exp;
1841   tree low;
1842   tree high;
1843   bool in_p;
1844   bool strict_overflow_p;
1845   unsigned int idx, next;
1846 };
1847
1848 /* This is similar to make_range in fold-const.c, but on top of
1849    GIMPLE instead of trees.  If EXP is non-NULL, it should be
1850    an SSA_NAME and STMT argument is ignored, otherwise STMT
1851    argument should be a GIMPLE_COND.  */
1852
1853 static void
1854 init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1855 {
1856   int in_p;
1857   tree low, high;
1858   bool is_bool, strict_overflow_p;
1859
1860   r->exp = NULL_TREE;
1861   r->in_p = false;
1862   r->strict_overflow_p = false;
1863   r->low = NULL_TREE;
1864   r->high = NULL_TREE;
1865   if (exp != NULL_TREE
1866       && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1867     return;
1868
1869   /* Start with simply saying "EXP != 0" and then look at the code of EXP
1870      and see if we can refine the range.  Some of the cases below may not
1871      happen, but it doesn't seem worth worrying about this.  We "continue"
1872      the outer loop when we've changed something; otherwise we "break"
1873      the switch, which will "break" the while.  */
1874   low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1875   high = low;
1876   in_p = 0;
1877   strict_overflow_p = false;
1878   is_bool = false;
1879   if (exp == NULL_TREE)
1880     is_bool = true;
1881   else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1882     {
1883       if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1884         is_bool = true;
1885       else
1886         return;
1887     }
1888   else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1889     is_bool = true;
1890
1891   while (1)
1892     {
1893       enum tree_code code;
1894       tree arg0, arg1, exp_type;
1895       tree nexp;
1896       location_t loc;
1897
1898       if (exp != NULL_TREE)
1899         {
1900           if (TREE_CODE (exp) != SSA_NAME
1901               || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
1902             break;
1903
1904           stmt = SSA_NAME_DEF_STMT (exp);
1905           if (!is_gimple_assign (stmt))
1906             break;
1907
1908           code = gimple_assign_rhs_code (stmt);
1909           arg0 = gimple_assign_rhs1 (stmt);
1910           arg1 = gimple_assign_rhs2 (stmt);
1911           exp_type = TREE_TYPE (exp);
1912         }
1913       else
1914         {
1915           code = gimple_cond_code (stmt);
1916           arg0 = gimple_cond_lhs (stmt);
1917           arg1 = gimple_cond_rhs (stmt);
1918           exp_type = boolean_type_node;
1919         }
1920
1921       if (TREE_CODE (arg0) != SSA_NAME)
1922         break;
1923       loc = gimple_location (stmt);
1924       switch (code)
1925         {
1926         case BIT_NOT_EXPR:
1927           if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
1928               /* Ensure the range is either +[-,0], +[0,0],
1929                  -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1930                  -[1,1].  If it is e.g. +[-,-] or -[-,-]
1931                  or similar expression of unconditional true or
1932                  false, it should not be negated.  */
1933               && ((high && integer_zerop (high))
1934                   || (low && integer_onep (low))))
1935             {
1936               in_p = !in_p;
1937               exp = arg0;
1938               continue;
1939             }
1940           break;
1941         case SSA_NAME:
1942           exp = arg0;
1943           continue;
1944         CASE_CONVERT:
1945           if (is_bool)
1946             goto do_default;
1947           if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1948             {
1949               if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1950                 is_bool = true;
1951               else
1952                 return;
1953             }
1954           else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1955             is_bool = true;
1956           goto do_default;
1957         case EQ_EXPR:
1958         case NE_EXPR:
1959         case LT_EXPR:
1960         case LE_EXPR:
1961         case GE_EXPR:
1962         case GT_EXPR:
1963           is_bool = true;
1964           /* FALLTHRU */
1965         default:
1966           if (!is_bool)
1967             return;
1968         do_default:
1969           nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1970                                   &low, &high, &in_p,
1971                                   &strict_overflow_p);
1972           if (nexp != NULL_TREE)
1973             {
1974               exp = nexp;
1975               gcc_assert (TREE_CODE (exp) == SSA_NAME);
1976               continue;
1977             }
1978           break;
1979         }
1980       break;
1981     }
1982   if (is_bool)
1983     {
1984       r->exp = exp;
1985       r->in_p = in_p;
1986       r->low = low;
1987       r->high = high;
1988       r->strict_overflow_p = strict_overflow_p;
1989     }
1990 }
1991
1992 /* Comparison function for qsort.  Sort entries
1993    without SSA_NAME exp first, then with SSA_NAMEs sorted
1994    by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1995    by increasing ->low and if ->low is the same, by increasing
1996    ->high.  ->low == NULL_TREE means minimum, ->high == NULL_TREE
1997    maximum.  */
1998
1999 static int
2000 range_entry_cmp (const void *a, const void *b)
2001 {
2002   const struct range_entry *p = (const struct range_entry *) a;
2003   const struct range_entry *q = (const struct range_entry *) b;
2004
2005   if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2006     {
2007       if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2008         {
2009           /* Group range_entries for the same SSA_NAME together.  */
2010           if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2011             return -1;
2012           else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2013             return 1;
2014           /* If ->low is different, NULL low goes first, then by
2015              ascending low.  */
2016           if (p->low != NULL_TREE)
2017             {
2018               if (q->low != NULL_TREE)
2019                 {
2020                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
2021                                           p->low, q->low);
2022                   if (tem && integer_onep (tem))
2023                     return -1;
2024                   tem = fold_binary (GT_EXPR, boolean_type_node,
2025                                      p->low, q->low);
2026                   if (tem && integer_onep (tem))
2027                     return 1;
2028                 }
2029               else
2030                 return 1;
2031             }
2032           else if (q->low != NULL_TREE)
2033             return -1;
2034           /* If ->high is different, NULL high goes last, before that by
2035              ascending high.  */
2036           if (p->high != NULL_TREE)
2037             {
2038               if (q->high != NULL_TREE)
2039                 {
2040                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
2041                                           p->high, q->high);
2042                   if (tem && integer_onep (tem))
2043                     return -1;
2044                   tem = fold_binary (GT_EXPR, boolean_type_node,
2045                                      p->high, q->high);
2046                   if (tem && integer_onep (tem))
2047                     return 1;
2048                 }
2049               else
2050                 return -1;
2051             }
2052           else if (p->high != NULL_TREE)
2053             return 1;
2054           /* If both ranges are the same, sort below by ascending idx.  */
2055         }
2056       else
2057         return 1;
2058     }
2059   else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2060     return -1;
2061
2062   if (p->idx < q->idx)
2063     return -1;
2064   else
2065     {
2066       gcc_checking_assert (p->idx > q->idx);
2067       return 1;
2068     }
2069 }
2070
2071 /* Helper routine of optimize_range_test.
2072    [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2073    RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2074    OPCODE and OPS are arguments of optimize_range_tests.  Return
2075    true if the range merge has been successful.
2076    If OPCODE is ERROR_MARK, this is called from within
2077    maybe_optimize_range_tests and is performing inter-bb range optimization.
2078    In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2079    oe->rank.  */
2080
2081 static bool
2082 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2083                    unsigned int count, enum tree_code opcode,
2084                    vec<operand_entry_t> *ops, tree exp, bool in_p,
2085                    tree low, tree high, bool strict_overflow_p)
2086 {
2087   operand_entry_t oe = (*ops)[range->idx];
2088   tree op = oe->op;
2089   gimple stmt = op ? SSA_NAME_DEF_STMT (op) :
2090     last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2091   location_t loc = gimple_location (stmt);
2092   tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2093   tree tem = build_range_check (loc, optype, exp, in_p, low, high);
2094   enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2095   gimple_stmt_iterator gsi;
2096
2097   if (tem == NULL_TREE)
2098     return false;
2099
2100   if (strict_overflow_p && issue_strict_overflow_warning (wc))
2101     warning_at (loc, OPT_Wstrict_overflow,
2102                 "assuming signed overflow does not occur "
2103                 "when simplifying range test");
2104
2105   if (dump_file && (dump_flags & TDF_DETAILS))
2106     {
2107       struct range_entry *r;
2108       fprintf (dump_file, "Optimizing range tests ");
2109       print_generic_expr (dump_file, range->exp, 0);
2110       fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2111       print_generic_expr (dump_file, range->low, 0);
2112       fprintf (dump_file, ", ");
2113       print_generic_expr (dump_file, range->high, 0);
2114       fprintf (dump_file, "]");
2115       for (r = otherrange; r < otherrange + count; r++)
2116         {
2117           fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2118           print_generic_expr (dump_file, r->low, 0);
2119           fprintf (dump_file, ", ");
2120           print_generic_expr (dump_file, r->high, 0);
2121           fprintf (dump_file, "]");
2122         }
2123       fprintf (dump_file, "\n into ");
2124       print_generic_expr (dump_file, tem, 0);
2125       fprintf (dump_file, "\n");
2126     }
2127
2128   if (opcode == BIT_IOR_EXPR
2129       || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2130     tem = invert_truthvalue_loc (loc, tem);
2131
2132   tem = fold_convert_loc (loc, optype, tem);
2133   gsi = gsi_for_stmt (stmt);
2134   /* In rare cases range->exp can be equal to lhs of stmt.
2135      In that case we have to insert after the stmt rather then before
2136      it.  */
2137   if (op == range->exp)
2138     tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false,
2139                                     GSI_CONTINUE_LINKING);
2140   else
2141     {
2142       tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2143                                       GSI_SAME_STMT);
2144       gsi_prev (&gsi);
2145     }
2146   for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2147     if (gimple_uid (gsi_stmt (gsi)))
2148       break;
2149     else
2150       gimple_set_uid (gsi_stmt (gsi), gimple_uid (stmt));
2151
2152   oe->op = tem;
2153   range->exp = exp;
2154   range->low = low;
2155   range->high = high;
2156   range->in_p = in_p;
2157   range->strict_overflow_p = false;
2158
2159   for (range = otherrange; range < otherrange + count; range++)
2160     {
2161       oe = (*ops)[range->idx];
2162       /* Now change all the other range test immediate uses, so that
2163          those tests will be optimized away.  */
2164       if (opcode == ERROR_MARK)
2165         {
2166           if (oe->op)
2167             oe->op = build_int_cst (TREE_TYPE (oe->op),
2168                                     oe->rank == BIT_IOR_EXPR ? 0 : 1);
2169           else
2170             oe->op = (oe->rank == BIT_IOR_EXPR
2171                       ? boolean_false_node : boolean_true_node);
2172         }
2173       else
2174         oe->op = error_mark_node;
2175       range->exp = NULL_TREE;
2176     }
2177   return true;
2178 }
2179
2180 /* Optimize X == CST1 || X == CST2
2181    if popcount (CST1 ^ CST2) == 1 into
2182    (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2183    Similarly for ranges.  E.g.
2184    X != 2 && X != 3 && X != 10 && X != 11
2185    will be transformed by the previous optimization into
2186    !((X - 2U) <= 1U || (X - 10U) <= 1U)
2187    and this loop can transform that into
2188    !(((X & ~8) - 2U) <= 1U).  */
2189
2190 static bool
2191 optimize_range_tests_xor (enum tree_code opcode, tree type,
2192                           tree lowi, tree lowj, tree highi, tree highj,
2193                           vec<operand_entry_t> *ops,
2194                           struct range_entry *rangei,
2195                           struct range_entry *rangej)
2196 {
2197   tree lowxor, highxor, tem, exp;
2198   /* Check highi ^ lowi == highj ^ lowj and
2199      popcount (highi ^ lowi) == 1.  */
2200   lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2201   if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2202     return false;
2203   if (tree_log2 (lowxor) < 0)
2204     return false;
2205   highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2206   if (!tree_int_cst_equal (lowxor, highxor))
2207     return false;
2208
2209   tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2210   exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2211   lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2212   highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2213   if (update_range_test (rangei, rangej, 1, opcode, ops, exp,
2214                          rangei->in_p, lowj, highj,
2215                          rangei->strict_overflow_p
2216                          || rangej->strict_overflow_p))
2217     return true;
2218   return false;
2219 }
2220
2221 /* Optimize X == CST1 || X == CST2
2222    if popcount (CST2 - CST1) == 1 into
2223    ((X - CST1) & ~(CST2 - CST1)) == 0.
2224    Similarly for ranges.  E.g.
2225    X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2226    || X == 75 || X == 45
2227    will be transformed by the previous optimization into
2228    (X - 43U) <= 3U || (X - 75U) <= 3U
2229    and this loop can transform that into
2230    ((X - 43U) & ~(75U - 43U)) <= 3U.  */
2231 static bool
2232 optimize_range_tests_diff (enum tree_code opcode, tree type,
2233                             tree lowi, tree lowj, tree highi, tree highj,
2234                             vec<operand_entry_t> *ops,
2235                             struct range_entry *rangei,
2236                             struct range_entry *rangej)
2237 {
2238   tree tem1, tem2, mask;
2239   /* Check highi - lowi == highj - lowj.  */
2240   tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2241   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2242     return false;
2243   tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2244   if (!tree_int_cst_equal (tem1, tem2))
2245     return false;
2246   /* Check popcount (lowj - lowi) == 1.  */
2247   tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2248   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2249     return false;
2250   if (tree_log2 (tem1) < 0)
2251     return false;
2252
2253   mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2254   tem1 = fold_binary (MINUS_EXPR, type, rangei->exp, lowi);
2255   tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2256   lowj = build_int_cst (type, 0);
2257   if (update_range_test (rangei, rangej, 1, opcode, ops, tem1,
2258                          rangei->in_p, lowj, tem2,
2259                          rangei->strict_overflow_p
2260                          || rangej->strict_overflow_p))
2261     return true;
2262   return false;
2263 }
2264
2265 /* It does some common checks for function optimize_range_tests_xor and
2266    optimize_range_tests_diff.
2267    If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2268    Else it calls optimize_range_tests_diff.  */
2269
2270 static bool
2271 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2272                         bool optimize_xor, vec<operand_entry_t> *ops,
2273                         struct range_entry *ranges)
2274 {
2275   int i, j;
2276   bool any_changes = false;
2277   for (i = first; i < length; i++)
2278     {
2279       tree lowi, highi, lowj, highj, type, tem;
2280
2281       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2282         continue;
2283       type = TREE_TYPE (ranges[i].exp);
2284       if (!INTEGRAL_TYPE_P (type))
2285         continue;
2286       lowi = ranges[i].low;
2287       if (lowi == NULL_TREE)
2288         lowi = TYPE_MIN_VALUE (type);
2289       highi = ranges[i].high;
2290       if (highi == NULL_TREE)
2291         continue;
2292       for (j = i + 1; j < length && j < i + 64; j++)
2293         {
2294           bool changes;
2295           if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2296             continue;
2297           lowj = ranges[j].low;
2298           if (lowj == NULL_TREE)
2299             continue;
2300           highj = ranges[j].high;
2301           if (highj == NULL_TREE)
2302             highj = TYPE_MAX_VALUE (type);
2303           /* Check lowj > highi.  */
2304           tem = fold_binary (GT_EXPR, boolean_type_node,
2305                              lowj, highi);
2306           if (tem == NULL_TREE || !integer_onep (tem))
2307             continue;
2308           if (optimize_xor)
2309             changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2310                                                 highi, highj, ops,
2311                                                 ranges + i, ranges + j);
2312           else
2313             changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2314                                                  highi, highj, ops,
2315                                                  ranges + i, ranges + j);
2316           if (changes)
2317             {
2318               any_changes = true;
2319               break;
2320             }
2321         }
2322     }
2323   return any_changes;
2324 }
2325
2326 /* Optimize range tests, similarly how fold_range_test optimizes
2327    it on trees.  The tree code for the binary
2328    operation between all the operands is OPCODE.
2329    If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2330    maybe_optimize_range_tests for inter-bb range optimization.
2331    In that case if oe->op is NULL, oe->id is bb->index whose
2332    GIMPLE_COND is && or ||ed into the test, and oe->rank says
2333    the actual opcode.  */
2334
2335 static bool
2336 optimize_range_tests (enum tree_code opcode,
2337                       vec<operand_entry_t> *ops)
2338 {
2339   unsigned int length = ops->length (), i, j, first;
2340   operand_entry_t oe;
2341   struct range_entry *ranges;
2342   bool any_changes = false;
2343
2344   if (length == 1)
2345     return false;
2346
2347   ranges = XNEWVEC (struct range_entry, length);
2348   for (i = 0; i < length; i++)
2349     {
2350       oe = (*ops)[i];
2351       ranges[i].idx = i;
2352       init_range_entry (ranges + i, oe->op,
2353                         oe->op ? NULL :
2354                           last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
2355       /* For | invert it now, we will invert it again before emitting
2356          the optimized expression.  */
2357       if (opcode == BIT_IOR_EXPR
2358           || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2359         ranges[i].in_p = !ranges[i].in_p;
2360     }
2361
2362   qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2363   for (i = 0; i < length; i++)
2364     if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2365       break;
2366
2367   /* Try to merge ranges.  */
2368   for (first = i; i < length; i++)
2369     {
2370       tree low = ranges[i].low;
2371       tree high = ranges[i].high;
2372       int in_p = ranges[i].in_p;
2373       bool strict_overflow_p = ranges[i].strict_overflow_p;
2374       int update_fail_count = 0;
2375
2376       for (j = i + 1; j < length; j++)
2377         {
2378           if (ranges[i].exp != ranges[j].exp)
2379             break;
2380           if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2381                              ranges[j].in_p, ranges[j].low, ranges[j].high))
2382             break;
2383           strict_overflow_p |= ranges[j].strict_overflow_p;
2384         }
2385
2386       if (j == i + 1)
2387         continue;
2388
2389       if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
2390                              ops, ranges[i].exp, in_p, low, high,
2391                              strict_overflow_p))
2392         {
2393           i = j - 1;
2394           any_changes = true;
2395         }
2396       /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2397          while update_range_test would fail.  */
2398       else if (update_fail_count == 64)
2399         i = j - 1;
2400       else
2401         ++update_fail_count;
2402     }
2403
2404   any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2405                                          ops, ranges);
2406
2407   if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2408     any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2409                                            ops, ranges);
2410
2411   if (any_changes && opcode != ERROR_MARK)
2412     {
2413       j = 0;
2414       FOR_EACH_VEC_ELT (*ops, i, oe)
2415         {
2416           if (oe->op == error_mark_node)
2417             continue;
2418           else if (i != j)
2419             (*ops)[j] = oe;
2420           j++;
2421         }
2422       ops->truncate (j);
2423     }
2424
2425   XDELETEVEC (ranges);
2426   return any_changes;
2427 }
2428
2429 /* Return true if STMT is a cast like:
2430    <bb N>:
2431    ...
2432    _123 = (int) _234;
2433
2434    <bb M>:
2435    # _345 = PHI <_123(N), 1(...), 1(...)>
2436    where _234 has bool type, _123 has single use and
2437    bb N has a single successor M.  This is commonly used in
2438    the last block of a range test.  */
2439
2440 static bool
2441 final_range_test_p (gimple stmt)
2442 {
2443   basic_block bb, rhs_bb;
2444   edge e;
2445   tree lhs, rhs;
2446   use_operand_p use_p;
2447   gimple use_stmt;
2448
2449   if (!gimple_assign_cast_p (stmt))
2450     return false;
2451   bb = gimple_bb (stmt);
2452   if (!single_succ_p (bb))
2453     return false;
2454   e = single_succ_edge (bb);
2455   if (e->flags & EDGE_COMPLEX)
2456     return false;
2457
2458   lhs = gimple_assign_lhs (stmt);
2459   rhs = gimple_assign_rhs1 (stmt);
2460   if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2461       || TREE_CODE (rhs) != SSA_NAME
2462       || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2463     return false;
2464
2465   /* Test whether lhs is consumed only by a PHI in the only successor bb.  */
2466   if (!single_imm_use (lhs, &use_p, &use_stmt))
2467     return false;
2468
2469   if (gimple_code (use_stmt) != GIMPLE_PHI
2470       || gimple_bb (use_stmt) != e->dest)
2471     return false;
2472
2473   /* And that the rhs is defined in the same loop.  */
2474   rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2475   if (rhs_bb == NULL
2476       || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2477     return false;
2478
2479   return true;
2480 }
2481
2482 /* Return true if BB is suitable basic block for inter-bb range test
2483    optimization.  If BACKWARD is true, BB should be the only predecessor
2484    of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2485    or compared with to find a common basic block to which all conditions
2486    branch to if true resp. false.  If BACKWARD is false, TEST_BB should
2487    be the only predecessor of BB.  */
2488
2489 static bool
2490 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2491                   bool backward)
2492 {
2493   edge_iterator ei, ei2;
2494   edge e, e2;
2495   gimple stmt;
2496   gimple_stmt_iterator gsi;
2497   bool other_edge_seen = false;
2498   bool is_cond;
2499
2500   if (test_bb == bb)
2501     return false;
2502   /* Check last stmt first.  */
2503   stmt = last_stmt (bb);
2504   if (stmt == NULL
2505       || (gimple_code (stmt) != GIMPLE_COND
2506           && (backward || !final_range_test_p (stmt)))
2507       || gimple_visited_p (stmt)
2508       || stmt_could_throw_p (stmt)
2509       || *other_bb == bb)
2510     return false;
2511   is_cond = gimple_code (stmt) == GIMPLE_COND;
2512   if (is_cond)
2513     {
2514       /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2515          goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2516          to *OTHER_BB (if not set yet, try to find it out).  */
2517       if (EDGE_COUNT (bb->succs) != 2)
2518         return false;
2519       FOR_EACH_EDGE (e, ei, bb->succs)
2520         {
2521           if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2522             return false;
2523           if (e->dest == test_bb)
2524             {
2525               if (backward)
2526                 continue;
2527               else
2528                 return false;
2529             }
2530           if (e->dest == bb)
2531             return false;
2532           if (*other_bb == NULL)
2533             {
2534               FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2535                 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2536                   return false;
2537                 else if (e->dest == e2->dest)
2538                   *other_bb = e->dest;
2539               if (*other_bb == NULL)
2540                 return false;
2541             }
2542           if (e->dest == *other_bb)
2543             other_edge_seen = true;
2544           else if (backward)
2545             return false;
2546         }
2547       if (*other_bb == NULL || !other_edge_seen)
2548         return false;
2549     }
2550   else if (single_succ (bb) != *other_bb)
2551     return false;
2552
2553   /* Now check all PHIs of *OTHER_BB.  */
2554   e = find_edge (bb, *other_bb);
2555   e2 = find_edge (test_bb, *other_bb);
2556   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2557     {
2558       gimple phi = gsi_stmt (gsi);
2559       /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2560          corresponding to BB and TEST_BB predecessor must be the same.  */
2561       if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2562                             gimple_phi_arg_def (phi, e2->dest_idx), 0))
2563         {
2564           /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2565              one of the PHIs should have the lhs of the last stmt in
2566              that block as PHI arg and that PHI should have 0 or 1
2567              corresponding to it in all other range test basic blocks
2568              considered.  */
2569           if (!is_cond)
2570             {
2571               if (gimple_phi_arg_def (phi, e->dest_idx)
2572                   == gimple_assign_lhs (stmt)
2573                   && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2574                       || integer_onep (gimple_phi_arg_def (phi,
2575                                                            e2->dest_idx))))
2576                 continue;
2577             }
2578           else
2579             {
2580               gimple test_last = last_stmt (test_bb);
2581               if (gimple_code (test_last) != GIMPLE_COND
2582                   && gimple_phi_arg_def (phi, e2->dest_idx)
2583                      == gimple_assign_lhs (test_last)
2584                   && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2585                       || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2586                 continue;
2587             }
2588
2589           return false;
2590         }
2591     }
2592   return true;
2593 }
2594
2595 /* Return true if BB doesn't have side-effects that would disallow
2596    range test optimization, all SSA_NAMEs set in the bb are consumed
2597    in the bb and there are no PHIs.  */
2598
2599 static bool
2600 no_side_effect_bb (basic_block bb)
2601 {
2602   gimple_stmt_iterator gsi;
2603   gimple last;
2604
2605   if (!gimple_seq_empty_p (phi_nodes (bb)))
2606     return false;
2607   last = last_stmt (bb);
2608   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2609     {
2610       gimple stmt = gsi_stmt (gsi);
2611       tree lhs;
2612       imm_use_iterator imm_iter;
2613       use_operand_p use_p;
2614
2615       if (is_gimple_debug (stmt))
2616         continue;
2617       if (gimple_has_side_effects (stmt))
2618         return false;
2619       if (stmt == last)
2620         return true;
2621       if (!is_gimple_assign (stmt))
2622         return false;
2623       lhs = gimple_assign_lhs (stmt);
2624       if (TREE_CODE (lhs) != SSA_NAME)
2625         return false;
2626       if (gimple_assign_rhs_could_trap_p (stmt))
2627         return false;
2628       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2629         {
2630           gimple use_stmt = USE_STMT (use_p);
2631           if (is_gimple_debug (use_stmt))
2632             continue;
2633           if (gimple_bb (use_stmt) != bb)
2634             return false;
2635         }
2636     }
2637   return false;
2638 }
2639
2640 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2641    return true and fill in *OPS recursively.  */
2642
2643 static bool
2644 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2645          struct loop *loop)
2646 {
2647   gimple stmt = SSA_NAME_DEF_STMT (var);
2648   tree rhs[2];
2649   int i;
2650
2651   if (!is_reassociable_op (stmt, code, loop))
2652     return false;
2653
2654   rhs[0] = gimple_assign_rhs1 (stmt);
2655   rhs[1] = gimple_assign_rhs2 (stmt);
2656   gimple_set_visited (stmt, true);
2657   for (i = 0; i < 2; i++)
2658     if (TREE_CODE (rhs[i]) == SSA_NAME
2659         && !get_ops (rhs[i], code, ops, loop)
2660         && has_single_use (rhs[i]))
2661       {
2662         operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
2663
2664         oe->op = rhs[i];
2665         oe->rank = code;
2666         oe->id = 0;
2667         oe->count = 1;
2668         ops->safe_push (oe);
2669       }
2670   return true;
2671 }
2672
2673 /* Find the ops that were added by get_ops starting from VAR, see if
2674    they were changed during update_range_test and if yes, create new
2675    stmts.  */
2676
2677 static tree
2678 update_ops (tree var, enum tree_code code, vec<operand_entry_t> ops,
2679             unsigned int *pidx, struct loop *loop)
2680 {
2681   gimple stmt = SSA_NAME_DEF_STMT (var);
2682   tree rhs[4];
2683   int i;
2684
2685   if (!is_reassociable_op (stmt, code, loop))
2686     return NULL;
2687
2688   rhs[0] = gimple_assign_rhs1 (stmt);
2689   rhs[1] = gimple_assign_rhs2 (stmt);
2690   rhs[2] = rhs[0];
2691   rhs[3] = rhs[1];
2692   for (i = 0; i < 2; i++)
2693     if (TREE_CODE (rhs[i]) == SSA_NAME)
2694       {
2695         rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
2696         if (rhs[2 + i] == NULL_TREE)
2697           {
2698             if (has_single_use (rhs[i]))
2699               rhs[2 + i] = ops[(*pidx)++]->op;
2700             else
2701               rhs[2 + i] = rhs[i];
2702           }
2703       }
2704   if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
2705       && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
2706     {
2707       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2708       var = make_ssa_name (TREE_TYPE (var), NULL);
2709       gimple g = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
2710                                                var, rhs[2], rhs[3]);
2711       gimple_set_uid (g, gimple_uid (stmt));
2712       gimple_set_visited (g, true);
2713       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2714     }
2715   return var;
2716 }
2717
2718 /* Structure to track the initial value passed to get_ops and
2719    the range in the ops vector for each basic block.  */
2720
2721 struct inter_bb_range_test_entry
2722 {
2723   tree op;
2724   unsigned int first_idx, last_idx;
2725 };
2726
2727 /* Inter-bb range test optimization.  */
2728
2729 static void
2730 maybe_optimize_range_tests (gimple stmt)
2731 {
2732   basic_block first_bb = gimple_bb (stmt);
2733   basic_block last_bb = first_bb;
2734   basic_block other_bb = NULL;
2735   basic_block bb;
2736   edge_iterator ei;
2737   edge e;
2738   auto_vec<operand_entry_t> ops;
2739   auto_vec<inter_bb_range_test_entry> bbinfo;
2740   bool any_changes = false;
2741
2742   /* Consider only basic blocks that end with GIMPLE_COND or
2743      a cast statement satisfying final_range_test_p.  All
2744      but the last bb in the first_bb .. last_bb range
2745      should end with GIMPLE_COND.  */
2746   if (gimple_code (stmt) == GIMPLE_COND)
2747     {
2748       if (EDGE_COUNT (first_bb->succs) != 2)
2749         return;
2750     }
2751   else if (final_range_test_p (stmt))
2752     other_bb = single_succ (first_bb);
2753   else
2754     return;
2755
2756   if (stmt_could_throw_p (stmt))
2757     return;
2758
2759   /* As relative ordering of post-dominator sons isn't fixed,
2760      maybe_optimize_range_tests can be called first on any
2761      bb in the range we want to optimize.  So, start searching
2762      backwards, if first_bb can be set to a predecessor.  */
2763   while (single_pred_p (first_bb))
2764     {
2765       basic_block pred_bb = single_pred (first_bb);
2766       if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
2767         break;
2768       if (!no_side_effect_bb (first_bb))
2769         break;
2770       first_bb = pred_bb;
2771     }
2772   /* If first_bb is last_bb, other_bb hasn't been computed yet.
2773      Before starting forward search in last_bb successors, find
2774      out the other_bb.  */
2775   if (first_bb == last_bb)
2776     {
2777       other_bb = NULL;
2778       /* As non-GIMPLE_COND last stmt always terminates the range,
2779          if forward search didn't discover anything, just give up.  */
2780       if (gimple_code (stmt) != GIMPLE_COND)
2781         return;
2782       /* Look at both successors.  Either it ends with a GIMPLE_COND
2783          and satisfies suitable_cond_bb, or ends with a cast and
2784          other_bb is that cast's successor.  */
2785       FOR_EACH_EDGE (e, ei, first_bb->succs)
2786         if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
2787             || e->dest == first_bb)
2788           return;
2789         else if (single_pred_p (e->dest))
2790           {
2791             stmt = last_stmt (e->dest);
2792             if (stmt
2793                 && gimple_code (stmt) == GIMPLE_COND
2794                 && EDGE_COUNT (e->dest->succs) == 2)
2795               {
2796                 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
2797                   break;
2798                 else
2799                   other_bb = NULL;
2800               }
2801             else if (stmt
2802                      && final_range_test_p (stmt)
2803                      && find_edge (first_bb, single_succ (e->dest)))
2804               {
2805                 other_bb = single_succ (e->dest);
2806                 if (other_bb == first_bb)
2807                   other_bb = NULL;
2808               }
2809           }
2810       if (other_bb == NULL)
2811         return;
2812     }
2813   /* Now do the forward search, moving last_bb to successor bbs
2814      that aren't other_bb.  */
2815   while (EDGE_COUNT (last_bb->succs) == 2)
2816     {
2817       FOR_EACH_EDGE (e, ei, last_bb->succs)
2818         if (e->dest != other_bb)
2819           break;
2820       if (e == NULL)
2821         break;
2822       if (!single_pred_p (e->dest))
2823         break;
2824       if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
2825         break;
2826       if (!no_side_effect_bb (e->dest))
2827         break;
2828       last_bb = e->dest;
2829     }
2830   if (first_bb == last_bb)
2831     return;
2832   /* Here basic blocks first_bb through last_bb's predecessor
2833      end with GIMPLE_COND, all of them have one of the edges to
2834      other_bb and another to another block in the range,
2835      all blocks except first_bb don't have side-effects and
2836      last_bb ends with either GIMPLE_COND, or cast satisfying
2837      final_range_test_p.  */
2838   for (bb = last_bb; ; bb = single_pred (bb))
2839     {
2840       enum tree_code code;
2841       tree lhs, rhs;
2842       inter_bb_range_test_entry bb_ent;
2843
2844       bb_ent.op = NULL_TREE;
2845       bb_ent.first_idx = ops.length ();
2846       bb_ent.last_idx = bb_ent.first_idx;
2847       e = find_edge (bb, other_bb);
2848       stmt = last_stmt (bb);
2849       gimple_set_visited (stmt, true);
2850       if (gimple_code (stmt) != GIMPLE_COND)
2851         {
2852           use_operand_p use_p;
2853           gimple phi;
2854           edge e2;
2855           unsigned int d;
2856
2857           lhs = gimple_assign_lhs (stmt);
2858           rhs = gimple_assign_rhs1 (stmt);
2859           gcc_assert (bb == last_bb);
2860
2861           /* stmt is
2862              _123 = (int) _234;
2863
2864              followed by:
2865              <bb M>:
2866              # _345 = PHI <_123(N), 1(...), 1(...)>
2867
2868              or 0 instead of 1.  If it is 0, the _234
2869              range test is anded together with all the
2870              other range tests, if it is 1, it is ored with
2871              them.  */
2872           single_imm_use (lhs, &use_p, &phi);
2873           gcc_assert (gimple_code (phi) == GIMPLE_PHI);
2874           e2 = find_edge (first_bb, other_bb);
2875           d = e2->dest_idx;
2876           gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
2877           if (integer_zerop (gimple_phi_arg_def (phi, d)))
2878             code = BIT_AND_EXPR;
2879           else
2880             {
2881               gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
2882               code = BIT_IOR_EXPR;
2883             }
2884
2885           /* If _234 SSA_NAME_DEF_STMT is
2886              _234 = _567 | _789;
2887              (or &, corresponding to 1/0 in the phi arguments,
2888              push into ops the individual range test arguments
2889              of the bitwise or resp. and, recursively.  */
2890           if (!get_ops (rhs, code, &ops,
2891                         loop_containing_stmt (stmt))
2892               && has_single_use (rhs))
2893             {
2894               /* Otherwise, push the _234 range test itself.  */
2895               operand_entry_t oe
2896                 = (operand_entry_t) pool_alloc (operand_entry_pool);
2897
2898               oe->op = rhs;
2899               oe->rank = code;
2900               oe->id = 0;
2901               oe->count = 1;
2902               ops.safe_push (oe);
2903               bb_ent.last_idx++;
2904             }
2905           else
2906             bb_ent.last_idx = ops.length ();
2907           bb_ent.op = rhs;
2908           bbinfo.safe_push (bb_ent);
2909           continue;
2910         }
2911       /* Otherwise stmt is GIMPLE_COND.  */
2912       code = gimple_cond_code (stmt);
2913       lhs = gimple_cond_lhs (stmt);
2914       rhs = gimple_cond_rhs (stmt);
2915       if (TREE_CODE (lhs) == SSA_NAME
2916           && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2917           && ((code != EQ_EXPR && code != NE_EXPR)
2918               || rhs != boolean_false_node
2919                  /* Either push into ops the individual bitwise
2920                     or resp. and operands, depending on which
2921                     edge is other_bb.  */
2922               || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
2923                                  ^ (code == EQ_EXPR))
2924                                 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
2925                            loop_containing_stmt (stmt))))
2926         {
2927           /* Or push the GIMPLE_COND stmt itself.  */
2928           operand_entry_t oe
2929             = (operand_entry_t) pool_alloc (operand_entry_pool);
2930
2931           oe->op = NULL;
2932           oe->rank = (e->flags & EDGE_TRUE_VALUE)
2933                      ? BIT_IOR_EXPR : BIT_AND_EXPR;
2934           /* oe->op = NULL signs that there is no SSA_NAME
2935              for the range test, and oe->id instead is the
2936              basic block number, at which's end the GIMPLE_COND
2937              is.  */
2938           oe->id = bb->index;
2939           oe->count = 1;
2940           ops.safe_push (oe);
2941           bb_ent.op = NULL;
2942           bb_ent.last_idx++;
2943         }
2944       else if (ops.length () > bb_ent.first_idx)
2945         {
2946           bb_ent.op = lhs;
2947           bb_ent.last_idx = ops.length ();
2948         }
2949       bbinfo.safe_push (bb_ent);
2950       if (bb == first_bb)
2951         break;
2952     }
2953   if (ops.length () > 1)
2954     any_changes = optimize_range_tests (ERROR_MARK, &ops);
2955   if (any_changes)
2956     {
2957       unsigned int idx;
2958       /* update_ops relies on has_single_use predicates returning the
2959          same values as it did during get_ops earlier.  Additionally it
2960          never removes statements, only adds new ones and it should walk
2961          from the single imm use and check the predicate already before
2962          making those changes.
2963          On the other side, the handling of GIMPLE_COND directly can turn
2964          previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
2965          it needs to be done in a separate loop afterwards.  */
2966       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
2967         {
2968           if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
2969               && bbinfo[idx].op != NULL_TREE)
2970             {
2971               tree new_op;
2972
2973               stmt = last_stmt (bb);
2974               new_op = update_ops (bbinfo[idx].op,
2975                                    (enum tree_code)
2976                                    ops[bbinfo[idx].first_idx]->rank,
2977                                    ops, &bbinfo[idx].first_idx,
2978                                    loop_containing_stmt (stmt));
2979               if (new_op == NULL_TREE)
2980                 {
2981                   gcc_assert (bb == last_bb);
2982                   new_op = ops[bbinfo[idx].first_idx++]->op;
2983                 }
2984               if (bbinfo[idx].op != new_op)
2985                 {
2986                   imm_use_iterator iter;
2987                   use_operand_p use_p;
2988                   gimple use_stmt, cast_stmt = NULL;
2989
2990                   FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
2991                     if (is_gimple_debug (use_stmt))
2992                       continue;
2993                     else if (gimple_code (use_stmt) == GIMPLE_COND
2994                              || gimple_code (use_stmt) == GIMPLE_PHI)
2995                       FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2996                         SET_USE (use_p, new_op);
2997                     else if (gimple_assign_cast_p (use_stmt))
2998                       cast_stmt = use_stmt;
2999                     else
3000                       gcc_unreachable ();
3001                   if (cast_stmt)
3002                     {
3003                       gcc_assert (bb == last_bb);
3004                       tree lhs = gimple_assign_lhs (cast_stmt);
3005                       tree new_lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3006                       enum tree_code rhs_code
3007                         = gimple_assign_rhs_code (cast_stmt);
3008                       gimple g;
3009                       if (is_gimple_min_invariant (new_op))
3010                         {
3011                           new_op = fold_convert (TREE_TYPE (lhs), new_op);
3012                           g = gimple_build_assign (new_lhs, new_op);
3013                         }
3014                       else
3015                         g = gimple_build_assign_with_ops (rhs_code, new_lhs,
3016                                                           new_op, NULL_TREE);
3017                       gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt);
3018                       gimple_set_uid (g, gimple_uid (cast_stmt));
3019                       gimple_set_visited (g, true);
3020                       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3021                       FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
3022                         if (is_gimple_debug (use_stmt))
3023                           continue;
3024                         else if (gimple_code (use_stmt) == GIMPLE_COND
3025                                  || gimple_code (use_stmt) == GIMPLE_PHI)
3026                           FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3027                             SET_USE (use_p, new_lhs);
3028                         else
3029                           gcc_unreachable ();
3030                     }
3031                 }
3032             }
3033           if (bb == first_bb)
3034             break;
3035         }
3036       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3037         {
3038           if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3039               && bbinfo[idx].op == NULL_TREE
3040               && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
3041             {
3042               stmt = last_stmt (bb);
3043               if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
3044                 gimple_cond_make_false (stmt);
3045               else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
3046                 gimple_cond_make_true (stmt);
3047               else
3048                 {
3049                   gimple_cond_set_code (stmt, NE_EXPR);
3050                   gimple_cond_set_lhs (stmt, ops[bbinfo[idx].first_idx]->op);
3051                   gimple_cond_set_rhs (stmt, boolean_false_node);
3052                 }
3053               update_stmt (stmt);
3054             }
3055           if (bb == first_bb)
3056             break;
3057         }
3058     }
3059 }
3060
3061 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3062    of STMT in it's operands.  This is also known as a "destructive
3063    update" operation.  */
3064
3065 static bool
3066 is_phi_for_stmt (gimple stmt, tree operand)
3067 {
3068   gimple def_stmt;
3069   tree lhs;
3070   use_operand_p arg_p;
3071   ssa_op_iter i;
3072
3073   if (TREE_CODE (operand) != SSA_NAME)
3074     return false;
3075
3076   lhs = gimple_assign_lhs (stmt);
3077
3078   def_stmt = SSA_NAME_DEF_STMT (operand);
3079   if (gimple_code (def_stmt) != GIMPLE_PHI)
3080     return false;
3081
3082   FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
3083     if (lhs == USE_FROM_PTR (arg_p))
3084       return true;
3085   return false;
3086 }
3087
3088 /* Remove def stmt of VAR if VAR has zero uses and recurse
3089    on rhs1 operand if so.  */
3090
3091 static void
3092 remove_visited_stmt_chain (tree var)
3093 {
3094   gimple stmt;
3095   gimple_stmt_iterator gsi;
3096
3097   while (1)
3098     {
3099       if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
3100         return;
3101       stmt = SSA_NAME_DEF_STMT (var);
3102       if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
3103         {
3104           var = gimple_assign_rhs1 (stmt);
3105           gsi = gsi_for_stmt (stmt);
3106           reassoc_remove_stmt (&gsi);
3107           release_defs (stmt);
3108         }
3109       else
3110         return;
3111     }
3112 }
3113
3114 /* This function checks three consequtive operands in
3115    passed operands vector OPS starting from OPINDEX and
3116    swaps two operands if it is profitable for binary operation
3117    consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3118
3119    We pair ops with the same rank if possible.
3120
3121    The alternative we try is to see if STMT is a destructive
3122    update style statement, which is like:
3123    b = phi (a, ...)
3124    a = c + b;
3125    In that case, we want to use the destructive update form to
3126    expose the possible vectorizer sum reduction opportunity.
3127    In that case, the third operand will be the phi node. This
3128    check is not performed if STMT is null.
3129
3130    We could, of course, try to be better as noted above, and do a
3131    lot of work to try to find these opportunities in >3 operand
3132    cases, but it is unlikely to be worth it.  */
3133
3134 static void
3135 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
3136                           unsigned int opindex, gimple stmt)
3137 {
3138   operand_entry_t oe1, oe2, oe3;
3139
3140   oe1 = ops[opindex];
3141   oe2 = ops[opindex + 1];
3142   oe3 = ops[opindex + 2];
3143
3144   if ((oe1->rank == oe2->rank
3145        && oe2->rank != oe3->rank)
3146       || (stmt && is_phi_for_stmt (stmt, oe3->op)
3147           && !is_phi_for_stmt (stmt, oe1->op)
3148           && !is_phi_for_stmt (stmt, oe2->op)))
3149     {
3150       struct operand_entry temp = *oe3;
3151       oe3->op = oe1->op;
3152       oe3->rank = oe1->rank;
3153       oe1->op = temp.op;
3154       oe1->rank= temp.rank;
3155     }
3156   else if ((oe1->rank == oe3->rank
3157             && oe2->rank != oe3->rank)
3158            || (stmt && is_phi_for_stmt (stmt, oe2->op)
3159                && !is_phi_for_stmt (stmt, oe1->op)
3160                && !is_phi_for_stmt (stmt, oe3->op)))
3161     {
3162       struct operand_entry temp = *oe2;
3163       oe2->op = oe1->op;
3164       oe2->rank = oe1->rank;
3165       oe1->op = temp.op;
3166       oe1->rank = temp.rank;
3167     }
3168 }
3169
3170 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3171    two definitions, otherwise return STMT.  */
3172
3173 static inline gimple
3174 find_insert_point (gimple stmt, tree rhs1, tree rhs2)
3175 {
3176   if (TREE_CODE (rhs1) == SSA_NAME
3177       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
3178     stmt = SSA_NAME_DEF_STMT (rhs1);
3179   if (TREE_CODE (rhs2) == SSA_NAME
3180       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
3181     stmt = SSA_NAME_DEF_STMT (rhs2);
3182   return stmt;
3183 }
3184
3185 /* Recursively rewrite our linearized statements so that the operators
3186    match those in OPS[OPINDEX], putting the computation in rank
3187    order.  Return new lhs.  */
3188
3189 static tree
3190 rewrite_expr_tree (gimple stmt, unsigned int opindex,
3191                    vec<operand_entry_t> ops, bool changed)
3192 {
3193   tree rhs1 = gimple_assign_rhs1 (stmt);
3194   tree rhs2 = gimple_assign_rhs2 (stmt);
3195   tree lhs = gimple_assign_lhs (stmt);
3196   operand_entry_t oe;
3197
3198   /* The final recursion case for this function is that you have
3199      exactly two operations left.
3200      If we had one exactly one op in the entire list to start with, we
3201      would have never called this function, and the tail recursion
3202      rewrites them one at a time.  */
3203   if (opindex + 2 == ops.length ())
3204     {
3205       operand_entry_t oe1, oe2;
3206
3207       oe1 = ops[opindex];
3208       oe2 = ops[opindex + 1];
3209
3210       if (rhs1 != oe1->op || rhs2 != oe2->op)
3211         {
3212           gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3213           unsigned int uid = gimple_uid (stmt);
3214
3215           if (dump_file && (dump_flags & TDF_DETAILS))
3216             {
3217               fprintf (dump_file, "Transforming ");
3218               print_gimple_stmt (dump_file, stmt, 0, 0);
3219             }
3220
3221           if (changed)
3222             {
3223               gimple insert_point = find_insert_point (stmt, oe1->op, oe2->op);
3224               lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3225               stmt
3226                 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3227                                                 lhs, oe1->op, oe2->op);
3228               gimple_set_uid (stmt, uid);
3229               gimple_set_visited (stmt, true);
3230               if (insert_point == gsi_stmt (gsi))
3231                 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3232               else
3233                 insert_stmt_after (stmt, insert_point);
3234             }
3235           else
3236             {
3237               gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
3238                                    == stmt);
3239               gimple_assign_set_rhs1 (stmt, oe1->op);
3240               gimple_assign_set_rhs2 (stmt, oe2->op);
3241               update_stmt (stmt);
3242             }
3243
3244           if (rhs1 != oe1->op && rhs1 != oe2->op)
3245             remove_visited_stmt_chain (rhs1);
3246
3247           if (dump_file && (dump_flags & TDF_DETAILS))
3248             {
3249               fprintf (dump_file, " into ");
3250               print_gimple_stmt (dump_file, stmt, 0, 0);
3251             }
3252         }
3253       return lhs;
3254     }
3255
3256   /* If we hit here, we should have 3 or more ops left.  */
3257   gcc_assert (opindex + 2 < ops.length ());
3258
3259   /* Rewrite the next operator.  */
3260   oe = ops[opindex];
3261
3262   /* Recurse on the LHS of the binary operator, which is guaranteed to
3263      be the non-leaf side.  */
3264   tree new_rhs1
3265     = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
3266                          changed || oe->op != rhs2);
3267
3268   if (oe->op != rhs2 || new_rhs1 != rhs1)
3269     {
3270       if (dump_file && (dump_flags & TDF_DETAILS))
3271         {
3272           fprintf (dump_file, "Transforming ");
3273           print_gimple_stmt (dump_file, stmt, 0, 0);
3274         }
3275
3276       /* If changed is false, this is either opindex == 0
3277          or all outer rhs2's were equal to corresponding oe->op,
3278          and powi_result is NULL.
3279          That means lhs is equivalent before and after reassociation.
3280          Otherwise ensure the old lhs SSA_NAME is not reused and
3281          create a new stmt as well, so that any debug stmts will be
3282          properly adjusted.  */
3283       if (changed)
3284         {
3285           gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3286           unsigned int uid = gimple_uid (stmt);
3287           gimple insert_point = find_insert_point (stmt, new_rhs1, oe->op);
3288
3289           lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3290           stmt = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3291                                                lhs, new_rhs1, oe->op);
3292           gimple_set_uid (stmt, uid);
3293           gimple_set_visited (stmt, true);
3294           if (insert_point == gsi_stmt (gsi))
3295             gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3296           else
3297             insert_stmt_after (stmt, insert_point);
3298         }
3299       else
3300         {
3301           gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
3302                                == stmt);
3303           gimple_assign_set_rhs1 (stmt, new_rhs1);
3304           gimple_assign_set_rhs2 (stmt, oe->op);
3305           update_stmt (stmt);
3306         }
3307
3308       if (dump_file && (dump_flags & TDF_DETAILS))
3309         {
3310           fprintf (dump_file, " into ");
3311           print_gimple_stmt (dump_file, stmt, 0, 0);
3312         }
3313     }
3314   return lhs;
3315 }
3316
3317 /* Find out how many cycles we need to compute statements chain.
3318    OPS_NUM holds number os statements in a chain.  CPU_WIDTH is a
3319    maximum number of independent statements we may execute per cycle.  */
3320
3321 static int
3322 get_required_cycles (int ops_num, int cpu_width)
3323 {
3324   int res;
3325   int elog;
3326   unsigned int rest;
3327
3328   /* While we have more than 2 * cpu_width operands
3329      we may reduce number of operands by cpu_width
3330      per cycle.  */
3331   res = ops_num / (2 * cpu_width);
3332
3333   /* Remained operands count may be reduced twice per cycle
3334      until we have only one operand.  */
3335   rest = (unsigned)(ops_num - res * cpu_width);
3336   elog = exact_log2 (rest);
3337   if (elog >= 0)
3338     res += elog;
3339   else
3340     res += floor_log2 (rest) + 1;
3341
3342   return res;
3343 }
3344
3345 /* Returns an optimal number of registers to use for computation of
3346    given statements.  */
3347
3348 static int
3349 get_reassociation_width (int ops_num, enum tree_code opc,
3350                          enum machine_mode mode)
3351 {
3352   int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
3353   int width;
3354   int width_min;
3355   int cycles_best;
3356
3357   if (param_width > 0)
3358     width = param_width;
3359   else
3360     width = targetm.sched.reassociation_width (opc, mode);
3361
3362   if (width == 1)
3363     return width;
3364
3365   /* Get the minimal time required for sequence computation.  */
3366   cycles_best = get_required_cycles (ops_num, width);
3367
3368   /* Check if we may use less width and still compute sequence for
3369      the same time.  It will allow us to reduce registers usage.
3370      get_required_cycles is monotonically increasing with lower width
3371      so we can perform a binary search for the minimal width that still
3372      results in the optimal cycle count.  */
3373   width_min = 1;
3374   while (width > width_min)
3375     {
3376       int width_mid = (width + width_min) / 2;
3377
3378       if (get_required_cycles (ops_num, width_mid) == cycles_best)
3379         width = width_mid;
3380       else if (width_min < width_mid)
3381         width_min = width_mid;
3382       else
3383         break;
3384     }
3385
3386   return width;
3387 }
3388
3389 /* Recursively rewrite our linearized statements so that the operators
3390    match those in OPS[OPINDEX], putting the computation in rank
3391    order and trying to allow operations to be executed in
3392    parallel.  */
3393
3394 static void
3395 rewrite_expr_tree_parallel (gimple stmt, int width,
3396                             vec<operand_entry_t> ops)
3397 {
3398   enum tree_code opcode = gimple_assign_rhs_code (stmt);
3399   int op_num = ops.length ();
3400   int stmt_num = op_num - 1;
3401   gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3402   int op_index = op_num - 1;
3403   int stmt_index = 0;
3404   int ready_stmts_end = 0;
3405   int i = 0;
3406   tree last_rhs1 = gimple_assign_rhs1 (stmt);
3407
3408   /* We start expression rewriting from the top statements.
3409      So, in this loop we create a full list of statements
3410      we will work with.  */
3411   stmts[stmt_num - 1] = stmt;
3412   for (i = stmt_num - 2; i >= 0; i--)
3413     stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3414
3415   for (i = 0; i < stmt_num; i++)
3416     {
3417       tree op1, op2;
3418
3419       /* Determine whether we should use results of
3420          already handled statements or not.  */
3421       if (ready_stmts_end == 0
3422           && (i - stmt_index >= width || op_index < 1))
3423         ready_stmts_end = i;
3424
3425       /* Now we choose operands for the next statement.  Non zero
3426          value in ready_stmts_end means here that we should use
3427          the result of already generated statements as new operand.  */
3428       if (ready_stmts_end > 0)
3429         {
3430           op1 = gimple_assign_lhs (stmts[stmt_index++]);
3431           if (ready_stmts_end > stmt_index)
3432             op2 = gimple_assign_lhs (stmts[stmt_index++]);
3433           else if (op_index >= 0)
3434             op2 = ops[op_index--]->op;
3435           else
3436             {
3437               gcc_assert (stmt_index < i);
3438               op2 = gimple_assign_lhs (stmts[stmt_index++]);
3439             }
3440
3441           if (stmt_index >= ready_stmts_end)
3442             ready_stmts_end = 0;
3443         }
3444       else
3445         {
3446           if (op_index > 1)
3447             swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3448           op2 = ops[op_index--]->op;
3449           op1 = ops[op_index--]->op;
3450         }
3451
3452       /* If we emit the last statement then we should put
3453          operands into the last statement.  It will also
3454          break the loop.  */
3455       if (op_index < 0 && stmt_index == i)
3456         i = stmt_num - 1;
3457
3458       if (dump_file && (dump_flags & TDF_DETAILS))
3459         {
3460           fprintf (dump_file, "Transforming ");
3461           print_gimple_stmt (dump_file, stmts[i], 0, 0);
3462         }
3463
3464       /* We keep original statement only for the last one.  All
3465          others are recreated.  */
3466       if (i == stmt_num - 1)
3467         {
3468           gimple_assign_set_rhs1 (stmts[i], op1);
3469           gimple_assign_set_rhs2 (stmts[i], op2);
3470           update_stmt (stmts[i]);
3471         }
3472       else
3473         stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3474
3475       if (dump_file && (dump_flags & TDF_DETAILS))
3476         {
3477           fprintf (dump_file, " into ");
3478           print_gimple_stmt (dump_file, stmts[i], 0, 0);
3479         }
3480     }
3481
3482   remove_visited_stmt_chain (last_rhs1);
3483 }
3484
3485 /* Transform STMT, which is really (A +B) + (C + D) into the left
3486    linear form, ((A+B)+C)+D.
3487    Recurse on D if necessary.  */
3488
3489 static void
3490 linearize_expr (gimple stmt)
3491 {
3492   gimple_stmt_iterator gsi;
3493   gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3494   gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3495   gimple oldbinrhs = binrhs;
3496   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3497   gimple newbinrhs = NULL;
3498   struct loop *loop = loop_containing_stmt (stmt);
3499   tree lhs = gimple_assign_lhs (stmt);
3500
3501   gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3502               && is_reassociable_op (binrhs, rhscode, loop));
3503
3504   gsi = gsi_for_stmt (stmt);
3505
3506   gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3507   binrhs = gimple_build_assign_with_ops (gimple_assign_rhs_code (binrhs),
3508                                          make_ssa_name (TREE_TYPE (lhs), NULL),
3509                                          gimple_assign_lhs (binlhs),
3510                                          gimple_assign_rhs2 (binrhs));
3511   gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3512   gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
3513   gimple_set_uid (binrhs, gimple_uid (stmt));
3514
3515   if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3516     newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3517
3518   if (dump_file && (dump_flags & TDF_DETAILS))
3519     {
3520       fprintf (dump_file, "Linearized: ");
3521       print_gimple_stmt (dump_file, stmt, 0, 0);
3522     }
3523
3524   reassociate_stats.linearized++;
3525   update_stmt (stmt);
3526
3527   gsi = gsi_for_stmt (oldbinrhs);
3528   reassoc_remove_stmt (&gsi);
3529   release_defs (oldbinrhs);
3530
3531   gimple_set_visited (stmt, true);
3532   gimple_set_visited (binlhs, true);
3533   gimple_set_visited (binrhs, true);
3534
3535   /* Tail recurse on the new rhs if it still needs reassociation.  */
3536   if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3537     /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3538            want to change the algorithm while converting to tuples.  */
3539     linearize_expr (stmt);
3540 }
3541
3542 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3543    it.  Otherwise, return NULL.  */
3544
3545 static gimple
3546 get_single_immediate_use (tree lhs)
3547 {
3548   use_operand_p immuse;
3549   gimple immusestmt;
3550
3551   if (TREE_CODE (lhs) == SSA_NAME
3552       && single_imm_use (lhs, &immuse, &immusestmt)
3553       && is_gimple_assign (immusestmt))
3554     return immusestmt;
3555
3556   return NULL;
3557 }
3558
3559 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3560    representing the negated value.  Insertions of any necessary
3561    instructions go before GSI.
3562    This function is recursive in that, if you hand it "a_5" as the
3563    value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3564    transform b_3 + b_4 into a_5 = -b_3 + -b_4.  */
3565
3566 static tree
3567 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
3568 {
3569   gimple negatedefstmt = NULL;
3570   tree resultofnegate;
3571   gimple_stmt_iterator gsi;
3572   unsigned int uid;
3573
3574   /* If we are trying to negate a name, defined by an add, negate the
3575      add operands instead.  */
3576   if (TREE_CODE (tonegate) == SSA_NAME)
3577     negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3578   if (TREE_CODE (tonegate) == SSA_NAME
3579       && is_gimple_assign (negatedefstmt)
3580       && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3581       && has_single_use (gimple_assign_lhs (negatedefstmt))
3582       && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3583     {
3584       tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3585       tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3586       tree lhs = gimple_assign_lhs (negatedefstmt);
3587       gimple g;
3588
3589       gsi = gsi_for_stmt (negatedefstmt);
3590       rhs1 = negate_value (rhs1, &gsi);
3591
3592       gsi = gsi_for_stmt (negatedefstmt);
3593       rhs2 = negate_value (rhs2, &gsi);
3594
3595       gsi = gsi_for_stmt (negatedefstmt);
3596       lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3597       gimple_set_visited (negatedefstmt, true);
3598       g = gimple_build_assign_with_ops (PLUS_EXPR, lhs, rhs1, rhs2);
3599       gimple_set_uid (g, gimple_uid (negatedefstmt));
3600       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3601       return lhs;
3602     }
3603
3604   tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3605   resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
3606                                              NULL_TREE, true, GSI_SAME_STMT);
3607   gsi = *gsip;
3608   uid = gimple_uid (gsi_stmt (gsi));
3609   for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
3610     {
3611       gimple stmt = gsi_stmt (gsi);
3612       if (gimple_uid (stmt) != 0)
3613         break;
3614       gimple_set_uid (stmt, uid);
3615     }
3616   return resultofnegate;
3617 }
3618
3619 /* Return true if we should break up the subtract in STMT into an add
3620    with negate.  This is true when we the subtract operands are really
3621    adds, or the subtract itself is used in an add expression.  In
3622    either case, breaking up the subtract into an add with negate
3623    exposes the adds to reassociation.  */
3624
3625 static bool
3626 should_break_up_subtract (gimple stmt)
3627 {
3628   tree lhs = gimple_assign_lhs (stmt);
3629   tree binlhs = gimple_assign_rhs1 (stmt);
3630   tree binrhs = gimple_assign_rhs2 (stmt);
3631   gimple immusestmt;
3632   struct loop *loop = loop_containing_stmt (stmt);
3633
3634   if (TREE_CODE (binlhs) == SSA_NAME
3635       && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3636     return true;
3637
3638   if (TREE_CODE (binrhs) == SSA_NAME
3639       && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3640     return true;
3641
3642   if (TREE_CODE (lhs) == SSA_NAME
3643       && (immusestmt = get_single_immediate_use (lhs))
3644       && is_gimple_assign (immusestmt)
3645       && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3646           ||  gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3647     return true;
3648   return false;
3649 }
3650
3651 /* Transform STMT from A - B into A + -B.  */
3652
3653 static void
3654 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3655 {
3656   tree rhs1 = gimple_assign_rhs1 (stmt);
3657   tree rhs2 = gimple_assign_rhs2 (stmt);
3658
3659   if (dump_file && (dump_flags & TDF_DETAILS))
3660     {
3661       fprintf (dump_file, "Breaking up subtract ");
3662       print_gimple_stmt (dump_file, stmt, 0, 0);
3663     }
3664
3665   rhs2 = negate_value (rhs2, gsip);
3666   gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3667   update_stmt (stmt);
3668 }
3669
3670 /* Determine whether STMT is a builtin call that raises an SSA name
3671    to an integer power and has only one use.  If so, and this is early
3672    reassociation and unsafe math optimizations are permitted, place
3673    the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3674    If any of these conditions does not hold, return FALSE.  */
3675
3676 static bool
3677 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3678 {
3679   tree fndecl, arg1;
3680   REAL_VALUE_TYPE c, cint;
3681
3682   if (!first_pass_instance
3683       || !flag_unsafe_math_optimizations
3684       || !is_gimple_call (stmt)
3685       || !has_single_use (gimple_call_lhs (stmt)))
3686     return false;
3687
3688   fndecl = gimple_call_fndecl (stmt);
3689
3690   if (!fndecl
3691       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3692     return false;
3693
3694   switch (DECL_FUNCTION_CODE (fndecl))
3695     {
3696     CASE_FLT_FN (BUILT_IN_POW):
3697       *base = gimple_call_arg (stmt, 0);
3698       arg1 = gimple_call_arg (stmt, 1);
3699
3700       if (TREE_CODE (arg1) != REAL_CST)
3701         return false;
3702
3703       c = TREE_REAL_CST (arg1);
3704
3705       if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
3706         return false;
3707
3708       *exponent = real_to_integer (&c);
3709       real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
3710       if (!real_identical (&c, &cint))
3711         return false;
3712
3713       break;
3714
3715     CASE_FLT_FN (BUILT_IN_POWI):
3716       *base = gimple_call_arg (stmt, 0);
3717       arg1 = gimple_call_arg (stmt, 1);
3718
3719       if (!tree_fits_shwi_p (arg1))
3720         return false;
3721
3722       *exponent = tree_to_shwi (arg1);
3723       break;
3724
3725     default:
3726       return false;
3727     }
3728
3729   /* Expanding negative exponents is generally unproductive, so we don't
3730      complicate matters with those.  Exponents of zero and one should
3731      have been handled by expression folding.  */
3732   if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
3733     return false;
3734
3735   return true;
3736 }
3737
3738 /* Recursively linearize a binary expression that is the RHS of STMT.
3739    Place the operands of the expression tree in the vector named OPS.  */
3740
3741 static void
3742 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
3743                      bool is_associative, bool set_visited)
3744 {
3745   tree binlhs = gimple_assign_rhs1 (stmt);
3746   tree binrhs = gimple_assign_rhs2 (stmt);
3747   gimple binlhsdef = NULL, binrhsdef = NULL;
3748   bool binlhsisreassoc = false;
3749   bool binrhsisreassoc = false;
3750   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3751   struct loop *loop = loop_containing_stmt (stmt);
3752   tree base = NULL_TREE;
3753   HOST_WIDE_INT exponent = 0;
3754
3755   if (set_visited)
3756     gimple_set_visited (stmt, true);
3757
3758   if (TREE_CODE (binlhs) == SSA_NAME)
3759     {
3760       binlhsdef = SSA_NAME_DEF_STMT (binlhs);
3761       binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
3762                          && !stmt_could_throw_p (binlhsdef));
3763     }
3764
3765   if (TREE_CODE (binrhs) == SSA_NAME)
3766     {
3767       binrhsdef = SSA_NAME_DEF_STMT (binrhs);
3768       binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
3769                          && !stmt_could_throw_p (binrhsdef));
3770     }
3771
3772   /* If the LHS is not reassociable, but the RHS is, we need to swap
3773      them.  If neither is reassociable, there is nothing we can do, so
3774      just put them in the ops vector.  If the LHS is reassociable,
3775      linearize it.  If both are reassociable, then linearize the RHS
3776      and the LHS.  */
3777
3778   if (!binlhsisreassoc)
3779     {
3780       tree temp;
3781
3782       /* If this is not a associative operation like division, give up.  */
3783       if (!is_associative)
3784         {
3785           add_to_ops_vec (ops, binrhs);
3786           return;
3787         }
3788
3789       if (!binrhsisreassoc)
3790         {
3791           if (rhscode == MULT_EXPR
3792               && TREE_CODE (binrhs) == SSA_NAME
3793               && acceptable_pow_call (binrhsdef, &base, &exponent))
3794             {
3795               add_repeat_to_ops_vec (ops, base, exponent);
3796               gimple_set_visited (binrhsdef, true);
3797             }
3798           else
3799             add_to_ops_vec (ops, binrhs);
3800
3801           if (rhscode == MULT_EXPR
3802               && TREE_CODE (binlhs) == SSA_NAME
3803               && acceptable_pow_call (binlhsdef, &base, &exponent))
3804             {
3805               add_repeat_to_ops_vec (ops, base, exponent);
3806               gimple_set_visited (binlhsdef, true);
3807             }
3808           else
3809             add_to_ops_vec (ops, binlhs);
3810
3811           return;
3812         }
3813
3814       if (dump_file && (dump_flags & TDF_DETAILS))
3815         {
3816           fprintf (dump_file, "swapping operands of ");
3817           print_gimple_stmt (dump_file, stmt, 0, 0);
3818         }
3819
3820       swap_ssa_operands (stmt,
3821                          gimple_assign_rhs1_ptr (stmt),
3822                          gimple_assign_rhs2_ptr (stmt));
3823       update_stmt (stmt);
3824
3825       if (dump_file && (dump_flags & TDF_DETAILS))
3826         {
3827           fprintf (dump_file, " is now ");
3828           print_gimple_stmt (dump_file, stmt, 0, 0);
3829         }
3830
3831       /* We want to make it so the lhs is always the reassociative op,
3832          so swap.  */
3833       temp = binlhs;
3834       binlhs = binrhs;
3835       binrhs = temp;
3836     }
3837   else if (binrhsisreassoc)
3838     {
3839       linearize_expr (stmt);
3840       binlhs = gimple_assign_rhs1 (stmt);
3841       binrhs = gimple_assign_rhs2 (stmt);
3842     }
3843
3844   gcc_assert (TREE_CODE (binrhs) != SSA_NAME
3845               || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
3846                                       rhscode, loop));
3847   linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
3848                        is_associative, set_visited);
3849
3850   if (rhscode == MULT_EXPR
3851       && TREE_CODE (binrhs) == SSA_NAME
3852       && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
3853     {
3854       add_repeat_to_ops_vec (ops, base, exponent);
3855       gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
3856     }
3857   else
3858     add_to_ops_vec (ops, binrhs);
3859 }
3860
3861 /* Repropagate the negates back into subtracts, since no other pass
3862    currently does it.  */
3863
3864 static void
3865 repropagate_negates (void)
3866 {
3867   unsigned int i = 0;
3868   tree negate;
3869
3870   FOR_EACH_VEC_ELT (plus_negates, i, negate)
3871     {
3872       gimple user = get_single_immediate_use (negate);
3873
3874       if (!user || !is_gimple_assign (user))
3875         continue;
3876
3877       /* The negate operand can be either operand of a PLUS_EXPR
3878          (it can be the LHS if the RHS is a constant for example).
3879
3880          Force the negate operand to the RHS of the PLUS_EXPR, then
3881          transform the PLUS_EXPR into a MINUS_EXPR.  */
3882       if (gimple_assign_rhs_code (user) == PLUS_EXPR)
3883         {
3884           /* If the negated operand appears on the LHS of the
3885              PLUS_EXPR, exchange the operands of the PLUS_EXPR
3886              to force the negated operand to the RHS of the PLUS_EXPR.  */
3887           if (gimple_assign_rhs1 (user) == negate)
3888             {
3889               swap_ssa_operands (user,
3890                                  gimple_assign_rhs1_ptr (user),
3891                                  gimple_assign_rhs2_ptr (user));
3892             }
3893
3894           /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
3895              the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR.  */
3896           if (gimple_assign_rhs2 (user) == negate)
3897             {
3898               tree rhs1 = gimple_assign_rhs1 (user);
3899               tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
3900               gimple_stmt_iterator gsi = gsi_for_stmt (user);
3901               gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
3902               update_stmt (user);
3903             }
3904         }
3905       else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
3906         {
3907           if (gimple_assign_rhs1 (user) == negate)
3908             {
3909               /* We have
3910                    x = -a
3911                    y = x - b
3912                  which we transform into
3913                    x = a + b
3914                    y = -x .
3915                  This pushes down the negate which we possibly can merge
3916                  into some other operation, hence insert it into the
3917                  plus_negates vector.  */
3918               gimple feed = SSA_NAME_DEF_STMT (negate);
3919               tree a = gimple_assign_rhs1 (feed);
3920               tree b = gimple_assign_rhs2 (user);
3921               gimple_stmt_iterator gsi = gsi_for_stmt (feed);
3922               gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
3923               tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)), NULL);
3924               gimple g = gimple_build_assign_with_ops (PLUS_EXPR, x, a, b);
3925               gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3926               gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x, NULL);
3927               user = gsi_stmt (gsi2);
3928               update_stmt (user);
3929               reassoc_remove_stmt (&gsi);
3930               release_defs (feed);
3931               plus_negates.safe_push (gimple_assign_lhs (user));
3932             }
3933           else
3934             {
3935               /* Transform "x = -a; y = b - x" into "y = b + a", getting
3936                  rid of one operation.  */
3937               gimple feed = SSA_NAME_DEF_STMT (negate);
3938               tree a = gimple_assign_rhs1 (feed);
3939               tree rhs1 = gimple_assign_rhs1 (user);
3940               gimple_stmt_iterator gsi = gsi_for_stmt (user);
3941               gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
3942               update_stmt (gsi_stmt (gsi));
3943             }
3944         }
3945     }
3946 }
3947
3948 /* Returns true if OP is of a type for which we can do reassociation.
3949    That is for integral or non-saturating fixed-point types, and for
3950    floating point type when associative-math is enabled.  */
3951
3952 static bool
3953 can_reassociate_p (tree op)
3954 {
3955   tree type = TREE_TYPE (op);
3956   if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
3957       || NON_SAT_FIXED_POINT_TYPE_P (type)
3958       || (flag_associative_math && FLOAT_TYPE_P (type)))
3959     return true;
3960   return false;
3961 }
3962
3963 /* Break up subtract operations in block BB.
3964
3965    We do this top down because we don't know whether the subtract is
3966    part of a possible chain of reassociation except at the top.
3967
3968    IE given
3969    d = f + g
3970    c = a + e
3971    b = c - d
3972    q = b - r
3973    k = t - q
3974
3975    we want to break up k = t - q, but we won't until we've transformed q
3976    = b - r, which won't be broken up until we transform b = c - d.
3977
3978    En passant, clear the GIMPLE visited flag on every statement
3979    and set UIDs within each basic block.  */
3980
3981 static void
3982 break_up_subtract_bb (basic_block bb)
3983 {
3984   gimple_stmt_iterator gsi;
3985   basic_block son;
3986   unsigned int uid = 1;
3987
3988   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3989     {
3990       gimple stmt = gsi_stmt (gsi);
3991       gimple_set_visited (stmt, false);
3992       gimple_set_uid (stmt, uid++);
3993
3994       if (!is_gimple_assign (stmt)
3995           || !can_reassociate_p (gimple_assign_lhs (stmt)))
3996         continue;
3997
3998       /* Look for simple gimple subtract operations.  */
3999       if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
4000         {
4001           if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
4002               || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
4003             continue;
4004
4005           /* Check for a subtract used only in an addition.  If this
4006              is the case, transform it into add of a negate for better
4007              reassociation.  IE transform C = A-B into C = A + -B if C
4008              is only used in an addition.  */
4009           if (should_break_up_subtract (stmt))
4010             break_up_subtract (stmt, &gsi);
4011         }
4012       else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
4013                && can_reassociate_p (gimple_assign_rhs1 (stmt)))
4014         plus_negates.safe_push (gimple_assign_lhs (stmt));
4015     }
4016   for (son = first_dom_son (CDI_DOMINATORS, bb);
4017        son;
4018        son = next_dom_son (CDI_DOMINATORS, son))
4019     break_up_subtract_bb (son);
4020 }
4021
4022 /* Used for repeated factor analysis.  */
4023 struct repeat_factor_d
4024 {
4025   /* An SSA name that occurs in a multiply chain.  */
4026   tree factor;
4027
4028   /* Cached rank of the factor.  */
4029   unsigned rank;
4030
4031   /* Number of occurrences of the factor in the chain.  */
4032   HOST_WIDE_INT count;
4033
4034   /* An SSA name representing the product of this factor and
4035      all factors appearing later in the repeated factor vector.  */
4036   tree repr;
4037 };
4038
4039 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
4040 typedef const struct repeat_factor_d *const_repeat_factor_t;
4041
4042
4043 static vec<repeat_factor> repeat_factor_vec;
4044
4045 /* Used for sorting the repeat factor vector.  Sort primarily by
4046    ascending occurrence count, secondarily by descending rank.  */
4047
4048 static int
4049 compare_repeat_factors (const void *x1, const void *x2)
4050 {
4051   const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
4052   const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
4053
4054   if (rf1->count != rf2->count)
4055     return rf1->count - rf2->count;
4056
4057   return rf2->rank - rf1->rank;
4058 }
4059
4060 /* Look for repeated operands in OPS in the multiply tree rooted at
4061    STMT.  Replace them with an optimal sequence of multiplies and powi
4062    builtin calls, and remove the used operands from OPS.  Return an
4063    SSA name representing the value of the replacement sequence.  */
4064
4065 static tree
4066 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
4067 {
4068   unsigned i, j, vec_len;
4069   int ii;
4070   operand_entry_t oe;
4071   repeat_factor_t rf1, rf2;
4072   repeat_factor rfnew;
4073   tree result = NULL_TREE;
4074   tree target_ssa, iter_result;
4075   tree type = TREE_TYPE (gimple_get_lhs (stmt));
4076   tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
4077   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4078   gimple mul_stmt, pow_stmt;
4079
4080   /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4081      target.  */
4082   if (!powi_fndecl)
4083     return NULL_TREE;
4084
4085   /* Allocate the repeated factor vector.  */
4086   repeat_factor_vec.create (10);
4087
4088   /* Scan the OPS vector for all SSA names in the product and build
4089      up a vector of occurrence counts for each factor.  */
4090   FOR_EACH_VEC_ELT (*ops, i, oe)
4091     {
4092       if (TREE_CODE (oe->op) == SSA_NAME)
4093         {
4094           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4095             {
4096               if (rf1->factor == oe->op)
4097                 {
4098                   rf1->count += oe->count;
4099                   break;
4100                 }
4101             }
4102
4103           if (j >= repeat_factor_vec.length ())
4104             {
4105               rfnew.factor = oe->op;
4106               rfnew.rank = oe->rank;
4107               rfnew.count = oe->count;
4108               rfnew.repr = NULL_TREE;
4109               repeat_factor_vec.safe_push (rfnew);
4110             }
4111         }
4112     }
4113
4114   /* Sort the repeated factor vector by (a) increasing occurrence count,
4115      and (b) decreasing rank.  */
4116   repeat_factor_vec.qsort (compare_repeat_factors);
4117
4118   /* It is generally best to combine as many base factors as possible
4119      into a product before applying __builtin_powi to the result.
4120      However, the sort order chosen for the repeated factor vector
4121      allows us to cache partial results for the product of the base
4122      factors for subsequent use.  When we already have a cached partial
4123      result from a previous iteration, it is best to make use of it
4124      before looking for another __builtin_pow opportunity.
4125
4126      As an example, consider x * x * y * y * y * z * z * z * z.
4127      We want to first compose the product x * y * z, raise it to the
4128      second power, then multiply this by y * z, and finally multiply
4129      by z.  This can be done in 5 multiplies provided we cache y * z
4130      for use in both expressions:
4131
4132         t1 = y * z
4133         t2 = t1 * x
4134         t3 = t2 * t2
4135         t4 = t1 * t3
4136         result = t4 * z
4137
4138      If we instead ignored the cached y * z and first multiplied by
4139      the __builtin_pow opportunity z * z, we would get the inferior:
4140
4141         t1 = y * z
4142         t2 = t1 * x
4143         t3 = t2 * t2
4144         t4 = z * z
4145         t5 = t3 * t4
4146         result = t5 * y  */
4147
4148   vec_len = repeat_factor_vec.length ();
4149
4150   /* Repeatedly look for opportunities to create a builtin_powi call.  */
4151   while (true)
4152     {
4153       HOST_WIDE_INT power;
4154
4155       /* First look for the largest cached product of factors from
4156          preceding iterations.  If found, create a builtin_powi for
4157          it if the minimum occurrence count for its factors is at
4158          least 2, or just use this cached product as our next
4159          multiplicand if the minimum occurrence count is 1.  */
4160       FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4161         {
4162           if (rf1->repr && rf1->count > 0)
4163             break;
4164         }
4165
4166       if (j < vec_len)
4167         {
4168           power = rf1->count;
4169
4170           if (power == 1)
4171             {
4172               iter_result = rf1->repr;
4173
4174               if (dump_file && (dump_flags & TDF_DETAILS))
4175                 {
4176                   unsigned elt;
4177                   repeat_factor_t rf;
4178                   fputs ("Multiplying by cached product ", dump_file);
4179                   for (elt = j; elt < vec_len; elt++)
4180                     {
4181                       rf = &repeat_factor_vec[elt];
4182                       print_generic_expr (dump_file, rf->factor, 0);
4183                       if (elt < vec_len - 1)
4184                         fputs (" * ", dump_file);
4185                     }
4186                   fputs ("\n", dump_file);
4187                 }
4188             }
4189           else
4190             {
4191               iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4192               pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4193                                             build_int_cst (integer_type_node,
4194                                                            power));
4195               gimple_call_set_lhs (pow_stmt, iter_result);
4196               gimple_set_location (pow_stmt, gimple_location (stmt));
4197               gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4198
4199               if (dump_file && (dump_flags & TDF_DETAILS))
4200                 {
4201                   unsigned elt;
4202                   repeat_factor_t rf;
4203                   fputs ("Building __builtin_pow call for cached product (",
4204                          dump_file);
4205                   for (elt = j; elt < vec_len; elt++)
4206                     {
4207                       rf = &repeat_factor_vec[elt];
4208                       print_generic_expr (dump_file, rf->factor, 0);
4209                       if (elt < vec_len - 1)
4210                         fputs (" * ", dump_file);
4211                     }
4212                   fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n",
4213                            power);
4214                 }
4215             }
4216         }
4217       else
4218         {
4219           /* Otherwise, find the first factor in the repeated factor
4220              vector whose occurrence count is at least 2.  If no such
4221              factor exists, there are no builtin_powi opportunities
4222              remaining.  */
4223           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4224             {
4225               if (rf1->count >= 2)
4226                 break;
4227             }
4228
4229           if (j >= vec_len)
4230             break;
4231
4232           power = rf1->count;
4233
4234           if (dump_file && (dump_flags & TDF_DETAILS))
4235             {
4236               unsigned elt;
4237               repeat_factor_t rf;
4238               fputs ("Building __builtin_pow call for (", dump_file);
4239               for (elt = j; elt < vec_len; elt++)
4240                 {
4241                   rf = &repeat_factor_vec[elt];
4242                   print_generic_expr (dump_file, rf->factor, 0);
4243                   if (elt < vec_len - 1)
4244                     fputs (" * ", dump_file);
4245                 }
4246               fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power);
4247             }
4248
4249           reassociate_stats.pows_created++;
4250
4251           /* Visit each element of the vector in reverse order (so that
4252              high-occurrence elements are visited first, and within the
4253              same occurrence count, lower-ranked elements are visited
4254              first).  Form a linear product of all elements in this order
4255              whose occurrencce count is at least that of element J.
4256              Record the SSA name representing the product of each element
4257              with all subsequent elements in the vector.  */
4258           if (j == vec_len - 1)
4259             rf1->repr = rf1->factor;
4260           else
4261             {
4262               for (ii = vec_len - 2; ii >= (int)j; ii--)
4263                 {
4264                   tree op1, op2;
4265
4266                   rf1 = &repeat_factor_vec[ii];
4267                   rf2 = &repeat_factor_vec[ii + 1];
4268
4269                   /* Init the last factor's representative to be itself.  */
4270                   if (!rf2->repr)
4271                     rf2->repr = rf2->factor;
4272
4273                   op1 = rf1->factor;
4274                   op2 = rf2->repr;
4275
4276                   target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4277                   mul_stmt = gimple_build_assign_with_ops (MULT_EXPR,
4278                                                            target_ssa,
4279                                                            op1, op2);
4280                   gimple_set_location (mul_stmt, gimple_location (stmt));
4281                   gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4282                   rf1->repr = target_ssa;
4283
4284                   /* Don't reprocess the multiply we just introduced.  */
4285                   gimple_set_visited (mul_stmt, true);
4286                 }
4287             }
4288
4289           /* Form a call to __builtin_powi for the maximum product
4290              just formed, raised to the power obtained earlier.  */
4291           rf1 = &repeat_factor_vec[j];
4292           iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4293           pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4294                                         build_int_cst (integer_type_node,
4295                                                        power));
4296           gimple_call_set_lhs (pow_stmt, iter_result);
4297           gimple_set_location (pow_stmt, gimple_location (stmt));
4298           gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4299         }
4300
4301       /* If we previously formed at least one other builtin_powi call,
4302          form the product of this one and those others.  */
4303       if (result)
4304         {
4305           tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4306           mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result,
4307                                                    result, iter_result);
4308           gimple_set_location (mul_stmt, gimple_location (stmt));
4309           gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4310           gimple_set_visited (mul_stmt, true);
4311           result = new_result;
4312         }
4313       else
4314         result = iter_result;
4315
4316       /* Decrement the occurrence count of each element in the product
4317          by the count found above, and remove this many copies of each
4318          factor from OPS.  */
4319       for (i = j; i < vec_len; i++)
4320         {
4321           unsigned k = power;
4322           unsigned n;
4323
4324           rf1 = &repeat_factor_vec[i];
4325           rf1->count -= power;
4326
4327           FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
4328             {
4329               if (oe->op == rf1->factor)
4330                 {
4331                   if (oe->count <= k)
4332                     {
4333                       ops->ordered_remove (n);
4334                       k -= oe->count;
4335
4336                       if (k == 0)
4337                         break;
4338                     }
4339                   else
4340                     {
4341                       oe->count -= k;
4342                       break;
4343                     }
4344                 }
4345             }
4346         }
4347     }
4348
4349   /* At this point all elements in the repeated factor vector have a
4350      remaining occurrence count of 0 or 1, and those with a count of 1
4351      don't have cached representatives.  Re-sort the ops vector and
4352      clean up.  */
4353   ops->qsort (sort_by_operand_rank);
4354   repeat_factor_vec.release ();
4355
4356   /* Return the final product computed herein.  Note that there may
4357      still be some elements with single occurrence count left in OPS;
4358      those will be handled by the normal reassociation logic.  */
4359   return result;
4360 }
4361
4362 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS.  */
4363
4364 static void
4365 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
4366 {
4367   tree rhs1;
4368
4369   if (dump_file && (dump_flags & TDF_DETAILS))
4370     {
4371       fprintf (dump_file, "Transforming ");
4372       print_gimple_stmt (dump_file, stmt, 0, 0);
4373     }
4374
4375   rhs1 = gimple_assign_rhs1 (stmt);
4376   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
4377   update_stmt (stmt);
4378   remove_visited_stmt_chain (rhs1);
4379
4380   if (dump_file && (dump_flags & TDF_DETAILS))
4381     {
4382       fprintf (dump_file, " into ");
4383       print_gimple_stmt (dump_file, stmt, 0, 0);
4384     }
4385 }
4386
4387 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2.  */
4388
4389 static void
4390 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
4391                             tree rhs1, tree rhs2)
4392 {
4393   if (dump_file && (dump_flags & TDF_DETAILS))
4394     {
4395       fprintf (dump_file, "Transforming ");
4396       print_gimple_stmt (dump_file, stmt, 0, 0);
4397     }
4398
4399   gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
4400   update_stmt (gsi_stmt (*gsi));
4401   remove_visited_stmt_chain (rhs1);
4402
4403   if (dump_file && (dump_flags & TDF_DETAILS))
4404     {
4405       fprintf (dump_file, " into ");
4406       print_gimple_stmt (dump_file, stmt, 0, 0);
4407     }
4408 }
4409
4410 /* Reassociate expressions in basic block BB and its post-dominator as
4411    children.  */
4412
4413 static void
4414 reassociate_bb (basic_block bb)
4415 {
4416   gimple_stmt_iterator gsi;
4417   basic_block son;
4418   gimple stmt = last_stmt (bb);
4419
4420   if (stmt && !gimple_visited_p (stmt))
4421     maybe_optimize_range_tests (stmt);
4422
4423   for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4424     {
4425       stmt = gsi_stmt (gsi);
4426
4427       if (is_gimple_assign (stmt)
4428           && !stmt_could_throw_p (stmt))
4429         {
4430           tree lhs, rhs1, rhs2;
4431           enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4432
4433           /* If this is not a gimple binary expression, there is
4434              nothing for us to do with it.  */
4435           if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4436             continue;
4437
4438           /* If this was part of an already processed statement,
4439              we don't need to touch it again. */
4440           if (gimple_visited_p (stmt))
4441             {
4442               /* This statement might have become dead because of previous
4443                  reassociations.  */
4444               if (has_zero_uses (gimple_get_lhs (stmt)))
4445                 {
4446                   reassoc_remove_stmt (&gsi);
4447                   release_defs (stmt);
4448                   /* We might end up removing the last stmt above which
4449                      places the iterator to the end of the sequence.
4450                      Reset it to the last stmt in this case which might
4451                      be the end of the sequence as well if we removed
4452                      the last statement of the sequence.  In which case
4453                      we need to bail out.  */
4454                   if (gsi_end_p (gsi))
4455                     {
4456                       gsi = gsi_last_bb (bb);
4457                       if (gsi_end_p (gsi))
4458                         break;
4459                     }
4460                 }
4461               continue;
4462             }
4463
4464           lhs = gimple_assign_lhs (stmt);
4465           rhs1 = gimple_assign_rhs1 (stmt);
4466           rhs2 = gimple_assign_rhs2 (stmt);
4467
4468           /* For non-bit or min/max operations we can't associate
4469              all types.  Verify that here.  */
4470           if (rhs_code != BIT_IOR_EXPR
4471               && rhs_code != BIT_AND_EXPR
4472               && rhs_code != BIT_XOR_EXPR
4473               && rhs_code != MIN_EXPR
4474               && rhs_code != MAX_EXPR
4475               && (!can_reassociate_p (lhs)
4476                   || !can_reassociate_p (rhs1)
4477                   || !can_reassociate_p (rhs2)))
4478             continue;
4479
4480           if (associative_tree_code (rhs_code))
4481             {
4482               auto_vec<operand_entry_t> ops;
4483               tree powi_result = NULL_TREE;
4484
4485               /* There may be no immediate uses left by the time we
4486                  get here because we may have eliminated them all.  */
4487               if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4488                 continue;
4489
4490               gimple_set_visited (stmt, true);
4491               linearize_expr_tree (&ops, stmt, true, true);
4492               ops.qsort (sort_by_operand_rank);
4493               optimize_ops_list (rhs_code, &ops);
4494               if (undistribute_ops_list (rhs_code, &ops,
4495                                          loop_containing_stmt (stmt)))
4496                 {
4497                   ops.qsort (sort_by_operand_rank);
4498                   optimize_ops_list (rhs_code, &ops);
4499                 }
4500
4501               if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4502                 optimize_range_tests (rhs_code, &ops);
4503
4504               if (first_pass_instance
4505                   && rhs_code == MULT_EXPR
4506                   && flag_unsafe_math_optimizations)
4507                 powi_result = attempt_builtin_powi (stmt, &ops);
4508
4509               /* If the operand vector is now empty, all operands were
4510                  consumed by the __builtin_powi optimization.  */
4511               if (ops.length () == 0)
4512                 transform_stmt_to_copy (&gsi, stmt, powi_result);
4513               else if (ops.length () == 1)
4514                 {
4515                   tree last_op = ops.last ()->op;
4516
4517                   if (powi_result)
4518                     transform_stmt_to_multiply (&gsi, stmt, last_op,
4519                                                 powi_result);
4520                   else
4521                     transform_stmt_to_copy (&gsi, stmt, last_op);
4522                 }
4523               else
4524                 {
4525                   enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4526                   int ops_num = ops.length ();
4527                   int width = get_reassociation_width (ops_num, rhs_code, mode);
4528                   tree new_lhs = lhs;
4529
4530                   if (dump_file && (dump_flags & TDF_DETAILS))
4531                     fprintf (dump_file,
4532                              "Width = %d was chosen for reassociation\n", width);
4533
4534                   if (width > 1
4535                       && ops.length () > 3)
4536                     rewrite_expr_tree_parallel (stmt, width, ops);
4537                   else
4538                     {
4539                       /* When there are three operands left, we want
4540                          to make sure the ones that get the double
4541                          binary op are chosen wisely.  */
4542                       int len = ops.length ();
4543                       if (len >= 3)
4544                         swap_ops_for_binary_stmt (ops, len - 3, stmt);
4545
4546                       new_lhs = rewrite_expr_tree (stmt, 0, ops,
4547                                                    powi_result != NULL);
4548                     }
4549
4550                   /* If we combined some repeated factors into a
4551                      __builtin_powi call, multiply that result by the
4552                      reassociated operands.  */
4553                   if (powi_result)
4554                     {
4555                       gimple mul_stmt, lhs_stmt = SSA_NAME_DEF_STMT (lhs);
4556                       tree type = TREE_TYPE (lhs);
4557                       tree target_ssa = make_temp_ssa_name (type, NULL,
4558                                                             "reassocpow");
4559                       gimple_set_lhs (lhs_stmt, target_ssa);
4560                       update_stmt (lhs_stmt);
4561                       if (lhs != new_lhs)
4562                         target_ssa = new_lhs;
4563                       mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs,
4564                                                                powi_result,
4565                                                                target_ssa);
4566                       gimple_set_location (mul_stmt, gimple_location (stmt));
4567                       gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4568                     }
4569                 }
4570             }
4571         }
4572     }
4573   for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4574        son;
4575        son = next_dom_son (CDI_POST_DOMINATORS, son))
4576     reassociate_bb (son);
4577 }
4578
4579 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4580 void debug_ops_vector (vec<operand_entry_t> ops);
4581
4582 /* Dump the operand entry vector OPS to FILE.  */
4583
4584 void
4585 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
4586 {
4587   operand_entry_t oe;
4588   unsigned int i;
4589
4590   FOR_EACH_VEC_ELT (ops, i, oe)
4591     {
4592       fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
4593       print_generic_expr (file, oe->op, 0);
4594     }
4595 }
4596
4597 /* Dump the operand entry vector OPS to STDERR.  */
4598
4599 DEBUG_FUNCTION void
4600 debug_ops_vector (vec<operand_entry_t> ops)
4601 {
4602   dump_ops_vector (stderr, ops);
4603 }
4604
4605 static void
4606 do_reassoc (void)
4607 {
4608   break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
4609   reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
4610 }
4611
4612 /* Initialize the reassociation pass.  */
4613
4614 static void
4615 init_reassoc (void)
4616 {
4617   int i;
4618   long rank = 2;
4619   int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
4620
4621   /* Find the loops, so that we can prevent moving calculations in
4622      them.  */
4623   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4624
4625   memset (&reassociate_stats, 0, sizeof (reassociate_stats));
4626
4627   operand_entry_pool = create_alloc_pool ("operand entry pool",
4628                                           sizeof (struct operand_entry), 30);
4629   next_operand_entry_id = 0;
4630
4631   /* Reverse RPO (Reverse Post Order) will give us something where
4632      deeper loops come later.  */
4633   pre_and_rev_post_order_compute (NULL, bbs, false);
4634   bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
4635   operand_rank = new hash_map<tree, long>;
4636
4637   /* Give each default definition a distinct rank.  This includes
4638      parameters and the static chain.  Walk backwards over all
4639      SSA names so that we get proper rank ordering according
4640      to tree_swap_operands_p.  */
4641   for (i = num_ssa_names - 1; i > 0; --i)
4642     {
4643       tree name = ssa_name (i);
4644       if (name && SSA_NAME_IS_DEFAULT_DEF (name))
4645         insert_operand_rank (name, ++rank);
4646     }
4647
4648   /* Set up rank for each BB  */
4649   for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
4650     bb_rank[bbs[i]] = ++rank  << 16;
4651
4652   free (bbs);
4653   calculate_dominance_info (CDI_POST_DOMINATORS);
4654   plus_negates = vNULL;
4655 }
4656
4657 /* Cleanup after the reassociation pass, and print stats if
4658    requested.  */
4659
4660 static void
4661 fini_reassoc (void)
4662 {
4663   statistics_counter_event (cfun, "Linearized",
4664                             reassociate_stats.linearized);
4665   statistics_counter_event (cfun, "Constants eliminated",
4666                             reassociate_stats.constants_eliminated);
4667   statistics_counter_event (cfun, "Ops eliminated",
4668                             reassociate_stats.ops_eliminated);
4669   statistics_counter_event (cfun, "Statements rewritten",
4670                             reassociate_stats.rewritten);
4671   statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
4672                             reassociate_stats.pows_encountered);
4673   statistics_counter_event (cfun, "Built-in powi calls created",
4674                             reassociate_stats.pows_created);
4675
4676   delete operand_rank;
4677   free_alloc_pool (operand_entry_pool);
4678   free (bb_rank);
4679   plus_negates.release ();
4680   free_dominance_info (CDI_POST_DOMINATORS);
4681   loop_optimizer_finalize ();
4682 }
4683
4684 /* Gate and execute functions for Reassociation.  */
4685
4686 static unsigned int
4687 execute_reassoc (void)
4688 {
4689   init_reassoc ();
4690
4691   do_reassoc ();
4692   repropagate_negates ();
4693
4694   fini_reassoc ();
4695   return 0;
4696 }
4697
4698 namespace {
4699
4700 const pass_data pass_data_reassoc =
4701 {
4702   GIMPLE_PASS, /* type */
4703   "reassoc", /* name */
4704   OPTGROUP_NONE, /* optinfo_flags */
4705   TV_TREE_REASSOC, /* tv_id */
4706   ( PROP_cfg | PROP_ssa ), /* properties_required */
4707   0, /* properties_provided */
4708   0, /* properties_destroyed */
4709   0, /* todo_flags_start */
4710   TODO_update_ssa_only_virtuals, /* todo_flags_finish */
4711 };
4712
4713 class pass_reassoc : public gimple_opt_pass
4714 {
4715 public:
4716   pass_reassoc (gcc::context *ctxt)
4717     : gimple_opt_pass (pass_data_reassoc, ctxt)
4718   {}
4719
4720   /* opt_pass methods: */
4721   opt_pass * clone () { return new pass_reassoc (m_ctxt); }
4722   virtual bool gate (function *) { return flag_tree_reassoc != 0; }
4723   virtual unsigned int execute (function *) { return execute_reassoc (); }
4724
4725 }; // class pass_reassoc
4726
4727 } // anon namespace
4728
4729 gimple_opt_pass *
4730 make_pass_reassoc (gcc::context *ctxt)
4731 {
4732   return new pass_reassoc (ctxt);
4733 }