gcc/tree-ssa-reassoc.c

   1 /* Reassociation for trees.
   2    Copyright (C) 2005-2014 Free Software Foundation, Inc.
   3    Contributed by Daniel Berlin <dan@dberlin.org>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 3, or (at your option)
  10 any later version.
  11
  12 GCC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "hash-table.h"
  25 #include "tm.h"
  26 #include "rtl.h"
  27 #include "tm_p.h"
  28 #include "tree.h"
  29 #include "stor-layout.h"
  30 #include "basic-block.h"
  31 #include "gimple-pretty-print.h"
  32 #include "tree-inline.h"
  33 #include "hash-map.h"
  34 #include "tree-ssa-alias.h"
  35 #include "internal-fn.h"
  36 #include "gimple-fold.h"
  37 #include "tree-eh.h"
  38 #include "gimple-expr.h"
  39 #include "is-a.h"
  40 #include "gimple.h"
  41 #include "gimple-iterator.h"
  42 #include "gimplify-me.h"
  43 #include "gimple-ssa.h"
  44 #include "tree-cfg.h"
  45 #include "tree-phinodes.h"
  46 #include "ssa-iterators.h"
  47 #include "stringpool.h"
  48 #include "tree-ssanames.h"
  49 #include "tree-ssa-loop-niter.h"
  50 #include "tree-ssa-loop.h"
  51 #include "expr.h"
  52 #include "tree-dfa.h"
  53 #include "tree-ssa.h"
  54 #include "tree-iterator.h"
  55 #include "tree-pass.h"
  56 #include "alloc-pool.h"
  57 #include "langhooks.h"
  58 #include "cfgloop.h"
  59 #include "flags.h"
  60 #include "target.h"
  61 #include "params.h"
  62 #include "diagnostic-core.h"
  63 #include "builtins.h"
  64
  65 /*  This is a simple global reassociation pass.  It is, in part, based
  66     on the LLVM pass of the same name (They do some things more/less
  67     than we do, in different orders, etc).
  68
  69     It consists of five steps:
  70
  71     1. Breaking up subtract operations into addition + negate, where
  72     it would promote the reassociation of adds.
  73
  74     2. Left linearization of the expression trees, so that (A+B)+(C+D)
  75     becomes (((A+B)+C)+D), which is easier for us to rewrite later.
  76     During linearization, we place the operands of the binary
  77     expressions into a vector of operand_entry_t
  78
  79     3. Optimization of the operand lists, eliminating things like a +
  80     -a, a & a, etc.
  81
  82     3a. Combine repeated factors with the same occurrence counts
  83     into a __builtin_powi call that will later be optimized into
  84     an optimal number of multiplies.
  85
  86     4. Rewrite the expression trees we linearized and optimized so
  87     they are in proper rank order.
  88
  89     5. Repropagate negates, as nothing else will clean it up ATM.
  90
  91     A bit of theory on #4, since nobody seems to write anything down
  92     about why it makes sense to do it the way they do it:
  93
  94     We could do this much nicer theoretically, but don't (for reasons
  95     explained after how to do it theoretically nice :P).
  96
  97     In order to promote the most redundancy elimination, you want
  98     binary expressions whose operands are the same rank (or
  99     preferably, the same value) exposed to the redundancy eliminator,
 100     for possible elimination.
 101
 102     So the way to do this if we really cared, is to build the new op
 103     tree from the leaves to the roots, merging as you go, and putting the
 104     new op on the end of the worklist, until you are left with one
 105     thing on the worklist.
 106
 107     IE if you have to rewrite the following set of operands (listed with
 108     rank in parentheses), with opcode PLUS_EXPR:
 109
 110     a (1),  b (1),  c (1),  d (2), e (2)
 111
 112
 113     We start with our merge worklist empty, and the ops list with all of
 114     those on it.
 115
 116     You want to first merge all leaves of the same rank, as much as
 117     possible.
 118
 119     So first build a binary op of
 120
 121     mergetmp = a + b, and put "mergetmp" on the merge worklist.
 122
 123     Because there is no three operand form of PLUS_EXPR, c is not going to
 124     be exposed to redundancy elimination as a rank 1 operand.
 125
 126     So you might as well throw it on the merge worklist (you could also
 127     consider it to now be a rank two operand, and merge it with d and e,
 128     but in this case, you then have evicted e from a binary op. So at
 129     least in this situation, you can't win.)
 130
 131     Then build a binary op of d + e
 132     mergetmp2 = d + e
 133
 134     and put mergetmp2 on the merge worklist.
 135
 136     so merge worklist = {mergetmp, c, mergetmp2}
 137
 138     Continue building binary ops of these operations until you have only
 139     one operation left on the worklist.
 140
 141     So we have
 142
 143     build binary op
 144     mergetmp3 = mergetmp + c
 145
 146     worklist = {mergetmp2, mergetmp3}
 147
 148     mergetmp4 = mergetmp2 + mergetmp3
 149
 150     worklist = {mergetmp4}
 151
 152     because we have one operation left, we can now just set the original
 153     statement equal to the result of that operation.
 154
 155     This will at least expose a + b  and d + e to redundancy elimination
 156     as binary operations.
 157
 158     For extra points, you can reuse the old statements to build the
 159     mergetmps, since you shouldn't run out.
 160
 161     So why don't we do this?
 162
 163     Because it's expensive, and rarely will help.  Most trees we are
 164     reassociating have 3 or less ops.  If they have 2 ops, they already
 165     will be written into a nice single binary op.  If you have 3 ops, a
 166     single simple check suffices to tell you whether the first two are of the
 167     same rank.  If so, you know to order it
 168
 169     mergetmp = op1 + op2
 170     newstmt = mergetmp + op3
 171
 172     instead of
 173     mergetmp = op2 + op3
 174     newstmt = mergetmp + op1
 175
 176     If all three are of the same rank, you can't expose them all in a
 177     single binary operator anyway, so the above is *still* the best you
 178     can do.
 179
 180     Thus, this is what we do.  When we have three ops left, we check to see
 181     what order to put them in, and call it a day.  As a nod to vector sum
 182     reduction, we check if any of the ops are really a phi node that is a
 183     destructive update for the associating op, and keep the destructive
 184     update together for vector sum reduction recognition.  */
 185
 186
 187 /* Statistics */
 188 static struct
 189 {
 190   int linearized;
 191   int constants_eliminated;
 192   int ops_eliminated;
 193   int rewritten;
 194   int pows_encountered;
 195   int pows_created;
 196 } reassociate_stats;
 197
 198 /* Operator, rank pair.  */
 199 typedef struct operand_entry
 200 {
 201   unsigned int rank;
 202   int id;
 203   tree op;
 204   unsigned int count;
 205 } *operand_entry_t;
 206
 207 static alloc_pool operand_entry_pool;
 208
 209 /* This is used to assign a unique ID to each struct operand_entry
 210    so that qsort results are identical on different hosts.  */
 211 static int next_operand_entry_id;
 212
 213 /* Starting rank number for a given basic block, so that we can rank
 214    operations using unmovable instructions in that BB based on the bb
 215    depth.  */
 216 static long *bb_rank;
 217
 218 /* Operand->rank hashtable.  */
 219 static hash_map<tree, long> *operand_rank;
 220
 221 /* Forward decls.  */
 222 static long get_rank (tree);
 223 static bool reassoc_stmt_dominates_stmt_p (gimple, gimple);
 224
 225 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
 226    possibly added by gsi_remove.  */
 227
 228 bool
 229 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
 230 {
 231   gimple stmt = gsi_stmt (*gsi);
 232
 233   if (!MAY_HAVE_DEBUG_STMTS || gimple_code (stmt) == GIMPLE_PHI)
 234     return gsi_remove (gsi, true);
 235
 236   gimple_stmt_iterator prev = *gsi;
 237   gsi_prev (&prev);
 238   unsigned uid = gimple_uid (stmt);
 239   basic_block bb = gimple_bb (stmt);
 240   bool ret = gsi_remove (gsi, true);
 241   if (!gsi_end_p (prev))
 242     gsi_next (&prev);
 243   else
 244     prev = gsi_start_bb (bb);
 245   gimple end_stmt = gsi_stmt (*gsi);
 246   while ((stmt = gsi_stmt (prev)) != end_stmt)
 247     {
 248       gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
 249       gimple_set_uid (stmt, uid);
 250       gsi_next (&prev);
 251     }
 252   return ret;
 253 }
 254
 255 /* Bias amount for loop-carried phis.  We want this to be larger than
 256    the depth of any reassociation tree we can see, but not larger than
 257    the rank difference between two blocks.  */
 258 #define PHI_LOOP_BIAS (1 << 15)
 259
 260 /* Rank assigned to a phi statement.  If STMT is a loop-carried phi of
 261    an innermost loop, and the phi has only a single use which is inside
 262    the loop, then the rank is the block rank of the loop latch plus an
 263    extra bias for the loop-carried dependence.  This causes expressions
 264    calculated into an accumulator variable to be independent for each
 265    iteration of the loop.  If STMT is some other phi, the rank is the
 266    block rank of its containing block.  */
 267 static long
 268 phi_rank (gimple stmt)
 269 {
 270   basic_block bb = gimple_bb (stmt);
 271   struct loop *father = bb->loop_father;
 272   tree res;
 273   unsigned i;
 274   use_operand_p use;
 275   gimple use_stmt;
 276
 277   /* We only care about real loops (those with a latch).  */
 278   if (!father->latch)
 279     return bb_rank[bb->index];
 280
 281   /* Interesting phis must be in headers of innermost loops.  */
 282   if (bb != father->header
 283       || father->inner)
 284     return bb_rank[bb->index];
 285
 286   /* Ignore virtual SSA_NAMEs.  */
 287   res = gimple_phi_result (stmt);
 288   if (virtual_operand_p (res))
 289     return bb_rank[bb->index];
 290
 291   /* The phi definition must have a single use, and that use must be
 292      within the loop.  Otherwise this isn't an accumulator pattern.  */
 293   if (!single_imm_use (res, &use, &use_stmt)
 294       || gimple_bb (use_stmt)->loop_father != father)
 295     return bb_rank[bb->index];
 296
 297   /* Look for phi arguments from within the loop.  If found, bias this phi.  */
 298   for (i = 0; i < gimple_phi_num_args (stmt); i++)
 299     {
 300       tree arg = gimple_phi_arg_def (stmt, i);
 301       if (TREE_CODE (arg) == SSA_NAME
 302           && !SSA_NAME_IS_DEFAULT_DEF (arg))
 303         {
 304           gimple def_stmt = SSA_NAME_DEF_STMT (arg);
 305           if (gimple_bb (def_stmt)->loop_father == father)
 306             return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
 307         }
 308     }
 309
 310   /* Must be an uninteresting phi.  */
 311   return bb_rank[bb->index];
 312 }
 313
 314 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
 315    loop-carried dependence of an innermost loop, return TRUE; else
 316    return FALSE.  */
 317 static bool
 318 loop_carried_phi (tree exp)
 319 {
 320   gimple phi_stmt;
 321   long block_rank;
 322
 323   if (TREE_CODE (exp) != SSA_NAME
 324       || SSA_NAME_IS_DEFAULT_DEF (exp))
 325     return false;
 326
 327   phi_stmt = SSA_NAME_DEF_STMT (exp);
 328
 329   if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
 330     return false;
 331
 332   /* Non-loop-carried phis have block rank.  Loop-carried phis have
 333      an additional bias added in.  If this phi doesn't have block rank,
 334      it's biased and should not be propagated.  */
 335   block_rank = bb_rank[gimple_bb (phi_stmt)->index];
 336
 337   if (phi_rank (phi_stmt) != block_rank)
 338     return true;
 339
 340   return false;
 341 }
 342
 343 /* Return the maximum of RANK and the rank that should be propagated
 344    from expression OP.  For most operands, this is just the rank of OP.
 345    For loop-carried phis, the value is zero to avoid undoing the bias
 346    in favor of the phi.  */
 347 static long
 348 propagate_rank (long rank, tree op)
 349 {
 350   long op_rank;
 351
 352   if (loop_carried_phi (op))
 353     return rank;
 354
 355   op_rank = get_rank (op);
 356
 357   return MAX (rank, op_rank);
 358 }
 359
 360 /* Look up the operand rank structure for expression E.  */
 361
 362 static inline long
 363 find_operand_rank (tree e)
 364 {
 365   long *slot = operand_rank->get (e);
 366   return slot ? *slot : -1;
 367 }
 368
 369 /* Insert {E,RANK} into the operand rank hashtable.  */
 370
 371 static inline void
 372 insert_operand_rank (tree e, long rank)
 373 {
 374   gcc_assert (rank > 0);
 375   gcc_assert (!operand_rank->put (e, rank));
 376 }
 377
 378 /* Given an expression E, return the rank of the expression.  */
 379
 380 static long
 381 get_rank (tree e)
 382 {
 383   /* Constants have rank 0.  */
 384   if (is_gimple_min_invariant (e))
 385     return 0;
 386
 387   /* SSA_NAME's have the rank of the expression they are the result
 388      of.
 389      For globals and uninitialized values, the rank is 0.
 390      For function arguments, use the pre-setup rank.
 391      For PHI nodes, stores, asm statements, etc, we use the rank of
 392      the BB.
 393      For simple operations, the rank is the maximum rank of any of
 394      its operands, or the bb_rank, whichever is less.
 395      I make no claims that this is optimal, however, it gives good
 396      results.  */
 397
 398   /* We make an exception to the normal ranking system to break
 399      dependences of accumulator variables in loops.  Suppose we
 400      have a simple one-block loop containing:
 401
 402        x_1 = phi(x_0, x_2)
 403        b = a + x_1
 404        c = b + d
 405        x_2 = c + e
 406
 407      As shown, each iteration of the calculation into x is fully
 408      dependent upon the iteration before it.  We would prefer to
 409      see this in the form:
 410
 411        x_1 = phi(x_0, x_2)
 412        b = a + d
 413        c = b + e
 414        x_2 = c + x_1
 415
 416      If the loop is unrolled, the calculations of b and c from
 417      different iterations can be interleaved.
 418
 419      To obtain this result during reassociation, we bias the rank
 420      of the phi definition x_1 upward, when it is recognized as an
 421      accumulator pattern.  The artificial rank causes it to be
 422      added last, providing the desired independence.  */
 423
 424   if (TREE_CODE (e) == SSA_NAME)
 425     {
 426       gimple stmt;
 427       long rank;
 428       int i, n;
 429       tree op;
 430
 431       if (SSA_NAME_IS_DEFAULT_DEF (e))
 432         return find_operand_rank (e);
 433
 434       stmt = SSA_NAME_DEF_STMT (e);
 435       if (gimple_code (stmt) == GIMPLE_PHI)
 436         return phi_rank (stmt);
 437
 438       if (!is_gimple_assign (stmt)
 439           || gimple_vdef (stmt))
 440         return bb_rank[gimple_bb (stmt)->index];
 441
 442       /* If we already have a rank for this expression, use that.  */
 443       rank = find_operand_rank (e);
 444       if (rank != -1)
 445         return rank;
 446
 447       /* Otherwise, find the maximum rank for the operands.  As an
 448          exception, remove the bias from loop-carried phis when propagating
 449          the rank so that dependent operations are not also biased.  */
 450       rank = 0;
 451       if (gimple_assign_single_p (stmt))
 452         {
 453           tree rhs = gimple_assign_rhs1 (stmt);
 454           n = TREE_OPERAND_LENGTH (rhs);
 455           if (n == 0)
 456             rank = propagate_rank (rank, rhs);
 457           else
 458             {
 459               for (i = 0; i < n; i++)
 460                 {
 461                   op = TREE_OPERAND (rhs, i);
 462
 463                   if (op != NULL_TREE)
 464                     rank = propagate_rank (rank, op);
 465                 }
 466             }
 467         }
 468       else
 469         {
 470           n = gimple_num_ops (stmt);
 471           for (i = 1; i < n; i++)
 472             {
 473               op = gimple_op (stmt, i);
 474               gcc_assert (op);
 475               rank = propagate_rank (rank, op);
 476             }
 477         }
 478
 479       if (dump_file && (dump_flags & TDF_DETAILS))
 480         {
 481           fprintf (dump_file, "Rank for ");
 482           print_generic_expr (dump_file, e, 0);
 483           fprintf (dump_file, " is %ld\n", (rank + 1));
 484         }
 485
 486       /* Note the rank in the hashtable so we don't recompute it.  */
 487       insert_operand_rank (e, (rank + 1));
 488       return (rank + 1);
 489     }
 490
 491   /* Globals, etc,  are rank 0 */
 492   return 0;
 493 }
 494
 495
 496 /* We want integer ones to end up last no matter what, since they are
 497    the ones we can do the most with.  */
 498 #define INTEGER_CONST_TYPE 1 << 3
 499 #define FLOAT_CONST_TYPE 1 << 2
 500 #define OTHER_CONST_TYPE 1 << 1
 501
 502 /* Classify an invariant tree into integer, float, or other, so that
 503    we can sort them to be near other constants of the same type.  */
 504 static inline int
 505 constant_type (tree t)
 506 {
 507   if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
 508     return INTEGER_CONST_TYPE;
 509   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
 510     return FLOAT_CONST_TYPE;
 511   else
 512     return OTHER_CONST_TYPE;
 513 }
 514
 515 /* qsort comparison function to sort operand entries PA and PB by rank
 516    so that the sorted array is ordered by rank in decreasing order.  */
 517 static int
 518 sort_by_operand_rank (const void *pa, const void *pb)
 519 {
 520   const operand_entry_t oea = *(const operand_entry_t *)pa;
 521   const operand_entry_t oeb = *(const operand_entry_t *)pb;
 522
 523   /* It's nicer for optimize_expression if constants that are likely
 524      to fold when added/multiplied//whatever are put next to each
 525      other.  Since all constants have rank 0, order them by type.  */
 526   if (oeb->rank == 0 && oea->rank == 0)
 527     {
 528       if (constant_type (oeb->op) != constant_type (oea->op))
 529         return constant_type (oeb->op) - constant_type (oea->op);
 530       else
 531         /* To make sorting result stable, we use unique IDs to determine
 532            order.  */
 533         return oeb->id - oea->id;
 534     }
 535
 536   /* Lastly, make sure the versions that are the same go next to each
 537      other.  */
 538   if ((oeb->rank - oea->rank == 0)
 539       && TREE_CODE (oea->op) == SSA_NAME
 540       && TREE_CODE (oeb->op) == SSA_NAME)
 541     {
 542       /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
 543          versions of removed SSA_NAMEs, so if possible, prefer to sort
 544          based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
 545          See PR60418.  */
 546       if (!SSA_NAME_IS_DEFAULT_DEF (oea->op)
 547           && !SSA_NAME_IS_DEFAULT_DEF (oeb->op)
 548           && SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
 549         {
 550           gimple stmta = SSA_NAME_DEF_STMT (oea->op);
 551           gimple stmtb = SSA_NAME_DEF_STMT (oeb->op);
 552           basic_block bba = gimple_bb (stmta);
 553           basic_block bbb = gimple_bb (stmtb);
 554           if (bbb != bba)
 555             {
 556               if (bb_rank[bbb->index] != bb_rank[bba->index])
 557                 return bb_rank[bbb->index] - bb_rank[bba->index];
 558             }
 559           else
 560             {
 561               bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
 562               bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
 563               if (da != db)
 564                 return da ? 1 : -1;
 565             }
 566         }
 567
 568       if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
 569         return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
 570       else
 571         return oeb->id - oea->id;
 572     }
 573
 574   if (oeb->rank != oea->rank)
 575     return oeb->rank - oea->rank;
 576   else
 577     return oeb->id - oea->id;
 578 }
 579
 580 /* Add an operand entry to *OPS for the tree operand OP.  */
 581
 582 static void
 583 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
 584 {
 585   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
 586
 587   oe->op = op;
 588   oe->rank = get_rank (op);
 589   oe->id = next_operand_entry_id++;
 590   oe->count = 1;
 591   ops->safe_push (oe);
 592 }
 593
 594 /* Add an operand entry to *OPS for the tree operand OP with repeat
 595    count REPEAT.  */
 596
 597 static void
 598 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
 599                        HOST_WIDE_INT repeat)
 600 {
 601   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
 602
 603   oe->op = op;
 604   oe->rank = get_rank (op);
 605   oe->id = next_operand_entry_id++;
 606   oe->count = repeat;
 607   ops->safe_push (oe);
 608
 609   reassociate_stats.pows_encountered++;
 610 }
 611
 612 /* Return true if STMT is reassociable operation containing a binary
 613    operation with tree code CODE, and is inside LOOP.  */
 614
 615 static bool
 616 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
 617 {
 618   basic_block bb = gimple_bb (stmt);
 619
 620   if (gimple_bb (stmt) == NULL)
 621     return false;
 622
 623   if (!flow_bb_inside_loop_p (loop, bb))
 624     return false;
 625
 626   if (is_gimple_assign (stmt)
 627       && gimple_assign_rhs_code (stmt) == code
 628       && has_single_use (gimple_assign_lhs (stmt)))
 629     return true;
 630
 631   return false;
 632 }
 633
 634
 635 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
 636    operand of the negate operation.  Otherwise, return NULL.  */
 637
 638 static tree
 639 get_unary_op (tree name, enum tree_code opcode)
 640 {
 641   gimple stmt = SSA_NAME_DEF_STMT (name);
 642
 643   if (!is_gimple_assign (stmt))
 644     return NULL_TREE;
 645
 646   if (gimple_assign_rhs_code (stmt) == opcode)
 647     return gimple_assign_rhs1 (stmt);
 648   return NULL_TREE;
 649 }
 650
 651 /* If CURR and LAST are a pair of ops that OPCODE allows us to
 652    eliminate through equivalences, do so, remove them from OPS, and
 653    return true.  Otherwise, return false.  */
 654
 655 static bool
 656 eliminate_duplicate_pair (enum tree_code opcode,
 657                           vec<operand_entry_t> *ops,
 658                           bool *all_done,
 659                           unsigned int i,
 660                           operand_entry_t curr,
 661                           operand_entry_t last)
 662 {
 663
 664   /* If we have two of the same op, and the opcode is & |, min, or max,
 665      we can eliminate one of them.
 666      If we have two of the same op, and the opcode is ^, we can
 667      eliminate both of them.  */
 668
 669   if (last && last->op == curr->op)
 670     {
 671       switch (opcode)
 672         {
 673         case MAX_EXPR:
 674         case MIN_EXPR:
 675         case BIT_IOR_EXPR:
 676         case BIT_AND_EXPR:
 677           if (dump_file && (dump_flags & TDF_DETAILS))
 678             {
 679               fprintf (dump_file, "Equivalence: ");
 680               print_generic_expr (dump_file, curr->op, 0);
 681               fprintf (dump_file, " [&|minmax] ");
 682               print_generic_expr (dump_file, last->op, 0);
 683               fprintf (dump_file, " -> ");
 684               print_generic_stmt (dump_file, last->op, 0);
 685             }
 686
 687           ops->ordered_remove (i);
 688           reassociate_stats.ops_eliminated ++;
 689
 690           return true;
 691
 692         case BIT_XOR_EXPR:
 693           if (dump_file && (dump_flags & TDF_DETAILS))
 694             {
 695               fprintf (dump_file, "Equivalence: ");
 696               print_generic_expr (dump_file, curr->op, 0);
 697               fprintf (dump_file, " ^ ");
 698               print_generic_expr (dump_file, last->op, 0);
 699               fprintf (dump_file, " -> nothing\n");
 700             }
 701
 702           reassociate_stats.ops_eliminated += 2;
 703
 704           if (ops->length () == 2)
 705             {
 706               ops->create (0);
 707               add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
 708               *all_done = true;
 709             }
 710           else
 711             {
 712               ops->ordered_remove (i-1);
 713               ops->ordered_remove (i-1);
 714             }
 715
 716           return true;
 717
 718         default:
 719           break;
 720         }
 721     }
 722   return false;
 723 }
 724
 725 static vec<tree> plus_negates;
 726
 727 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
 728    expression, look in OPS for a corresponding positive operation to cancel
 729    it out.  If we find one, remove the other from OPS, replace
 730    OPS[CURRINDEX] with 0 or -1, respectively, and return true.  Otherwise,
 731    return false. */
 732
 733 static bool
 734 eliminate_plus_minus_pair (enum tree_code opcode,
 735                            vec<operand_entry_t> *ops,
 736                            unsigned int currindex,
 737                            operand_entry_t curr)
 738 {
 739   tree negateop;
 740   tree notop;
 741   unsigned int i;
 742   operand_entry_t oe;
 743
 744   if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
 745     return false;
 746
 747   negateop = get_unary_op (curr->op, NEGATE_EXPR);
 748   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 749   if (negateop == NULL_TREE && notop == NULL_TREE)
 750     return false;
 751
 752   /* Any non-negated version will have a rank that is one less than
 753      the current rank.  So once we hit those ranks, if we don't find
 754      one, we can stop.  */
 755
 756   for (i = currindex + 1;
 757        ops->iterate (i, &oe)
 758        && oe->rank >= curr->rank - 1 ;
 759        i++)
 760     {
 761       if (oe->op == negateop)
 762         {
 763
 764           if (dump_file && (dump_flags & TDF_DETAILS))
 765             {
 766               fprintf (dump_file, "Equivalence: ");
 767               print_generic_expr (dump_file, negateop, 0);
 768               fprintf (dump_file, " + -");
 769               print_generic_expr (dump_file, oe->op, 0);
 770               fprintf (dump_file, " -> 0\n");
 771             }
 772
 773           ops->ordered_remove (i);
 774           add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
 775           ops->ordered_remove (currindex);
 776           reassociate_stats.ops_eliminated ++;
 777
 778           return true;
 779         }
 780       else if (oe->op == notop)
 781         {
 782           tree op_type = TREE_TYPE (oe->op);
 783
 784           if (dump_file && (dump_flags & TDF_DETAILS))
 785             {
 786               fprintf (dump_file, "Equivalence: ");
 787               print_generic_expr (dump_file, notop, 0);
 788               fprintf (dump_file, " + ~");
 789               print_generic_expr (dump_file, oe->op, 0);
 790               fprintf (dump_file, " -> -1\n");
 791             }
 792
 793           ops->ordered_remove (i);
 794           add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
 795           ops->ordered_remove (currindex);
 796           reassociate_stats.ops_eliminated ++;
 797
 798           return true;
 799         }
 800     }
 801
 802   /* CURR->OP is a negate expr in a plus expr: save it for later
 803      inspection in repropagate_negates().  */
 804   if (negateop != NULL_TREE)
 805     plus_negates.safe_push (curr->op);
 806
 807   return false;
 808 }
 809
 810 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
 811    bitwise not expression, look in OPS for a corresponding operand to
 812    cancel it out.  If we find one, remove the other from OPS, replace
 813    OPS[CURRINDEX] with 0, and return true.  Otherwise, return
 814    false. */
 815
 816 static bool
 817 eliminate_not_pairs (enum tree_code opcode,
 818                      vec<operand_entry_t> *ops,
 819                      unsigned int currindex,
 820                      operand_entry_t curr)
 821 {
 822   tree notop;
 823   unsigned int i;
 824   operand_entry_t oe;
 825
 826   if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
 827       || TREE_CODE (curr->op) != SSA_NAME)
 828     return false;
 829
 830   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 831   if (notop == NULL_TREE)
 832     return false;
 833
 834   /* Any non-not version will have a rank that is one less than
 835      the current rank.  So once we hit those ranks, if we don't find
 836      one, we can stop.  */
 837
 838   for (i = currindex + 1;
 839        ops->iterate (i, &oe)
 840        && oe->rank >= curr->rank - 1;
 841        i++)
 842     {
 843       if (oe->op == notop)
 844         {
 845           if (dump_file && (dump_flags & TDF_DETAILS))
 846             {
 847               fprintf (dump_file, "Equivalence: ");
 848               print_generic_expr (dump_file, notop, 0);
 849               if (opcode == BIT_AND_EXPR)
 850                 fprintf (dump_file, " & ~");
 851               else if (opcode == BIT_IOR_EXPR)
 852                 fprintf (dump_file, " | ~");
 853               print_generic_expr (dump_file, oe->op, 0);
 854               if (opcode == BIT_AND_EXPR)
 855                 fprintf (dump_file, " -> 0\n");
 856               else if (opcode == BIT_IOR_EXPR)
 857                 fprintf (dump_file, " -> -1\n");
 858             }
 859
 860           if (opcode == BIT_AND_EXPR)
 861             oe->op = build_zero_cst (TREE_TYPE (oe->op));
 862           else if (opcode == BIT_IOR_EXPR)
 863             oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
 864
 865           reassociate_stats.ops_eliminated += ops->length () - 1;
 866           ops->truncate (0);
 867           ops->quick_push (oe);
 868           return true;
 869         }
 870     }
 871
 872   return false;
 873 }
 874
 875 /* Use constant value that may be present in OPS to try to eliminate
 876    operands.  Note that this function is only really used when we've
 877    eliminated ops for other reasons, or merged constants.  Across
 878    single statements, fold already does all of this, plus more.  There
 879    is little point in duplicating logic, so I've only included the
 880    identities that I could ever construct testcases to trigger.  */
 881
 882 static void
 883 eliminate_using_constants (enum tree_code opcode,
 884                            vec<operand_entry_t> *ops)
 885 {
 886   operand_entry_t oelast = ops->last ();
 887   tree type = TREE_TYPE (oelast->op);
 888
 889   if (oelast->rank == 0
 890       && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
 891     {
 892       switch (opcode)
 893         {
 894         case BIT_AND_EXPR:
 895           if (integer_zerop (oelast->op))
 896             {
 897               if (ops->length () != 1)
 898                 {
 899                   if (dump_file && (dump_flags & TDF_DETAILS))
 900                     fprintf (dump_file, "Found & 0, removing all other ops\n");
 901
 902                   reassociate_stats.ops_eliminated += ops->length () - 1;
 903
 904                   ops->truncate (0);
 905                   ops->quick_push (oelast);
 906                   return;
 907                 }
 908             }
 909           else if (integer_all_onesp (oelast->op))
 910             {
 911               if (ops->length () != 1)
 912                 {
 913                   if (dump_file && (dump_flags & TDF_DETAILS))
 914                     fprintf (dump_file, "Found & -1, removing\n");
 915                   ops->pop ();
 916                   reassociate_stats.ops_eliminated++;
 917                 }
 918             }
 919           break;
 920         case BIT_IOR_EXPR:
 921           if (integer_all_onesp (oelast->op))
 922             {
 923               if (ops->length () != 1)
 924                 {
 925                   if (dump_file && (dump_flags & TDF_DETAILS))
 926                     fprintf (dump_file, "Found | -1, removing all other ops\n");
 927
 928                   reassociate_stats.ops_eliminated += ops->length () - 1;
 929
 930                   ops->truncate (0);
 931                   ops->quick_push (oelast);
 932                   return;
 933                 }
 934             }
 935           else if (integer_zerop (oelast->op))
 936             {
 937               if (ops->length () != 1)
 938                 {
 939                   if (dump_file && (dump_flags & TDF_DETAILS))
 940                     fprintf (dump_file, "Found | 0, removing\n");
 941                   ops->pop ();
 942                   reassociate_stats.ops_eliminated++;
 943                 }
 944             }
 945           break;
 946         case MULT_EXPR:
 947           if (integer_zerop (oelast->op)
 948               || (FLOAT_TYPE_P (type)
 949                   && !HONOR_NANS (TYPE_MODE (type))
 950                   && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
 951                   && real_zerop (oelast->op)))
 952             {
 953               if (ops->length () != 1)
 954                 {
 955                   if (dump_file && (dump_flags & TDF_DETAILS))
 956                     fprintf (dump_file, "Found * 0, removing all other ops\n");
 957
 958                   reassociate_stats.ops_eliminated += ops->length () - 1;
 959                   ops->truncate (1);
 960                   ops->quick_push (oelast);
 961                   return;
 962                 }
 963             }
 964           else if (integer_onep (oelast->op)
 965                    || (FLOAT_TYPE_P (type)
 966                        && !HONOR_SNANS (TYPE_MODE (type))
 967                        && real_onep (oelast->op)))
 968             {
 969               if (ops->length () != 1)
 970                 {
 971                   if (dump_file && (dump_flags & TDF_DETAILS))
 972                     fprintf (dump_file, "Found * 1, removing\n");
 973                   ops->pop ();
 974                   reassociate_stats.ops_eliminated++;
 975                   return;
 976                 }
 977             }
 978           break;
 979         case BIT_XOR_EXPR:
 980         case PLUS_EXPR:
 981         case MINUS_EXPR:
 982           if (integer_zerop (oelast->op)
 983               || (FLOAT_TYPE_P (type)
 984                   && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
 985                   && fold_real_zero_addition_p (type, oelast->op,
 986                                                 opcode == MINUS_EXPR)))
 987             {
 988               if (ops->length () != 1)
 989                 {
 990                   if (dump_file && (dump_flags & TDF_DETAILS))
 991                     fprintf (dump_file, "Found [|^+] 0, removing\n");
 992                   ops->pop ();
 993                   reassociate_stats.ops_eliminated++;
 994                   return;
 995                 }
 996             }
 997           break;
 998         default:
 999           break;
1000         }
1001     }
1002 }
1003
1004
1005 static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
1006                                  bool, bool);
1007
1008 /* Structure for tracking and counting operands.  */
1009 typedef struct oecount_s {
1010   int cnt;
1011   int id;
1012   enum tree_code oecode;
1013   tree op;
1014 } oecount;
1015
1016
1017 /* The heap for the oecount hashtable and the sorted list of operands.  */
1018 static vec<oecount> cvec;
1019
1020
1021 /* Oecount hashtable helpers.  */
1022
1023 struct oecount_hasher
1024 {
1025   typedef int value_type;
1026   typedef int compare_type;
1027   typedef int store_values_directly;
1028   static inline hashval_t hash (const value_type &);
1029   static inline bool equal (const value_type &, const compare_type &);
1030   static bool is_deleted (int &v) { return v == 1; }
1031   static void mark_deleted (int &e) { e = 1; }
1032   static bool is_empty (int &v) { return v == 0; }
1033   static void mark_empty (int &e) { e = 0; }
1034   static void remove (int &) {}
1035 };
1036
1037 /* Hash function for oecount.  */
1038
1039 inline hashval_t
1040 oecount_hasher::hash (const value_type &p)
1041 {
1042   const oecount *c = &cvec[p - 42];
1043   return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1044 }
1045
1046 /* Comparison function for oecount.  */
1047
1048 inline bool
1049 oecount_hasher::equal (const value_type &p1, const compare_type &p2)
1050 {
1051   const oecount *c1 = &cvec[p1 - 42];
1052   const oecount *c2 = &cvec[p2 - 42];
1053   return (c1->oecode == c2->oecode
1054           && c1->op == c2->op);
1055 }
1056
1057 /* Comparison function for qsort sorting oecount elements by count.  */
1058
1059 static int
1060 oecount_cmp (const void *p1, const void *p2)
1061 {
1062   const oecount *c1 = (const oecount *)p1;
1063   const oecount *c2 = (const oecount *)p2;
1064   if (c1->cnt != c2->cnt)
1065     return c1->cnt - c2->cnt;
1066   else
1067     /* If counts are identical, use unique IDs to stabilize qsort.  */
1068     return c1->id - c2->id;
1069 }
1070
1071 /* Return TRUE iff STMT represents a builtin call that raises OP
1072    to some exponent.  */
1073
1074 static bool
1075 stmt_is_power_of_op (gimple stmt, tree op)
1076 {
1077   tree fndecl;
1078
1079   if (!is_gimple_call (stmt))
1080     return false;
1081
1082   fndecl = gimple_call_fndecl (stmt);
1083
1084   if (!fndecl
1085       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
1086     return false;
1087
1088   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1089     {
1090     CASE_FLT_FN (BUILT_IN_POW):
1091     CASE_FLT_FN (BUILT_IN_POWI):
1092       return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1093
1094     default:
1095       return false;
1096     }
1097 }
1098
1099 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1100    in place and return the result.  Assumes that stmt_is_power_of_op
1101    was previously called for STMT and returned TRUE.  */
1102
1103 static HOST_WIDE_INT
1104 decrement_power (gimple stmt)
1105 {
1106   REAL_VALUE_TYPE c, cint;
1107   HOST_WIDE_INT power;
1108   tree arg1;
1109
1110   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1111     {
1112     CASE_FLT_FN (BUILT_IN_POW):
1113       arg1 = gimple_call_arg (stmt, 1);
1114       c = TREE_REAL_CST (arg1);
1115       power = real_to_integer (&c) - 1;
1116       real_from_integer (&cint, VOIDmode, power, SIGNED);
1117       gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1118       return power;
1119
1120     CASE_FLT_FN (BUILT_IN_POWI):
1121       arg1 = gimple_call_arg (stmt, 1);
1122       power = TREE_INT_CST_LOW (arg1) - 1;
1123       gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1124       return power;
1125
1126     default:
1127       gcc_unreachable ();
1128     }
1129 }
1130
1131 /* Find the single immediate use of STMT's LHS, and replace it
1132    with OP.  Remove STMT.  If STMT's LHS is the same as *DEF,
1133    replace *DEF with OP as well.  */
1134
1135 static void
1136 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1137 {
1138   tree lhs;
1139   gimple use_stmt;
1140   use_operand_p use;
1141   gimple_stmt_iterator gsi;
1142
1143   if (is_gimple_call (stmt))
1144     lhs = gimple_call_lhs (stmt);
1145   else
1146     lhs = gimple_assign_lhs (stmt);
1147
1148   gcc_assert (has_single_use (lhs));
1149   single_imm_use (lhs, &use, &use_stmt);
1150   if (lhs == *def)
1151     *def = op;
1152   SET_USE (use, op);
1153   if (TREE_CODE (op) != SSA_NAME)
1154     update_stmt (use_stmt);
1155   gsi = gsi_for_stmt (stmt);
1156   unlink_stmt_vdef (stmt);
1157   reassoc_remove_stmt (&gsi);
1158   release_defs (stmt);
1159 }
1160
1161 /* Walks the linear chain with result *DEF searching for an operation
1162    with operand OP and code OPCODE removing that from the chain.  *DEF
1163    is updated if there is only one operand but no operation left.  */
1164
1165 static void
1166 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1167 {
1168   gimple stmt = SSA_NAME_DEF_STMT (*def);
1169
1170   do
1171     {
1172       tree name;
1173
1174       if (opcode == MULT_EXPR
1175           && stmt_is_power_of_op (stmt, op))
1176         {
1177           if (decrement_power (stmt) == 1)
1178             propagate_op_to_single_use (op, stmt, def);
1179           return;
1180         }
1181
1182       name = gimple_assign_rhs1 (stmt);
1183
1184       /* If this is the operation we look for and one of the operands
1185          is ours simply propagate the other operand into the stmts
1186          single use.  */
1187       if (gimple_assign_rhs_code (stmt) == opcode
1188           && (name == op
1189               || gimple_assign_rhs2 (stmt) == op))
1190         {
1191           if (name == op)
1192             name = gimple_assign_rhs2 (stmt);
1193           propagate_op_to_single_use (name, stmt, def);
1194           return;
1195         }
1196
1197       /* We might have a multiply of two __builtin_pow* calls, and
1198          the operand might be hiding in the rightmost one.  */
1199       if (opcode == MULT_EXPR
1200           && gimple_assign_rhs_code (stmt) == opcode
1201           && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1202           && has_single_use (gimple_assign_rhs2 (stmt)))
1203         {
1204           gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1205           if (stmt_is_power_of_op (stmt2, op))
1206             {
1207               if (decrement_power (stmt2) == 1)
1208                 propagate_op_to_single_use (op, stmt2, def);
1209               return;
1210             }
1211         }
1212
1213       /* Continue walking the chain.  */
1214       gcc_assert (name != op
1215                   && TREE_CODE (name) == SSA_NAME);
1216       stmt = SSA_NAME_DEF_STMT (name);
1217     }
1218   while (1);
1219 }
1220
1221 /* Returns true if statement S1 dominates statement S2.  Like
1222    stmt_dominates_stmt_p, but uses stmt UIDs to optimize.  */
1223
1224 static bool
1225 reassoc_stmt_dominates_stmt_p (gimple s1, gimple s2)
1226 {
1227   basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1228
1229   /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1230      SSA_NAME.  Assume it lives at the beginning of function and
1231      thus dominates everything.  */
1232   if (!bb1 || s1 == s2)
1233     return true;
1234
1235   /* If bb2 is NULL, it doesn't dominate any stmt with a bb.  */
1236   if (!bb2)
1237     return false;
1238
1239   if (bb1 == bb2)
1240     {
1241       /* PHIs in the same basic block are assumed to be
1242          executed all in parallel, if only one stmt is a PHI,
1243          it dominates the other stmt in the same basic block.  */
1244       if (gimple_code (s1) == GIMPLE_PHI)
1245         return true;
1246
1247       if (gimple_code (s2) == GIMPLE_PHI)
1248         return false;
1249
1250       gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1251
1252       if (gimple_uid (s1) < gimple_uid (s2))
1253         return true;
1254
1255       if (gimple_uid (s1) > gimple_uid (s2))
1256         return false;
1257
1258       gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1259       unsigned int uid = gimple_uid (s1);
1260       for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1261         {
1262           gimple s = gsi_stmt (gsi);
1263           if (gimple_uid (s) != uid)
1264             break;
1265           if (s == s2)
1266             return true;
1267         }
1268
1269       return false;
1270     }
1271
1272   return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1273 }
1274
1275 /* Insert STMT after INSERT_POINT.  */
1276
1277 static void
1278 insert_stmt_after (gimple stmt, gimple insert_point)
1279 {
1280   gimple_stmt_iterator gsi;
1281   basic_block bb;
1282
1283   if (gimple_code (insert_point) == GIMPLE_PHI)
1284     bb = gimple_bb (insert_point);
1285   else if (!stmt_ends_bb_p (insert_point))
1286     {
1287       gsi = gsi_for_stmt (insert_point);
1288       gimple_set_uid (stmt, gimple_uid (insert_point));
1289       gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1290       return;
1291     }
1292   else
1293     /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1294        thus if it must end a basic block, it should be a call that can
1295        throw, or some assignment that can throw.  If it throws, the LHS
1296        of it will not be initialized though, so only valid places using
1297        the SSA_NAME should be dominated by the fallthru edge.  */
1298     bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1299   gsi = gsi_after_labels (bb);
1300   if (gsi_end_p (gsi))
1301     {
1302       gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1303       gimple_set_uid (stmt,
1304                       gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1305     }
1306   else
1307     gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1308   gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1309 }
1310
1311 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1312    the result.  Places the statement after the definition of either
1313    OP1 or OP2.  Returns the new statement.  */
1314
1315 static gimple
1316 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1317 {
1318   gimple op1def = NULL, op2def = NULL;
1319   gimple_stmt_iterator gsi;
1320   tree op;
1321   gimple_assign sum;
1322
1323   /* Create the addition statement.  */
1324   op = make_ssa_name (type, NULL);
1325   sum = gimple_build_assign_with_ops (opcode, op, op1, op2);
1326
1327   /* Find an insertion place and insert.  */
1328   if (TREE_CODE (op1) == SSA_NAME)
1329     op1def = SSA_NAME_DEF_STMT (op1);
1330   if (TREE_CODE (op2) == SSA_NAME)
1331     op2def = SSA_NAME_DEF_STMT (op2);
1332   if ((!op1def || gimple_nop_p (op1def))
1333       && (!op2def || gimple_nop_p (op2def)))
1334     {
1335       gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1336       if (gsi_end_p (gsi))
1337         {
1338           gimple_stmt_iterator gsi2
1339             = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1340           gimple_set_uid (sum,
1341                           gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1342         }
1343       else
1344         gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1345       gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1346     }
1347   else
1348     {
1349       gimple insert_point;
1350       if ((!op1def || gimple_nop_p (op1def))
1351            || (op2def && !gimple_nop_p (op2def)
1352                && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1353         insert_point = op2def;
1354       else
1355         insert_point = op1def;
1356       insert_stmt_after (sum, insert_point);
1357     }
1358   update_stmt (sum);
1359
1360   return sum;
1361 }
1362
1363 /* Perform un-distribution of divisions and multiplications.
1364    A * X + B * X is transformed into (A + B) * X and A / X + B / X
1365    to (A + B) / X for real X.
1366
1367    The algorithm is organized as follows.
1368
1369     - First we walk the addition chain *OPS looking for summands that
1370       are defined by a multiplication or a real division.  This results
1371       in the candidates bitmap with relevant indices into *OPS.
1372
1373     - Second we build the chains of multiplications or divisions for
1374       these candidates, counting the number of occurrences of (operand, code)
1375       pairs in all of the candidates chains.
1376
1377     - Third we sort the (operand, code) pairs by number of occurrence and
1378       process them starting with the pair with the most uses.
1379
1380       * For each such pair we walk the candidates again to build a
1381         second candidate bitmap noting all multiplication/division chains
1382         that have at least one occurrence of (operand, code).
1383
1384       * We build an alternate addition chain only covering these
1385         candidates with one (operand, code) operation removed from their
1386         multiplication/division chain.
1387
1388       * The first candidate gets replaced by the alternate addition chain
1389         multiplied/divided by the operand.
1390
1391       * All candidate chains get disabled for further processing and
1392         processing of (operand, code) pairs continues.
1393
1394   The alternate addition chains built are re-processed by the main
1395   reassociation algorithm which allows optimizing a * x * y + b * y * x
1396   to (a + b ) * x * y in one invocation of the reassociation pass.  */
1397
1398 static bool
1399 undistribute_ops_list (enum tree_code opcode,
1400                        vec<operand_entry_t> *ops, struct loop *loop)
1401 {
1402   unsigned int length = ops->length ();
1403   operand_entry_t oe1;
1404   unsigned i, j;
1405   sbitmap candidates, candidates2;
1406   unsigned nr_candidates, nr_candidates2;
1407   sbitmap_iterator sbi0;
1408   vec<operand_entry_t> *subops;
1409   bool changed = false;
1410   int next_oecount_id = 0;
1411
1412   if (length <= 1
1413       || opcode != PLUS_EXPR)
1414     return false;
1415
1416   /* Build a list of candidates to process.  */
1417   candidates = sbitmap_alloc (length);
1418   bitmap_clear (candidates);
1419   nr_candidates = 0;
1420   FOR_EACH_VEC_ELT (*ops, i, oe1)
1421     {
1422       enum tree_code dcode;
1423       gimple oe1def;
1424
1425       if (TREE_CODE (oe1->op) != SSA_NAME)
1426         continue;
1427       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1428       if (!is_gimple_assign (oe1def))
1429         continue;
1430       dcode = gimple_assign_rhs_code (oe1def);
1431       if ((dcode != MULT_EXPR
1432            && dcode != RDIV_EXPR)
1433           || !is_reassociable_op (oe1def, dcode, loop))
1434         continue;
1435
1436       bitmap_set_bit (candidates, i);
1437       nr_candidates++;
1438     }
1439
1440   if (nr_candidates < 2)
1441     {
1442       sbitmap_free (candidates);
1443       return false;
1444     }
1445
1446   if (dump_file && (dump_flags & TDF_DETAILS))
1447     {
1448       fprintf (dump_file, "searching for un-distribute opportunities ");
1449       print_generic_expr (dump_file,
1450         (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1451       fprintf (dump_file, " %d\n", nr_candidates);
1452     }
1453
1454   /* Build linearized sub-operand lists and the counting table.  */
1455   cvec.create (0);
1456
1457   hash_table<oecount_hasher> ctable (15);
1458
1459   /* ??? Macro arguments cannot have multi-argument template types in
1460      them.  This typedef is needed to workaround that limitation.  */
1461   typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1462   subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1463   EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1464     {
1465       gimple oedef;
1466       enum tree_code oecode;
1467       unsigned j;
1468
1469       oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1470       oecode = gimple_assign_rhs_code (oedef);
1471       linearize_expr_tree (&subops[i], oedef,
1472                            associative_tree_code (oecode), false);
1473
1474       FOR_EACH_VEC_ELT (subops[i], j, oe1)
1475         {
1476           oecount c;
1477           int *slot;
1478           int idx;
1479           c.oecode = oecode;
1480           c.cnt = 1;
1481           c.id = next_oecount_id++;
1482           c.op = oe1->op;
1483           cvec.safe_push (c);
1484           idx = cvec.length () + 41;
1485           slot = ctable.find_slot (idx, INSERT);
1486           if (!*slot)
1487             {
1488               *slot = idx;
1489             }
1490           else
1491             {
1492               cvec.pop ();
1493               cvec[*slot - 42].cnt++;
1494             }
1495         }
1496     }
1497
1498   /* Sort the counting table.  */
1499   cvec.qsort (oecount_cmp);
1500
1501   if (dump_file && (dump_flags & TDF_DETAILS))
1502     {
1503       oecount *c;
1504       fprintf (dump_file, "Candidates:\n");
1505       FOR_EACH_VEC_ELT (cvec, j, c)
1506         {
1507           fprintf (dump_file, "  %u %s: ", c->cnt,
1508                    c->oecode == MULT_EXPR
1509                    ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1510           print_generic_expr (dump_file, c->op, 0);
1511           fprintf (dump_file, "\n");
1512         }
1513     }
1514
1515   /* Process the (operand, code) pairs in order of most occurrence.  */
1516   candidates2 = sbitmap_alloc (length);
1517   while (!cvec.is_empty ())
1518     {
1519       oecount *c = &cvec.last ();
1520       if (c->cnt < 2)
1521         break;
1522
1523       /* Now collect the operands in the outer chain that contain
1524          the common operand in their inner chain.  */
1525       bitmap_clear (candidates2);
1526       nr_candidates2 = 0;
1527       EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1528         {
1529           gimple oedef;
1530           enum tree_code oecode;
1531           unsigned j;
1532           tree op = (*ops)[i]->op;
1533
1534           /* If we undistributed in this chain already this may be
1535              a constant.  */
1536           if (TREE_CODE (op) != SSA_NAME)
1537             continue;
1538
1539           oedef = SSA_NAME_DEF_STMT (op);
1540           oecode = gimple_assign_rhs_code (oedef);
1541           if (oecode != c->oecode)
1542             continue;
1543
1544           FOR_EACH_VEC_ELT (subops[i], j, oe1)
1545             {
1546               if (oe1->op == c->op)
1547                 {
1548                   bitmap_set_bit (candidates2, i);
1549                   ++nr_candidates2;
1550                   break;
1551                 }
1552             }
1553         }
1554
1555       if (nr_candidates2 >= 2)
1556         {
1557           operand_entry_t oe1, oe2;
1558           gimple prod;
1559           int first = bitmap_first_set_bit (candidates2);
1560
1561           /* Build the new addition chain.  */
1562           oe1 = (*ops)[first];
1563           if (dump_file && (dump_flags & TDF_DETAILS))
1564             {
1565               fprintf (dump_file, "Building (");
1566               print_generic_expr (dump_file, oe1->op, 0);
1567             }
1568           zero_one_operation (&oe1->op, c->oecode, c->op);
1569           EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1570             {
1571               gimple sum;
1572               oe2 = (*ops)[i];
1573               if (dump_file && (dump_flags & TDF_DETAILS))
1574                 {
1575                   fprintf (dump_file, " + ");
1576                   print_generic_expr (dump_file, oe2->op, 0);
1577                 }
1578               zero_one_operation (&oe2->op, c->oecode, c->op);
1579               sum = build_and_add_sum (TREE_TYPE (oe1->op),
1580                                        oe1->op, oe2->op, opcode);
1581               oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1582               oe2->rank = 0;
1583               oe1->op = gimple_get_lhs (sum);
1584             }
1585
1586           /* Apply the multiplication/division.  */
1587           prod = build_and_add_sum (TREE_TYPE (oe1->op),
1588                                     oe1->op, c->op, c->oecode);
1589           if (dump_file && (dump_flags & TDF_DETAILS))
1590             {
1591               fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1592               print_generic_expr (dump_file, c->op, 0);
1593               fprintf (dump_file, "\n");
1594             }
1595
1596           /* Record it in the addition chain and disable further
1597              undistribution with this op.  */
1598           oe1->op = gimple_assign_lhs (prod);
1599           oe1->rank = get_rank (oe1->op);
1600           subops[first].release ();
1601
1602           changed = true;
1603         }
1604
1605       cvec.pop ();
1606     }
1607
1608   for (i = 0; i < ops->length (); ++i)
1609     subops[i].release ();
1610   free (subops);
1611   cvec.release ();
1612   sbitmap_free (candidates);
1613   sbitmap_free (candidates2);
1614
1615   return changed;
1616 }
1617
1618 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1619    expression, examine the other OPS to see if any of them are comparisons
1620    of the same values, which we may be able to combine or eliminate.
1621    For example, we can rewrite (a < b) | (a == b) as (a <= b).  */
1622
1623 static bool
1624 eliminate_redundant_comparison (enum tree_code opcode,
1625                                 vec<operand_entry_t> *ops,
1626                                 unsigned int currindex,
1627                                 operand_entry_t curr)
1628 {
1629   tree op1, op2;
1630   enum tree_code lcode, rcode;
1631   gimple def1, def2;
1632   int i;
1633   operand_entry_t oe;
1634
1635   if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1636     return false;
1637
1638   /* Check that CURR is a comparison.  */
1639   if (TREE_CODE (curr->op) != SSA_NAME)
1640     return false;
1641   def1 = SSA_NAME_DEF_STMT (curr->op);
1642   if (!is_gimple_assign (def1))
1643     return false;
1644   lcode = gimple_assign_rhs_code (def1);
1645   if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1646     return false;
1647   op1 = gimple_assign_rhs1 (def1);
1648   op2 = gimple_assign_rhs2 (def1);
1649
1650   /* Now look for a similar comparison in the remaining OPS.  */
1651   for (i = currindex + 1; ops->iterate (i, &oe); i++)
1652     {
1653       tree t;
1654
1655       if (TREE_CODE (oe->op) != SSA_NAME)
1656         continue;
1657       def2 = SSA_NAME_DEF_STMT (oe->op);
1658       if (!is_gimple_assign (def2))
1659         continue;
1660       rcode = gimple_assign_rhs_code (def2);
1661       if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1662         continue;
1663
1664       /* If we got here, we have a match.  See if we can combine the
1665          two comparisons.  */
1666       if (opcode == BIT_IOR_EXPR)
1667         t = maybe_fold_or_comparisons (lcode, op1, op2,
1668                                        rcode, gimple_assign_rhs1 (def2),
1669                                        gimple_assign_rhs2 (def2));
1670       else
1671         t = maybe_fold_and_comparisons (lcode, op1, op2,
1672                                         rcode, gimple_assign_rhs1 (def2),
1673                                         gimple_assign_rhs2 (def2));
1674       if (!t)
1675         continue;
1676
1677       /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1678          always give us a boolean_type_node value back.  If the original
1679          BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1680          we need to convert.  */
1681       if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1682         t = fold_convert (TREE_TYPE (curr->op), t);
1683
1684       if (TREE_CODE (t) != INTEGER_CST
1685           && !operand_equal_p (t, curr->op, 0))
1686         {
1687           enum tree_code subcode;
1688           tree newop1, newop2;
1689           if (!COMPARISON_CLASS_P (t))
1690             continue;
1691           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1692           STRIP_USELESS_TYPE_CONVERSION (newop1);
1693           STRIP_USELESS_TYPE_CONVERSION (newop2);
1694           if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1695             continue;
1696         }
1697
1698       if (dump_file && (dump_flags & TDF_DETAILS))
1699         {
1700           fprintf (dump_file, "Equivalence: ");
1701           print_generic_expr (dump_file, curr->op, 0);
1702           fprintf (dump_file, " %s ", op_symbol_code (opcode));
1703           print_generic_expr (dump_file, oe->op, 0);
1704           fprintf (dump_file, " -> ");
1705           print_generic_expr (dump_file, t, 0);
1706           fprintf (dump_file, "\n");
1707         }
1708
1709       /* Now we can delete oe, as it has been subsumed by the new combined
1710          expression t.  */
1711       ops->ordered_remove (i);
1712       reassociate_stats.ops_eliminated ++;
1713
1714       /* If t is the same as curr->op, we're done.  Otherwise we must
1715          replace curr->op with t.  Special case is if we got a constant
1716          back, in which case we add it to the end instead of in place of
1717          the current entry.  */
1718       if (TREE_CODE (t) == INTEGER_CST)
1719         {
1720           ops->ordered_remove (currindex);
1721           add_to_ops_vec (ops, t);
1722         }
1723       else if (!operand_equal_p (t, curr->op, 0))
1724         {
1725           gimple sum;
1726           enum tree_code subcode;
1727           tree newop1;
1728           tree newop2;
1729           gcc_assert (COMPARISON_CLASS_P (t));
1730           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1731           STRIP_USELESS_TYPE_CONVERSION (newop1);
1732           STRIP_USELESS_TYPE_CONVERSION (newop2);
1733           gcc_checking_assert (is_gimple_val (newop1)
1734                                && is_gimple_val (newop2));
1735           sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1736           curr->op = gimple_get_lhs (sum);
1737         }
1738       return true;
1739     }
1740
1741   return false;
1742 }
1743
1744 /* Perform various identities and other optimizations on the list of
1745    operand entries, stored in OPS.  The tree code for the binary
1746    operation between all the operands is OPCODE.  */
1747
1748 static void
1749 optimize_ops_list (enum tree_code opcode,
1750                    vec<operand_entry_t> *ops)
1751 {
1752   unsigned int length = ops->length ();
1753   unsigned int i;
1754   operand_entry_t oe;
1755   operand_entry_t oelast = NULL;
1756   bool iterate = false;
1757
1758   if (length == 1)
1759     return;
1760
1761   oelast = ops->last ();
1762
1763   /* If the last two are constants, pop the constants off, merge them
1764      and try the next two.  */
1765   if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1766     {
1767       operand_entry_t oelm1 = (*ops)[length - 2];
1768
1769       if (oelm1->rank == 0
1770           && is_gimple_min_invariant (oelm1->op)
1771           && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1772                                        TREE_TYPE (oelast->op)))
1773         {
1774           tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1775                                      oelm1->op, oelast->op);
1776
1777           if (folded && is_gimple_min_invariant (folded))
1778             {
1779               if (dump_file && (dump_flags & TDF_DETAILS))
1780                 fprintf (dump_file, "Merging constants\n");
1781
1782               ops->pop ();
1783               ops->pop ();
1784
1785               add_to_ops_vec (ops, folded);
1786               reassociate_stats.constants_eliminated++;
1787
1788               optimize_ops_list (opcode, ops);
1789               return;
1790             }
1791         }
1792     }
1793
1794   eliminate_using_constants (opcode, ops);
1795   oelast = NULL;
1796
1797   for (i = 0; ops->iterate (i, &oe);)
1798     {
1799       bool done = false;
1800
1801       if (eliminate_not_pairs (opcode, ops, i, oe))
1802         return;
1803       if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1804           || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1805           || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1806         {
1807           if (done)
1808             return;
1809           iterate = true;
1810           oelast = NULL;
1811           continue;
1812         }
1813       oelast = oe;
1814       i++;
1815     }
1816
1817   length = ops->length ();
1818   oelast = ops->last ();
1819
1820   if (iterate)
1821     optimize_ops_list (opcode, ops);
1822 }
1823
1824 /* The following functions are subroutines to optimize_range_tests and allow
1825    it to try to change a logical combination of comparisons into a range
1826    test.
1827
1828    For example, both
1829         X == 2 || X == 5 || X == 3 || X == 4
1830    and
1831         X >= 2 && X <= 5
1832    are converted to
1833         (unsigned) (X - 2) <= 3
1834
1835    For more information see comments above fold_test_range in fold-const.c,
1836    this implementation is for GIMPLE.  */
1837
1838 struct range_entry
1839 {
1840   tree exp;
1841   tree low;
1842   tree high;
1843   bool in_p;
1844   bool strict_overflow_p;
1845   unsigned int idx, next;
1846 };
1847
1848 /* This is similar to make_range in fold-const.c, but on top of
1849    GIMPLE instead of trees.  If EXP is non-NULL, it should be
1850    an SSA_NAME and STMT argument is ignored, otherwise STMT
1851    argument should be a GIMPLE_COND.  */
1852
1853 static void
1854 init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1855 {
1856   int in_p;
1857   tree low, high;
1858   bool is_bool, strict_overflow_p;
1859
1860   r->exp = NULL_TREE;
1861   r->in_p = false;
1862   r->strict_overflow_p = false;
1863   r->low = NULL_TREE;
1864   r->high = NULL_TREE;
1865   if (exp != NULL_TREE
1866       && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1867     return;
1868
1869   /* Start with simply saying "EXP != 0" and then look at the code of EXP
1870      and see if we can refine the range.  Some of the cases below may not
1871      happen, but it doesn't seem worth worrying about this.  We "continue"
1872      the outer loop when we've changed something; otherwise we "break"
1873      the switch, which will "break" the while.  */
1874   low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1875   high = low;
1876   in_p = 0;
1877   strict_overflow_p = false;
1878   is_bool = false;
1879   if (exp == NULL_TREE)
1880     is_bool = true;
1881   else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1882     {
1883       if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1884         is_bool = true;
1885       else
1886         return;
1887     }
1888   else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1889     is_bool = true;
1890
1891   while (1)
1892     {
1893       enum tree_code code;
1894       tree arg0, arg1, exp_type;
1895       tree nexp;
1896       location_t loc;
1897
1898       if (exp != NULL_TREE)
1899         {
1900           if (TREE_CODE (exp) != SSA_NAME
1901               || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
1902             break;
1903
1904           stmt = SSA_NAME_DEF_STMT (exp);
1905           if (!is_gimple_assign (stmt))
1906             break;
1907
1908           code = gimple_assign_rhs_code (stmt);
1909           arg0 = gimple_assign_rhs1 (stmt);
1910           arg1 = gimple_assign_rhs2 (stmt);
1911           exp_type = TREE_TYPE (exp);
1912         }
1913       else
1914         {
1915           code = gimple_cond_code (stmt);
1916           arg0 = gimple_cond_lhs (stmt);
1917           arg1 = gimple_cond_rhs (stmt);
1918           exp_type = boolean_type_node;
1919         }
1920
1921       if (TREE_CODE (arg0) != SSA_NAME)
1922         break;
1923       loc = gimple_location (stmt);
1924       switch (code)
1925         {
1926         case BIT_NOT_EXPR:
1927           if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
1928               /* Ensure the range is either +[-,0], +[0,0],
1929                  -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1930                  -[1,1].  If it is e.g. +[-,-] or -[-,-]
1931                  or similar expression of unconditional true or
1932                  false, it should not be negated.  */
1933               && ((high && integer_zerop (high))
1934                   || (low && integer_onep (low))))
1935             {
1936               in_p = !in_p;
1937               exp = arg0;
1938               continue;
1939             }
1940           break;
1941         case SSA_NAME:
1942           exp = arg0;
1943           continue;
1944         CASE_CONVERT:
1945           if (is_bool)
1946             goto do_default;
1947           if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1948             {
1949               if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1950                 is_bool = true;
1951               else
1952                 return;
1953             }
1954           else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1955             is_bool = true;
1956           goto do_default;
1957         case EQ_EXPR:
1958         case NE_EXPR:
1959         case LT_EXPR:
1960         case LE_EXPR:
1961         case GE_EXPR:
1962         case GT_EXPR:
1963           is_bool = true;
1964           /* FALLTHRU */
1965         default:
1966           if (!is_bool)
1967             return;
1968         do_default:
1969           nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1970                                   &low, &high, &in_p,
1971                                   &strict_overflow_p);
1972           if (nexp != NULL_TREE)
1973             {
1974               exp = nexp;
1975               gcc_assert (TREE_CODE (exp) == SSA_NAME);
1976               continue;
1977             }
1978           break;
1979         }
1980       break;
1981     }
1982   if (is_bool)
1983     {
1984       r->exp = exp;
1985       r->in_p = in_p;
1986       r->low = low;
1987       r->high = high;
1988       r->strict_overflow_p = strict_overflow_p;
1989     }
1990 }
1991
1992 /* Comparison function for qsort.  Sort entries
1993    without SSA_NAME exp first, then with SSA_NAMEs sorted
1994    by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1995    by increasing ->low and if ->low is the same, by increasing
1996    ->high.  ->low == NULL_TREE means minimum, ->high == NULL_TREE
1997    maximum.  */
1998
1999 static int
2000 range_entry_cmp (const void *a, const void *b)
2001 {
2002   const struct range_entry *p = (const struct range_entry *) a;
2003   const struct range_entry *q = (const struct range_entry *) b;
2004
2005   if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2006     {
2007       if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2008         {
2009           /* Group range_entries for the same SSA_NAME together.  */
2010           if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2011             return -1;
2012           else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2013             return 1;
2014           /* If ->low is different, NULL low goes first, then by
2015              ascending low.  */
2016           if (p->low != NULL_TREE)
2017             {
2018               if (q->low != NULL_TREE)
2019                 {
2020                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
2021                                           p->low, q->low);
2022                   if (tem && integer_onep (tem))
2023                     return -1;
2024                   tem = fold_binary (GT_EXPR, boolean_type_node,
2025                                      p->low, q->low);
2026                   if (tem && integer_onep (tem))
2027                     return 1;
2028                 }
2029               else
2030                 return 1;
2031             }
2032           else if (q->low != NULL_TREE)
2033             return -1;
2034           /* If ->high is different, NULL high goes last, before that by
2035              ascending high.  */
2036           if (p->high != NULL_TREE)
2037             {
2038               if (q->high != NULL_TREE)
2039                 {
2040                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
2041                                           p->high, q->high);
2042                   if (tem && integer_onep (tem))
2043                     return -1;
2044                   tem = fold_binary (GT_EXPR, boolean_type_node,
2045                                      p->high, q->high);
2046                   if (tem && integer_onep (tem))
2047                     return 1;
2048                 }
2049               else
2050                 return -1;
2051             }
2052           else if (p->high != NULL_TREE)
2053             return 1;
2054           /* If both ranges are the same, sort below by ascending idx.  */
2055         }
2056       else
2057         return 1;
2058     }
2059   else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2060     return -1;
2061
2062   if (p->idx < q->idx)
2063     return -1;
2064   else
2065     {
2066       gcc_checking_assert (p->idx > q->idx);
2067       return 1;
2068     }
2069 }
2070
2071 /* Helper routine of optimize_range_test.
2072    [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2073    RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2074    OPCODE and OPS are arguments of optimize_range_tests.  Return
2075    true if the range merge has been successful.
2076    If OPCODE is ERROR_MARK, this is called from within
2077    maybe_optimize_range_tests and is performing inter-bb range optimization.
2078    In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2079    oe->rank.  */
2080
2081 static bool
2082 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2083                    unsigned int count, enum tree_code opcode,
2084                    vec<operand_entry_t> *ops, tree exp, bool in_p,
2085                    tree low, tree high, bool strict_overflow_p)
2086 {
2087   operand_entry_t oe = (*ops)[range->idx];
2088   tree op = oe->op;
2089   gimple stmt = op ? SSA_NAME_DEF_STMT (op) :
2090     last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2091   location_t loc = gimple_location (stmt);
2092   tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2093   tree tem = build_range_check (loc, optype, exp, in_p, low, high);
2094   enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2095   gimple_stmt_iterator gsi;
2096
2097   if (tem == NULL_TREE)
2098     return false;
2099
2100   if (strict_overflow_p && issue_strict_overflow_warning (wc))
2101     warning_at (loc, OPT_Wstrict_overflow,
2102                 "assuming signed overflow does not occur "
2103                 "when simplifying range test");
2104
2105   if (dump_file && (dump_flags & TDF_DETAILS))
2106     {
2107       struct range_entry *r;
2108       fprintf (dump_file, "Optimizing range tests ");
2109       print_generic_expr (dump_file, range->exp, 0);
2110       fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2111       print_generic_expr (dump_file, range->low, 0);
2112       fprintf (dump_file, ", ");
2113       print_generic_expr (dump_file, range->high, 0);
2114       fprintf (dump_file, "]");
2115       for (r = otherrange; r < otherrange + count; r++)
2116         {
2117           fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2118           print_generic_expr (dump_file, r->low, 0);
2119           fprintf (dump_file, ", ");
2120           print_generic_expr (dump_file, r->high, 0);
2121           fprintf (dump_file, "]");
2122         }
2123       fprintf (dump_file, "\n into ");
2124       print_generic_expr (dump_file, tem, 0);
2125       fprintf (dump_file, "\n");
2126     }
2127
2128   if (opcode == BIT_IOR_EXPR
2129       || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2130     tem = invert_truthvalue_loc (loc, tem);
2131
2132   tem = fold_convert_loc (loc, optype, tem);
2133   gsi = gsi_for_stmt (stmt);
2134   /* In rare cases range->exp can be equal to lhs of stmt.
2135      In that case we have to insert after the stmt rather then before
2136      it.  */
2137   if (op == range->exp)
2138     tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false,
2139                                     GSI_CONTINUE_LINKING);
2140   else
2141     {
2142       tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2143                                       GSI_SAME_STMT);
2144       gsi_prev (&gsi);
2145     }
2146   for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2147     if (gimple_uid (gsi_stmt (gsi)))
2148       break;
2149     else
2150       gimple_set_uid (gsi_stmt (gsi), gimple_uid (stmt));
2151
2152   oe->op = tem;
2153   range->exp = exp;
2154   range->low = low;
2155   range->high = high;
2156   range->in_p = in_p;
2157   range->strict_overflow_p = false;
2158
2159   for (range = otherrange; range < otherrange + count; range++)
2160     {
2161       oe = (*ops)[range->idx];
2162       /* Now change all the other range test immediate uses, so that
2163          those tests will be optimized away.  */
2164       if (opcode == ERROR_MARK)
2165         {
2166           if (oe->op)
2167             oe->op = build_int_cst (TREE_TYPE (oe->op),
2168                                     oe->rank == BIT_IOR_EXPR ? 0 : 1);
2169           else
2170             oe->op = (oe->rank == BIT_IOR_EXPR
2171                       ? boolean_false_node : boolean_true_node);
2172         }
2173       else
2174         oe->op = error_mark_node;
2175       range->exp = NULL_TREE;
2176     }
2177   return true;
2178 }
2179
2180 /* Optimize X == CST1 || X == CST2
2181    if popcount (CST1 ^ CST2) == 1 into
2182    (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2183    Similarly for ranges.  E.g.
2184    X != 2 && X != 3 && X != 10 && X != 11
2185    will be transformed by the previous optimization into
2186    !((X - 2U) <= 1U || (X - 10U) <= 1U)
2187    and this loop can transform that into
2188    !(((X & ~8) - 2U) <= 1U).  */
2189
2190 static bool
2191 optimize_range_tests_xor (enum tree_code opcode, tree type,
2192                           tree lowi, tree lowj, tree highi, tree highj,
2193                           vec<operand_entry_t> *ops,
2194                           struct range_entry *rangei,
2195                           struct range_entry *rangej)
2196 {
2197   tree lowxor, highxor, tem, exp;
2198   /* Check highi ^ lowi == highj ^ lowj and
2199      popcount (highi ^ lowi) == 1.  */
2200   lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2201   if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2202     return false;
2203   if (tree_log2 (lowxor) < 0)
2204     return false;
2205   highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2206   if (!tree_int_cst_equal (lowxor, highxor))
2207     return false;
2208
2209   tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2210   exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2211   lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2212   highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2213   if (update_range_test (rangei, rangej, 1, opcode, ops, exp,
2214                          rangei->in_p, lowj, highj,
2215                          rangei->strict_overflow_p
2216                          || rangej->strict_overflow_p))
2217     return true;
2218   return false;
2219 }
2220
2221 /* Optimize X == CST1 || X == CST2
2222    if popcount (CST2 - CST1) == 1 into
2223    ((X - CST1) & ~(CST2 - CST1)) == 0.
2224    Similarly for ranges.  E.g.
2225    X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2226    || X == 75 || X == 45
2227    will be transformed by the previous optimization into
2228    (X - 43U) <= 3U || (X - 75U) <= 3U
2229    and this loop can transform that into
2230    ((X - 43U) & ~(75U - 43U)) <= 3U.  */
2231 static bool
2232 optimize_range_tests_diff (enum tree_code opcode, tree type,
2233                             tree lowi, tree lowj, tree highi, tree highj,
2234                             vec<operand_entry_t> *ops,
2235                             struct range_entry *rangei,
2236                             struct range_entry *rangej)
2237 {
2238   tree tem1, tem2, mask;
2239   /* Check highi - lowi == highj - lowj.  */
2240   tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2241   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2242     return false;
2243   tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2244   if (!tree_int_cst_equal (tem1, tem2))
2245     return false;
2246   /* Check popcount (lowj - lowi) == 1.  */
2247   tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2248   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2249     return false;
2250   if (tree_log2 (tem1) < 0)
2251     return false;
2252
2253   mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2254   tem1 = fold_binary (MINUS_EXPR, type, rangei->exp, lowi);
2255   tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2256   lowj = build_int_cst (type, 0);
2257   if (update_range_test (rangei, rangej, 1, opcode, ops, tem1,
2258                          rangei->in_p, lowj, tem2,
2259                          rangei->strict_overflow_p
2260                          || rangej->strict_overflow_p))
2261     return true;
2262   return false;
2263 }
2264
2265 /* It does some common checks for function optimize_range_tests_xor and
2266    optimize_range_tests_diff.
2267    If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2268    Else it calls optimize_range_tests_diff.  */
2269
2270 static bool
2271 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2272                         bool optimize_xor, vec<operand_entry_t> *ops,
2273                         struct range_entry *ranges)
2274 {
2275   int i, j;
2276   bool any_changes = false;
2277   for (i = first; i < length; i++)
2278     {
2279       tree lowi, highi, lowj, highj, type, tem;
2280
2281       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2282         continue;
2283       type = TREE_TYPE (ranges[i].exp);
2284       if (!INTEGRAL_TYPE_P (type))
2285         continue;
2286       lowi = ranges[i].low;
2287       if (lowi == NULL_TREE)
2288         lowi = TYPE_MIN_VALUE (type);
2289       highi = ranges[i].high;
2290       if (highi == NULL_TREE)
2291         continue;
2292       for (j = i + 1; j < length && j < i + 64; j++)
2293         {
2294           bool changes;
2295           if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2296             continue;
2297           lowj = ranges[j].low;
2298           if (lowj == NULL_TREE)
2299             continue;
2300           highj = ranges[j].high;
2301           if (highj == NULL_TREE)
2302             highj = TYPE_MAX_VALUE (type);
2303           /* Check lowj > highi.  */
2304           tem = fold_binary (GT_EXPR, boolean_type_node,
2305                              lowj, highi);
2306           if (tem == NULL_TREE || !integer_onep (tem))
2307             continue;
2308           if (optimize_xor)
2309             changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2310                                                 highi, highj, ops,
2311                                                 ranges + i, ranges + j);
2312           else
2313             changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2314                                                  highi, highj, ops,
2315                                                  ranges + i, ranges + j);
2316           if (changes)
2317             {
2318               any_changes = true;
2319               break;
2320             }
2321         }
2322     }
2323   return any_changes;
2324 }
2325
2326 /* Optimize range tests, similarly how fold_range_test optimizes
2327    it on trees.  The tree code for the binary
2328    operation between all the operands is OPCODE.
2329    If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2330    maybe_optimize_range_tests for inter-bb range optimization.
2331    In that case if oe->op is NULL, oe->id is bb->index whose
2332    GIMPLE_COND is && or ||ed into the test, and oe->rank says
2333    the actual opcode.  */
2334
2335 static bool
2336 optimize_range_tests (enum tree_code opcode,
2337                       vec<operand_entry_t> *ops)
2338 {
2339   unsigned int length = ops->length (), i, j, first;
2340   operand_entry_t oe;
2341   struct range_entry *ranges;
2342   bool any_changes = false;
2343
2344   if (length == 1)
2345     return false;
2346
2347   ranges = XNEWVEC (struct range_entry, length);
2348   for (i = 0; i < length; i++)
2349     {
2350       oe = (*ops)[i];
2351       ranges[i].idx = i;
2352       init_range_entry (ranges + i, oe->op,
2353                         oe->op ? NULL :
2354                           last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
2355       /* For | invert it now, we will invert it again before emitting
2356          the optimized expression.  */
2357       if (opcode == BIT_IOR_EXPR
2358           || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2359         ranges[i].in_p = !ranges[i].in_p;
2360     }
2361
2362   qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2363   for (i = 0; i < length; i++)
2364     if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2365       break;
2366
2367   /* Try to merge ranges.  */
2368   for (first = i; i < length; i++)
2369     {
2370       tree low = ranges[i].low;
2371       tree high = ranges[i].high;
2372       int in_p = ranges[i].in_p;
2373       bool strict_overflow_p = ranges[i].strict_overflow_p;
2374       int update_fail_count = 0;
2375
2376       for (j = i + 1; j < length; j++)
2377         {
2378           if (ranges[i].exp != ranges[j].exp)
2379             break;
2380           if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2381                              ranges[j].in_p, ranges[j].low, ranges[j].high))
2382             break;
2383           strict_overflow_p |= ranges[j].strict_overflow_p;
2384         }
2385
2386       if (j == i + 1)
2387         continue;
2388
2389       if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
2390                              ops, ranges[i].exp, in_p, low, high,
2391                              strict_overflow_p))
2392         {
2393           i = j - 1;
2394           any_changes = true;
2395         }
2396       /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2397          while update_range_test would fail.  */
2398       else if (update_fail_count == 64)
2399         i = j - 1;
2400       else
2401         ++update_fail_count;
2402     }
2403
2404   any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2405                                          ops, ranges);
2406
2407   if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2408     any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2409                                            ops, ranges);
2410
2411   if (any_changes && opcode != ERROR_MARK)
2412     {
2413       j = 0;
2414       FOR_EACH_VEC_ELT (*ops, i, oe)
2415         {
2416           if (oe->op == error_mark_node)
2417             continue;
2418           else if (i != j)
2419             (*ops)[j] = oe;
2420           j++;
2421         }
2422       ops->truncate (j);
2423     }
2424
2425   XDELETEVEC (ranges);
2426   return any_changes;
2427 }
2428
2429 /* Return true if STMT is a cast like:
2430    <bb N>:
2431    ...
2432    _123 = (int) _234;
2433
2434    <bb M>:
2435    # _345 = PHI <_123(N), 1(...), 1(...)>
2436    where _234 has bool type, _123 has single use and
2437    bb N has a single successor M.  This is commonly used in
2438    the last block of a range test.  */
2439
2440 static bool
2441 final_range_test_p (gimple stmt)
2442 {
2443   basic_block bb, rhs_bb;
2444   edge e;
2445   tree lhs, rhs;
2446   use_operand_p use_p;
2447   gimple use_stmt;
2448
2449   if (!gimple_assign_cast_p (stmt))
2450     return false;
2451   bb = gimple_bb (stmt);
2452   if (!single_succ_p (bb))
2453     return false;
2454   e = single_succ_edge (bb);
2455   if (e->flags & EDGE_COMPLEX)
2456     return false;
2457
2458   lhs = gimple_assign_lhs (stmt);
2459   rhs = gimple_assign_rhs1 (stmt);
2460   if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2461       || TREE_CODE (rhs) != SSA_NAME
2462       || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2463     return false;
2464
2465   /* Test whether lhs is consumed only by a PHI in the only successor bb.  */
2466   if (!single_imm_use (lhs, &use_p, &use_stmt))
2467     return false;
2468
2469   if (gimple_code (use_stmt) != GIMPLE_PHI
2470       || gimple_bb (use_stmt) != e->dest)
2471     return false;
2472
2473   /* And that the rhs is defined in the same loop.  */
2474   rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2475   if (rhs_bb == NULL
2476       || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2477     return false;
2478
2479   return true;
2480 }
2481
2482 /* Return true if BB is suitable basic block for inter-bb range test
2483    optimization.  If BACKWARD is true, BB should be the only predecessor
2484    of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2485    or compared with to find a common basic block to which all conditions
2486    branch to if true resp. false.  If BACKWARD is false, TEST_BB should
2487    be the only predecessor of BB.  */
2488
2489 static bool
2490 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2491                   bool backward)
2492 {
2493   edge_iterator ei, ei2;
2494   edge e, e2;
2495   gimple stmt;
2496   gimple_phi_iterator gsi;
2497   bool other_edge_seen = false;
2498   bool is_cond;
2499
2500   if (test_bb == bb)
2501     return false;
2502   /* Check last stmt first.  */
2503   stmt = last_stmt (bb);
2504   if (stmt == NULL
2505       || (gimple_code (stmt) != GIMPLE_COND
2506           && (backward || !final_range_test_p (stmt)))
2507       || gimple_visited_p (stmt)
2508       || stmt_could_throw_p (stmt)
2509       || *other_bb == bb)
2510     return false;
2511   is_cond = gimple_code (stmt) == GIMPLE_COND;
2512   if (is_cond)
2513     {
2514       /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2515          goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2516          to *OTHER_BB (if not set yet, try to find it out).  */
2517       if (EDGE_COUNT (bb->succs) != 2)
2518         return false;
2519       FOR_EACH_EDGE (e, ei, bb->succs)
2520         {
2521           if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2522             return false;
2523           if (e->dest == test_bb)
2524             {
2525               if (backward)
2526                 continue;
2527               else
2528                 return false;
2529             }
2530           if (e->dest == bb)
2531             return false;
2532           if (*other_bb == NULL)
2533             {
2534               FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2535                 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2536                   return false;
2537                 else if (e->dest == e2->dest)
2538                   *other_bb = e->dest;
2539               if (*other_bb == NULL)
2540                 return false;
2541             }
2542           if (e->dest == *other_bb)
2543             other_edge_seen = true;
2544           else if (backward)
2545             return false;
2546         }
2547       if (*other_bb == NULL || !other_edge_seen)
2548         return false;
2549     }
2550   else if (single_succ (bb) != *other_bb)
2551     return false;
2552
2553   /* Now check all PHIs of *OTHER_BB.  */
2554   e = find_edge (bb, *other_bb);
2555   e2 = find_edge (test_bb, *other_bb);
2556   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2557     {
2558       gimple_phi phi = gsi.phi ();
2559       /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2560          corresponding to BB and TEST_BB predecessor must be the same.  */
2561       if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2562                             gimple_phi_arg_def (phi, e2->dest_idx), 0))
2563         {
2564           /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2565              one of the PHIs should have the lhs of the last stmt in
2566              that block as PHI arg and that PHI should have 0 or 1
2567              corresponding to it in all other range test basic blocks
2568              considered.  */
2569           if (!is_cond)
2570             {
2571               if (gimple_phi_arg_def (phi, e->dest_idx)
2572                   == gimple_assign_lhs (stmt)
2573                   && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2574                       || integer_onep (gimple_phi_arg_def (phi,
2575                                                            e2->dest_idx))))
2576                 continue;
2577             }
2578           else
2579             {
2580               gimple test_last = last_stmt (test_bb);
2581               if (gimple_code (test_last) != GIMPLE_COND
2582                   && gimple_phi_arg_def (phi, e2->dest_idx)
2583                      == gimple_assign_lhs (test_last)
2584                   && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2585                       || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2586                 continue;
2587             }
2588
2589           return false;
2590         }
2591     }
2592   return true;
2593 }
2594
2595 /* Return true if BB doesn't have side-effects that would disallow
2596    range test optimization, all SSA_NAMEs set in the bb are consumed
2597    in the bb and there are no PHIs.  */
2598
2599 static bool
2600 no_side_effect_bb (basic_block bb)
2601 {
2602   gimple_stmt_iterator gsi;
2603   gimple last;
2604
2605   if (!gimple_seq_empty_p (phi_nodes (bb)))
2606     return false;
2607   last = last_stmt (bb);
2608   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2609     {
2610       gimple stmt = gsi_stmt (gsi);
2611       tree lhs;
2612       imm_use_iterator imm_iter;
2613       use_operand_p use_p;
2614
2615       if (is_gimple_debug (stmt))
2616         continue;
2617       if (gimple_has_side_effects (stmt))
2618         return false;
2619       if (stmt == last)
2620         return true;
2621       if (!is_gimple_assign (stmt))
2622         return false;
2623       lhs = gimple_assign_lhs (stmt);
2624       if (TREE_CODE (lhs) != SSA_NAME)
2625         return false;
2626       if (gimple_assign_rhs_could_trap_p (stmt))
2627         return false;
2628       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2629         {
2630           gimple use_stmt = USE_STMT (use_p);
2631           if (is_gimple_debug (use_stmt))
2632             continue;
2633           if (gimple_bb (use_stmt) != bb)
2634             return false;
2635         }
2636     }
2637   return false;
2638 }
2639
2640 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2641    return true and fill in *OPS recursively.  */
2642
2643 static bool
2644 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2645          struct loop *loop)
2646 {
2647   gimple stmt = SSA_NAME_DEF_STMT (var);
2648   tree rhs[2];
2649   int i;
2650
2651   if (!is_reassociable_op (stmt, code, loop))
2652     return false;
2653
2654   rhs[0] = gimple_assign_rhs1 (stmt);
2655   rhs[1] = gimple_assign_rhs2 (stmt);
2656   gimple_set_visited (stmt, true);
2657   for (i = 0; i < 2; i++)
2658     if (TREE_CODE (rhs[i]) == SSA_NAME
2659         && !get_ops (rhs[i], code, ops, loop)
2660         && has_single_use (rhs[i]))
2661       {
2662         operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
2663
2664         oe->op = rhs[i];
2665         oe->rank = code;
2666         oe->id = 0;
2667         oe->count = 1;
2668         ops->safe_push (oe);
2669       }
2670   return true;
2671 }
2672
2673 /* Find the ops that were added by get_ops starting from VAR, see if
2674    they were changed during update_range_test and if yes, create new
2675    stmts.  */
2676
2677 static tree
2678 update_ops (tree var, enum tree_code code, vec<operand_entry_t> ops,
2679             unsigned int *pidx, struct loop *loop)
2680 {
2681   gimple stmt = SSA_NAME_DEF_STMT (var);
2682   tree rhs[4];
2683   int i;
2684
2685   if (!is_reassociable_op (stmt, code, loop))
2686     return NULL;
2687
2688   rhs[0] = gimple_assign_rhs1 (stmt);
2689   rhs[1] = gimple_assign_rhs2 (stmt);
2690   rhs[2] = rhs[0];
2691   rhs[3] = rhs[1];
2692   for (i = 0; i < 2; i++)
2693     if (TREE_CODE (rhs[i]) == SSA_NAME)
2694       {
2695         rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
2696         if (rhs[2 + i] == NULL_TREE)
2697           {
2698             if (has_single_use (rhs[i]))
2699               rhs[2 + i] = ops[(*pidx)++]->op;
2700             else
2701               rhs[2 + i] = rhs[i];
2702           }
2703       }
2704   if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
2705       && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
2706     {
2707       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2708       var = make_ssa_name (TREE_TYPE (var), NULL);
2709       gimple_assign g =
2710         gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
2711                                       var, rhs[2], rhs[3]);
2712       gimple_set_uid (g, gimple_uid (stmt));
2713       gimple_set_visited (g, true);
2714       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2715     }
2716   return var;
2717 }
2718
2719 /* Structure to track the initial value passed to get_ops and
2720    the range in the ops vector for each basic block.  */
2721
2722 struct inter_bb_range_test_entry
2723 {
2724   tree op;
2725   unsigned int first_idx, last_idx;
2726 };
2727
2728 /* Inter-bb range test optimization.  */
2729
2730 static void
2731 maybe_optimize_range_tests (gimple stmt)
2732 {
2733   basic_block first_bb = gimple_bb (stmt);
2734   basic_block last_bb = first_bb;
2735   basic_block other_bb = NULL;
2736   basic_block bb;
2737   edge_iterator ei;
2738   edge e;
2739   auto_vec<operand_entry_t> ops;
2740   auto_vec<inter_bb_range_test_entry> bbinfo;
2741   bool any_changes = false;
2742
2743   /* Consider only basic blocks that end with GIMPLE_COND or
2744      a cast statement satisfying final_range_test_p.  All
2745      but the last bb in the first_bb .. last_bb range
2746      should end with GIMPLE_COND.  */
2747   if (gimple_code (stmt) == GIMPLE_COND)
2748     {
2749       if (EDGE_COUNT (first_bb->succs) != 2)
2750         return;
2751     }
2752   else if (final_range_test_p (stmt))
2753     other_bb = single_succ (first_bb);
2754   else
2755     return;
2756
2757   if (stmt_could_throw_p (stmt))
2758     return;
2759
2760   /* As relative ordering of post-dominator sons isn't fixed,
2761      maybe_optimize_range_tests can be called first on any
2762      bb in the range we want to optimize.  So, start searching
2763      backwards, if first_bb can be set to a predecessor.  */
2764   while (single_pred_p (first_bb))
2765     {
2766       basic_block pred_bb = single_pred (first_bb);
2767       if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
2768         break;
2769       if (!no_side_effect_bb (first_bb))
2770         break;
2771       first_bb = pred_bb;
2772     }
2773   /* If first_bb is last_bb, other_bb hasn't been computed yet.
2774      Before starting forward search in last_bb successors, find
2775      out the other_bb.  */
2776   if (first_bb == last_bb)
2777     {
2778       other_bb = NULL;
2779       /* As non-GIMPLE_COND last stmt always terminates the range,
2780          if forward search didn't discover anything, just give up.  */
2781       if (gimple_code (stmt) != GIMPLE_COND)
2782         return;
2783       /* Look at both successors.  Either it ends with a GIMPLE_COND
2784          and satisfies suitable_cond_bb, or ends with a cast and
2785          other_bb is that cast's successor.  */
2786       FOR_EACH_EDGE (e, ei, first_bb->succs)
2787         if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
2788             || e->dest == first_bb)
2789           return;
2790         else if (single_pred_p (e->dest))
2791           {
2792             stmt = last_stmt (e->dest);
2793             if (stmt
2794                 && gimple_code (stmt) == GIMPLE_COND
2795                 && EDGE_COUNT (e->dest->succs) == 2)
2796               {
2797                 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
2798                   break;
2799                 else
2800                   other_bb = NULL;
2801               }
2802             else if (stmt
2803                      && final_range_test_p (stmt)
2804                      && find_edge (first_bb, single_succ (e->dest)))
2805               {
2806                 other_bb = single_succ (e->dest);
2807                 if (other_bb == first_bb)
2808                   other_bb = NULL;
2809               }
2810           }
2811       if (other_bb == NULL)
2812         return;
2813     }
2814   /* Now do the forward search, moving last_bb to successor bbs
2815      that aren't other_bb.  */
2816   while (EDGE_COUNT (last_bb->succs) == 2)
2817     {
2818       FOR_EACH_EDGE (e, ei, last_bb->succs)
2819         if (e->dest != other_bb)
2820           break;
2821       if (e == NULL)
2822         break;
2823       if (!single_pred_p (e->dest))
2824         break;
2825       if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
2826         break;
2827       if (!no_side_effect_bb (e->dest))
2828         break;
2829       last_bb = e->dest;
2830     }
2831   if (first_bb == last_bb)
2832     return;
2833   /* Here basic blocks first_bb through last_bb's predecessor
2834      end with GIMPLE_COND, all of them have one of the edges to
2835      other_bb and another to another block in the range,
2836      all blocks except first_bb don't have side-effects and
2837      last_bb ends with either GIMPLE_COND, or cast satisfying
2838      final_range_test_p.  */
2839   for (bb = last_bb; ; bb = single_pred (bb))
2840     {
2841       enum tree_code code;
2842       tree lhs, rhs;
2843       inter_bb_range_test_entry bb_ent;
2844
2845       bb_ent.op = NULL_TREE;
2846       bb_ent.first_idx = ops.length ();
2847       bb_ent.last_idx = bb_ent.first_idx;
2848       e = find_edge (bb, other_bb);
2849       stmt = last_stmt (bb);
2850       gimple_set_visited (stmt, true);
2851       if (gimple_code (stmt) != GIMPLE_COND)
2852         {
2853           use_operand_p use_p;
2854           gimple phi;
2855           edge e2;
2856           unsigned int d;
2857
2858           lhs = gimple_assign_lhs (stmt);
2859           rhs = gimple_assign_rhs1 (stmt);
2860           gcc_assert (bb == last_bb);
2861
2862           /* stmt is
2863              _123 = (int) _234;
2864
2865              followed by:
2866              <bb M>:
2867              # _345 = PHI <_123(N), 1(...), 1(...)>
2868
2869              or 0 instead of 1.  If it is 0, the _234
2870              range test is anded together with all the
2871              other range tests, if it is 1, it is ored with
2872              them.  */
2873           single_imm_use (lhs, &use_p, &phi);
2874           gcc_assert (gimple_code (phi) == GIMPLE_PHI);
2875           e2 = find_edge (first_bb, other_bb);
2876           d = e2->dest_idx;
2877           gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
2878           if (integer_zerop (gimple_phi_arg_def (phi, d)))
2879             code = BIT_AND_EXPR;
2880           else
2881             {
2882               gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
2883               code = BIT_IOR_EXPR;
2884             }
2885
2886           /* If _234 SSA_NAME_DEF_STMT is
2887              _234 = _567 | _789;
2888              (or &, corresponding to 1/0 in the phi arguments,
2889              push into ops the individual range test arguments
2890              of the bitwise or resp. and, recursively.  */
2891           if (!get_ops (rhs, code, &ops,
2892                         loop_containing_stmt (stmt))
2893               && has_single_use (rhs))
2894             {
2895               /* Otherwise, push the _234 range test itself.  */
2896               operand_entry_t oe
2897                 = (operand_entry_t) pool_alloc (operand_entry_pool);
2898
2899               oe->op = rhs;
2900               oe->rank = code;
2901               oe->id = 0;
2902               oe->count = 1;
2903               ops.safe_push (oe);
2904               bb_ent.last_idx++;
2905             }
2906           else
2907             bb_ent.last_idx = ops.length ();
2908           bb_ent.op = rhs;
2909           bbinfo.safe_push (bb_ent);
2910           continue;
2911         }
2912       /* Otherwise stmt is GIMPLE_COND.  */
2913       code = gimple_cond_code (stmt);
2914       lhs = gimple_cond_lhs (stmt);
2915       rhs = gimple_cond_rhs (stmt);
2916       if (TREE_CODE (lhs) == SSA_NAME
2917           && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2918           && ((code != EQ_EXPR && code != NE_EXPR)
2919               || rhs != boolean_false_node
2920                  /* Either push into ops the individual bitwise
2921                     or resp. and operands, depending on which
2922                     edge is other_bb.  */
2923               || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
2924                                  ^ (code == EQ_EXPR))
2925                                 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
2926                            loop_containing_stmt (stmt))))
2927         {
2928           /* Or push the GIMPLE_COND stmt itself.  */
2929           operand_entry_t oe
2930             = (operand_entry_t) pool_alloc (operand_entry_pool);
2931
2932           oe->op = NULL;
2933           oe->rank = (e->flags & EDGE_TRUE_VALUE)
2934                      ? BIT_IOR_EXPR : BIT_AND_EXPR;
2935           /* oe->op = NULL signs that there is no SSA_NAME
2936              for the range test, and oe->id instead is the
2937              basic block number, at which's end the GIMPLE_COND
2938              is.  */
2939           oe->id = bb->index;
2940           oe->count = 1;
2941           ops.safe_push (oe);
2942           bb_ent.op = NULL;
2943           bb_ent.last_idx++;
2944         }
2945       else if (ops.length () > bb_ent.first_idx)
2946         {
2947           bb_ent.op = lhs;
2948           bb_ent.last_idx = ops.length ();
2949         }
2950       bbinfo.safe_push (bb_ent);
2951       if (bb == first_bb)
2952         break;
2953     }
2954   if (ops.length () > 1)
2955     any_changes = optimize_range_tests (ERROR_MARK, &ops);
2956   if (any_changes)
2957     {
2958       unsigned int idx;
2959       /* update_ops relies on has_single_use predicates returning the
2960          same values as it did during get_ops earlier.  Additionally it
2961          never removes statements, only adds new ones and it should walk
2962          from the single imm use and check the predicate already before
2963          making those changes.
2964          On the other side, the handling of GIMPLE_COND directly can turn
2965          previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
2966          it needs to be done in a separate loop afterwards.  */
2967       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
2968         {
2969           if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
2970               && bbinfo[idx].op != NULL_TREE)
2971             {
2972               tree new_op;
2973
2974               stmt = last_stmt (bb);
2975               new_op = update_ops (bbinfo[idx].op,
2976                                    (enum tree_code)
2977                                    ops[bbinfo[idx].first_idx]->rank,
2978                                    ops, &bbinfo[idx].first_idx,
2979                                    loop_containing_stmt (stmt));
2980               if (new_op == NULL_TREE)
2981                 {
2982                   gcc_assert (bb == last_bb);
2983                   new_op = ops[bbinfo[idx].first_idx++]->op;
2984                 }
2985               if (bbinfo[idx].op != new_op)
2986                 {
2987                   imm_use_iterator iter;
2988                   use_operand_p use_p;
2989                   gimple use_stmt, cast_stmt = NULL;
2990
2991                   FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
2992                     if (is_gimple_debug (use_stmt))
2993                       continue;
2994                     else if (gimple_code (use_stmt) == GIMPLE_COND
2995                              || gimple_code (use_stmt) == GIMPLE_PHI)
2996                       FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2997                         SET_USE (use_p, new_op);
2998                     else if (gimple_assign_cast_p (use_stmt))
2999                       cast_stmt = use_stmt;
3000                     else
3001                       gcc_unreachable ();
3002                   if (cast_stmt)
3003                     {
3004                       gcc_assert (bb == last_bb);
3005                       tree lhs = gimple_assign_lhs (cast_stmt);
3006                       tree new_lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3007                       enum tree_code rhs_code
3008                         = gimple_assign_rhs_code (cast_stmt);
3009                       gimple_assign g;
3010                       if (is_gimple_min_invariant (new_op))
3011                         {
3012                           new_op = fold_convert (TREE_TYPE (lhs), new_op);
3013                           g = gimple_build_assign (new_lhs, new_op);
3014                         }
3015                       else
3016                         g = gimple_build_assign_with_ops (rhs_code, new_lhs,
3017                                                           new_op, NULL_TREE);
3018                       gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt);
3019                       gimple_set_uid (g, gimple_uid (cast_stmt));
3020                       gimple_set_visited (g, true);
3021                       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3022                       FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
3023                         if (is_gimple_debug (use_stmt))
3024                           continue;
3025                         else if (gimple_code (use_stmt) == GIMPLE_COND
3026                                  || gimple_code (use_stmt) == GIMPLE_PHI)
3027                           FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3028                             SET_USE (use_p, new_lhs);
3029                         else
3030                           gcc_unreachable ();
3031                     }
3032                 }
3033             }
3034           if (bb == first_bb)
3035             break;
3036         }
3037       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3038         {
3039           if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3040               && bbinfo[idx].op == NULL_TREE
3041               && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
3042             {
3043               stmt = last_stmt (bb);
3044               if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
3045                 gimple_cond_make_false (stmt);
3046               else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
3047                 gimple_cond_make_true (stmt);
3048               else
3049                 {
3050                   gimple_cond_set_code (stmt, NE_EXPR);
3051                   gimple_cond_set_lhs (stmt, ops[bbinfo[idx].first_idx]->op);
3052                   gimple_cond_set_rhs (stmt, boolean_false_node);
3053                 }
3054               update_stmt (stmt);
3055             }
3056           if (bb == first_bb)
3057             break;
3058         }
3059     }
3060 }
3061
3062 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3063    of STMT in it's operands.  This is also known as a "destructive
3064    update" operation.  */
3065
3066 static bool
3067 is_phi_for_stmt (gimple stmt, tree operand)
3068 {
3069   gimple def_stmt;
3070   tree lhs;
3071   use_operand_p arg_p;
3072   ssa_op_iter i;
3073
3074   if (TREE_CODE (operand) != SSA_NAME)
3075     return false;
3076
3077   lhs = gimple_assign_lhs (stmt);
3078
3079   def_stmt = SSA_NAME_DEF_STMT (operand);
3080   if (gimple_code (def_stmt) != GIMPLE_PHI)
3081     return false;
3082
3083   FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
3084     if (lhs == USE_FROM_PTR (arg_p))
3085       return true;
3086   return false;
3087 }
3088
3089 /* Remove def stmt of VAR if VAR has zero uses and recurse
3090    on rhs1 operand if so.  */
3091
3092 static void
3093 remove_visited_stmt_chain (tree var)
3094 {
3095   gimple stmt;
3096   gimple_stmt_iterator gsi;
3097
3098   while (1)
3099     {
3100       if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
3101         return;
3102       stmt = SSA_NAME_DEF_STMT (var);
3103       if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
3104         {
3105           var = gimple_assign_rhs1 (stmt);
3106           gsi = gsi_for_stmt (stmt);
3107           reassoc_remove_stmt (&gsi);
3108           release_defs (stmt);
3109         }
3110       else
3111         return;
3112     }
3113 }
3114
3115 /* This function checks three consequtive operands in
3116    passed operands vector OPS starting from OPINDEX and
3117    swaps two operands if it is profitable for binary operation
3118    consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3119
3120    We pair ops with the same rank if possible.
3121
3122    The alternative we try is to see if STMT is a destructive
3123    update style statement, which is like:
3124    b = phi (a, ...)
3125    a = c + b;
3126    In that case, we want to use the destructive update form to
3127    expose the possible vectorizer sum reduction opportunity.
3128    In that case, the third operand will be the phi node. This
3129    check is not performed if STMT is null.
3130
3131    We could, of course, try to be better as noted above, and do a
3132    lot of work to try to find these opportunities in >3 operand
3133    cases, but it is unlikely to be worth it.  */
3134
3135 static void
3136 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
3137                           unsigned int opindex, gimple stmt)
3138 {
3139   operand_entry_t oe1, oe2, oe3;
3140
3141   oe1 = ops[opindex];
3142   oe2 = ops[opindex + 1];
3143   oe3 = ops[opindex + 2];
3144
3145   if ((oe1->rank == oe2->rank
3146        && oe2->rank != oe3->rank)
3147       || (stmt && is_phi_for_stmt (stmt, oe3->op)
3148           && !is_phi_for_stmt (stmt, oe1->op)
3149           && !is_phi_for_stmt (stmt, oe2->op)))
3150     {
3151       struct operand_entry temp = *oe3;
3152       oe3->op = oe1->op;
3153       oe3->rank = oe1->rank;
3154       oe1->op = temp.op;
3155       oe1->rank= temp.rank;
3156     }
3157   else if ((oe1->rank == oe3->rank
3158             && oe2->rank != oe3->rank)
3159            || (stmt && is_phi_for_stmt (stmt, oe2->op)
3160                && !is_phi_for_stmt (stmt, oe1->op)
3161                && !is_phi_for_stmt (stmt, oe3->op)))
3162     {
3163       struct operand_entry temp = *oe2;
3164       oe2->op = oe1->op;
3165       oe2->rank = oe1->rank;
3166       oe1->op = temp.op;
3167       oe1->rank = temp.rank;
3168     }
3169 }
3170
3171 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3172    two definitions, otherwise return STMT.  */
3173
3174 static inline gimple
3175 find_insert_point (gimple stmt, tree rhs1, tree rhs2)
3176 {
3177   if (TREE_CODE (rhs1) == SSA_NAME
3178       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
3179     stmt = SSA_NAME_DEF_STMT (rhs1);
3180   if (TREE_CODE (rhs2) == SSA_NAME
3181       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
3182     stmt = SSA_NAME_DEF_STMT (rhs2);
3183   return stmt;
3184 }
3185
3186 /* Recursively rewrite our linearized statements so that the operators
3187    match those in OPS[OPINDEX], putting the computation in rank
3188    order.  Return new lhs.  */
3189
3190 static tree
3191 rewrite_expr_tree (gimple stmt, unsigned int opindex,
3192                    vec<operand_entry_t> ops, bool changed)
3193 {
3194   tree rhs1 = gimple_assign_rhs1 (stmt);
3195   tree rhs2 = gimple_assign_rhs2 (stmt);
3196   tree lhs = gimple_assign_lhs (stmt);
3197   operand_entry_t oe;
3198
3199   /* The final recursion case for this function is that you have
3200      exactly two operations left.
3201      If we had one exactly one op in the entire list to start with, we
3202      would have never called this function, and the tail recursion
3203      rewrites them one at a time.  */
3204   if (opindex + 2 == ops.length ())
3205     {
3206       operand_entry_t oe1, oe2;
3207
3208       oe1 = ops[opindex];
3209       oe2 = ops[opindex + 1];
3210
3211       if (rhs1 != oe1->op || rhs2 != oe2->op)
3212         {
3213           gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3214           unsigned int uid = gimple_uid (stmt);
3215
3216           if (dump_file && (dump_flags & TDF_DETAILS))
3217             {
3218               fprintf (dump_file, "Transforming ");
3219               print_gimple_stmt (dump_file, stmt, 0, 0);
3220             }
3221
3222           if (changed)
3223             {
3224               gimple insert_point = find_insert_point (stmt, oe1->op, oe2->op);
3225               lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3226               stmt
3227                 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3228                                                 lhs, oe1->op, oe2->op);
3229               gimple_set_uid (stmt, uid);
3230               gimple_set_visited (stmt, true);
3231               if (insert_point == gsi_stmt (gsi))
3232                 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3233               else
3234                 insert_stmt_after (stmt, insert_point);
3235             }
3236           else
3237             {
3238               gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
3239                                    == stmt);
3240               gimple_assign_set_rhs1 (stmt, oe1->op);
3241               gimple_assign_set_rhs2 (stmt, oe2->op);
3242               update_stmt (stmt);
3243             }
3244
3245           if (rhs1 != oe1->op && rhs1 != oe2->op)
3246             remove_visited_stmt_chain (rhs1);
3247
3248           if (dump_file && (dump_flags & TDF_DETAILS))
3249             {
3250               fprintf (dump_file, " into ");
3251               print_gimple_stmt (dump_file, stmt, 0, 0);
3252             }
3253         }
3254       return lhs;
3255     }
3256
3257   /* If we hit here, we should have 3 or more ops left.  */
3258   gcc_assert (opindex + 2 < ops.length ());
3259
3260   /* Rewrite the next operator.  */
3261   oe = ops[opindex];
3262
3263   /* Recurse on the LHS of the binary operator, which is guaranteed to
3264      be the non-leaf side.  */
3265   tree new_rhs1
3266     = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
3267                          changed || oe->op != rhs2);
3268
3269   if (oe->op != rhs2 || new_rhs1 != rhs1)
3270     {
3271       if (dump_file && (dump_flags & TDF_DETAILS))
3272         {
3273           fprintf (dump_file, "Transforming ");
3274           print_gimple_stmt (dump_file, stmt, 0, 0);
3275         }
3276
3277       /* If changed is false, this is either opindex == 0
3278          or all outer rhs2's were equal to corresponding oe->op,
3279          and powi_result is NULL.
3280          That means lhs is equivalent before and after reassociation.
3281          Otherwise ensure the old lhs SSA_NAME is not reused and
3282          create a new stmt as well, so that any debug stmts will be
3283          properly adjusted.  */
3284       if (changed)
3285         {
3286           gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3287           unsigned int uid = gimple_uid (stmt);
3288           gimple insert_point = find_insert_point (stmt, new_rhs1, oe->op);
3289
3290           lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3291           stmt = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3292                                                lhs, new_rhs1, oe->op);
3293           gimple_set_uid (stmt, uid);
3294           gimple_set_visited (stmt, true);
3295           if (insert_point == gsi_stmt (gsi))
3296             gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3297           else
3298             insert_stmt_after (stmt, insert_point);
3299         }
3300       else
3301         {
3302           gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
3303                                == stmt);
3304           gimple_assign_set_rhs1 (stmt, new_rhs1);
3305           gimple_assign_set_rhs2 (stmt, oe->op);
3306           update_stmt (stmt);
3307         }
3308
3309       if (dump_file && (dump_flags & TDF_DETAILS))
3310         {
3311           fprintf (dump_file, " into ");
3312           print_gimple_stmt (dump_file, stmt, 0, 0);
3313         }
3314     }
3315   return lhs;
3316 }
3317
3318 /* Find out how many cycles we need to compute statements chain.
3319    OPS_NUM holds number os statements in a chain.  CPU_WIDTH is a
3320    maximum number of independent statements we may execute per cycle.  */
3321
3322 static int
3323 get_required_cycles (int ops_num, int cpu_width)
3324 {
3325   int res;
3326   int elog;
3327   unsigned int rest;
3328
3329   /* While we have more than 2 * cpu_width operands
3330      we may reduce number of operands by cpu_width
3331      per cycle.  */
3332   res = ops_num / (2 * cpu_width);
3333
3334   /* Remained operands count may be reduced twice per cycle
3335      until we have only one operand.  */
3336   rest = (unsigned)(ops_num - res * cpu_width);
3337   elog = exact_log2 (rest);
3338   if (elog >= 0)
3339     res += elog;
3340   else
3341     res += floor_log2 (rest) + 1;
3342
3343   return res;
3344 }
3345
3346 /* Returns an optimal number of registers to use for computation of
3347    given statements.  */
3348
3349 static int
3350 get_reassociation_width (int ops_num, enum tree_code opc,
3351                          enum machine_mode mode)
3352 {
3353   int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
3354   int width;
3355   int width_min;
3356   int cycles_best;
3357
3358   if (param_width > 0)
3359     width = param_width;
3360   else
3361     width = targetm.sched.reassociation_width (opc, mode);
3362
3363   if (width == 1)
3364     return width;
3365
3366   /* Get the minimal time required for sequence computation.  */
3367   cycles_best = get_required_cycles (ops_num, width);
3368
3369   /* Check if we may use less width and still compute sequence for
3370      the same time.  It will allow us to reduce registers usage.
3371      get_required_cycles is monotonically increasing with lower width
3372      so we can perform a binary search for the minimal width that still
3373      results in the optimal cycle count.  */
3374   width_min = 1;
3375   while (width > width_min)
3376     {
3377       int width_mid = (width + width_min) / 2;
3378
3379       if (get_required_cycles (ops_num, width_mid) == cycles_best)
3380         width = width_mid;
3381       else if (width_min < width_mid)
3382         width_min = width_mid;
3383       else
3384         break;
3385     }
3386
3387   return width;
3388 }
3389
3390 /* Recursively rewrite our linearized statements so that the operators
3391    match those in OPS[OPINDEX], putting the computation in rank
3392    order and trying to allow operations to be executed in
3393    parallel.  */
3394
3395 static void
3396 rewrite_expr_tree_parallel (gimple_assign stmt, int width,
3397                             vec<operand_entry_t> ops)
3398 {
3399   enum tree_code opcode = gimple_assign_rhs_code (stmt);
3400   int op_num = ops.length ();
3401   int stmt_num = op_num - 1;
3402   gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3403   int op_index = op_num - 1;
3404   int stmt_index = 0;
3405   int ready_stmts_end = 0;
3406   int i = 0;
3407   tree last_rhs1 = gimple_assign_rhs1 (stmt);
3408
3409   /* We start expression rewriting from the top statements.
3410      So, in this loop we create a full list of statements
3411      we will work with.  */
3412   stmts[stmt_num - 1] = stmt;
3413   for (i = stmt_num - 2; i >= 0; i--)
3414     stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3415
3416   for (i = 0; i < stmt_num; i++)
3417     {
3418       tree op1, op2;
3419
3420       /* Determine whether we should use results of
3421          already handled statements or not.  */
3422       if (ready_stmts_end == 0
3423           && (i - stmt_index >= width || op_index < 1))
3424         ready_stmts_end = i;
3425
3426       /* Now we choose operands for the next statement.  Non zero
3427          value in ready_stmts_end means here that we should use
3428          the result of already generated statements as new operand.  */
3429       if (ready_stmts_end > 0)
3430         {
3431           op1 = gimple_assign_lhs (stmts[stmt_index++]);
3432           if (ready_stmts_end > stmt_index)
3433             op2 = gimple_assign_lhs (stmts[stmt_index++]);
3434           else if (op_index >= 0)
3435             op2 = ops[op_index--]->op;
3436           else
3437             {
3438               gcc_assert (stmt_index < i);
3439               op2 = gimple_assign_lhs (stmts[stmt_index++]);
3440             }
3441
3442           if (stmt_index >= ready_stmts_end)
3443             ready_stmts_end = 0;
3444         }
3445       else
3446         {
3447           if (op_index > 1)
3448             swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3449           op2 = ops[op_index--]->op;
3450           op1 = ops[op_index--]->op;
3451         }
3452
3453       /* If we emit the last statement then we should put
3454          operands into the last statement.  It will also
3455          break the loop.  */
3456       if (op_index < 0 && stmt_index == i)
3457         i = stmt_num - 1;
3458
3459       if (dump_file && (dump_flags & TDF_DETAILS))
3460         {
3461           fprintf (dump_file, "Transforming ");
3462           print_gimple_stmt (dump_file, stmts[i], 0, 0);
3463         }
3464
3465       /* We keep original statement only for the last one.  All
3466          others are recreated.  */
3467       if (i == stmt_num - 1)
3468         {
3469           gimple_assign_set_rhs1 (stmts[i], op1);
3470           gimple_assign_set_rhs2 (stmts[i], op2);
3471           update_stmt (stmts[i]);
3472         }
3473       else
3474         stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3475
3476       if (dump_file && (dump_flags & TDF_DETAILS))
3477         {
3478           fprintf (dump_file, " into ");
3479           print_gimple_stmt (dump_file, stmts[i], 0, 0);
3480         }
3481     }
3482
3483   remove_visited_stmt_chain (last_rhs1);
3484 }
3485
3486 /* Transform STMT, which is really (A +B) + (C + D) into the left
3487    linear form, ((A+B)+C)+D.
3488    Recurse on D if necessary.  */
3489
3490 static void
3491 linearize_expr (gimple stmt)
3492 {
3493   gimple_stmt_iterator gsi;
3494   gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3495   gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3496   gimple oldbinrhs = binrhs;
3497   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3498   gimple newbinrhs = NULL;
3499   struct loop *loop = loop_containing_stmt (stmt);
3500   tree lhs = gimple_assign_lhs (stmt);
3501
3502   gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3503               && is_reassociable_op (binrhs, rhscode, loop));
3504
3505   gsi = gsi_for_stmt (stmt);
3506
3507   gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3508   binrhs = gimple_build_assign_with_ops (gimple_assign_rhs_code (binrhs),
3509                                          make_ssa_name (TREE_TYPE (lhs), NULL),
3510                                          gimple_assign_lhs (binlhs),
3511                                          gimple_assign_rhs2 (binrhs));
3512   gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3513   gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
3514   gimple_set_uid (binrhs, gimple_uid (stmt));
3515
3516   if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3517     newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3518
3519   if (dump_file && (dump_flags & TDF_DETAILS))
3520     {
3521       fprintf (dump_file, "Linearized: ");
3522       print_gimple_stmt (dump_file, stmt, 0, 0);
3523     }
3524
3525   reassociate_stats.linearized++;
3526   update_stmt (stmt);
3527
3528   gsi = gsi_for_stmt (oldbinrhs);
3529   reassoc_remove_stmt (&gsi);
3530   release_defs (oldbinrhs);
3531
3532   gimple_set_visited (stmt, true);
3533   gimple_set_visited (binlhs, true);
3534   gimple_set_visited (binrhs, true);
3535
3536   /* Tail recurse on the new rhs if it still needs reassociation.  */
3537   if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3538     /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3539            want to change the algorithm while converting to tuples.  */
3540     linearize_expr (stmt);
3541 }
3542
3543 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3544    it.  Otherwise, return NULL.  */
3545
3546 static gimple
3547 get_single_immediate_use (tree lhs)
3548 {
3549   use_operand_p immuse;
3550   gimple immusestmt;
3551
3552   if (TREE_CODE (lhs) == SSA_NAME
3553       && single_imm_use (lhs, &immuse, &immusestmt)
3554       && is_gimple_assign (immusestmt))
3555     return immusestmt;
3556
3557   return NULL;
3558 }
3559
3560 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3561    representing the negated value.  Insertions of any necessary
3562    instructions go before GSI.
3563    This function is recursive in that, if you hand it "a_5" as the
3564    value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3565    transform b_3 + b_4 into a_5 = -b_3 + -b_4.  */
3566
3567 static tree
3568 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
3569 {
3570   gimple negatedefstmt = NULL;
3571   tree resultofnegate;
3572   gimple_stmt_iterator gsi;
3573   unsigned int uid;
3574
3575   /* If we are trying to negate a name, defined by an add, negate the
3576      add operands instead.  */
3577   if (TREE_CODE (tonegate) == SSA_NAME)
3578     negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3579   if (TREE_CODE (tonegate) == SSA_NAME
3580       && is_gimple_assign (negatedefstmt)
3581       && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3582       && has_single_use (gimple_assign_lhs (negatedefstmt))
3583       && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3584     {
3585       tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3586       tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3587       tree lhs = gimple_assign_lhs (negatedefstmt);
3588       gimple g;
3589
3590       gsi = gsi_for_stmt (negatedefstmt);
3591       rhs1 = negate_value (rhs1, &gsi);
3592
3593       gsi = gsi_for_stmt (negatedefstmt);
3594       rhs2 = negate_value (rhs2, &gsi);
3595
3596       gsi = gsi_for_stmt (negatedefstmt);
3597       lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3598       gimple_set_visited (negatedefstmt, true);
3599       g = gimple_build_assign_with_ops (PLUS_EXPR, lhs, rhs1, rhs2);
3600       gimple_set_uid (g, gimple_uid (negatedefstmt));
3601       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3602       return lhs;
3603     }
3604
3605   tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3606   resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
3607                                              NULL_TREE, true, GSI_SAME_STMT);
3608   gsi = *gsip;
3609   uid = gimple_uid (gsi_stmt (gsi));
3610   for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
3611     {
3612       gimple stmt = gsi_stmt (gsi);
3613       if (gimple_uid (stmt) != 0)
3614         break;
3615       gimple_set_uid (stmt, uid);
3616     }
3617   return resultofnegate;
3618 }
3619
3620 /* Return true if we should break up the subtract in STMT into an add
3621    with negate.  This is true when we the subtract operands are really
3622    adds, or the subtract itself is used in an add expression.  In
3623    either case, breaking up the subtract into an add with negate
3624    exposes the adds to reassociation.  */
3625
3626 static bool
3627 should_break_up_subtract (gimple stmt)
3628 {
3629   tree lhs = gimple_assign_lhs (stmt);
3630   tree binlhs = gimple_assign_rhs1 (stmt);
3631   tree binrhs = gimple_assign_rhs2 (stmt);
3632   gimple immusestmt;
3633   struct loop *loop = loop_containing_stmt (stmt);
3634
3635   if (TREE_CODE (binlhs) == SSA_NAME
3636       && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3637     return true;
3638
3639   if (TREE_CODE (binrhs) == SSA_NAME
3640       && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3641     return true;
3642
3643   if (TREE_CODE (lhs) == SSA_NAME
3644       && (immusestmt = get_single_immediate_use (lhs))
3645       && is_gimple_assign (immusestmt)
3646       && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3647           ||  gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3648     return true;
3649   return false;
3650 }
3651
3652 /* Transform STMT from A - B into A + -B.  */
3653
3654 static void
3655 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3656 {
3657   tree rhs1 = gimple_assign_rhs1 (stmt);
3658   tree rhs2 = gimple_assign_rhs2 (stmt);
3659
3660   if (dump_file && (dump_flags & TDF_DETAILS))
3661     {
3662       fprintf (dump_file, "Breaking up subtract ");
3663       print_gimple_stmt (dump_file, stmt, 0, 0);
3664     }
3665
3666   rhs2 = negate_value (rhs2, gsip);
3667   gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3668   update_stmt (stmt);
3669 }
3670
3671 /* Determine whether STMT is a builtin call that raises an SSA name
3672    to an integer power and has only one use.  If so, and this is early
3673    reassociation and unsafe math optimizations are permitted, place
3674    the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3675    If any of these conditions does not hold, return FALSE.  */
3676
3677 static bool
3678 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3679 {
3680   tree fndecl, arg1;
3681   REAL_VALUE_TYPE c, cint;
3682
3683   if (!first_pass_instance
3684       || !flag_unsafe_math_optimizations
3685       || !is_gimple_call (stmt)
3686       || !has_single_use (gimple_call_lhs (stmt)))
3687     return false;
3688
3689   fndecl = gimple_call_fndecl (stmt);
3690
3691   if (!fndecl
3692       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3693     return false;
3694
3695   switch (DECL_FUNCTION_CODE (fndecl))
3696     {
3697     CASE_FLT_FN (BUILT_IN_POW):
3698       *base = gimple_call_arg (stmt, 0);
3699       arg1 = gimple_call_arg (stmt, 1);
3700
3701       if (TREE_CODE (arg1) != REAL_CST)
3702         return false;
3703
3704       c = TREE_REAL_CST (arg1);
3705
3706       if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
3707         return false;
3708
3709       *exponent = real_to_integer (&c);
3710       real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
3711       if (!real_identical (&c, &cint))
3712         return false;
3713
3714       break;
3715
3716     CASE_FLT_FN (BUILT_IN_POWI):
3717       *base = gimple_call_arg (stmt, 0);
3718       arg1 = gimple_call_arg (stmt, 1);
3719
3720       if (!tree_fits_shwi_p (arg1))
3721         return false;
3722
3723       *exponent = tree_to_shwi (arg1);
3724       break;
3725
3726     default:
3727       return false;
3728     }
3729
3730   /* Expanding negative exponents is generally unproductive, so we don't
3731      complicate matters with those.  Exponents of zero and one should
3732      have been handled by expression folding.  */
3733   if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
3734     return false;
3735
3736   return true;
3737 }
3738
3739 /* Recursively linearize a binary expression that is the RHS of STMT.
3740    Place the operands of the expression tree in the vector named OPS.  */
3741
3742 static void
3743 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
3744                      bool is_associative, bool set_visited)
3745 {
3746   tree binlhs = gimple_assign_rhs1 (stmt);
3747   tree binrhs = gimple_assign_rhs2 (stmt);
3748   gimple binlhsdef = NULL, binrhsdef = NULL;
3749   bool binlhsisreassoc = false;
3750   bool binrhsisreassoc = false;
3751   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3752   struct loop *loop = loop_containing_stmt (stmt);
3753   tree base = NULL_TREE;
3754   HOST_WIDE_INT exponent = 0;
3755
3756   if (set_visited)
3757     gimple_set_visited (stmt, true);
3758
3759   if (TREE_CODE (binlhs) == SSA_NAME)
3760     {
3761       binlhsdef = SSA_NAME_DEF_STMT (binlhs);
3762       binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
3763                          && !stmt_could_throw_p (binlhsdef));
3764     }
3765
3766   if (TREE_CODE (binrhs) == SSA_NAME)
3767     {
3768       binrhsdef = SSA_NAME_DEF_STMT (binrhs);
3769       binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
3770                          && !stmt_could_throw_p (binrhsdef));
3771     }
3772
3773   /* If the LHS is not reassociable, but the RHS is, we need to swap
3774      them.  If neither is reassociable, there is nothing we can do, so
3775      just put them in the ops vector.  If the LHS is reassociable,
3776      linearize it.  If both are reassociable, then linearize the RHS
3777      and the LHS.  */
3778
3779   if (!binlhsisreassoc)
3780     {
3781       tree temp;
3782
3783       /* If this is not a associative operation like division, give up.  */
3784       if (!is_associative)
3785         {
3786           add_to_ops_vec (ops, binrhs);
3787           return;
3788         }
3789
3790       if (!binrhsisreassoc)
3791         {
3792           if (rhscode == MULT_EXPR
3793               && TREE_CODE (binrhs) == SSA_NAME
3794               && acceptable_pow_call (binrhsdef, &base, &exponent))
3795             {
3796               add_repeat_to_ops_vec (ops, base, exponent);
3797               gimple_set_visited (binrhsdef, true);
3798             }
3799           else
3800             add_to_ops_vec (ops, binrhs);
3801
3802           if (rhscode == MULT_EXPR
3803               && TREE_CODE (binlhs) == SSA_NAME
3804               && acceptable_pow_call (binlhsdef, &base, &exponent))
3805             {
3806               add_repeat_to_ops_vec (ops, base, exponent);
3807               gimple_set_visited (binlhsdef, true);
3808             }
3809           else
3810             add_to_ops_vec (ops, binlhs);
3811
3812           return;
3813         }
3814
3815       if (dump_file && (dump_flags & TDF_DETAILS))
3816         {
3817           fprintf (dump_file, "swapping operands of ");
3818           print_gimple_stmt (dump_file, stmt, 0, 0);
3819         }
3820
3821       swap_ssa_operands (stmt,
3822                          gimple_assign_rhs1_ptr (stmt),
3823                          gimple_assign_rhs2_ptr (stmt));
3824       update_stmt (stmt);
3825
3826       if (dump_file && (dump_flags & TDF_DETAILS))
3827         {
3828           fprintf (dump_file, " is now ");
3829           print_gimple_stmt (dump_file, stmt, 0, 0);
3830         }
3831
3832       /* We want to make it so the lhs is always the reassociative op,
3833          so swap.  */
3834       temp = binlhs;
3835       binlhs = binrhs;
3836       binrhs = temp;
3837     }
3838   else if (binrhsisreassoc)
3839     {
3840       linearize_expr (stmt);
3841       binlhs = gimple_assign_rhs1 (stmt);
3842       binrhs = gimple_assign_rhs2 (stmt);
3843     }
3844
3845   gcc_assert (TREE_CODE (binrhs) != SSA_NAME
3846               || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
3847                                       rhscode, loop));
3848   linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
3849                        is_associative, set_visited);
3850
3851   if (rhscode == MULT_EXPR
3852       && TREE_CODE (binrhs) == SSA_NAME
3853       && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
3854     {
3855       add_repeat_to_ops_vec (ops, base, exponent);
3856       gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
3857     }
3858   else
3859     add_to_ops_vec (ops, binrhs);
3860 }
3861
3862 /* Repropagate the negates back into subtracts, since no other pass
3863    currently does it.  */
3864
3865 static void
3866 repropagate_negates (void)
3867 {
3868   unsigned int i = 0;
3869   tree negate;
3870
3871   FOR_EACH_VEC_ELT (plus_negates, i, negate)
3872     {
3873       gimple user = get_single_immediate_use (negate);
3874
3875       if (!user || !is_gimple_assign (user))
3876         continue;
3877
3878       /* The negate operand can be either operand of a PLUS_EXPR
3879          (it can be the LHS if the RHS is a constant for example).
3880
3881          Force the negate operand to the RHS of the PLUS_EXPR, then
3882          transform the PLUS_EXPR into a MINUS_EXPR.  */
3883       if (gimple_assign_rhs_code (user) == PLUS_EXPR)
3884         {
3885           /* If the negated operand appears on the LHS of the
3886              PLUS_EXPR, exchange the operands of the PLUS_EXPR
3887              to force the negated operand to the RHS of the PLUS_EXPR.  */
3888           if (gimple_assign_rhs1 (user) == negate)
3889             {
3890               swap_ssa_operands (user,
3891                                  gimple_assign_rhs1_ptr (user),
3892                                  gimple_assign_rhs2_ptr (user));
3893             }
3894
3895           /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
3896              the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR.  */
3897           if (gimple_assign_rhs2 (user) == negate)
3898             {
3899               tree rhs1 = gimple_assign_rhs1 (user);
3900               tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
3901               gimple_stmt_iterator gsi = gsi_for_stmt (user);
3902               gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
3903               update_stmt (user);
3904             }
3905         }
3906       else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
3907         {
3908           if (gimple_assign_rhs1 (user) == negate)
3909             {
3910               /* We have
3911                    x = -a
3912                    y = x - b
3913                  which we transform into
3914                    x = a + b
3915                    y = -x .
3916                  This pushes down the negate which we possibly can merge
3917                  into some other operation, hence insert it into the
3918                  plus_negates vector.  */
3919               gimple feed = SSA_NAME_DEF_STMT (negate);
3920               tree a = gimple_assign_rhs1 (feed);
3921               tree b = gimple_assign_rhs2 (user);
3922               gimple_stmt_iterator gsi = gsi_for_stmt (feed);
3923               gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
3924               tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)), NULL);
3925               gimple g = gimple_build_assign_with_ops (PLUS_EXPR, x, a, b);
3926               gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3927               gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x, NULL);
3928               user = gsi_stmt (gsi2);
3929               update_stmt (user);
3930               reassoc_remove_stmt (&gsi);
3931               release_defs (feed);
3932               plus_negates.safe_push (gimple_assign_lhs (user));
3933             }
3934           else
3935             {
3936               /* Transform "x = -a; y = b - x" into "y = b + a", getting
3937                  rid of one operation.  */
3938               gimple feed = SSA_NAME_DEF_STMT (negate);
3939               tree a = gimple_assign_rhs1 (feed);
3940               tree rhs1 = gimple_assign_rhs1 (user);
3941               gimple_stmt_iterator gsi = gsi_for_stmt (user);
3942               gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
3943               update_stmt (gsi_stmt (gsi));
3944             }
3945         }
3946     }
3947 }
3948
3949 /* Returns true if OP is of a type for which we can do reassociation.
3950    That is for integral or non-saturating fixed-point types, and for
3951    floating point type when associative-math is enabled.  */
3952
3953 static bool
3954 can_reassociate_p (tree op)
3955 {
3956   tree type = TREE_TYPE (op);
3957   if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
3958       || NON_SAT_FIXED_POINT_TYPE_P (type)
3959       || (flag_associative_math && FLOAT_TYPE_P (type)))
3960     return true;
3961   return false;
3962 }
3963
3964 /* Break up subtract operations in block BB.
3965
3966    We do this top down because we don't know whether the subtract is
3967    part of a possible chain of reassociation except at the top.
3968
3969    IE given
3970    d = f + g
3971    c = a + e
3972    b = c - d
3973    q = b - r
3974    k = t - q
3975
3976    we want to break up k = t - q, but we won't until we've transformed q
3977    = b - r, which won't be broken up until we transform b = c - d.
3978
3979    En passant, clear the GIMPLE visited flag on every statement
3980    and set UIDs within each basic block.  */
3981
3982 static void
3983 break_up_subtract_bb (basic_block bb)
3984 {
3985   gimple_stmt_iterator gsi;
3986   basic_block son;
3987   unsigned int uid = 1;
3988
3989   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3990     {
3991       gimple stmt = gsi_stmt (gsi);
3992       gimple_set_visited (stmt, false);
3993       gimple_set_uid (stmt, uid++);
3994
3995       if (!is_gimple_assign (stmt)
3996           || !can_reassociate_p (gimple_assign_lhs (stmt)))
3997         continue;
3998
3999       /* Look for simple gimple subtract operations.  */
4000       if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
4001         {
4002           if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
4003               || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
4004             continue;
4005
4006           /* Check for a subtract used only in an addition.  If this
4007              is the case, transform it into add of a negate for better
4008              reassociation.  IE transform C = A-B into C = A + -B if C
4009              is only used in an addition.  */
4010           if (should_break_up_subtract (stmt))
4011             break_up_subtract (stmt, &gsi);
4012         }
4013       else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
4014                && can_reassociate_p (gimple_assign_rhs1 (stmt)))
4015         plus_negates.safe_push (gimple_assign_lhs (stmt));
4016     }
4017   for (son = first_dom_son (CDI_DOMINATORS, bb);
4018        son;
4019        son = next_dom_son (CDI_DOMINATORS, son))
4020     break_up_subtract_bb (son);
4021 }
4022
4023 /* Used for repeated factor analysis.  */
4024 struct repeat_factor_d
4025 {
4026   /* An SSA name that occurs in a multiply chain.  */
4027   tree factor;
4028
4029   /* Cached rank of the factor.  */
4030   unsigned rank;
4031
4032   /* Number of occurrences of the factor in the chain.  */
4033   HOST_WIDE_INT count;
4034
4035   /* An SSA name representing the product of this factor and
4036      all factors appearing later in the repeated factor vector.  */
4037   tree repr;
4038 };
4039
4040 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
4041 typedef const struct repeat_factor_d *const_repeat_factor_t;
4042
4043
4044 static vec<repeat_factor> repeat_factor_vec;
4045
4046 /* Used for sorting the repeat factor vector.  Sort primarily by
4047    ascending occurrence count, secondarily by descending rank.  */
4048
4049 static int
4050 compare_repeat_factors (const void *x1, const void *x2)
4051 {
4052   const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
4053   const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
4054
4055   if (rf1->count != rf2->count)
4056     return rf1->count - rf2->count;
4057
4058   return rf2->rank - rf1->rank;
4059 }
4060
4061 /* Look for repeated operands in OPS in the multiply tree rooted at
4062    STMT.  Replace them with an optimal sequence of multiplies and powi
4063    builtin calls, and remove the used operands from OPS.  Return an
4064    SSA name representing the value of the replacement sequence.  */
4065
4066 static tree
4067 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
4068 {
4069   unsigned i, j, vec_len;
4070   int ii;
4071   operand_entry_t oe;
4072   repeat_factor_t rf1, rf2;
4073   repeat_factor rfnew;
4074   tree result = NULL_TREE;
4075   tree target_ssa, iter_result;
4076   tree type = TREE_TYPE (gimple_get_lhs (stmt));
4077   tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
4078   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4079   gimple mul_stmt, pow_stmt;
4080
4081   /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4082      target.  */
4083   if (!powi_fndecl)
4084     return NULL_TREE;
4085
4086   /* Allocate the repeated factor vector.  */
4087   repeat_factor_vec.create (10);
4088
4089   /* Scan the OPS vector for all SSA names in the product and build
4090      up a vector of occurrence counts for each factor.  */
4091   FOR_EACH_VEC_ELT (*ops, i, oe)
4092     {
4093       if (TREE_CODE (oe->op) == SSA_NAME)
4094         {
4095           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4096             {
4097               if (rf1->factor == oe->op)
4098                 {
4099                   rf1->count += oe->count;
4100                   break;
4101                 }
4102             }
4103
4104           if (j >= repeat_factor_vec.length ())
4105             {
4106               rfnew.factor = oe->op;
4107               rfnew.rank = oe->rank;
4108               rfnew.count = oe->count;
4109               rfnew.repr = NULL_TREE;
4110               repeat_factor_vec.safe_push (rfnew);
4111             }
4112         }
4113     }
4114
4115   /* Sort the repeated factor vector by (a) increasing occurrence count,
4116      and (b) decreasing rank.  */
4117   repeat_factor_vec.qsort (compare_repeat_factors);
4118
4119   /* It is generally best to combine as many base factors as possible
4120      into a product before applying __builtin_powi to the result.
4121      However, the sort order chosen for the repeated factor vector
4122      allows us to cache partial results for the product of the base
4123      factors for subsequent use.  When we already have a cached partial
4124      result from a previous iteration, it is best to make use of it
4125      before looking for another __builtin_pow opportunity.
4126
4127      As an example, consider x * x * y * y * y * z * z * z * z.
4128      We want to first compose the product x * y * z, raise it to the
4129      second power, then multiply this by y * z, and finally multiply
4130      by z.  This can be done in 5 multiplies provided we cache y * z
4131      for use in both expressions:
4132
4133         t1 = y * z
4134         t2 = t1 * x
4135         t3 = t2 * t2
4136         t4 = t1 * t3
4137         result = t4 * z
4138
4139      If we instead ignored the cached y * z and first multiplied by
4140      the __builtin_pow opportunity z * z, we would get the inferior:
4141
4142         t1 = y * z
4143         t2 = t1 * x
4144         t3 = t2 * t2
4145         t4 = z * z
4146         t5 = t3 * t4
4147         result = t5 * y  */
4148
4149   vec_len = repeat_factor_vec.length ();
4150
4151   /* Repeatedly look for opportunities to create a builtin_powi call.  */
4152   while (true)
4153     {
4154       HOST_WIDE_INT power;
4155
4156       /* First look for the largest cached product of factors from
4157          preceding iterations.  If found, create a builtin_powi for
4158          it if the minimum occurrence count for its factors is at
4159          least 2, or just use this cached product as our next
4160          multiplicand if the minimum occurrence count is 1.  */
4161       FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4162         {
4163           if (rf1->repr && rf1->count > 0)
4164             break;
4165         }
4166
4167       if (j < vec_len)
4168         {
4169           power = rf1->count;
4170
4171           if (power == 1)
4172             {
4173               iter_result = rf1->repr;
4174
4175               if (dump_file && (dump_flags & TDF_DETAILS))
4176                 {
4177                   unsigned elt;
4178                   repeat_factor_t rf;
4179                   fputs ("Multiplying by cached product ", dump_file);
4180                   for (elt = j; elt < vec_len; elt++)
4181                     {
4182                       rf = &repeat_factor_vec[elt];
4183                       print_generic_expr (dump_file, rf->factor, 0);
4184                       if (elt < vec_len - 1)
4185                         fputs (" * ", dump_file);
4186                     }
4187                   fputs ("\n", dump_file);
4188                 }
4189             }
4190           else
4191             {
4192               iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4193               pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4194                                             build_int_cst (integer_type_node,
4195                                                            power));
4196               gimple_call_set_lhs (pow_stmt, iter_result);
4197               gimple_set_location (pow_stmt, gimple_location (stmt));
4198               gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4199
4200               if (dump_file && (dump_flags & TDF_DETAILS))
4201                 {
4202                   unsigned elt;
4203                   repeat_factor_t rf;
4204                   fputs ("Building __builtin_pow call for cached product (",
4205                          dump_file);
4206                   for (elt = j; elt < vec_len; elt++)
4207                     {
4208                       rf = &repeat_factor_vec[elt];
4209                       print_generic_expr (dump_file, rf->factor, 0);
4210                       if (elt < vec_len - 1)
4211                         fputs (" * ", dump_file);
4212                     }
4213                   fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n",
4214                            power);
4215                 }
4216             }
4217         }
4218       else
4219         {
4220           /* Otherwise, find the first factor in the repeated factor
4221              vector whose occurrence count is at least 2.  If no such
4222              factor exists, there are no builtin_powi opportunities
4223              remaining.  */
4224           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4225             {
4226               if (rf1->count >= 2)
4227                 break;
4228             }
4229
4230           if (j >= vec_len)
4231             break;
4232
4233           power = rf1->count;
4234
4235           if (dump_file && (dump_flags & TDF_DETAILS))
4236             {
4237               unsigned elt;
4238               repeat_factor_t rf;
4239               fputs ("Building __builtin_pow call for (", dump_file);
4240               for (elt = j; elt < vec_len; elt++)
4241                 {
4242                   rf = &repeat_factor_vec[elt];
4243                   print_generic_expr (dump_file, rf->factor, 0);
4244                   if (elt < vec_len - 1)
4245                     fputs (" * ", dump_file);
4246                 }
4247               fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power);
4248             }
4249
4250           reassociate_stats.pows_created++;
4251
4252           /* Visit each element of the vector in reverse order (so that
4253              high-occurrence elements are visited first, and within the
4254              same occurrence count, lower-ranked elements are visited
4255              first).  Form a linear product of all elements in this order
4256              whose occurrencce count is at least that of element J.
4257              Record the SSA name representing the product of each element
4258              with all subsequent elements in the vector.  */
4259           if (j == vec_len - 1)
4260             rf1->repr = rf1->factor;
4261           else
4262             {
4263               for (ii = vec_len - 2; ii >= (int)j; ii--)
4264                 {
4265                   tree op1, op2;
4266
4267                   rf1 = &repeat_factor_vec[ii];
4268                   rf2 = &repeat_factor_vec[ii + 1];
4269
4270                   /* Init the last factor's representative to be itself.  */
4271                   if (!rf2->repr)
4272                     rf2->repr = rf2->factor;
4273
4274                   op1 = rf1->factor;
4275                   op2 = rf2->repr;
4276
4277                   target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4278                   mul_stmt = gimple_build_assign_with_ops (MULT_EXPR,
4279                                                            target_ssa,
4280                                                            op1, op2);
4281                   gimple_set_location (mul_stmt, gimple_location (stmt));
4282                   gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4283                   rf1->repr = target_ssa;
4284
4285                   /* Don't reprocess the multiply we just introduced.  */
4286                   gimple_set_visited (mul_stmt, true);
4287                 }
4288             }
4289
4290           /* Form a call to __builtin_powi for the maximum product
4291              just formed, raised to the power obtained earlier.  */
4292           rf1 = &repeat_factor_vec[j];
4293           iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4294           pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4295                                         build_int_cst (integer_type_node,
4296                                                        power));
4297           gimple_call_set_lhs (pow_stmt, iter_result);
4298           gimple_set_location (pow_stmt, gimple_location (stmt));
4299           gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4300         }
4301
4302       /* If we previously formed at least one other builtin_powi call,
4303          form the product of this one and those others.  */
4304       if (result)
4305         {
4306           tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4307           mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result,
4308                                                    result, iter_result);
4309           gimple_set_location (mul_stmt, gimple_location (stmt));
4310           gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4311           gimple_set_visited (mul_stmt, true);
4312           result = new_result;
4313         }
4314       else
4315         result = iter_result;
4316
4317       /* Decrement the occurrence count of each element in the product
4318          by the count found above, and remove this many copies of each
4319          factor from OPS.  */
4320       for (i = j; i < vec_len; i++)
4321         {
4322           unsigned k = power;
4323           unsigned n;
4324
4325           rf1 = &repeat_factor_vec[i];
4326           rf1->count -= power;
4327
4328           FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
4329             {
4330               if (oe->op == rf1->factor)
4331                 {
4332                   if (oe->count <= k)
4333                     {
4334                       ops->ordered_remove (n);
4335                       k -= oe->count;
4336
4337                       if (k == 0)
4338                         break;
4339                     }
4340                   else
4341                     {
4342                       oe->count -= k;
4343                       break;
4344                     }
4345                 }
4346             }
4347         }
4348     }
4349
4350   /* At this point all elements in the repeated factor vector have a
4351      remaining occurrence count of 0 or 1, and those with a count of 1
4352      don't have cached representatives.  Re-sort the ops vector and
4353      clean up.  */
4354   ops->qsort (sort_by_operand_rank);
4355   repeat_factor_vec.release ();
4356
4357   /* Return the final product computed herein.  Note that there may
4358      still be some elements with single occurrence count left in OPS;
4359      those will be handled by the normal reassociation logic.  */
4360   return result;
4361 }
4362
4363 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS.  */
4364
4365 static void
4366 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
4367 {
4368   tree rhs1;
4369
4370   if (dump_file && (dump_flags & TDF_DETAILS))
4371     {
4372       fprintf (dump_file, "Transforming ");
4373       print_gimple_stmt (dump_file, stmt, 0, 0);
4374     }
4375
4376   rhs1 = gimple_assign_rhs1 (stmt);
4377   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
4378   update_stmt (stmt);
4379   remove_visited_stmt_chain (rhs1);
4380
4381   if (dump_file && (dump_flags & TDF_DETAILS))
4382     {
4383       fprintf (dump_file, " into ");
4384       print_gimple_stmt (dump_file, stmt, 0, 0);
4385     }
4386 }
4387
4388 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2.  */
4389
4390 static void
4391 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
4392                             tree rhs1, tree rhs2)
4393 {
4394   if (dump_file && (dump_flags & TDF_DETAILS))
4395     {
4396       fprintf (dump_file, "Transforming ");
4397       print_gimple_stmt (dump_file, stmt, 0, 0);
4398     }
4399
4400   gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
4401   update_stmt (gsi_stmt (*gsi));
4402   remove_visited_stmt_chain (rhs1);
4403
4404   if (dump_file && (dump_flags & TDF_DETAILS))
4405     {
4406       fprintf (dump_file, " into ");
4407       print_gimple_stmt (dump_file, stmt, 0, 0);
4408     }
4409 }
4410
4411 /* Reassociate expressions in basic block BB and its post-dominator as
4412    children.  */
4413
4414 static void
4415 reassociate_bb (basic_block bb)
4416 {
4417   gimple_stmt_iterator gsi;
4418   basic_block son;
4419   gimple stmt = last_stmt (bb);
4420
4421   if (stmt && !gimple_visited_p (stmt))
4422     maybe_optimize_range_tests (stmt);
4423
4424   for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4425     {
4426       stmt = gsi_stmt (gsi);
4427
4428       if (is_gimple_assign (stmt)
4429           && !stmt_could_throw_p (stmt))
4430         {
4431           tree lhs, rhs1, rhs2;
4432           enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4433
4434           /* If this is not a gimple binary expression, there is
4435              nothing for us to do with it.  */
4436           if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4437             continue;
4438
4439           /* If this was part of an already processed statement,
4440              we don't need to touch it again. */
4441           if (gimple_visited_p (stmt))
4442             {
4443               /* This statement might have become dead because of previous
4444                  reassociations.  */
4445               if (has_zero_uses (gimple_get_lhs (stmt)))
4446                 {
4447                   reassoc_remove_stmt (&gsi);
4448                   release_defs (stmt);
4449                   /* We might end up removing the last stmt above which
4450                      places the iterator to the end of the sequence.
4451                      Reset it to the last stmt in this case which might
4452                      be the end of the sequence as well if we removed
4453                      the last statement of the sequence.  In which case
4454                      we need to bail out.  */
4455                   if (gsi_end_p (gsi))
4456                     {
4457                       gsi = gsi_last_bb (bb);
4458                       if (gsi_end_p (gsi))
4459                         break;
4460                     }
4461                 }
4462               continue;
4463             }
4464
4465           lhs = gimple_assign_lhs (stmt);
4466           rhs1 = gimple_assign_rhs1 (stmt);
4467           rhs2 = gimple_assign_rhs2 (stmt);
4468
4469           /* For non-bit or min/max operations we can't associate
4470              all types.  Verify that here.  */
4471           if (rhs_code != BIT_IOR_EXPR
4472               && rhs_code != BIT_AND_EXPR
4473               && rhs_code != BIT_XOR_EXPR
4474               && rhs_code != MIN_EXPR
4475               && rhs_code != MAX_EXPR
4476               && (!can_reassociate_p (lhs)
4477                   || !can_reassociate_p (rhs1)
4478                   || !can_reassociate_p (rhs2)))
4479             continue;
4480
4481           if (associative_tree_code (rhs_code))
4482             {
4483               auto_vec<operand_entry_t> ops;
4484               tree powi_result = NULL_TREE;
4485
4486               /* There may be no immediate uses left by the time we
4487                  get here because we may have eliminated them all.  */
4488               if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4489                 continue;
4490
4491               gimple_set_visited (stmt, true);
4492               linearize_expr_tree (&ops, stmt, true, true);
4493               ops.qsort (sort_by_operand_rank);
4494               optimize_ops_list (rhs_code, &ops);
4495               if (undistribute_ops_list (rhs_code, &ops,
4496                                          loop_containing_stmt (stmt)))
4497                 {
4498                   ops.qsort (sort_by_operand_rank);
4499                   optimize_ops_list (rhs_code, &ops);
4500                 }
4501
4502               if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4503                 optimize_range_tests (rhs_code, &ops);
4504
4505               if (first_pass_instance
4506                   && rhs_code == MULT_EXPR
4507                   && flag_unsafe_math_optimizations)
4508                 powi_result = attempt_builtin_powi (stmt, &ops);
4509
4510               /* If the operand vector is now empty, all operands were
4511                  consumed by the __builtin_powi optimization.  */
4512               if (ops.length () == 0)
4513                 transform_stmt_to_copy (&gsi, stmt, powi_result);
4514               else if (ops.length () == 1)
4515                 {
4516                   tree last_op = ops.last ()->op;
4517
4518                   if (powi_result)
4519                     transform_stmt_to_multiply (&gsi, stmt, last_op,
4520                                                 powi_result);
4521                   else
4522                     transform_stmt_to_copy (&gsi, stmt, last_op);
4523                 }
4524               else
4525                 {
4526                   enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4527                   int ops_num = ops.length ();
4528                   int width = get_reassociation_width (ops_num, rhs_code, mode);
4529                   tree new_lhs = lhs;
4530
4531                   if (dump_file && (dump_flags & TDF_DETAILS))
4532                     fprintf (dump_file,
4533                              "Width = %d was chosen for reassociation\n", width);
4534
4535                   if (width > 1
4536                       && ops.length () > 3)
4537                     rewrite_expr_tree_parallel (as_a <gimple_assign> (stmt),
4538                                                 width, ops);
4539                   else
4540                     {
4541                       /* When there are three operands left, we want
4542                          to make sure the ones that get the double
4543                          binary op are chosen wisely.  */
4544                       int len = ops.length ();
4545                       if (len >= 3)
4546                         swap_ops_for_binary_stmt (ops, len - 3, stmt);
4547
4548                       new_lhs = rewrite_expr_tree (stmt, 0, ops,
4549                                                    powi_result != NULL);
4550                     }
4551
4552                   /* If we combined some repeated factors into a
4553                      __builtin_powi call, multiply that result by the
4554                      reassociated operands.  */
4555                   if (powi_result)
4556                     {
4557                       gimple mul_stmt, lhs_stmt = SSA_NAME_DEF_STMT (lhs);
4558                       tree type = TREE_TYPE (lhs);
4559                       tree target_ssa = make_temp_ssa_name (type, NULL,
4560                                                             "reassocpow");
4561                       gimple_set_lhs (lhs_stmt, target_ssa);
4562                       update_stmt (lhs_stmt);
4563                       if (lhs != new_lhs)
4564                         target_ssa = new_lhs;
4565                       mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs,
4566                                                                powi_result,
4567                                                                target_ssa);
4568                       gimple_set_location (mul_stmt, gimple_location (stmt));
4569                       gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4570                     }
4571                 }
4572             }
4573         }
4574     }
4575   for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4576        son;
4577        son = next_dom_son (CDI_POST_DOMINATORS, son))
4578     reassociate_bb (son);
4579 }
4580
4581 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4582 void debug_ops_vector (vec<operand_entry_t> ops);
4583
4584 /* Dump the operand entry vector OPS to FILE.  */
4585
4586 void
4587 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
4588 {
4589   operand_entry_t oe;
4590   unsigned int i;
4591
4592   FOR_EACH_VEC_ELT (ops, i, oe)
4593     {
4594       fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
4595       print_generic_expr (file, oe->op, 0);
4596     }
4597 }
4598
4599 /* Dump the operand entry vector OPS to STDERR.  */
4600
4601 DEBUG_FUNCTION void
4602 debug_ops_vector (vec<operand_entry_t> ops)
4603 {
4604   dump_ops_vector (stderr, ops);
4605 }
4606
4607 static void
4608 do_reassoc (void)
4609 {
4610   break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
4611   reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
4612 }
4613
4614 /* Initialize the reassociation pass.  */
4615
4616 static void
4617 init_reassoc (void)
4618 {
4619   int i;
4620   long rank = 2;
4621   int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
4622
4623   /* Find the loops, so that we can prevent moving calculations in
4624      them.  */
4625   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4626
4627   memset (&reassociate_stats, 0, sizeof (reassociate_stats));
4628
4629   operand_entry_pool = create_alloc_pool ("operand entry pool",
4630                                           sizeof (struct operand_entry), 30);
4631   next_operand_entry_id = 0;
4632
4633   /* Reverse RPO (Reverse Post Order) will give us something where
4634      deeper loops come later.  */
4635   pre_and_rev_post_order_compute (NULL, bbs, false);
4636   bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
4637   operand_rank = new hash_map<tree, long>;
4638
4639   /* Give each default definition a distinct rank.  This includes
4640      parameters and the static chain.  Walk backwards over all
4641      SSA names so that we get proper rank ordering according
4642      to tree_swap_operands_p.  */
4643   for (i = num_ssa_names - 1; i > 0; --i)
4644     {
4645       tree name = ssa_name (i);
4646       if (name && SSA_NAME_IS_DEFAULT_DEF (name))
4647         insert_operand_rank (name, ++rank);
4648     }
4649
4650   /* Set up rank for each BB  */
4651   for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
4652     bb_rank[bbs[i]] = ++rank  << 16;
4653
4654   free (bbs);
4655   calculate_dominance_info (CDI_POST_DOMINATORS);
4656   plus_negates = vNULL;
4657 }
4658
4659 /* Cleanup after the reassociation pass, and print stats if
4660    requested.  */
4661
4662 static void
4663 fini_reassoc (void)
4664 {
4665   statistics_counter_event (cfun, "Linearized",
4666                             reassociate_stats.linearized);
4667   statistics_counter_event (cfun, "Constants eliminated",
4668                             reassociate_stats.constants_eliminated);
4669   statistics_counter_event (cfun, "Ops eliminated",
4670                             reassociate_stats.ops_eliminated);
4671   statistics_counter_event (cfun, "Statements rewritten",
4672                             reassociate_stats.rewritten);
4673   statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
4674                             reassociate_stats.pows_encountered);
4675   statistics_counter_event (cfun, "Built-in powi calls created",
4676                             reassociate_stats.pows_created);
4677
4678   delete operand_rank;
4679   free_alloc_pool (operand_entry_pool);
4680   free (bb_rank);
4681   plus_negates.release ();
4682   free_dominance_info (CDI_POST_DOMINATORS);
4683   loop_optimizer_finalize ();
4684 }
4685
4686 /* Gate and execute functions for Reassociation.  */
4687
4688 static unsigned int
4689 execute_reassoc (void)
4690 {
4691   init_reassoc ();
4692
4693   do_reassoc ();
4694   repropagate_negates ();
4695
4696   fini_reassoc ();
4697   return 0;
4698 }
4699
4700 namespace {
4701
4702 const pass_data pass_data_reassoc =
4703 {
4704   GIMPLE_PASS, /* type */
4705   "reassoc", /* name */
4706   OPTGROUP_NONE, /* optinfo_flags */
4707   TV_TREE_REASSOC, /* tv_id */
4708   ( PROP_cfg | PROP_ssa ), /* properties_required */
4709   0, /* properties_provided */
4710   0, /* properties_destroyed */
4711   0, /* todo_flags_start */
4712   TODO_update_ssa_only_virtuals, /* todo_flags_finish */
4713 };
4714
4715 class pass_reassoc : public gimple_opt_pass
4716 {
4717 public:
4718   pass_reassoc (gcc::context *ctxt)
4719     : gimple_opt_pass (pass_data_reassoc, ctxt)
4720   {}
4721
4722   /* opt_pass methods: */
4723   opt_pass * clone () { return new pass_reassoc (m_ctxt); }
4724   virtual bool gate (function *) { return flag_tree_reassoc != 0; }
4725   virtual unsigned int execute (function *) { return execute_reassoc (); }
4726
4727 }; // class pass_reassoc
4728
4729 } // anon namespace
4730
4731 gimple_opt_pass *
4732 make_pass_reassoc (gcc::context *ctxt)
4733 {
4734   return new pass_reassoc (ctxt);
4735 }