gcc/tree-ssa-reassoc.c

   1 /* Reassociation for trees.
   2    Copyright (C) 2005-2014 Free Software Foundation, Inc.
   3    Contributed by Daniel Berlin <dan@dberlin.org>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 3, or (at your option)
  10 any later version.
  11
  12 GCC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "hash-table.h"
  25 #include "tm.h"
  26 #include "rtl.h"
  27 #include "tm_p.h"
  28 #include "tree.h"
  29 #include "stor-layout.h"
  30 #include "basic-block.h"
  31 #include "gimple-pretty-print.h"
  32 #include "tree-inline.h"
  33 #include "hash-map.h"
  34 #include "tree-ssa-alias.h"
  35 #include "internal-fn.h"
  36 #include "gimple-fold.h"
  37 #include "tree-eh.h"
  38 #include "gimple-expr.h"
  39 #include "is-a.h"
  40 #include "gimple.h"
  41 #include "gimple-iterator.h"
  42 #include "gimplify-me.h"
  43 #include "gimple-ssa.h"
  44 #include "tree-cfg.h"
  45 #include "tree-phinodes.h"
  46 #include "ssa-iterators.h"
  47 #include "stringpool.h"
  48 #include "tree-ssanames.h"
  49 #include "tree-ssa-loop-niter.h"
  50 #include "tree-ssa-loop.h"
  51 #include "expr.h"
  52 #include "tree-dfa.h"
  53 #include "tree-ssa.h"
  54 #include "tree-iterator.h"
  55 #include "tree-pass.h"
  56 #include "alloc-pool.h"
  57 #include "langhooks.h"
  58 #include "cfgloop.h"
  59 #include "flags.h"
  60 #include "target.h"
  61 #include "params.h"
  62 #include "diagnostic-core.h"
  63 #include "builtins.h"
  64
  65 /*  This is a simple global reassociation pass.  It is, in part, based
  66     on the LLVM pass of the same name (They do some things more/less
  67     than we do, in different orders, etc).
  68
  69     It consists of five steps:
  70
  71     1. Breaking up subtract operations into addition + negate, where
  72     it would promote the reassociation of adds.
  73
  74     2. Left linearization of the expression trees, so that (A+B)+(C+D)
  75     becomes (((A+B)+C)+D), which is easier for us to rewrite later.
  76     During linearization, we place the operands of the binary
  77     expressions into a vector of operand_entry_t
  78
  79     3. Optimization of the operand lists, eliminating things like a +
  80     -a, a & a, etc.
  81
  82     3a. Combine repeated factors with the same occurrence counts
  83     into a __builtin_powi call that will later be optimized into
  84     an optimal number of multiplies.
  85
  86     4. Rewrite the expression trees we linearized and optimized so
  87     they are in proper rank order.
  88
  89     5. Repropagate negates, as nothing else will clean it up ATM.
  90
  91     A bit of theory on #4, since nobody seems to write anything down
  92     about why it makes sense to do it the way they do it:
  93
  94     We could do this much nicer theoretically, but don't (for reasons
  95     explained after how to do it theoretically nice :P).
  96
  97     In order to promote the most redundancy elimination, you want
  98     binary expressions whose operands are the same rank (or
  99     preferably, the same value) exposed to the redundancy eliminator,
 100     for possible elimination.
 101
 102     So the way to do this if we really cared, is to build the new op
 103     tree from the leaves to the roots, merging as you go, and putting the
 104     new op on the end of the worklist, until you are left with one
 105     thing on the worklist.
 106
 107     IE if you have to rewrite the following set of operands (listed with
 108     rank in parentheses), with opcode PLUS_EXPR:
 109
 110     a (1),  b (1),  c (1),  d (2), e (2)
 111
 112
 113     We start with our merge worklist empty, and the ops list with all of
 114     those on it.
 115
 116     You want to first merge all leaves of the same rank, as much as
 117     possible.
 118
 119     So first build a binary op of
 120
 121     mergetmp = a + b, and put "mergetmp" on the merge worklist.
 122
 123     Because there is no three operand form of PLUS_EXPR, c is not going to
 124     be exposed to redundancy elimination as a rank 1 operand.
 125
 126     So you might as well throw it on the merge worklist (you could also
 127     consider it to now be a rank two operand, and merge it with d and e,
 128     but in this case, you then have evicted e from a binary op. So at
 129     least in this situation, you can't win.)
 130
 131     Then build a binary op of d + e
 132     mergetmp2 = d + e
 133
 134     and put mergetmp2 on the merge worklist.
 135
 136     so merge worklist = {mergetmp, c, mergetmp2}
 137
 138     Continue building binary ops of these operations until you have only
 139     one operation left on the worklist.
 140
 141     So we have
 142
 143     build binary op
 144     mergetmp3 = mergetmp + c
 145
 146     worklist = {mergetmp2, mergetmp3}
 147
 148     mergetmp4 = mergetmp2 + mergetmp3
 149
 150     worklist = {mergetmp4}
 151
 152     because we have one operation left, we can now just set the original
 153     statement equal to the result of that operation.
 154
 155     This will at least expose a + b  and d + e to redundancy elimination
 156     as binary operations.
 157
 158     For extra points, you can reuse the old statements to build the
 159     mergetmps, since you shouldn't run out.
 160
 161     So why don't we do this?
 162
 163     Because it's expensive, and rarely will help.  Most trees we are
 164     reassociating have 3 or less ops.  If they have 2 ops, they already
 165     will be written into a nice single binary op.  If you have 3 ops, a
 166     single simple check suffices to tell you whether the first two are of the
 167     same rank.  If so, you know to order it
 168
 169     mergetmp = op1 + op2
 170     newstmt = mergetmp + op3
 171
 172     instead of
 173     mergetmp = op2 + op3
 174     newstmt = mergetmp + op1
 175
 176     If all three are of the same rank, you can't expose them all in a
 177     single binary operator anyway, so the above is *still* the best you
 178     can do.
 179
 180     Thus, this is what we do.  When we have three ops left, we check to see
 181     what order to put them in, and call it a day.  As a nod to vector sum
 182     reduction, we check if any of the ops are really a phi node that is a
 183     destructive update for the associating op, and keep the destructive
 184     update together for vector sum reduction recognition.  */
 185
 186
 187 /* Statistics */
 188 static struct
 189 {
 190   int linearized;
 191   int constants_eliminated;
 192   int ops_eliminated;
 193   int rewritten;
 194   int pows_encountered;
 195   int pows_created;
 196 } reassociate_stats;
 197
 198 /* Operator, rank pair.  */
 199 typedef struct operand_entry
 200 {
 201   unsigned int rank;
 202   int id;
 203   tree op;
 204   unsigned int count;
 205 } *operand_entry_t;
 206
 207 static alloc_pool operand_entry_pool;
 208
 209 /* This is used to assign a unique ID to each struct operand_entry
 210    so that qsort results are identical on different hosts.  */
 211 static int next_operand_entry_id;
 212
 213 /* Starting rank number for a given basic block, so that we can rank
 214    operations using unmovable instructions in that BB based on the bb
 215    depth.  */
 216 static long *bb_rank;
 217
 218 /* Operand->rank hashtable.  */
 219 static hash_map<tree, long> *operand_rank;
 220
 221 /* Forward decls.  */
 222 static long get_rank (tree);
 223 static bool reassoc_stmt_dominates_stmt_p (gimple, gimple);
 224
 225 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
 226    possibly added by gsi_remove.  */
 227
 228 bool
 229 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
 230 {
 231   gimple stmt = gsi_stmt (*gsi);
 232
 233   if (!MAY_HAVE_DEBUG_STMTS || gimple_code (stmt) == GIMPLE_PHI)
 234     return gsi_remove (gsi, true);
 235
 236   gimple_stmt_iterator prev = *gsi;
 237   gsi_prev (&prev);
 238   unsigned uid = gimple_uid (stmt);
 239   basic_block bb = gimple_bb (stmt);
 240   bool ret = gsi_remove (gsi, true);
 241   if (!gsi_end_p (prev))
 242     gsi_next (&prev);
 243   else
 244     prev = gsi_start_bb (bb);
 245   gimple end_stmt = gsi_stmt (*gsi);
 246   while ((stmt = gsi_stmt (prev)) != end_stmt)
 247     {
 248       gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
 249       gimple_set_uid (stmt, uid);
 250       gsi_next (&prev);
 251     }
 252   return ret;
 253 }
 254
 255 /* Bias amount for loop-carried phis.  We want this to be larger than
 256    the depth of any reassociation tree we can see, but not larger than
 257    the rank difference between two blocks.  */
 258 #define PHI_LOOP_BIAS (1 << 15)
 259
 260 /* Rank assigned to a phi statement.  If STMT is a loop-carried phi of
 261    an innermost loop, and the phi has only a single use which is inside
 262    the loop, then the rank is the block rank of the loop latch plus an
 263    extra bias for the loop-carried dependence.  This causes expressions
 264    calculated into an accumulator variable to be independent for each
 265    iteration of the loop.  If STMT is some other phi, the rank is the
 266    block rank of its containing block.  */
 267 static long
 268 phi_rank (gimple stmt)
 269 {
 270   basic_block bb = gimple_bb (stmt);
 271   struct loop *father = bb->loop_father;
 272   tree res;
 273   unsigned i;
 274   use_operand_p use;
 275   gimple use_stmt;
 276
 277   /* We only care about real loops (those with a latch).  */
 278   if (!father->latch)
 279     return bb_rank[bb->index];
 280
 281   /* Interesting phis must be in headers of innermost loops.  */
 282   if (bb != father->header
 283       || father->inner)
 284     return bb_rank[bb->index];
 285
 286   /* Ignore virtual SSA_NAMEs.  */
 287   res = gimple_phi_result (stmt);
 288   if (virtual_operand_p (res))
 289     return bb_rank[bb->index];
 290
 291   /* The phi definition must have a single use, and that use must be
 292      within the loop.  Otherwise this isn't an accumulator pattern.  */
 293   if (!single_imm_use (res, &use, &use_stmt)
 294       || gimple_bb (use_stmt)->loop_father != father)
 295     return bb_rank[bb->index];
 296
 297   /* Look for phi arguments from within the loop.  If found, bias this phi.  */
 298   for (i = 0; i < gimple_phi_num_args (stmt); i++)
 299     {
 300       tree arg = gimple_phi_arg_def (stmt, i);
 301       if (TREE_CODE (arg) == SSA_NAME
 302           && !SSA_NAME_IS_DEFAULT_DEF (arg))
 303         {
 304           gimple def_stmt = SSA_NAME_DEF_STMT (arg);
 305           if (gimple_bb (def_stmt)->loop_father == father)
 306             return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
 307         }
 308     }
 309
 310   /* Must be an uninteresting phi.  */
 311   return bb_rank[bb->index];
 312 }
 313
 314 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
 315    loop-carried dependence of an innermost loop, return TRUE; else
 316    return FALSE.  */
 317 static bool
 318 loop_carried_phi (tree exp)
 319 {
 320   gimple phi_stmt;
 321   long block_rank;
 322
 323   if (TREE_CODE (exp) != SSA_NAME
 324       || SSA_NAME_IS_DEFAULT_DEF (exp))
 325     return false;
 326
 327   phi_stmt = SSA_NAME_DEF_STMT (exp);
 328
 329   if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
 330     return false;
 331
 332   /* Non-loop-carried phis have block rank.  Loop-carried phis have
 333      an additional bias added in.  If this phi doesn't have block rank,
 334      it's biased and should not be propagated.  */
 335   block_rank = bb_rank[gimple_bb (phi_stmt)->index];
 336
 337   if (phi_rank (phi_stmt) != block_rank)
 338     return true;
 339
 340   return false;
 341 }
 342
 343 /* Return the maximum of RANK and the rank that should be propagated
 344    from expression OP.  For most operands, this is just the rank of OP.
 345    For loop-carried phis, the value is zero to avoid undoing the bias
 346    in favor of the phi.  */
 347 static long
 348 propagate_rank (long rank, tree op)
 349 {
 350   long op_rank;
 351
 352   if (loop_carried_phi (op))
 353     return rank;
 354
 355   op_rank = get_rank (op);
 356
 357   return MAX (rank, op_rank);
 358 }
 359
 360 /* Look up the operand rank structure for expression E.  */
 361
 362 static inline long
 363 find_operand_rank (tree e)
 364 {
 365   long *slot = operand_rank->get (e);
 366   return slot ? *slot : -1;
 367 }
 368
 369 /* Insert {E,RANK} into the operand rank hashtable.  */
 370
 371 static inline void
 372 insert_operand_rank (tree e, long rank)
 373 {
 374   gcc_assert (rank > 0);
 375   gcc_assert (!operand_rank->put (e, rank));
 376 }
 377
 378 /* Given an expression E, return the rank of the expression.  */
 379
 380 static long
 381 get_rank (tree e)
 382 {
 383   /* Constants have rank 0.  */
 384   if (is_gimple_min_invariant (e))
 385     return 0;
 386
 387   /* SSA_NAME's have the rank of the expression they are the result
 388      of.
 389      For globals and uninitialized values, the rank is 0.
 390      For function arguments, use the pre-setup rank.
 391      For PHI nodes, stores, asm statements, etc, we use the rank of
 392      the BB.
 393      For simple operations, the rank is the maximum rank of any of
 394      its operands, or the bb_rank, whichever is less.
 395      I make no claims that this is optimal, however, it gives good
 396      results.  */
 397
 398   /* We make an exception to the normal ranking system to break
 399      dependences of accumulator variables in loops.  Suppose we
 400      have a simple one-block loop containing:
 401
 402        x_1 = phi(x_0, x_2)
 403        b = a + x_1
 404        c = b + d
 405        x_2 = c + e
 406
 407      As shown, each iteration of the calculation into x is fully
 408      dependent upon the iteration before it.  We would prefer to
 409      see this in the form:
 410
 411        x_1 = phi(x_0, x_2)
 412        b = a + d
 413        c = b + e
 414        x_2 = c + x_1
 415
 416      If the loop is unrolled, the calculations of b and c from
 417      different iterations can be interleaved.
 418
 419      To obtain this result during reassociation, we bias the rank
 420      of the phi definition x_1 upward, when it is recognized as an
 421      accumulator pattern.  The artificial rank causes it to be
 422      added last, providing the desired independence.  */
 423
 424   if (TREE_CODE (e) == SSA_NAME)
 425     {
 426       gimple stmt;
 427       long rank;
 428       int i, n;
 429       tree op;
 430
 431       if (SSA_NAME_IS_DEFAULT_DEF (e))
 432         return find_operand_rank (e);
 433
 434       stmt = SSA_NAME_DEF_STMT (e);
 435       if (gimple_code (stmt) == GIMPLE_PHI)
 436         return phi_rank (stmt);
 437
 438       if (!is_gimple_assign (stmt)
 439           || gimple_vdef (stmt))
 440         return bb_rank[gimple_bb (stmt)->index];
 441
 442       /* If we already have a rank for this expression, use that.  */
 443       rank = find_operand_rank (e);
 444       if (rank != -1)
 445         return rank;
 446
 447       /* Otherwise, find the maximum rank for the operands.  As an
 448          exception, remove the bias from loop-carried phis when propagating
 449          the rank so that dependent operations are not also biased.  */
 450       rank = 0;
 451       if (gimple_assign_single_p (stmt))
 452         {
 453           tree rhs = gimple_assign_rhs1 (stmt);
 454           n = TREE_OPERAND_LENGTH (rhs);
 455           if (n == 0)
 456             rank = propagate_rank (rank, rhs);
 457           else
 458             {
 459               for (i = 0; i < n; i++)
 460                 {
 461                   op = TREE_OPERAND (rhs, i);
 462
 463                   if (op != NULL_TREE)
 464                     rank = propagate_rank (rank, op);
 465                 }
 466             }
 467         }
 468       else
 469         {
 470           n = gimple_num_ops (stmt);
 471           for (i = 1; i < n; i++)
 472             {
 473               op = gimple_op (stmt, i);
 474               gcc_assert (op);
 475               rank = propagate_rank (rank, op);
 476             }
 477         }
 478
 479       if (dump_file && (dump_flags & TDF_DETAILS))
 480         {
 481           fprintf (dump_file, "Rank for ");
 482           print_generic_expr (dump_file, e, 0);
 483           fprintf (dump_file, " is %ld\n", (rank + 1));
 484         }
 485
 486       /* Note the rank in the hashtable so we don't recompute it.  */
 487       insert_operand_rank (e, (rank + 1));
 488       return (rank + 1);
 489     }
 490
 491   /* Globals, etc,  are rank 0 */
 492   return 0;
 493 }
 494
 495
 496 /* We want integer ones to end up last no matter what, since they are
 497    the ones we can do the most with.  */
 498 #define INTEGER_CONST_TYPE 1 << 3
 499 #define FLOAT_CONST_TYPE 1 << 2
 500 #define OTHER_CONST_TYPE 1 << 1
 501
 502 /* Classify an invariant tree into integer, float, or other, so that
 503    we can sort them to be near other constants of the same type.  */
 504 static inline int
 505 constant_type (tree t)
 506 {
 507   if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
 508     return INTEGER_CONST_TYPE;
 509   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
 510     return FLOAT_CONST_TYPE;
 511   else
 512     return OTHER_CONST_TYPE;
 513 }
 514
 515 /* qsort comparison function to sort operand entries PA and PB by rank
 516    so that the sorted array is ordered by rank in decreasing order.  */
 517 static int
 518 sort_by_operand_rank (const void *pa, const void *pb)
 519 {
 520   const operand_entry_t oea = *(const operand_entry_t *)pa;
 521   const operand_entry_t oeb = *(const operand_entry_t *)pb;
 522
 523   /* It's nicer for optimize_expression if constants that are likely
 524      to fold when added/multiplied//whatever are put next to each
 525      other.  Since all constants have rank 0, order them by type.  */
 526   if (oeb->rank == 0 && oea->rank == 0)
 527     {
 528       if (constant_type (oeb->op) != constant_type (oea->op))
 529         return constant_type (oeb->op) - constant_type (oea->op);
 530       else
 531         /* To make sorting result stable, we use unique IDs to determine
 532            order.  */
 533         return oeb->id - oea->id;
 534     }
 535
 536   /* Lastly, make sure the versions that are the same go next to each
 537      other.  */
 538   if ((oeb->rank - oea->rank == 0)
 539       && TREE_CODE (oea->op) == SSA_NAME
 540       && TREE_CODE (oeb->op) == SSA_NAME)
 541     {
 542       /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
 543          versions of removed SSA_NAMEs, so if possible, prefer to sort
 544          based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
 545          See PR60418.  */
 546       if (!SSA_NAME_IS_DEFAULT_DEF (oea->op)
 547           && !SSA_NAME_IS_DEFAULT_DEF (oeb->op)
 548           && SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
 549         {
 550           gimple stmta = SSA_NAME_DEF_STMT (oea->op);
 551           gimple stmtb = SSA_NAME_DEF_STMT (oeb->op);
 552           basic_block bba = gimple_bb (stmta);
 553           basic_block bbb = gimple_bb (stmtb);
 554           if (bbb != bba)
 555             {
 556               if (bb_rank[bbb->index] != bb_rank[bba->index])
 557                 return bb_rank[bbb->index] - bb_rank[bba->index];
 558             }
 559           else
 560             {
 561               bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
 562               bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
 563               if (da != db)
 564                 return da ? 1 : -1;
 565             }
 566         }
 567
 568       if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
 569         return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
 570       else
 571         return oeb->id - oea->id;
 572     }
 573
 574   if (oeb->rank != oea->rank)
 575     return oeb->rank - oea->rank;
 576   else
 577     return oeb->id - oea->id;
 578 }
 579
 580 /* Add an operand entry to *OPS for the tree operand OP.  */
 581
 582 static void
 583 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
 584 {
 585   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
 586
 587   oe->op = op;
 588   oe->rank = get_rank (op);
 589   oe->id = next_operand_entry_id++;
 590   oe->count = 1;
 591   ops->safe_push (oe);
 592 }
 593
 594 /* Add an operand entry to *OPS for the tree operand OP with repeat
 595    count REPEAT.  */
 596
 597 static void
 598 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
 599                        HOST_WIDE_INT repeat)
 600 {
 601   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
 602
 603   oe->op = op;
 604   oe->rank = get_rank (op);
 605   oe->id = next_operand_entry_id++;
 606   oe->count = repeat;
 607   ops->safe_push (oe);
 608
 609   reassociate_stats.pows_encountered++;
 610 }
 611
 612 /* Return true if STMT is reassociable operation containing a binary
 613    operation with tree code CODE, and is inside LOOP.  */
 614
 615 static bool
 616 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
 617 {
 618   basic_block bb = gimple_bb (stmt);
 619
 620   if (gimple_bb (stmt) == NULL)
 621     return false;
 622
 623   if (!flow_bb_inside_loop_p (loop, bb))
 624     return false;
 625
 626   if (is_gimple_assign (stmt)
 627       && gimple_assign_rhs_code (stmt) == code
 628       && has_single_use (gimple_assign_lhs (stmt)))
 629     return true;
 630
 631   return false;
 632 }
 633
 634
 635 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
 636    operand of the negate operation.  Otherwise, return NULL.  */
 637
 638 static tree
 639 get_unary_op (tree name, enum tree_code opcode)
 640 {
 641   gimple stmt = SSA_NAME_DEF_STMT (name);
 642
 643   if (!is_gimple_assign (stmt))
 644     return NULL_TREE;
 645
 646   if (gimple_assign_rhs_code (stmt) == opcode)
 647     return gimple_assign_rhs1 (stmt);
 648   return NULL_TREE;
 649 }
 650
 651 /* If CURR and LAST are a pair of ops that OPCODE allows us to
 652    eliminate through equivalences, do so, remove them from OPS, and
 653    return true.  Otherwise, return false.  */
 654
 655 static bool
 656 eliminate_duplicate_pair (enum tree_code opcode,
 657                           vec<operand_entry_t> *ops,
 658                           bool *all_done,
 659                           unsigned int i,
 660                           operand_entry_t curr,
 661                           operand_entry_t last)
 662 {
 663
 664   /* If we have two of the same op, and the opcode is & |, min, or max,
 665      we can eliminate one of them.
 666      If we have two of the same op, and the opcode is ^, we can
 667      eliminate both of them.  */
 668
 669   if (last && last->op == curr->op)
 670     {
 671       switch (opcode)
 672         {
 673         case MAX_EXPR:
 674         case MIN_EXPR:
 675         case BIT_IOR_EXPR:
 676         case BIT_AND_EXPR:
 677           if (dump_file && (dump_flags & TDF_DETAILS))
 678             {
 679               fprintf (dump_file, "Equivalence: ");
 680               print_generic_expr (dump_file, curr->op, 0);
 681               fprintf (dump_file, " [&|minmax] ");
 682               print_generic_expr (dump_file, last->op, 0);
 683               fprintf (dump_file, " -> ");
 684               print_generic_stmt (dump_file, last->op, 0);
 685             }
 686
 687           ops->ordered_remove (i);
 688           reassociate_stats.ops_eliminated ++;
 689
 690           return true;
 691
 692         case BIT_XOR_EXPR:
 693           if (dump_file && (dump_flags & TDF_DETAILS))
 694             {
 695               fprintf (dump_file, "Equivalence: ");
 696               print_generic_expr (dump_file, curr->op, 0);
 697               fprintf (dump_file, " ^ ");
 698               print_generic_expr (dump_file, last->op, 0);
 699               fprintf (dump_file, " -> nothing\n");
 700             }
 701
 702           reassociate_stats.ops_eliminated += 2;
 703
 704           if (ops->length () == 2)
 705             {
 706               ops->create (0);
 707               add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
 708               *all_done = true;
 709             }
 710           else
 711             {
 712               ops->ordered_remove (i-1);
 713               ops->ordered_remove (i-1);
 714             }
 715
 716           return true;
 717
 718         default:
 719           break;
 720         }
 721     }
 722   return false;
 723 }
 724
 725 static vec<tree> plus_negates;
 726
 727 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
 728    expression, look in OPS for a corresponding positive operation to cancel
 729    it out.  If we find one, remove the other from OPS, replace
 730    OPS[CURRINDEX] with 0 or -1, respectively, and return true.  Otherwise,
 731    return false. */
 732
 733 static bool
 734 eliminate_plus_minus_pair (enum tree_code opcode,
 735                            vec<operand_entry_t> *ops,
 736                            unsigned int currindex,
 737                            operand_entry_t curr)
 738 {
 739   tree negateop;
 740   tree notop;
 741   unsigned int i;
 742   operand_entry_t oe;
 743
 744   if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
 745     return false;
 746
 747   negateop = get_unary_op (curr->op, NEGATE_EXPR);
 748   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 749   if (negateop == NULL_TREE && notop == NULL_TREE)
 750     return false;
 751
 752   /* Any non-negated version will have a rank that is one less than
 753      the current rank.  So once we hit those ranks, if we don't find
 754      one, we can stop.  */
 755
 756   for (i = currindex + 1;
 757        ops->iterate (i, &oe)
 758        && oe->rank >= curr->rank - 1 ;
 759        i++)
 760     {
 761       if (oe->op == negateop)
 762         {
 763
 764           if (dump_file && (dump_flags & TDF_DETAILS))
 765             {
 766               fprintf (dump_file, "Equivalence: ");
 767               print_generic_expr (dump_file, negateop, 0);
 768               fprintf (dump_file, " + -");
 769               print_generic_expr (dump_file, oe->op, 0);
 770               fprintf (dump_file, " -> 0\n");
 771             }
 772
 773           ops->ordered_remove (i);
 774           add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
 775           ops->ordered_remove (currindex);
 776           reassociate_stats.ops_eliminated ++;
 777
 778           return true;
 779         }
 780       else if (oe->op == notop)
 781         {
 782           tree op_type = TREE_TYPE (oe->op);
 783
 784           if (dump_file && (dump_flags & TDF_DETAILS))
 785             {
 786               fprintf (dump_file, "Equivalence: ");
 787               print_generic_expr (dump_file, notop, 0);
 788               fprintf (dump_file, " + ~");
 789               print_generic_expr (dump_file, oe->op, 0);
 790               fprintf (dump_file, " -> -1\n");
 791             }
 792
 793           ops->ordered_remove (i);
 794           add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
 795           ops->ordered_remove (currindex);
 796           reassociate_stats.ops_eliminated ++;
 797
 798           return true;
 799         }
 800     }
 801
 802   /* CURR->OP is a negate expr in a plus expr: save it for later
 803      inspection in repropagate_negates().  */
 804   if (negateop != NULL_TREE)
 805     plus_negates.safe_push (curr->op);
 806
 807   return false;
 808 }
 809
 810 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
 811    bitwise not expression, look in OPS for a corresponding operand to
 812    cancel it out.  If we find one, remove the other from OPS, replace
 813    OPS[CURRINDEX] with 0, and return true.  Otherwise, return
 814    false. */
 815
 816 static bool
 817 eliminate_not_pairs (enum tree_code opcode,
 818                      vec<operand_entry_t> *ops,
 819                      unsigned int currindex,
 820                      operand_entry_t curr)
 821 {
 822   tree notop;
 823   unsigned int i;
 824   operand_entry_t oe;
 825
 826   if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
 827       || TREE_CODE (curr->op) != SSA_NAME)
 828     return false;
 829
 830   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 831   if (notop == NULL_TREE)
 832     return false;
 833
 834   /* Any non-not version will have a rank that is one less than
 835      the current rank.  So once we hit those ranks, if we don't find
 836      one, we can stop.  */
 837
 838   for (i = currindex + 1;
 839        ops->iterate (i, &oe)
 840        && oe->rank >= curr->rank - 1;
 841        i++)
 842     {
 843       if (oe->op == notop)
 844         {
 845           if (dump_file && (dump_flags & TDF_DETAILS))
 846             {
 847               fprintf (dump_file, "Equivalence: ");
 848               print_generic_expr (dump_file, notop, 0);
 849               if (opcode == BIT_AND_EXPR)
 850                 fprintf (dump_file, " & ~");
 851               else if (opcode == BIT_IOR_EXPR)
 852                 fprintf (dump_file, " | ~");
 853               print_generic_expr (dump_file, oe->op, 0);
 854               if (opcode == BIT_AND_EXPR)
 855                 fprintf (dump_file, " -> 0\n");
 856               else if (opcode == BIT_IOR_EXPR)
 857                 fprintf (dump_file, " -> -1\n");
 858             }
 859
 860           if (opcode == BIT_AND_EXPR)
 861             oe->op = build_zero_cst (TREE_TYPE (oe->op));
 862           else if (opcode == BIT_IOR_EXPR)
 863             oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
 864
 865           reassociate_stats.ops_eliminated += ops->length () - 1;
 866           ops->truncate (0);
 867           ops->quick_push (oe);
 868           return true;
 869         }
 870     }
 871
 872   return false;
 873 }
 874
 875 /* Use constant value that may be present in OPS to try to eliminate
 876    operands.  Note that this function is only really used when we've
 877    eliminated ops for other reasons, or merged constants.  Across
 878    single statements, fold already does all of this, plus more.  There
 879    is little point in duplicating logic, so I've only included the
 880    identities that I could ever construct testcases to trigger.  */
 881
 882 static void
 883 eliminate_using_constants (enum tree_code opcode,
 884                            vec<operand_entry_t> *ops)
 885 {
 886   operand_entry_t oelast = ops->last ();
 887   tree type = TREE_TYPE (oelast->op);
 888
 889   if (oelast->rank == 0
 890       && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
 891     {
 892       switch (opcode)
 893         {
 894         case BIT_AND_EXPR:
 895           if (integer_zerop (oelast->op))
 896             {
 897               if (ops->length () != 1)
 898                 {
 899                   if (dump_file && (dump_flags & TDF_DETAILS))
 900                     fprintf (dump_file, "Found & 0, removing all other ops\n");
 901
 902                   reassociate_stats.ops_eliminated += ops->length () - 1;
 903
 904                   ops->truncate (0);
 905                   ops->quick_push (oelast);
 906                   return;
 907                 }
 908             }
 909           else if (integer_all_onesp (oelast->op))
 910             {
 911               if (ops->length () != 1)
 912                 {
 913                   if (dump_file && (dump_flags & TDF_DETAILS))
 914                     fprintf (dump_file, "Found & -1, removing\n");
 915                   ops->pop ();
 916                   reassociate_stats.ops_eliminated++;
 917                 }
 918             }
 919           break;
 920         case BIT_IOR_EXPR:
 921           if (integer_all_onesp (oelast->op))
 922             {
 923               if (ops->length () != 1)
 924                 {
 925                   if (dump_file && (dump_flags & TDF_DETAILS))
 926                     fprintf (dump_file, "Found | -1, removing all other ops\n");
 927
 928                   reassociate_stats.ops_eliminated += ops->length () - 1;
 929
 930                   ops->truncate (0);
 931                   ops->quick_push (oelast);
 932                   return;
 933                 }
 934             }
 935           else if (integer_zerop (oelast->op))
 936             {
 937               if (ops->length () != 1)
 938                 {
 939                   if (dump_file && (dump_flags & TDF_DETAILS))
 940                     fprintf (dump_file, "Found | 0, removing\n");
 941                   ops->pop ();
 942                   reassociate_stats.ops_eliminated++;
 943                 }
 944             }
 945           break;
 946         case MULT_EXPR:
 947           if (integer_zerop (oelast->op)
 948               || (FLOAT_TYPE_P (type)
 949                   && !HONOR_NANS (TYPE_MODE (type))
 950                   && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
 951                   && real_zerop (oelast->op)))
 952             {
 953               if (ops->length () != 1)
 954                 {
 955                   if (dump_file && (dump_flags & TDF_DETAILS))
 956                     fprintf (dump_file, "Found * 0, removing all other ops\n");
 957
 958                   reassociate_stats.ops_eliminated += ops->length () - 1;
 959                   ops->truncate (1);
 960                   ops->quick_push (oelast);
 961                   return;
 962                 }
 963             }
 964           else if (integer_onep (oelast->op)
 965                    || (FLOAT_TYPE_P (type)
 966                        && !HONOR_SNANS (TYPE_MODE (type))
 967                        && real_onep (oelast->op)))
 968             {
 969               if (ops->length () != 1)
 970                 {
 971                   if (dump_file && (dump_flags & TDF_DETAILS))
 972                     fprintf (dump_file, "Found * 1, removing\n");
 973                   ops->pop ();
 974                   reassociate_stats.ops_eliminated++;
 975                   return;
 976                 }
 977             }
 978           break;
 979         case BIT_XOR_EXPR:
 980         case PLUS_EXPR:
 981         case MINUS_EXPR:
 982           if (integer_zerop (oelast->op)
 983               || (FLOAT_TYPE_P (type)
 984                   && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
 985                   && fold_real_zero_addition_p (type, oelast->op,
 986                                                 opcode == MINUS_EXPR)))
 987             {
 988               if (ops->length () != 1)
 989                 {
 990                   if (dump_file && (dump_flags & TDF_DETAILS))
 991                     fprintf (dump_file, "Found [|^+] 0, removing\n");
 992                   ops->pop ();
 993                   reassociate_stats.ops_eliminated++;
 994                   return;
 995                 }
 996             }
 997           break;
 998         default:
 999           break;
1000         }
1001     }
1002 }
1003
1004
1005 static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
1006                                  bool, bool);
1007
1008 /* Structure for tracking and counting operands.  */
1009 typedef struct oecount_s {
1010   int cnt;
1011   int id;
1012   enum tree_code oecode;
1013   tree op;
1014 } oecount;
1015
1016
1017 /* The heap for the oecount hashtable and the sorted list of operands.  */
1018 static vec<oecount> cvec;
1019
1020
1021 /* Oecount hashtable helpers.  */
1022
1023 struct oecount_hasher
1024 {
1025   typedef int value_type;
1026   typedef int compare_type;
1027   typedef int store_values_directly;
1028   static inline hashval_t hash (const value_type &);
1029   static inline bool equal (const value_type &, const compare_type &);
1030   static bool is_deleted (int &v) { return v == 1; }
1031   static void mark_deleted (int &e) { e = 1; }
1032   static bool is_empty (int &v) { return v == 0; }
1033   static void mark_empty (int &e) { e = 0; }
1034   static void remove (int &) {}
1035 };
1036
1037 /* Hash function for oecount.  */
1038
1039 inline hashval_t
1040 oecount_hasher::hash (const value_type &p)
1041 {
1042   const oecount *c = &cvec[p - 42];
1043   return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1044 }
1045
1046 /* Comparison function for oecount.  */
1047
1048 inline bool
1049 oecount_hasher::equal (const value_type &p1, const compare_type &p2)
1050 {
1051   const oecount *c1 = &cvec[p1 - 42];
1052   const oecount *c2 = &cvec[p2 - 42];
1053   return (c1->oecode == c2->oecode
1054           && c1->op == c2->op);
1055 }
1056
1057 /* Comparison function for qsort sorting oecount elements by count.  */
1058
1059 static int
1060 oecount_cmp (const void *p1, const void *p2)
1061 {
1062   const oecount *c1 = (const oecount *)p1;
1063   const oecount *c2 = (const oecount *)p2;
1064   if (c1->cnt != c2->cnt)
1065     return c1->cnt - c2->cnt;
1066   else
1067     /* If counts are identical, use unique IDs to stabilize qsort.  */
1068     return c1->id - c2->id;
1069 }
1070
1071 /* Return TRUE iff STMT represents a builtin call that raises OP
1072    to some exponent.  */
1073
1074 static bool
1075 stmt_is_power_of_op (gimple stmt, tree op)
1076 {
1077   tree fndecl;
1078
1079   if (!is_gimple_call (stmt))
1080     return false;
1081
1082   fndecl = gimple_call_fndecl (stmt);
1083
1084   if (!fndecl
1085       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
1086     return false;
1087
1088   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1089     {
1090     CASE_FLT_FN (BUILT_IN_POW):
1091     CASE_FLT_FN (BUILT_IN_POWI):
1092       return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1093
1094     default:
1095       return false;
1096     }
1097 }
1098
1099 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1100    in place and return the result.  Assumes that stmt_is_power_of_op
1101    was previously called for STMT and returned TRUE.  */
1102
1103 static HOST_WIDE_INT
1104 decrement_power (gimple stmt)
1105 {
1106   REAL_VALUE_TYPE c, cint;
1107   HOST_WIDE_INT power;
1108   tree arg1;
1109
1110   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1111     {
1112     CASE_FLT_FN (BUILT_IN_POW):
1113       arg1 = gimple_call_arg (stmt, 1);
1114       c = TREE_REAL_CST (arg1);
1115       power = real_to_integer (&c) - 1;
1116       real_from_integer (&cint, VOIDmode, power, SIGNED);
1117       gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1118       return power;
1119
1120     CASE_FLT_FN (BUILT_IN_POWI):
1121       arg1 = gimple_call_arg (stmt, 1);
1122       power = TREE_INT_CST_LOW (arg1) - 1;
1123       gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1124       return power;
1125
1126     default:
1127       gcc_unreachable ();
1128     }
1129 }
1130
1131 /* Find the single immediate use of STMT's LHS, and replace it
1132    with OP.  Remove STMT.  If STMT's LHS is the same as *DEF,
1133    replace *DEF with OP as well.  */
1134
1135 static void
1136 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1137 {
1138   tree lhs;
1139   gimple use_stmt;
1140   use_operand_p use;
1141   gimple_stmt_iterator gsi;
1142
1143   if (is_gimple_call (stmt))
1144     lhs = gimple_call_lhs (stmt);
1145   else
1146     lhs = gimple_assign_lhs (stmt);
1147
1148   gcc_assert (has_single_use (lhs));
1149   single_imm_use (lhs, &use, &use_stmt);
1150   if (lhs == *def)
1151     *def = op;
1152   SET_USE (use, op);
1153   if (TREE_CODE (op) != SSA_NAME)
1154     update_stmt (use_stmt);
1155   gsi = gsi_for_stmt (stmt);
1156   unlink_stmt_vdef (stmt);
1157   reassoc_remove_stmt (&gsi);
1158   release_defs (stmt);
1159 }
1160
1161 /* Walks the linear chain with result *DEF searching for an operation
1162    with operand OP and code OPCODE removing that from the chain.  *DEF
1163    is updated if there is only one operand but no operation left.  */
1164
1165 static void
1166 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1167 {
1168   gimple stmt = SSA_NAME_DEF_STMT (*def);
1169
1170   do
1171     {
1172       tree name;
1173
1174       if (opcode == MULT_EXPR
1175           && stmt_is_power_of_op (stmt, op))
1176         {
1177           if (decrement_power (stmt) == 1)
1178             propagate_op_to_single_use (op, stmt, def);
1179           return;
1180         }
1181
1182       name = gimple_assign_rhs1 (stmt);
1183
1184       /* If this is the operation we look for and one of the operands
1185          is ours simply propagate the other operand into the stmts
1186          single use.  */
1187       if (gimple_assign_rhs_code (stmt) == opcode
1188           && (name == op
1189               || gimple_assign_rhs2 (stmt) == op))
1190         {
1191           if (name == op)
1192             name = gimple_assign_rhs2 (stmt);
1193           propagate_op_to_single_use (name, stmt, def);
1194           return;
1195         }
1196
1197       /* We might have a multiply of two __builtin_pow* calls, and
1198          the operand might be hiding in the rightmost one.  */
1199       if (opcode == MULT_EXPR
1200           && gimple_assign_rhs_code (stmt) == opcode
1201           && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1202           && has_single_use (gimple_assign_rhs2 (stmt)))
1203         {
1204           gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1205           if (stmt_is_power_of_op (stmt2, op))
1206             {
1207               if (decrement_power (stmt2) == 1)
1208                 propagate_op_to_single_use (op, stmt2, def);
1209               return;
1210             }
1211         }
1212
1213       /* Continue walking the chain.  */
1214       gcc_assert (name != op
1215                   && TREE_CODE (name) == SSA_NAME);
1216       stmt = SSA_NAME_DEF_STMT (name);
1217     }
1218   while (1);
1219 }
1220
1221 /* Returns true if statement S1 dominates statement S2.  Like
1222    stmt_dominates_stmt_p, but uses stmt UIDs to optimize.  */
1223
1224 static bool
1225 reassoc_stmt_dominates_stmt_p (gimple s1, gimple s2)
1226 {
1227   basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1228
1229   /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1230      SSA_NAME.  Assume it lives at the beginning of function and
1231      thus dominates everything.  */
1232   if (!bb1 || s1 == s2)
1233     return true;
1234
1235   /* If bb2 is NULL, it doesn't dominate any stmt with a bb.  */
1236   if (!bb2)
1237     return false;
1238
1239   if (bb1 == bb2)
1240     {
1241       /* PHIs in the same basic block are assumed to be
1242          executed all in parallel, if only one stmt is a PHI,
1243          it dominates the other stmt in the same basic block.  */
1244       if (gimple_code (s1) == GIMPLE_PHI)
1245         return true;
1246
1247       if (gimple_code (s2) == GIMPLE_PHI)
1248         return false;
1249
1250       gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1251
1252       if (gimple_uid (s1) < gimple_uid (s2))
1253         return true;
1254
1255       if (gimple_uid (s1) > gimple_uid (s2))
1256         return false;
1257
1258       gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1259       unsigned int uid = gimple_uid (s1);
1260       for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1261         {
1262           gimple s = gsi_stmt (gsi);
1263           if (gimple_uid (s) != uid)
1264             break;
1265           if (s == s2)
1266             return true;
1267         }
1268
1269       return false;
1270     }
1271
1272   return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1273 }
1274
1275 /* Insert STMT after INSERT_POINT.  */
1276
1277 static void
1278 insert_stmt_after (gimple stmt, gimple insert_point)
1279 {
1280   gimple_stmt_iterator gsi;
1281   basic_block bb;
1282
1283   if (gimple_code (insert_point) == GIMPLE_PHI)
1284     bb = gimple_bb (insert_point);
1285   else if (!stmt_ends_bb_p (insert_point))
1286     {
1287       gsi = gsi_for_stmt (insert_point);
1288       gimple_set_uid (stmt, gimple_uid (insert_point));
1289       gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1290       return;
1291     }
1292   else
1293     /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1294        thus if it must end a basic block, it should be a call that can
1295        throw, or some assignment that can throw.  If it throws, the LHS
1296        of it will not be initialized though, so only valid places using
1297        the SSA_NAME should be dominated by the fallthru edge.  */
1298     bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1299   gsi = gsi_after_labels (bb);
1300   if (gsi_end_p (gsi))
1301     {
1302       gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1303       gimple_set_uid (stmt,
1304                       gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1305     }
1306   else
1307     gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1308   gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1309 }
1310
1311 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1312    the result.  Places the statement after the definition of either
1313    OP1 or OP2.  Returns the new statement.  */
1314
1315 static gimple
1316 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1317 {
1318   gimple op1def = NULL, op2def = NULL;
1319   gimple_stmt_iterator gsi;
1320   tree op;
1321   gimple sum;
1322
1323   /* Create the addition statement.  */
1324   op = make_ssa_name (type, NULL);
1325   sum = gimple_build_assign_with_ops (opcode, op, op1, op2);
1326
1327   /* Find an insertion place and insert.  */
1328   if (TREE_CODE (op1) == SSA_NAME)
1329     op1def = SSA_NAME_DEF_STMT (op1);
1330   if (TREE_CODE (op2) == SSA_NAME)
1331     op2def = SSA_NAME_DEF_STMT (op2);
1332   if ((!op1def || gimple_nop_p (op1def))
1333       && (!op2def || gimple_nop_p (op2def)))
1334     {
1335       gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1336       if (gsi_end_p (gsi))
1337         {
1338           gimple_stmt_iterator gsi2
1339             = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1340           gimple_set_uid (sum,
1341                           gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1342         }
1343       else
1344         gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1345       gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1346     }
1347   else
1348     {
1349       gimple insert_point;
1350       if ((!op1def || gimple_nop_p (op1def))
1351            || (op2def && !gimple_nop_p (op2def)
1352                && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1353         insert_point = op2def;
1354       else
1355         insert_point = op1def;
1356       insert_stmt_after (sum, insert_point);
1357     }
1358   update_stmt (sum);
1359
1360   return sum;
1361 }
1362
1363 /* Perform un-distribution of divisions and multiplications.
1364    A * X + B * X is transformed into (A + B) * X and A / X + B / X
1365    to (A + B) / X for real X.
1366
1367    The algorithm is organized as follows.
1368
1369     - First we walk the addition chain *OPS looking for summands that
1370       are defined by a multiplication or a real division.  This results
1371       in the candidates bitmap with relevant indices into *OPS.
1372
1373     - Second we build the chains of multiplications or divisions for
1374       these candidates, counting the number of occurrences of (operand, code)
1375       pairs in all of the candidates chains.
1376
1377     - Third we sort the (operand, code) pairs by number of occurrence and
1378       process them starting with the pair with the most uses.
1379
1380       * For each such pair we walk the candidates again to build a
1381         second candidate bitmap noting all multiplication/division chains
1382         that have at least one occurrence of (operand, code).
1383
1384       * We build an alternate addition chain only covering these
1385         candidates with one (operand, code) operation removed from their
1386         multiplication/division chain.
1387
1388       * The first candidate gets replaced by the alternate addition chain
1389         multiplied/divided by the operand.
1390
1391       * All candidate chains get disabled for further processing and
1392         processing of (operand, code) pairs continues.
1393
1394   The alternate addition chains built are re-processed by the main
1395   reassociation algorithm which allows optimizing a * x * y + b * y * x
1396   to (a + b ) * x * y in one invocation of the reassociation pass.  */
1397
1398 static bool
1399 undistribute_ops_list (enum tree_code opcode,
1400                        vec<operand_entry_t> *ops, struct loop *loop)
1401 {
1402   unsigned int length = ops->length ();
1403   operand_entry_t oe1;
1404   unsigned i, j;
1405   sbitmap candidates, candidates2;
1406   unsigned nr_candidates, nr_candidates2;
1407   sbitmap_iterator sbi0;
1408   vec<operand_entry_t> *subops;
1409   bool changed = false;
1410   int next_oecount_id = 0;
1411
1412   if (length <= 1
1413       || opcode != PLUS_EXPR)
1414     return false;
1415
1416   /* Build a list of candidates to process.  */
1417   candidates = sbitmap_alloc (length);
1418   bitmap_clear (candidates);
1419   nr_candidates = 0;
1420   FOR_EACH_VEC_ELT (*ops, i, oe1)
1421     {
1422       enum tree_code dcode;
1423       gimple oe1def;
1424
1425       if (TREE_CODE (oe1->op) != SSA_NAME)
1426         continue;
1427       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1428       if (!is_gimple_assign (oe1def))
1429         continue;
1430       dcode = gimple_assign_rhs_code (oe1def);
1431       if ((dcode != MULT_EXPR
1432            && dcode != RDIV_EXPR)
1433           || !is_reassociable_op (oe1def, dcode, loop))
1434         continue;
1435
1436       bitmap_set_bit (candidates, i);
1437       nr_candidates++;
1438     }
1439
1440   if (nr_candidates < 2)
1441     {
1442       sbitmap_free (candidates);
1443       return false;
1444     }
1445
1446   if (dump_file && (dump_flags & TDF_DETAILS))
1447     {
1448       fprintf (dump_file, "searching for un-distribute opportunities ");
1449       print_generic_expr (dump_file,
1450         (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1451       fprintf (dump_file, " %d\n", nr_candidates);
1452     }
1453
1454   /* Build linearized sub-operand lists and the counting table.  */
1455   cvec.create (0);
1456
1457   hash_table<oecount_hasher> ctable (15);
1458
1459   /* ??? Macro arguments cannot have multi-argument template types in
1460      them.  This typedef is needed to workaround that limitation.  */
1461   typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1462   subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1463   EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1464     {
1465       gimple oedef;
1466       enum tree_code oecode;
1467       unsigned j;
1468
1469       oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1470       oecode = gimple_assign_rhs_code (oedef);
1471       linearize_expr_tree (&subops[i], oedef,
1472                            associative_tree_code (oecode), false);
1473
1474       FOR_EACH_VEC_ELT (subops[i], j, oe1)
1475         {
1476           oecount c;
1477           int *slot;
1478           int idx;
1479           c.oecode = oecode;
1480           c.cnt = 1;
1481           c.id = next_oecount_id++;
1482           c.op = oe1->op;
1483           cvec.safe_push (c);
1484           idx = cvec.length () + 41;
1485           slot = ctable.find_slot (idx, INSERT);
1486           if (!*slot)
1487             {
1488               *slot = idx;
1489             }
1490           else
1491             {
1492               cvec.pop ();
1493               cvec[*slot - 42].cnt++;
1494             }
1495         }
1496     }
1497
1498   /* Sort the counting table.  */
1499   cvec.qsort (oecount_cmp);
1500
1501   if (dump_file && (dump_flags & TDF_DETAILS))
1502     {
1503       oecount *c;
1504       fprintf (dump_file, "Candidates:\n");
1505       FOR_EACH_VEC_ELT (cvec, j, c)
1506         {
1507           fprintf (dump_file, "  %u %s: ", c->cnt,
1508                    c->oecode == MULT_EXPR
1509                    ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1510           print_generic_expr (dump_file, c->op, 0);
1511           fprintf (dump_file, "\n");
1512         }
1513     }
1514
1515   /* Process the (operand, code) pairs in order of most occurrence.  */
1516   candidates2 = sbitmap_alloc (length);
1517   while (!cvec.is_empty ())
1518     {
1519       oecount *c = &cvec.last ();
1520       if (c->cnt < 2)
1521         break;
1522
1523       /* Now collect the operands in the outer chain that contain
1524          the common operand in their inner chain.  */
1525       bitmap_clear (candidates2);
1526       nr_candidates2 = 0;
1527       EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1528         {
1529           gimple oedef;
1530           enum tree_code oecode;
1531           unsigned j;
1532           tree op = (*ops)[i]->op;
1533
1534           /* If we undistributed in this chain already this may be
1535              a constant.  */
1536           if (TREE_CODE (op) != SSA_NAME)
1537             continue;
1538
1539           oedef = SSA_NAME_DEF_STMT (op);
1540           oecode = gimple_assign_rhs_code (oedef);
1541           if (oecode != c->oecode)
1542             continue;
1543
1544           FOR_EACH_VEC_ELT (subops[i], j, oe1)
1545             {
1546               if (oe1->op == c->op)
1547                 {
1548                   bitmap_set_bit (candidates2, i);
1549                   ++nr_candidates2;
1550                   break;
1551                 }
1552             }
1553         }
1554
1555       if (nr_candidates2 >= 2)
1556         {
1557           operand_entry_t oe1, oe2;
1558           gimple prod;
1559           int first = bitmap_first_set_bit (candidates2);
1560
1561           /* Build the new addition chain.  */
1562           oe1 = (*ops)[first];
1563           if (dump_file && (dump_flags & TDF_DETAILS))
1564             {
1565               fprintf (dump_file, "Building (");
1566               print_generic_expr (dump_file, oe1->op, 0);
1567             }
1568           zero_one_operation (&oe1->op, c->oecode, c->op);
1569           EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1570             {
1571               gimple sum;
1572               oe2 = (*ops)[i];
1573               if (dump_file && (dump_flags & TDF_DETAILS))
1574                 {
1575                   fprintf (dump_file, " + ");
1576                   print_generic_expr (dump_file, oe2->op, 0);
1577                 }
1578               zero_one_operation (&oe2->op, c->oecode, c->op);
1579               sum = build_and_add_sum (TREE_TYPE (oe1->op),
1580                                        oe1->op, oe2->op, opcode);
1581               oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1582               oe2->rank = 0;
1583               oe1->op = gimple_get_lhs (sum);
1584             }
1585
1586           /* Apply the multiplication/division.  */
1587           prod = build_and_add_sum (TREE_TYPE (oe1->op),
1588                                     oe1->op, c->op, c->oecode);
1589           if (dump_file && (dump_flags & TDF_DETAILS))
1590             {
1591               fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1592               print_generic_expr (dump_file, c->op, 0);
1593               fprintf (dump_file, "\n");
1594             }
1595
1596           /* Record it in the addition chain and disable further
1597              undistribution with this op.  */
1598           oe1->op = gimple_assign_lhs (prod);
1599           oe1->rank = get_rank (oe1->op);
1600           subops[first].release ();
1601
1602           changed = true;
1603         }
1604
1605       cvec.pop ();
1606     }
1607
1608   for (i = 0; i < ops->length (); ++i)
1609     subops[i].release ();
1610   free (subops);
1611   cvec.release ();
1612   sbitmap_free (candidates);
1613   sbitmap_free (candidates2);
1614
1615   return changed;
1616 }
1617
1618 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1619    expression, examine the other OPS to see if any of them are comparisons
1620    of the same values, which we may be able to combine or eliminate.
1621    For example, we can rewrite (a < b) | (a == b) as (a <= b).  */
1622
1623 static bool
1624 eliminate_redundant_comparison (enum tree_code opcode,
1625                                 vec<operand_entry_t> *ops,
1626                                 unsigned int currindex,
1627                                 operand_entry_t curr)
1628 {
1629   tree op1, op2;
1630   enum tree_code lcode, rcode;
1631   gimple def1, def2;
1632   int i;
1633   operand_entry_t oe;
1634
1635   if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1636     return false;
1637
1638   /* Check that CURR is a comparison.  */
1639   if (TREE_CODE (curr->op) != SSA_NAME)
1640     return false;
1641   def1 = SSA_NAME_DEF_STMT (curr->op);
1642   if (!is_gimple_assign (def1))
1643     return false;
1644   lcode = gimple_assign_rhs_code (def1);
1645   if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1646     return false;
1647   op1 = gimple_assign_rhs1 (def1);
1648   op2 = gimple_assign_rhs2 (def1);
1649
1650   /* Now look for a similar comparison in the remaining OPS.  */
1651   for (i = currindex + 1; ops->iterate (i, &oe); i++)
1652     {
1653       tree t;
1654
1655       if (TREE_CODE (oe->op) != SSA_NAME)
1656         continue;
1657       def2 = SSA_NAME_DEF_STMT (oe->op);
1658       if (!is_gimple_assign (def2))
1659         continue;
1660       rcode = gimple_assign_rhs_code (def2);
1661       if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1662         continue;
1663
1664       /* If we got here, we have a match.  See if we can combine the
1665          two comparisons.  */
1666       if (opcode == BIT_IOR_EXPR)
1667         t = maybe_fold_or_comparisons (lcode, op1, op2,
1668                                        rcode, gimple_assign_rhs1 (def2),
1669                                        gimple_assign_rhs2 (def2));
1670       else
1671         t = maybe_fold_and_comparisons (lcode, op1, op2,
1672                                         rcode, gimple_assign_rhs1 (def2),
1673                                         gimple_assign_rhs2 (def2));
1674       if (!t)
1675         continue;
1676
1677       /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1678          always give us a boolean_type_node value back.  If the original
1679          BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1680          we need to convert.  */
1681       if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1682         t = fold_convert (TREE_TYPE (curr->op), t);
1683
1684       if (TREE_CODE (t) != INTEGER_CST
1685           && !operand_equal_p (t, curr->op, 0))
1686         {
1687           enum tree_code subcode;
1688           tree newop1, newop2;
1689           if (!COMPARISON_CLASS_P (t))
1690             continue;
1691           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1692           STRIP_USELESS_TYPE_CONVERSION (newop1);
1693           STRIP_USELESS_TYPE_CONVERSION (newop2);
1694           if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1695             continue;
1696         }
1697
1698       if (dump_file && (dump_flags & TDF_DETAILS))
1699         {
1700           fprintf (dump_file, "Equivalence: ");
1701           print_generic_expr (dump_file, curr->op, 0);
1702           fprintf (dump_file, " %s ", op_symbol_code (opcode));
1703           print_generic_expr (dump_file, oe->op, 0);
1704           fprintf (dump_file, " -> ");
1705           print_generic_expr (dump_file, t, 0);
1706           fprintf (dump_file, "\n");
1707         }
1708
1709       /* Now we can delete oe, as it has been subsumed by the new combined
1710          expression t.  */
1711       ops->ordered_remove (i);
1712       reassociate_stats.ops_eliminated ++;
1713
1714       /* If t is the same as curr->op, we're done.  Otherwise we must
1715          replace curr->op with t.  Special case is if we got a constant
1716          back, in which case we add it to the end instead of in place of
1717          the current entry.  */
1718       if (TREE_CODE (t) == INTEGER_CST)
1719         {
1720           ops->ordered_remove (currindex);
1721           add_to_ops_vec (ops, t);
1722         }
1723       else if (!operand_equal_p (t, curr->op, 0))
1724         {
1725           gimple sum;
1726           enum tree_code subcode;
1727           tree newop1;
1728           tree newop2;
1729           gcc_assert (COMPARISON_CLASS_P (t));
1730           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1731           STRIP_USELESS_TYPE_CONVERSION (newop1);
1732           STRIP_USELESS_TYPE_CONVERSION (newop2);
1733           gcc_checking_assert (is_gimple_val (newop1)
1734                                && is_gimple_val (newop2));
1735           sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1736           curr->op = gimple_get_lhs (sum);
1737         }
1738       return true;
1739     }
1740
1741   return false;
1742 }
1743
1744 /* Perform various identities and other optimizations on the list of
1745    operand entries, stored in OPS.  The tree code for the binary
1746    operation between all the operands is OPCODE.  */
1747
1748 static void
1749 optimize_ops_list (enum tree_code opcode,
1750                    vec<operand_entry_t> *ops)
1751 {
1752   unsigned int length = ops->length ();
1753   unsigned int i;
1754   operand_entry_t oe;
1755   operand_entry_t oelast = NULL;
1756   bool iterate = false;
1757
1758   if (length == 1)
1759     return;
1760
1761   oelast = ops->last ();
1762
1763   /* If the last two are constants, pop the constants off, merge them
1764      and try the next two.  */
1765   if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1766     {
1767       operand_entry_t oelm1 = (*ops)[length - 2];
1768
1769       if (oelm1->rank == 0
1770           && is_gimple_min_invariant (oelm1->op)
1771           && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1772                                        TREE_TYPE (oelast->op)))
1773         {
1774           tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1775                                      oelm1->op, oelast->op);
1776
1777           if (folded && is_gimple_min_invariant (folded))
1778             {
1779               if (dump_file && (dump_flags & TDF_DETAILS))
1780                 fprintf (dump_file, "Merging constants\n");
1781
1782               ops->pop ();
1783               ops->pop ();
1784
1785               add_to_ops_vec (ops, folded);
1786               reassociate_stats.constants_eliminated++;
1787
1788               optimize_ops_list (opcode, ops);
1789               return;
1790             }
1791         }
1792     }
1793
1794   eliminate_using_constants (opcode, ops);
1795   oelast = NULL;
1796
1797   for (i = 0; ops->iterate (i, &oe);)
1798     {
1799       bool done = false;
1800
1801       if (eliminate_not_pairs (opcode, ops, i, oe))
1802         return;
1803       if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1804           || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1805           || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1806         {
1807           if (done)
1808             return;
1809           iterate = true;
1810           oelast = NULL;
1811           continue;
1812         }
1813       oelast = oe;
1814       i++;
1815     }
1816
1817   length = ops->length ();
1818   oelast = ops->last ();
1819
1820   if (iterate)
1821     optimize_ops_list (opcode, ops);
1822 }
1823
1824 /* The following functions are subroutines to optimize_range_tests and allow
1825    it to try to change a logical combination of comparisons into a range
1826    test.
1827
1828    For example, both
1829         X == 2 || X == 5 || X == 3 || X == 4
1830    and
1831         X >= 2 && X <= 5
1832    are converted to
1833         (unsigned) (X - 2) <= 3
1834
1835    For more information see comments above fold_test_range in fold-const.c,
1836    this implementation is for GIMPLE.  */
1837
1838 struct range_entry
1839 {
1840   tree exp;
1841   tree low;
1842   tree high;
1843   bool in_p;
1844   bool strict_overflow_p;
1845   unsigned int idx, next;
1846 };
1847
1848 /* This is similar to make_range in fold-const.c, but on top of
1849    GIMPLE instead of trees.  If EXP is non-NULL, it should be
1850    an SSA_NAME and STMT argument is ignored, otherwise STMT
1851    argument should be a GIMPLE_COND.  */
1852
1853 static void
1854 init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1855 {
1856   int in_p;
1857   tree low, high;
1858   bool is_bool, strict_overflow_p;
1859
1860   r->exp = NULL_TREE;
1861   r->in_p = false;
1862   r->strict_overflow_p = false;
1863   r->low = NULL_TREE;
1864   r->high = NULL_TREE;
1865   if (exp != NULL_TREE
1866       && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1867     return;
1868
1869   /* Start with simply saying "EXP != 0" and then look at the code of EXP
1870      and see if we can refine the range.  Some of the cases below may not
1871      happen, but it doesn't seem worth worrying about this.  We "continue"
1872      the outer loop when we've changed something; otherwise we "break"
1873      the switch, which will "break" the while.  */
1874   low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1875   high = low;
1876   in_p = 0;
1877   strict_overflow_p = false;
1878   is_bool = false;
1879   if (exp == NULL_TREE)
1880     is_bool = true;
1881   else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1882     {
1883       if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1884         is_bool = true;
1885       else
1886         return;
1887     }
1888   else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1889     is_bool = true;
1890
1891   while (1)
1892     {
1893       enum tree_code code;
1894       tree arg0, arg1, exp_type;
1895       tree nexp;
1896       location_t loc;
1897
1898       if (exp != NULL_TREE)
1899         {
1900           if (TREE_CODE (exp) != SSA_NAME
1901               || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
1902             break;
1903
1904           stmt = SSA_NAME_DEF_STMT (exp);
1905           if (!is_gimple_assign (stmt))
1906             break;
1907
1908           code = gimple_assign_rhs_code (stmt);
1909           arg0 = gimple_assign_rhs1 (stmt);
1910           arg1 = gimple_assign_rhs2 (stmt);
1911           exp_type = TREE_TYPE (exp);
1912         }
1913       else
1914         {
1915           code = gimple_cond_code (stmt);
1916           arg0 = gimple_cond_lhs (stmt);
1917           arg1 = gimple_cond_rhs (stmt);
1918           exp_type = boolean_type_node;
1919         }
1920
1921       if (TREE_CODE (arg0) != SSA_NAME)
1922         break;
1923       loc = gimple_location (stmt);
1924       switch (code)
1925         {
1926         case BIT_NOT_EXPR:
1927           if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
1928               /* Ensure the range is either +[-,0], +[0,0],
1929                  -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1930                  -[1,1].  If it is e.g. +[-,-] or -[-,-]
1931                  or similar expression of unconditional true or
1932                  false, it should not be negated.  */
1933               && ((high && integer_zerop (high))
1934                   || (low && integer_onep (low))))
1935             {
1936               in_p = !in_p;
1937               exp = arg0;
1938               continue;
1939             }
1940           break;
1941         case SSA_NAME:
1942           exp = arg0;
1943           continue;
1944         CASE_CONVERT:
1945           if (is_bool)
1946             goto do_default;
1947           if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1948             {
1949               if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1950                 is_bool = true;
1951               else
1952                 return;
1953             }
1954           else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1955             is_bool = true;
1956           goto do_default;
1957         case EQ_EXPR:
1958         case NE_EXPR:
1959         case LT_EXPR:
1960         case LE_EXPR:
1961         case GE_EXPR:
1962         case GT_EXPR:
1963           is_bool = true;
1964           /* FALLTHRU */
1965         default:
1966           if (!is_bool)
1967             return;
1968         do_default:
1969           nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1970                                   &low, &high, &in_p,
1971                                   &strict_overflow_p);
1972           if (nexp != NULL_TREE)
1973             {
1974               exp = nexp;
1975               gcc_assert (TREE_CODE (exp) == SSA_NAME);
1976               continue;
1977             }
1978           break;
1979         }
1980       break;
1981     }
1982   if (is_bool)
1983     {
1984       r->exp = exp;
1985       r->in_p = in_p;
1986       r->low = low;
1987       r->high = high;
1988       r->strict_overflow_p = strict_overflow_p;
1989     }
1990 }
1991
1992 /* Comparison function for qsort.  Sort entries
1993    without SSA_NAME exp first, then with SSA_NAMEs sorted
1994    by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1995    by increasing ->low and if ->low is the same, by increasing
1996    ->high.  ->low == NULL_TREE means minimum, ->high == NULL_TREE
1997    maximum.  */
1998
1999 static int
2000 range_entry_cmp (const void *a, const void *b)
2001 {
2002   const struct range_entry *p = (const struct range_entry *) a;
2003   const struct range_entry *q = (const struct range_entry *) b;
2004
2005   if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2006     {
2007       if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2008         {
2009           /* Group range_entries for the same SSA_NAME together.  */
2010           if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2011             return -1;
2012           else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2013             return 1;
2014           /* If ->low is different, NULL low goes first, then by
2015              ascending low.  */
2016           if (p->low != NULL_TREE)
2017             {
2018               if (q->low != NULL_TREE)
2019                 {
2020                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
2021                                           p->low, q->low);
2022                   if (tem && integer_onep (tem))
2023                     return -1;
2024                   tem = fold_binary (GT_EXPR, boolean_type_node,
2025                                      p->low, q->low);
2026                   if (tem && integer_onep (tem))
2027                     return 1;
2028                 }
2029               else
2030                 return 1;
2031             }
2032           else if (q->low != NULL_TREE)
2033             return -1;
2034           /* If ->high is different, NULL high goes last, before that by
2035              ascending high.  */
2036           if (p->high != NULL_TREE)
2037             {
2038               if (q->high != NULL_TREE)
2039                 {
2040                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
2041                                           p->high, q->high);
2042                   if (tem && integer_onep (tem))
2043                     return -1;
2044                   tem = fold_binary (GT_EXPR, boolean_type_node,
2045                                      p->high, q->high);
2046                   if (tem && integer_onep (tem))
2047                     return 1;
2048                 }
2049               else
2050                 return -1;
2051             }
2052           else if (p->high != NULL_TREE)
2053             return 1;
2054           /* If both ranges are the same, sort below by ascending idx.  */
2055         }
2056       else
2057         return 1;
2058     }
2059   else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2060     return -1;
2061
2062   if (p->idx < q->idx)
2063     return -1;
2064   else
2065     {
2066       gcc_checking_assert (p->idx > q->idx);
2067       return 1;
2068     }
2069 }
2070
2071 /* Helper routine of optimize_range_test.
2072    [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2073    RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2074    OPCODE and OPS are arguments of optimize_range_tests.  Return
2075    true if the range merge has been successful.
2076    If OPCODE is ERROR_MARK, this is called from within
2077    maybe_optimize_range_tests and is performing inter-bb range optimization.
2078    In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2079    oe->rank.  */
2080
2081 static bool
2082 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2083                    unsigned int count, enum tree_code opcode,
2084                    vec<operand_entry_t> *ops, tree exp, bool in_p,
2085                    tree low, tree high, bool strict_overflow_p)
2086 {
2087   operand_entry_t oe = (*ops)[range->idx];
2088   tree op = oe->op;
2089   gimple stmt = op ? SSA_NAME_DEF_STMT (op) :
2090     last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2091   location_t loc = gimple_location (stmt);
2092   tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2093   tree tem = build_range_check (loc, optype, exp, in_p, low, high);
2094   enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2095   gimple_stmt_iterator gsi;
2096
2097   if (tem == NULL_TREE)
2098     return false;
2099
2100   if (strict_overflow_p && issue_strict_overflow_warning (wc))
2101     warning_at (loc, OPT_Wstrict_overflow,
2102                 "assuming signed overflow does not occur "
2103                 "when simplifying range test");
2104
2105   if (dump_file && (dump_flags & TDF_DETAILS))
2106     {
2107       struct range_entry *r;
2108       fprintf (dump_file, "Optimizing range tests ");
2109       print_generic_expr (dump_file, range->exp, 0);
2110       fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2111       print_generic_expr (dump_file, range->low, 0);
2112       fprintf (dump_file, ", ");
2113       print_generic_expr (dump_file, range->high, 0);
2114       fprintf (dump_file, "]");
2115       for (r = otherrange; r < otherrange + count; r++)
2116         {
2117           fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2118           print_generic_expr (dump_file, r->low, 0);
2119           fprintf (dump_file, ", ");
2120           print_generic_expr (dump_file, r->high, 0);
2121           fprintf (dump_file, "]");
2122         }
2123       fprintf (dump_file, "\n into ");
2124       print_generic_expr (dump_file, tem, 0);
2125       fprintf (dump_file, "\n");
2126     }
2127
2128   if (opcode == BIT_IOR_EXPR
2129       || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2130     tem = invert_truthvalue_loc (loc, tem);
2131
2132   tem = fold_convert_loc (loc, optype, tem);
2133   gsi = gsi_for_stmt (stmt);
2134   /* In rare cases range->exp can be equal to lhs of stmt.
2135      In that case we have to insert after the stmt rather then before
2136      it.  */
2137   if (op == range->exp)
2138     tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false,
2139                                     GSI_CONTINUE_LINKING);
2140   else
2141     {
2142       tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2143                                       GSI_SAME_STMT);
2144       gsi_prev (&gsi);
2145     }
2146   for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2147     if (gimple_uid (gsi_stmt (gsi)))
2148       break;
2149     else
2150       gimple_set_uid (gsi_stmt (gsi), gimple_uid (stmt));
2151
2152   oe->op = tem;
2153   range->exp = exp;
2154   range->low = low;
2155   range->high = high;
2156   range->in_p = in_p;
2157   range->strict_overflow_p = false;
2158
2159   for (range = otherrange; range < otherrange + count; range++)
2160     {
2161       oe = (*ops)[range->idx];
2162       /* Now change all the other range test immediate uses, so that
2163          those tests will be optimized away.  */
2164       if (opcode == ERROR_MARK)
2165         {
2166           if (oe->op)
2167             oe->op = build_int_cst (TREE_TYPE (oe->op),
2168                                     oe->rank == BIT_IOR_EXPR ? 0 : 1);
2169           else
2170             oe->op = (oe->rank == BIT_IOR_EXPR
2171                       ? boolean_false_node : boolean_true_node);
2172         }
2173       else
2174         oe->op = error_mark_node;
2175       range->exp = NULL_TREE;
2176     }
2177   return true;
2178 }
2179
2180 /* Optimize X == CST1 || X == CST2
2181    if popcount (CST1 ^ CST2) == 1 into
2182    (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2183    Similarly for ranges.  E.g.
2184    X != 2 && X != 3 && X != 10 && X != 11
2185    will be transformed by the previous optimization into
2186    !((X - 2U) <= 1U || (X - 10U) <= 1U)
2187    and this loop can transform that into
2188    !(((X & ~8) - 2U) <= 1U).  */
2189
2190 static bool
2191 optimize_range_tests_xor (enum tree_code opcode, tree type,
2192                           tree lowi, tree lowj, tree highi, tree highj,
2193                           vec<operand_entry_t> *ops,
2194                           struct range_entry *rangei,
2195                           struct range_entry *rangej)
2196 {
2197   tree lowxor, highxor, tem, exp;
2198   /* Check highi ^ lowi == highj ^ lowj and
2199      popcount (highi ^ lowi) == 1.  */
2200   lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2201   if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2202     return false;
2203   if (tree_log2 (lowxor) < 0)
2204     return false;
2205   highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2206   if (!tree_int_cst_equal (lowxor, highxor))
2207     return false;
2208
2209   tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2210   exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2211   lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2212   highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2213   if (update_range_test (rangei, rangej, 1, opcode, ops, exp,
2214                          rangei->in_p, lowj, highj,
2215                          rangei->strict_overflow_p
2216                          || rangej->strict_overflow_p))
2217     return true;
2218   return false;
2219 }
2220
2221 /* Optimize X == CST1 || X == CST2
2222    if popcount (CST2 - CST1) == 1 into
2223    ((X - CST1) & ~(CST2 - CST1)) == 0.
2224    Similarly for ranges.  E.g.
2225    X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2226    || X == 75 || X == 45
2227    will be transformed by the previous optimization into
2228    (X - 43U) <= 3U || (X - 75U) <= 3U
2229    and this loop can transform that into
2230    ((X - 43U) & ~(75U - 43U)) <= 3U.  */
2231 static bool
2232 optimize_range_tests_diff (enum tree_code opcode, tree type,
2233                             tree lowi, tree lowj, tree highi, tree highj,
2234                             vec<operand_entry_t> *ops,
2235                             struct range_entry *rangei,
2236                             struct range_entry *rangej)
2237 {
2238   tree tem1, tem2, mask;
2239   /* Check highi - lowi == highj - lowj.  */
2240   tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2241   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2242     return false;
2243   tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2244   if (!tree_int_cst_equal (tem1, tem2))
2245     return false;
2246   /* Check popcount (lowj - lowi) == 1.  */
2247   tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2248   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2249     return false;
2250   if (tree_log2 (tem1) < 0)
2251     return false;
2252
2253   type = unsigned_type_for (type);
2254   tem1 = fold_convert (type, tem1);
2255   tem2 = fold_convert (type, tem2);
2256   lowi = fold_convert (type, lowi);
2257   mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2258   tem1 = fold_binary (MINUS_EXPR, type,
2259                       fold_convert (type, rangei->exp), lowi);
2260   tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2261   lowj = build_int_cst (type, 0);
2262   if (update_range_test (rangei, rangej, 1, opcode, ops, tem1,
2263                          rangei->in_p, lowj, tem2,
2264                          rangei->strict_overflow_p
2265                          || rangej->strict_overflow_p))
2266     return true;
2267   return false;
2268 }
2269
2270 /* It does some common checks for function optimize_range_tests_xor and
2271    optimize_range_tests_diff.
2272    If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2273    Else it calls optimize_range_tests_diff.  */
2274
2275 static bool
2276 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2277                         bool optimize_xor, vec<operand_entry_t> *ops,
2278                         struct range_entry *ranges)
2279 {
2280   int i, j;
2281   bool any_changes = false;
2282   for (i = first; i < length; i++)
2283     {
2284       tree lowi, highi, lowj, highj, type, tem;
2285
2286       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2287         continue;
2288       type = TREE_TYPE (ranges[i].exp);
2289       if (!INTEGRAL_TYPE_P (type))
2290         continue;
2291       lowi = ranges[i].low;
2292       if (lowi == NULL_TREE)
2293         lowi = TYPE_MIN_VALUE (type);
2294       highi = ranges[i].high;
2295       if (highi == NULL_TREE)
2296         continue;
2297       for (j = i + 1; j < length && j < i + 64; j++)
2298         {
2299           bool changes;
2300           if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2301             continue;
2302           lowj = ranges[j].low;
2303           if (lowj == NULL_TREE)
2304             continue;
2305           highj = ranges[j].high;
2306           if (highj == NULL_TREE)
2307             highj = TYPE_MAX_VALUE (type);
2308           /* Check lowj > highi.  */
2309           tem = fold_binary (GT_EXPR, boolean_type_node,
2310                              lowj, highi);
2311           if (tem == NULL_TREE || !integer_onep (tem))
2312             continue;
2313           if (optimize_xor)
2314             changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2315                                                 highi, highj, ops,
2316                                                 ranges + i, ranges + j);
2317           else
2318             changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2319                                                  highi, highj, ops,
2320                                                  ranges + i, ranges + j);
2321           if (changes)
2322             {
2323               any_changes = true;
2324               break;
2325             }
2326         }
2327     }
2328   return any_changes;
2329 }
2330
2331 /* Optimize range tests, similarly how fold_range_test optimizes
2332    it on trees.  The tree code for the binary
2333    operation between all the operands is OPCODE.
2334    If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2335    maybe_optimize_range_tests for inter-bb range optimization.
2336    In that case if oe->op is NULL, oe->id is bb->index whose
2337    GIMPLE_COND is && or ||ed into the test, and oe->rank says
2338    the actual opcode.  */
2339
2340 static bool
2341 optimize_range_tests (enum tree_code opcode,
2342                       vec<operand_entry_t> *ops)
2343 {
2344   unsigned int length = ops->length (), i, j, first;
2345   operand_entry_t oe;
2346   struct range_entry *ranges;
2347   bool any_changes = false;
2348
2349   if (length == 1)
2350     return false;
2351
2352   ranges = XNEWVEC (struct range_entry, length);
2353   for (i = 0; i < length; i++)
2354     {
2355       oe = (*ops)[i];
2356       ranges[i].idx = i;
2357       init_range_entry (ranges + i, oe->op,
2358                         oe->op ? NULL :
2359                           last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
2360       /* For | invert it now, we will invert it again before emitting
2361          the optimized expression.  */
2362       if (opcode == BIT_IOR_EXPR
2363           || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2364         ranges[i].in_p = !ranges[i].in_p;
2365     }
2366
2367   qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2368   for (i = 0; i < length; i++)
2369     if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2370       break;
2371
2372   /* Try to merge ranges.  */
2373   for (first = i; i < length; i++)
2374     {
2375       tree low = ranges[i].low;
2376       tree high = ranges[i].high;
2377       int in_p = ranges[i].in_p;
2378       bool strict_overflow_p = ranges[i].strict_overflow_p;
2379       int update_fail_count = 0;
2380
2381       for (j = i + 1; j < length; j++)
2382         {
2383           if (ranges[i].exp != ranges[j].exp)
2384             break;
2385           if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2386                              ranges[j].in_p, ranges[j].low, ranges[j].high))
2387             break;
2388           strict_overflow_p |= ranges[j].strict_overflow_p;
2389         }
2390
2391       if (j == i + 1)
2392         continue;
2393
2394       if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
2395                              ops, ranges[i].exp, in_p, low, high,
2396                              strict_overflow_p))
2397         {
2398           i = j - 1;
2399           any_changes = true;
2400         }
2401       /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2402          while update_range_test would fail.  */
2403       else if (update_fail_count == 64)
2404         i = j - 1;
2405       else
2406         ++update_fail_count;
2407     }
2408
2409   any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2410                                          ops, ranges);
2411
2412   if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2413     any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2414                                            ops, ranges);
2415
2416   if (any_changes && opcode != ERROR_MARK)
2417     {
2418       j = 0;
2419       FOR_EACH_VEC_ELT (*ops, i, oe)
2420         {
2421           if (oe->op == error_mark_node)
2422             continue;
2423           else if (i != j)
2424             (*ops)[j] = oe;
2425           j++;
2426         }
2427       ops->truncate (j);
2428     }
2429
2430   XDELETEVEC (ranges);
2431   return any_changes;
2432 }
2433
2434 /* Return true if STMT is a cast like:
2435    <bb N>:
2436    ...
2437    _123 = (int) _234;
2438
2439    <bb M>:
2440    # _345 = PHI <_123(N), 1(...), 1(...)>
2441    where _234 has bool type, _123 has single use and
2442    bb N has a single successor M.  This is commonly used in
2443    the last block of a range test.  */
2444
2445 static bool
2446 final_range_test_p (gimple stmt)
2447 {
2448   basic_block bb, rhs_bb;
2449   edge e;
2450   tree lhs, rhs;
2451   use_operand_p use_p;
2452   gimple use_stmt;
2453
2454   if (!gimple_assign_cast_p (stmt))
2455     return false;
2456   bb = gimple_bb (stmt);
2457   if (!single_succ_p (bb))
2458     return false;
2459   e = single_succ_edge (bb);
2460   if (e->flags & EDGE_COMPLEX)
2461     return false;
2462
2463   lhs = gimple_assign_lhs (stmt);
2464   rhs = gimple_assign_rhs1 (stmt);
2465   if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2466       || TREE_CODE (rhs) != SSA_NAME
2467       || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2468     return false;
2469
2470   /* Test whether lhs is consumed only by a PHI in the only successor bb.  */
2471   if (!single_imm_use (lhs, &use_p, &use_stmt))
2472     return false;
2473
2474   if (gimple_code (use_stmt) != GIMPLE_PHI
2475       || gimple_bb (use_stmt) != e->dest)
2476     return false;
2477
2478   /* And that the rhs is defined in the same loop.  */
2479   rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2480   if (rhs_bb == NULL
2481       || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2482     return false;
2483
2484   return true;
2485 }
2486
2487 /* Return true if BB is suitable basic block for inter-bb range test
2488    optimization.  If BACKWARD is true, BB should be the only predecessor
2489    of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2490    or compared with to find a common basic block to which all conditions
2491    branch to if true resp. false.  If BACKWARD is false, TEST_BB should
2492    be the only predecessor of BB.  */
2493
2494 static bool
2495 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2496                   bool backward)
2497 {
2498   edge_iterator ei, ei2;
2499   edge e, e2;
2500   gimple stmt;
2501   gimple_stmt_iterator gsi;
2502   bool other_edge_seen = false;
2503   bool is_cond;
2504
2505   if (test_bb == bb)
2506     return false;
2507   /* Check last stmt first.  */
2508   stmt = last_stmt (bb);
2509   if (stmt == NULL
2510       || (gimple_code (stmt) != GIMPLE_COND
2511           && (backward || !final_range_test_p (stmt)))
2512       || gimple_visited_p (stmt)
2513       || stmt_could_throw_p (stmt)
2514       || *other_bb == bb)
2515     return false;
2516   is_cond = gimple_code (stmt) == GIMPLE_COND;
2517   if (is_cond)
2518     {
2519       /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2520          goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2521          to *OTHER_BB (if not set yet, try to find it out).  */
2522       if (EDGE_COUNT (bb->succs) != 2)
2523         return false;
2524       FOR_EACH_EDGE (e, ei, bb->succs)
2525         {
2526           if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2527             return false;
2528           if (e->dest == test_bb)
2529             {
2530               if (backward)
2531                 continue;
2532               else
2533                 return false;
2534             }
2535           if (e->dest == bb)
2536             return false;
2537           if (*other_bb == NULL)
2538             {
2539               FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2540                 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2541                   return false;
2542                 else if (e->dest == e2->dest)
2543                   *other_bb = e->dest;
2544               if (*other_bb == NULL)
2545                 return false;
2546             }
2547           if (e->dest == *other_bb)
2548             other_edge_seen = true;
2549           else if (backward)
2550             return false;
2551         }
2552       if (*other_bb == NULL || !other_edge_seen)
2553         return false;
2554     }
2555   else if (single_succ (bb) != *other_bb)
2556     return false;
2557
2558   /* Now check all PHIs of *OTHER_BB.  */
2559   e = find_edge (bb, *other_bb);
2560   e2 = find_edge (test_bb, *other_bb);
2561   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2562     {
2563       gimple phi = gsi_stmt (gsi);
2564       /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2565          corresponding to BB and TEST_BB predecessor must be the same.  */
2566       if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2567                             gimple_phi_arg_def (phi, e2->dest_idx), 0))
2568         {
2569           /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2570              one of the PHIs should have the lhs of the last stmt in
2571              that block as PHI arg and that PHI should have 0 or 1
2572              corresponding to it in all other range test basic blocks
2573              considered.  */
2574           if (!is_cond)
2575             {
2576               if (gimple_phi_arg_def (phi, e->dest_idx)
2577                   == gimple_assign_lhs (stmt)
2578                   && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2579                       || integer_onep (gimple_phi_arg_def (phi,
2580                                                            e2->dest_idx))))
2581                 continue;
2582             }
2583           else
2584             {
2585               gimple test_last = last_stmt (test_bb);
2586               if (gimple_code (test_last) != GIMPLE_COND
2587                   && gimple_phi_arg_def (phi, e2->dest_idx)
2588                      == gimple_assign_lhs (test_last)
2589                   && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2590                       || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2591                 continue;
2592             }
2593
2594           return false;
2595         }
2596     }
2597   return true;
2598 }
2599
2600 /* Return true if BB doesn't have side-effects that would disallow
2601    range test optimization, all SSA_NAMEs set in the bb are consumed
2602    in the bb and there are no PHIs.  */
2603
2604 static bool
2605 no_side_effect_bb (basic_block bb)
2606 {
2607   gimple_stmt_iterator gsi;
2608   gimple last;
2609
2610   if (!gimple_seq_empty_p (phi_nodes (bb)))
2611     return false;
2612   last = last_stmt (bb);
2613   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2614     {
2615       gimple stmt = gsi_stmt (gsi);
2616       tree lhs;
2617       imm_use_iterator imm_iter;
2618       use_operand_p use_p;
2619
2620       if (is_gimple_debug (stmt))
2621         continue;
2622       if (gimple_has_side_effects (stmt))
2623         return false;
2624       if (stmt == last)
2625         return true;
2626       if (!is_gimple_assign (stmt))
2627         return false;
2628       lhs = gimple_assign_lhs (stmt);
2629       if (TREE_CODE (lhs) != SSA_NAME)
2630         return false;
2631       if (gimple_assign_rhs_could_trap_p (stmt))
2632         return false;
2633       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2634         {
2635           gimple use_stmt = USE_STMT (use_p);
2636           if (is_gimple_debug (use_stmt))
2637             continue;
2638           if (gimple_bb (use_stmt) != bb)
2639             return false;
2640         }
2641     }
2642   return false;
2643 }
2644
2645 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2646    return true and fill in *OPS recursively.  */
2647
2648 static bool
2649 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2650          struct loop *loop)
2651 {
2652   gimple stmt = SSA_NAME_DEF_STMT (var);
2653   tree rhs[2];
2654   int i;
2655
2656   if (!is_reassociable_op (stmt, code, loop))
2657     return false;
2658
2659   rhs[0] = gimple_assign_rhs1 (stmt);
2660   rhs[1] = gimple_assign_rhs2 (stmt);
2661   gimple_set_visited (stmt, true);
2662   for (i = 0; i < 2; i++)
2663     if (TREE_CODE (rhs[i]) == SSA_NAME
2664         && !get_ops (rhs[i], code, ops, loop)
2665         && has_single_use (rhs[i]))
2666       {
2667         operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
2668
2669         oe->op = rhs[i];
2670         oe->rank = code;
2671         oe->id = 0;
2672         oe->count = 1;
2673         ops->safe_push (oe);
2674       }
2675   return true;
2676 }
2677
2678 /* Find the ops that were added by get_ops starting from VAR, see if
2679    they were changed during update_range_test and if yes, create new
2680    stmts.  */
2681
2682 static tree
2683 update_ops (tree var, enum tree_code code, vec<operand_entry_t> ops,
2684             unsigned int *pidx, struct loop *loop)
2685 {
2686   gimple stmt = SSA_NAME_DEF_STMT (var);
2687   tree rhs[4];
2688   int i;
2689
2690   if (!is_reassociable_op (stmt, code, loop))
2691     return NULL;
2692
2693   rhs[0] = gimple_assign_rhs1 (stmt);
2694   rhs[1] = gimple_assign_rhs2 (stmt);
2695   rhs[2] = rhs[0];
2696   rhs[3] = rhs[1];
2697   for (i = 0; i < 2; i++)
2698     if (TREE_CODE (rhs[i]) == SSA_NAME)
2699       {
2700         rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
2701         if (rhs[2 + i] == NULL_TREE)
2702           {
2703             if (has_single_use (rhs[i]))
2704               rhs[2 + i] = ops[(*pidx)++]->op;
2705             else
2706               rhs[2 + i] = rhs[i];
2707           }
2708       }
2709   if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
2710       && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
2711     {
2712       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2713       var = make_ssa_name (TREE_TYPE (var), NULL);
2714       gimple g = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
2715                                                var, rhs[2], rhs[3]);
2716       gimple_set_uid (g, gimple_uid (stmt));
2717       gimple_set_visited (g, true);
2718       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2719     }
2720   return var;
2721 }
2722
2723 /* Structure to track the initial value passed to get_ops and
2724    the range in the ops vector for each basic block.  */
2725
2726 struct inter_bb_range_test_entry
2727 {
2728   tree op;
2729   unsigned int first_idx, last_idx;
2730 };
2731
2732 /* Inter-bb range test optimization.  */
2733
2734 static void
2735 maybe_optimize_range_tests (gimple stmt)
2736 {
2737   basic_block first_bb = gimple_bb (stmt);
2738   basic_block last_bb = first_bb;
2739   basic_block other_bb = NULL;
2740   basic_block bb;
2741   edge_iterator ei;
2742   edge e;
2743   auto_vec<operand_entry_t> ops;
2744   auto_vec<inter_bb_range_test_entry> bbinfo;
2745   bool any_changes = false;
2746
2747   /* Consider only basic blocks that end with GIMPLE_COND or
2748      a cast statement satisfying final_range_test_p.  All
2749      but the last bb in the first_bb .. last_bb range
2750      should end with GIMPLE_COND.  */
2751   if (gimple_code (stmt) == GIMPLE_COND)
2752     {
2753       if (EDGE_COUNT (first_bb->succs) != 2)
2754         return;
2755     }
2756   else if (final_range_test_p (stmt))
2757     other_bb = single_succ (first_bb);
2758   else
2759     return;
2760
2761   if (stmt_could_throw_p (stmt))
2762     return;
2763
2764   /* As relative ordering of post-dominator sons isn't fixed,
2765      maybe_optimize_range_tests can be called first on any
2766      bb in the range we want to optimize.  So, start searching
2767      backwards, if first_bb can be set to a predecessor.  */
2768   while (single_pred_p (first_bb))
2769     {
2770       basic_block pred_bb = single_pred (first_bb);
2771       if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
2772         break;
2773       if (!no_side_effect_bb (first_bb))
2774         break;
2775       first_bb = pred_bb;
2776     }
2777   /* If first_bb is last_bb, other_bb hasn't been computed yet.
2778      Before starting forward search in last_bb successors, find
2779      out the other_bb.  */
2780   if (first_bb == last_bb)
2781     {
2782       other_bb = NULL;
2783       /* As non-GIMPLE_COND last stmt always terminates the range,
2784          if forward search didn't discover anything, just give up.  */
2785       if (gimple_code (stmt) != GIMPLE_COND)
2786         return;
2787       /* Look at both successors.  Either it ends with a GIMPLE_COND
2788          and satisfies suitable_cond_bb, or ends with a cast and
2789          other_bb is that cast's successor.  */
2790       FOR_EACH_EDGE (e, ei, first_bb->succs)
2791         if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
2792             || e->dest == first_bb)
2793           return;
2794         else if (single_pred_p (e->dest))
2795           {
2796             stmt = last_stmt (e->dest);
2797             if (stmt
2798                 && gimple_code (stmt) == GIMPLE_COND
2799                 && EDGE_COUNT (e->dest->succs) == 2)
2800               {
2801                 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
2802                   break;
2803                 else
2804                   other_bb = NULL;
2805               }
2806             else if (stmt
2807                      && final_range_test_p (stmt)
2808                      && find_edge (first_bb, single_succ (e->dest)))
2809               {
2810                 other_bb = single_succ (e->dest);
2811                 if (other_bb == first_bb)
2812                   other_bb = NULL;
2813               }
2814           }
2815       if (other_bb == NULL)
2816         return;
2817     }
2818   /* Now do the forward search, moving last_bb to successor bbs
2819      that aren't other_bb.  */
2820   while (EDGE_COUNT (last_bb->succs) == 2)
2821     {
2822       FOR_EACH_EDGE (e, ei, last_bb->succs)
2823         if (e->dest != other_bb)
2824           break;
2825       if (e == NULL)
2826         break;
2827       if (!single_pred_p (e->dest))
2828         break;
2829       if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
2830         break;
2831       if (!no_side_effect_bb (e->dest))
2832         break;
2833       last_bb = e->dest;
2834     }
2835   if (first_bb == last_bb)
2836     return;
2837   /* Here basic blocks first_bb through last_bb's predecessor
2838      end with GIMPLE_COND, all of them have one of the edges to
2839      other_bb and another to another block in the range,
2840      all blocks except first_bb don't have side-effects and
2841      last_bb ends with either GIMPLE_COND, or cast satisfying
2842      final_range_test_p.  */
2843   for (bb = last_bb; ; bb = single_pred (bb))
2844     {
2845       enum tree_code code;
2846       tree lhs, rhs;
2847       inter_bb_range_test_entry bb_ent;
2848
2849       bb_ent.op = NULL_TREE;
2850       bb_ent.first_idx = ops.length ();
2851       bb_ent.last_idx = bb_ent.first_idx;
2852       e = find_edge (bb, other_bb);
2853       stmt = last_stmt (bb);
2854       gimple_set_visited (stmt, true);
2855       if (gimple_code (stmt) != GIMPLE_COND)
2856         {
2857           use_operand_p use_p;
2858           gimple phi;
2859           edge e2;
2860           unsigned int d;
2861
2862           lhs = gimple_assign_lhs (stmt);
2863           rhs = gimple_assign_rhs1 (stmt);
2864           gcc_assert (bb == last_bb);
2865
2866           /* stmt is
2867              _123 = (int) _234;
2868
2869              followed by:
2870              <bb M>:
2871              # _345 = PHI <_123(N), 1(...), 1(...)>
2872
2873              or 0 instead of 1.  If it is 0, the _234
2874              range test is anded together with all the
2875              other range tests, if it is 1, it is ored with
2876              them.  */
2877           single_imm_use (lhs, &use_p, &phi);
2878           gcc_assert (gimple_code (phi) == GIMPLE_PHI);
2879           e2 = find_edge (first_bb, other_bb);
2880           d = e2->dest_idx;
2881           gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
2882           if (integer_zerop (gimple_phi_arg_def (phi, d)))
2883             code = BIT_AND_EXPR;
2884           else
2885             {
2886               gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
2887               code = BIT_IOR_EXPR;
2888             }
2889
2890           /* If _234 SSA_NAME_DEF_STMT is
2891              _234 = _567 | _789;
2892              (or &, corresponding to 1/0 in the phi arguments,
2893              push into ops the individual range test arguments
2894              of the bitwise or resp. and, recursively.  */
2895           if (!get_ops (rhs, code, &ops,
2896                         loop_containing_stmt (stmt))
2897               && has_single_use (rhs))
2898             {
2899               /* Otherwise, push the _234 range test itself.  */
2900               operand_entry_t oe
2901                 = (operand_entry_t) pool_alloc (operand_entry_pool);
2902
2903               oe->op = rhs;
2904               oe->rank = code;
2905               oe->id = 0;
2906               oe->count = 1;
2907               ops.safe_push (oe);
2908               bb_ent.last_idx++;
2909             }
2910           else
2911             bb_ent.last_idx = ops.length ();
2912           bb_ent.op = rhs;
2913           bbinfo.safe_push (bb_ent);
2914           continue;
2915         }
2916       /* Otherwise stmt is GIMPLE_COND.  */
2917       code = gimple_cond_code (stmt);
2918       lhs = gimple_cond_lhs (stmt);
2919       rhs = gimple_cond_rhs (stmt);
2920       if (TREE_CODE (lhs) == SSA_NAME
2921           && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2922           && ((code != EQ_EXPR && code != NE_EXPR)
2923               || rhs != boolean_false_node
2924                  /* Either push into ops the individual bitwise
2925                     or resp. and operands, depending on which
2926                     edge is other_bb.  */
2927               || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
2928                                  ^ (code == EQ_EXPR))
2929                                 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
2930                            loop_containing_stmt (stmt))))
2931         {
2932           /* Or push the GIMPLE_COND stmt itself.  */
2933           operand_entry_t oe
2934             = (operand_entry_t) pool_alloc (operand_entry_pool);
2935
2936           oe->op = NULL;
2937           oe->rank = (e->flags & EDGE_TRUE_VALUE)
2938                      ? BIT_IOR_EXPR : BIT_AND_EXPR;
2939           /* oe->op = NULL signs that there is no SSA_NAME
2940              for the range test, and oe->id instead is the
2941              basic block number, at which's end the GIMPLE_COND
2942              is.  */
2943           oe->id = bb->index;
2944           oe->count = 1;
2945           ops.safe_push (oe);
2946           bb_ent.op = NULL;
2947           bb_ent.last_idx++;
2948         }
2949       else if (ops.length () > bb_ent.first_idx)
2950         {
2951           bb_ent.op = lhs;
2952           bb_ent.last_idx = ops.length ();
2953         }
2954       bbinfo.safe_push (bb_ent);
2955       if (bb == first_bb)
2956         break;
2957     }
2958   if (ops.length () > 1)
2959     any_changes = optimize_range_tests (ERROR_MARK, &ops);
2960   if (any_changes)
2961     {
2962       unsigned int idx;
2963       /* update_ops relies on has_single_use predicates returning the
2964          same values as it did during get_ops earlier.  Additionally it
2965          never removes statements, only adds new ones and it should walk
2966          from the single imm use and check the predicate already before
2967          making those changes.
2968          On the other side, the handling of GIMPLE_COND directly can turn
2969          previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
2970          it needs to be done in a separate loop afterwards.  */
2971       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
2972         {
2973           if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
2974               && bbinfo[idx].op != NULL_TREE)
2975             {
2976               tree new_op;
2977
2978               stmt = last_stmt (bb);
2979               new_op = update_ops (bbinfo[idx].op,
2980                                    (enum tree_code)
2981                                    ops[bbinfo[idx].first_idx]->rank,
2982                                    ops, &bbinfo[idx].first_idx,
2983                                    loop_containing_stmt (stmt));
2984               if (new_op == NULL_TREE)
2985                 {
2986                   gcc_assert (bb == last_bb);
2987                   new_op = ops[bbinfo[idx].first_idx++]->op;
2988                 }
2989               if (bbinfo[idx].op != new_op)
2990                 {
2991                   imm_use_iterator iter;
2992                   use_operand_p use_p;
2993                   gimple use_stmt, cast_stmt = NULL;
2994
2995                   FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
2996                     if (is_gimple_debug (use_stmt))
2997                       continue;
2998                     else if (gimple_code (use_stmt) == GIMPLE_COND
2999                              || gimple_code (use_stmt) == GIMPLE_PHI)
3000                       FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3001                         SET_USE (use_p, new_op);
3002                     else if (gimple_assign_cast_p (use_stmt))
3003                       cast_stmt = use_stmt;
3004                     else
3005                       gcc_unreachable ();
3006                   if (cast_stmt)
3007                     {
3008                       gcc_assert (bb == last_bb);
3009                       tree lhs = gimple_assign_lhs (cast_stmt);
3010                       tree new_lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3011                       enum tree_code rhs_code
3012                         = gimple_assign_rhs_code (cast_stmt);
3013                       gimple g;
3014                       if (is_gimple_min_invariant (new_op))
3015                         {
3016                           new_op = fold_convert (TREE_TYPE (lhs), new_op);
3017                           g = gimple_build_assign (new_lhs, new_op);
3018                         }
3019                       else
3020                         g = gimple_build_assign_with_ops (rhs_code, new_lhs,
3021                                                           new_op, NULL_TREE);
3022                       gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt);
3023                       gimple_set_uid (g, gimple_uid (cast_stmt));
3024                       gimple_set_visited (g, true);
3025                       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3026                       FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
3027                         if (is_gimple_debug (use_stmt))
3028                           continue;
3029                         else if (gimple_code (use_stmt) == GIMPLE_COND
3030                                  || gimple_code (use_stmt) == GIMPLE_PHI)
3031                           FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3032                             SET_USE (use_p, new_lhs);
3033                         else
3034                           gcc_unreachable ();
3035                     }
3036                 }
3037             }
3038           if (bb == first_bb)
3039             break;
3040         }
3041       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3042         {
3043           if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3044               && bbinfo[idx].op == NULL_TREE
3045               && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
3046             {
3047               stmt = last_stmt (bb);
3048               if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
3049                 gimple_cond_make_false (stmt);
3050               else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
3051                 gimple_cond_make_true (stmt);
3052               else
3053                 {
3054                   gimple_cond_set_code (stmt, NE_EXPR);
3055                   gimple_cond_set_lhs (stmt, ops[bbinfo[idx].first_idx]->op);
3056                   gimple_cond_set_rhs (stmt, boolean_false_node);
3057                 }
3058               update_stmt (stmt);
3059             }
3060           if (bb == first_bb)
3061             break;
3062         }
3063     }
3064 }
3065
3066 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3067    of STMT in it's operands.  This is also known as a "destructive
3068    update" operation.  */
3069
3070 static bool
3071 is_phi_for_stmt (gimple stmt, tree operand)
3072 {
3073   gimple def_stmt;
3074   tree lhs;
3075   use_operand_p arg_p;
3076   ssa_op_iter i;
3077
3078   if (TREE_CODE (operand) != SSA_NAME)
3079     return false;
3080
3081   lhs = gimple_assign_lhs (stmt);
3082
3083   def_stmt = SSA_NAME_DEF_STMT (operand);
3084   if (gimple_code (def_stmt) != GIMPLE_PHI)
3085     return false;
3086
3087   FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
3088     if (lhs == USE_FROM_PTR (arg_p))
3089       return true;
3090   return false;
3091 }
3092
3093 /* Remove def stmt of VAR if VAR has zero uses and recurse
3094    on rhs1 operand if so.  */
3095
3096 static void
3097 remove_visited_stmt_chain (tree var)
3098 {
3099   gimple stmt;
3100   gimple_stmt_iterator gsi;
3101
3102   while (1)
3103     {
3104       if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
3105         return;
3106       stmt = SSA_NAME_DEF_STMT (var);
3107       if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
3108         {
3109           var = gimple_assign_rhs1 (stmt);
3110           gsi = gsi_for_stmt (stmt);
3111           reassoc_remove_stmt (&gsi);
3112           release_defs (stmt);
3113         }
3114       else
3115         return;
3116     }
3117 }
3118
3119 /* This function checks three consequtive operands in
3120    passed operands vector OPS starting from OPINDEX and
3121    swaps two operands if it is profitable for binary operation
3122    consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3123
3124    We pair ops with the same rank if possible.
3125
3126    The alternative we try is to see if STMT is a destructive
3127    update style statement, which is like:
3128    b = phi (a, ...)
3129    a = c + b;
3130    In that case, we want to use the destructive update form to
3131    expose the possible vectorizer sum reduction opportunity.
3132    In that case, the third operand will be the phi node. This
3133    check is not performed if STMT is null.
3134
3135    We could, of course, try to be better as noted above, and do a
3136    lot of work to try to find these opportunities in >3 operand
3137    cases, but it is unlikely to be worth it.  */
3138
3139 static void
3140 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
3141                           unsigned int opindex, gimple stmt)
3142 {
3143   operand_entry_t oe1, oe2, oe3;
3144
3145   oe1 = ops[opindex];
3146   oe2 = ops[opindex + 1];
3147   oe3 = ops[opindex + 2];
3148
3149   if ((oe1->rank == oe2->rank
3150        && oe2->rank != oe3->rank)
3151       || (stmt && is_phi_for_stmt (stmt, oe3->op)
3152           && !is_phi_for_stmt (stmt, oe1->op)
3153           && !is_phi_for_stmt (stmt, oe2->op)))
3154     {
3155       struct operand_entry temp = *oe3;
3156       oe3->op = oe1->op;
3157       oe3->rank = oe1->rank;
3158       oe1->op = temp.op;
3159       oe1->rank= temp.rank;
3160     }
3161   else if ((oe1->rank == oe3->rank
3162             && oe2->rank != oe3->rank)
3163            || (stmt && is_phi_for_stmt (stmt, oe2->op)
3164                && !is_phi_for_stmt (stmt, oe1->op)
3165                && !is_phi_for_stmt (stmt, oe3->op)))
3166     {
3167       struct operand_entry temp = *oe2;
3168       oe2->op = oe1->op;
3169       oe2->rank = oe1->rank;
3170       oe1->op = temp.op;
3171       oe1->rank = temp.rank;
3172     }
3173 }
3174
3175 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3176    two definitions, otherwise return STMT.  */
3177
3178 static inline gimple
3179 find_insert_point (gimple stmt, tree rhs1, tree rhs2)
3180 {
3181   if (TREE_CODE (rhs1) == SSA_NAME
3182       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
3183     stmt = SSA_NAME_DEF_STMT (rhs1);
3184   if (TREE_CODE (rhs2) == SSA_NAME
3185       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
3186     stmt = SSA_NAME_DEF_STMT (rhs2);
3187   return stmt;
3188 }
3189
3190 /* Recursively rewrite our linearized statements so that the operators
3191    match those in OPS[OPINDEX], putting the computation in rank
3192    order.  Return new lhs.  */
3193
3194 static tree
3195 rewrite_expr_tree (gimple stmt, unsigned int opindex,
3196                    vec<operand_entry_t> ops, bool changed)
3197 {
3198   tree rhs1 = gimple_assign_rhs1 (stmt);
3199   tree rhs2 = gimple_assign_rhs2 (stmt);
3200   tree lhs = gimple_assign_lhs (stmt);
3201   operand_entry_t oe;
3202
3203   /* The final recursion case for this function is that you have
3204      exactly two operations left.
3205      If we had one exactly one op in the entire list to start with, we
3206      would have never called this function, and the tail recursion
3207      rewrites them one at a time.  */
3208   if (opindex + 2 == ops.length ())
3209     {
3210       operand_entry_t oe1, oe2;
3211
3212       oe1 = ops[opindex];
3213       oe2 = ops[opindex + 1];
3214
3215       if (rhs1 != oe1->op || rhs2 != oe2->op)
3216         {
3217           gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3218           unsigned int uid = gimple_uid (stmt);
3219
3220           if (dump_file && (dump_flags & TDF_DETAILS))
3221             {
3222               fprintf (dump_file, "Transforming ");
3223               print_gimple_stmt (dump_file, stmt, 0, 0);
3224             }
3225
3226           if (changed)
3227             {
3228               gimple insert_point = find_insert_point (stmt, oe1->op, oe2->op);
3229               lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3230               stmt
3231                 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3232                                                 lhs, oe1->op, oe2->op);
3233               gimple_set_uid (stmt, uid);
3234               gimple_set_visited (stmt, true);
3235               if (insert_point == gsi_stmt (gsi))
3236                 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3237               else
3238                 insert_stmt_after (stmt, insert_point);
3239             }
3240           else
3241             {
3242               gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
3243                                    == stmt);
3244               gimple_assign_set_rhs1 (stmt, oe1->op);
3245               gimple_assign_set_rhs2 (stmt, oe2->op);
3246               update_stmt (stmt);
3247             }
3248
3249           if (rhs1 != oe1->op && rhs1 != oe2->op)
3250             remove_visited_stmt_chain (rhs1);
3251
3252           if (dump_file && (dump_flags & TDF_DETAILS))
3253             {
3254               fprintf (dump_file, " into ");
3255               print_gimple_stmt (dump_file, stmt, 0, 0);
3256             }
3257         }
3258       return lhs;
3259     }
3260
3261   /* If we hit here, we should have 3 or more ops left.  */
3262   gcc_assert (opindex + 2 < ops.length ());
3263
3264   /* Rewrite the next operator.  */
3265   oe = ops[opindex];
3266
3267   /* Recurse on the LHS of the binary operator, which is guaranteed to
3268      be the non-leaf side.  */
3269   tree new_rhs1
3270     = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
3271                          changed || oe->op != rhs2);
3272
3273   if (oe->op != rhs2 || new_rhs1 != rhs1)
3274     {
3275       if (dump_file && (dump_flags & TDF_DETAILS))
3276         {
3277           fprintf (dump_file, "Transforming ");
3278           print_gimple_stmt (dump_file, stmt, 0, 0);
3279         }
3280
3281       /* If changed is false, this is either opindex == 0
3282          or all outer rhs2's were equal to corresponding oe->op,
3283          and powi_result is NULL.
3284          That means lhs is equivalent before and after reassociation.
3285          Otherwise ensure the old lhs SSA_NAME is not reused and
3286          create a new stmt as well, so that any debug stmts will be
3287          properly adjusted.  */
3288       if (changed)
3289         {
3290           gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3291           unsigned int uid = gimple_uid (stmt);
3292           gimple insert_point = find_insert_point (stmt, new_rhs1, oe->op);
3293
3294           lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3295           stmt = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3296                                                lhs, new_rhs1, oe->op);
3297           gimple_set_uid (stmt, uid);
3298           gimple_set_visited (stmt, true);
3299           if (insert_point == gsi_stmt (gsi))
3300             gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3301           else
3302             insert_stmt_after (stmt, insert_point);
3303         }
3304       else
3305         {
3306           gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
3307                                == stmt);
3308           gimple_assign_set_rhs1 (stmt, new_rhs1);
3309           gimple_assign_set_rhs2 (stmt, oe->op);
3310           update_stmt (stmt);
3311         }
3312
3313       if (dump_file && (dump_flags & TDF_DETAILS))
3314         {
3315           fprintf (dump_file, " into ");
3316           print_gimple_stmt (dump_file, stmt, 0, 0);
3317         }
3318     }
3319   return lhs;
3320 }
3321
3322 /* Find out how many cycles we need to compute statements chain.
3323    OPS_NUM holds number os statements in a chain.  CPU_WIDTH is a
3324    maximum number of independent statements we may execute per cycle.  */
3325
3326 static int
3327 get_required_cycles (int ops_num, int cpu_width)
3328 {
3329   int res;
3330   int elog;
3331   unsigned int rest;
3332
3333   /* While we have more than 2 * cpu_width operands
3334      we may reduce number of operands by cpu_width
3335      per cycle.  */
3336   res = ops_num / (2 * cpu_width);
3337
3338   /* Remained operands count may be reduced twice per cycle
3339      until we have only one operand.  */
3340   rest = (unsigned)(ops_num - res * cpu_width);
3341   elog = exact_log2 (rest);
3342   if (elog >= 0)
3343     res += elog;
3344   else
3345     res += floor_log2 (rest) + 1;
3346
3347   return res;
3348 }
3349
3350 /* Returns an optimal number of registers to use for computation of
3351    given statements.  */
3352
3353 static int
3354 get_reassociation_width (int ops_num, enum tree_code opc,
3355                          enum machine_mode mode)
3356 {
3357   int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
3358   int width;
3359   int width_min;
3360   int cycles_best;
3361
3362   if (param_width > 0)
3363     width = param_width;
3364   else
3365     width = targetm.sched.reassociation_width (opc, mode);
3366
3367   if (width == 1)
3368     return width;
3369
3370   /* Get the minimal time required for sequence computation.  */
3371   cycles_best = get_required_cycles (ops_num, width);
3372
3373   /* Check if we may use less width and still compute sequence for
3374      the same time.  It will allow us to reduce registers usage.
3375      get_required_cycles is monotonically increasing with lower width
3376      so we can perform a binary search for the minimal width that still
3377      results in the optimal cycle count.  */
3378   width_min = 1;
3379   while (width > width_min)
3380     {
3381       int width_mid = (width + width_min) / 2;
3382
3383       if (get_required_cycles (ops_num, width_mid) == cycles_best)
3384         width = width_mid;
3385       else if (width_min < width_mid)
3386         width_min = width_mid;
3387       else
3388         break;
3389     }
3390
3391   return width;
3392 }
3393
3394 /* Recursively rewrite our linearized statements so that the operators
3395    match those in OPS[OPINDEX], putting the computation in rank
3396    order and trying to allow operations to be executed in
3397    parallel.  */
3398
3399 static void
3400 rewrite_expr_tree_parallel (gimple stmt, int width,
3401                             vec<operand_entry_t> ops)
3402 {
3403   enum tree_code opcode = gimple_assign_rhs_code (stmt);
3404   int op_num = ops.length ();
3405   int stmt_num = op_num - 1;
3406   gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3407   int op_index = op_num - 1;
3408   int stmt_index = 0;
3409   int ready_stmts_end = 0;
3410   int i = 0;
3411   tree last_rhs1 = gimple_assign_rhs1 (stmt);
3412
3413   /* We start expression rewriting from the top statements.
3414      So, in this loop we create a full list of statements
3415      we will work with.  */
3416   stmts[stmt_num - 1] = stmt;
3417   for (i = stmt_num - 2; i >= 0; i--)
3418     stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3419
3420   for (i = 0; i < stmt_num; i++)
3421     {
3422       tree op1, op2;
3423
3424       /* Determine whether we should use results of
3425          already handled statements or not.  */
3426       if (ready_stmts_end == 0
3427           && (i - stmt_index >= width || op_index < 1))
3428         ready_stmts_end = i;
3429
3430       /* Now we choose operands for the next statement.  Non zero
3431          value in ready_stmts_end means here that we should use
3432          the result of already generated statements as new operand.  */
3433       if (ready_stmts_end > 0)
3434         {
3435           op1 = gimple_assign_lhs (stmts[stmt_index++]);
3436           if (ready_stmts_end > stmt_index)
3437             op2 = gimple_assign_lhs (stmts[stmt_index++]);
3438           else if (op_index >= 0)
3439             op2 = ops[op_index--]->op;
3440           else
3441             {
3442               gcc_assert (stmt_index < i);
3443               op2 = gimple_assign_lhs (stmts[stmt_index++]);
3444             }
3445
3446           if (stmt_index >= ready_stmts_end)
3447             ready_stmts_end = 0;
3448         }
3449       else
3450         {
3451           if (op_index > 1)
3452             swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3453           op2 = ops[op_index--]->op;
3454           op1 = ops[op_index--]->op;
3455         }
3456
3457       /* If we emit the last statement then we should put
3458          operands into the last statement.  It will also
3459          break the loop.  */
3460       if (op_index < 0 && stmt_index == i)
3461         i = stmt_num - 1;
3462
3463       if (dump_file && (dump_flags & TDF_DETAILS))
3464         {
3465           fprintf (dump_file, "Transforming ");
3466           print_gimple_stmt (dump_file, stmts[i], 0, 0);
3467         }
3468
3469       /* We keep original statement only for the last one.  All
3470          others are recreated.  */
3471       if (i == stmt_num - 1)
3472         {
3473           gimple_assign_set_rhs1 (stmts[i], op1);
3474           gimple_assign_set_rhs2 (stmts[i], op2);
3475           update_stmt (stmts[i]);
3476         }
3477       else
3478         stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3479
3480       if (dump_file && (dump_flags & TDF_DETAILS))
3481         {
3482           fprintf (dump_file, " into ");
3483           print_gimple_stmt (dump_file, stmts[i], 0, 0);
3484         }
3485     }
3486
3487   remove_visited_stmt_chain (last_rhs1);
3488 }
3489
3490 /* Transform STMT, which is really (A +B) + (C + D) into the left
3491    linear form, ((A+B)+C)+D.
3492    Recurse on D if necessary.  */
3493
3494 static void
3495 linearize_expr (gimple stmt)
3496 {
3497   gimple_stmt_iterator gsi;
3498   gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3499   gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3500   gimple oldbinrhs = binrhs;
3501   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3502   gimple newbinrhs = NULL;
3503   struct loop *loop = loop_containing_stmt (stmt);
3504   tree lhs = gimple_assign_lhs (stmt);
3505
3506   gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3507               && is_reassociable_op (binrhs, rhscode, loop));
3508
3509   gsi = gsi_for_stmt (stmt);
3510
3511   gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3512   binrhs = gimple_build_assign_with_ops (gimple_assign_rhs_code (binrhs),
3513                                          make_ssa_name (TREE_TYPE (lhs), NULL),
3514                                          gimple_assign_lhs (binlhs),
3515                                          gimple_assign_rhs2 (binrhs));
3516   gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3517   gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
3518   gimple_set_uid (binrhs, gimple_uid (stmt));
3519
3520   if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3521     newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3522
3523   if (dump_file && (dump_flags & TDF_DETAILS))
3524     {
3525       fprintf (dump_file, "Linearized: ");
3526       print_gimple_stmt (dump_file, stmt, 0, 0);
3527     }
3528
3529   reassociate_stats.linearized++;
3530   update_stmt (stmt);
3531
3532   gsi = gsi_for_stmt (oldbinrhs);
3533   reassoc_remove_stmt (&gsi);
3534   release_defs (oldbinrhs);
3535
3536   gimple_set_visited (stmt, true);
3537   gimple_set_visited (binlhs, true);
3538   gimple_set_visited (binrhs, true);
3539
3540   /* Tail recurse on the new rhs if it still needs reassociation.  */
3541   if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3542     /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3543            want to change the algorithm while converting to tuples.  */
3544     linearize_expr (stmt);
3545 }
3546
3547 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3548    it.  Otherwise, return NULL.  */
3549
3550 static gimple
3551 get_single_immediate_use (tree lhs)
3552 {
3553   use_operand_p immuse;
3554   gimple immusestmt;
3555
3556   if (TREE_CODE (lhs) == SSA_NAME
3557       && single_imm_use (lhs, &immuse, &immusestmt)
3558       && is_gimple_assign (immusestmt))
3559     return immusestmt;
3560
3561   return NULL;
3562 }
3563
3564 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3565    representing the negated value.  Insertions of any necessary
3566    instructions go before GSI.
3567    This function is recursive in that, if you hand it "a_5" as the
3568    value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3569    transform b_3 + b_4 into a_5 = -b_3 + -b_4.  */
3570
3571 static tree
3572 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
3573 {
3574   gimple negatedefstmt = NULL;
3575   tree resultofnegate;
3576   gimple_stmt_iterator gsi;
3577   unsigned int uid;
3578
3579   /* If we are trying to negate a name, defined by an add, negate the
3580      add operands instead.  */
3581   if (TREE_CODE (tonegate) == SSA_NAME)
3582     negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3583   if (TREE_CODE (tonegate) == SSA_NAME
3584       && is_gimple_assign (negatedefstmt)
3585       && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3586       && has_single_use (gimple_assign_lhs (negatedefstmt))
3587       && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3588     {
3589       tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3590       tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3591       tree lhs = gimple_assign_lhs (negatedefstmt);
3592       gimple g;
3593
3594       gsi = gsi_for_stmt (negatedefstmt);
3595       rhs1 = negate_value (rhs1, &gsi);
3596
3597       gsi = gsi_for_stmt (negatedefstmt);
3598       rhs2 = negate_value (rhs2, &gsi);
3599
3600       gsi = gsi_for_stmt (negatedefstmt);
3601       lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3602       gimple_set_visited (negatedefstmt, true);
3603       g = gimple_build_assign_with_ops (PLUS_EXPR, lhs, rhs1, rhs2);
3604       gimple_set_uid (g, gimple_uid (negatedefstmt));
3605       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3606       return lhs;
3607     }
3608
3609   tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3610   resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
3611                                              NULL_TREE, true, GSI_SAME_STMT);
3612   gsi = *gsip;
3613   uid = gimple_uid (gsi_stmt (gsi));
3614   for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
3615     {
3616       gimple stmt = gsi_stmt (gsi);
3617       if (gimple_uid (stmt) != 0)
3618         break;
3619       gimple_set_uid (stmt, uid);
3620     }
3621   return resultofnegate;
3622 }
3623
3624 /* Return true if we should break up the subtract in STMT into an add
3625    with negate.  This is true when we the subtract operands are really
3626    adds, or the subtract itself is used in an add expression.  In
3627    either case, breaking up the subtract into an add with negate
3628    exposes the adds to reassociation.  */
3629
3630 static bool
3631 should_break_up_subtract (gimple stmt)
3632 {
3633   tree lhs = gimple_assign_lhs (stmt);
3634   tree binlhs = gimple_assign_rhs1 (stmt);
3635   tree binrhs = gimple_assign_rhs2 (stmt);
3636   gimple immusestmt;
3637   struct loop *loop = loop_containing_stmt (stmt);
3638
3639   if (TREE_CODE (binlhs) == SSA_NAME
3640       && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3641     return true;
3642
3643   if (TREE_CODE (binrhs) == SSA_NAME
3644       && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3645     return true;
3646
3647   if (TREE_CODE (lhs) == SSA_NAME
3648       && (immusestmt = get_single_immediate_use (lhs))
3649       && is_gimple_assign (immusestmt)
3650       && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3651           ||  gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3652     return true;
3653   return false;
3654 }
3655
3656 /* Transform STMT from A - B into A + -B.  */
3657
3658 static void
3659 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3660 {
3661   tree rhs1 = gimple_assign_rhs1 (stmt);
3662   tree rhs2 = gimple_assign_rhs2 (stmt);
3663
3664   if (dump_file && (dump_flags & TDF_DETAILS))
3665     {
3666       fprintf (dump_file, "Breaking up subtract ");
3667       print_gimple_stmt (dump_file, stmt, 0, 0);
3668     }
3669
3670   rhs2 = negate_value (rhs2, gsip);
3671   gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3672   update_stmt (stmt);
3673 }
3674
3675 /* Determine whether STMT is a builtin call that raises an SSA name
3676    to an integer power and has only one use.  If so, and this is early
3677    reassociation and unsafe math optimizations are permitted, place
3678    the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3679    If any of these conditions does not hold, return FALSE.  */
3680
3681 static bool
3682 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3683 {
3684   tree fndecl, arg1;
3685   REAL_VALUE_TYPE c, cint;
3686
3687   if (!first_pass_instance
3688       || !flag_unsafe_math_optimizations
3689       || !is_gimple_call (stmt)
3690       || !has_single_use (gimple_call_lhs (stmt)))
3691     return false;
3692
3693   fndecl = gimple_call_fndecl (stmt);
3694
3695   if (!fndecl
3696       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3697     return false;
3698
3699   switch (DECL_FUNCTION_CODE (fndecl))
3700     {
3701     CASE_FLT_FN (BUILT_IN_POW):
3702       *base = gimple_call_arg (stmt, 0);
3703       arg1 = gimple_call_arg (stmt, 1);
3704
3705       if (TREE_CODE (arg1) != REAL_CST)
3706         return false;
3707
3708       c = TREE_REAL_CST (arg1);
3709
3710       if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
3711         return false;
3712
3713       *exponent = real_to_integer (&c);
3714       real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
3715       if (!real_identical (&c, &cint))
3716         return false;
3717
3718       break;
3719
3720     CASE_FLT_FN (BUILT_IN_POWI):
3721       *base = gimple_call_arg (stmt, 0);
3722       arg1 = gimple_call_arg (stmt, 1);
3723
3724       if (!tree_fits_shwi_p (arg1))
3725         return false;
3726
3727       *exponent = tree_to_shwi (arg1);
3728       break;
3729
3730     default:
3731       return false;
3732     }
3733
3734   /* Expanding negative exponents is generally unproductive, so we don't
3735      complicate matters with those.  Exponents of zero and one should
3736      have been handled by expression folding.  */
3737   if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
3738     return false;
3739
3740   return true;
3741 }
3742
3743 /* Recursively linearize a binary expression that is the RHS of STMT.
3744    Place the operands of the expression tree in the vector named OPS.  */
3745
3746 static void
3747 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
3748                      bool is_associative, bool set_visited)
3749 {
3750   tree binlhs = gimple_assign_rhs1 (stmt);
3751   tree binrhs = gimple_assign_rhs2 (stmt);
3752   gimple binlhsdef = NULL, binrhsdef = NULL;
3753   bool binlhsisreassoc = false;
3754   bool binrhsisreassoc = false;
3755   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3756   struct loop *loop = loop_containing_stmt (stmt);
3757   tree base = NULL_TREE;
3758   HOST_WIDE_INT exponent = 0;
3759
3760   if (set_visited)
3761     gimple_set_visited (stmt, true);
3762
3763   if (TREE_CODE (binlhs) == SSA_NAME)
3764     {
3765       binlhsdef = SSA_NAME_DEF_STMT (binlhs);
3766       binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
3767                          && !stmt_could_throw_p (binlhsdef));
3768     }
3769
3770   if (TREE_CODE (binrhs) == SSA_NAME)
3771     {
3772       binrhsdef = SSA_NAME_DEF_STMT (binrhs);
3773       binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
3774                          && !stmt_could_throw_p (binrhsdef));
3775     }
3776
3777   /* If the LHS is not reassociable, but the RHS is, we need to swap
3778      them.  If neither is reassociable, there is nothing we can do, so
3779      just put them in the ops vector.  If the LHS is reassociable,
3780      linearize it.  If both are reassociable, then linearize the RHS
3781      and the LHS.  */
3782
3783   if (!binlhsisreassoc)
3784     {
3785       tree temp;
3786
3787       /* If this is not a associative operation like division, give up.  */
3788       if (!is_associative)
3789         {
3790           add_to_ops_vec (ops, binrhs);
3791           return;
3792         }
3793
3794       if (!binrhsisreassoc)
3795         {
3796           if (rhscode == MULT_EXPR
3797               && TREE_CODE (binrhs) == SSA_NAME
3798               && acceptable_pow_call (binrhsdef, &base, &exponent))
3799             {
3800               add_repeat_to_ops_vec (ops, base, exponent);
3801               gimple_set_visited (binrhsdef, true);
3802             }
3803           else
3804             add_to_ops_vec (ops, binrhs);
3805
3806           if (rhscode == MULT_EXPR
3807               && TREE_CODE (binlhs) == SSA_NAME
3808               && acceptable_pow_call (binlhsdef, &base, &exponent))
3809             {
3810               add_repeat_to_ops_vec (ops, base, exponent);
3811               gimple_set_visited (binlhsdef, true);
3812             }
3813           else
3814             add_to_ops_vec (ops, binlhs);
3815
3816           return;
3817         }
3818
3819       if (dump_file && (dump_flags & TDF_DETAILS))
3820         {
3821           fprintf (dump_file, "swapping operands of ");
3822           print_gimple_stmt (dump_file, stmt, 0, 0);
3823         }
3824
3825       swap_ssa_operands (stmt,
3826                          gimple_assign_rhs1_ptr (stmt),
3827                          gimple_assign_rhs2_ptr (stmt));
3828       update_stmt (stmt);
3829
3830       if (dump_file && (dump_flags & TDF_DETAILS))
3831         {
3832           fprintf (dump_file, " is now ");
3833           print_gimple_stmt (dump_file, stmt, 0, 0);
3834         }
3835
3836       /* We want to make it so the lhs is always the reassociative op,
3837          so swap.  */
3838       temp = binlhs;
3839       binlhs = binrhs;
3840       binrhs = temp;
3841     }
3842   else if (binrhsisreassoc)
3843     {
3844       linearize_expr (stmt);
3845       binlhs = gimple_assign_rhs1 (stmt);
3846       binrhs = gimple_assign_rhs2 (stmt);
3847     }
3848
3849   gcc_assert (TREE_CODE (binrhs) != SSA_NAME
3850               || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
3851                                       rhscode, loop));
3852   linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
3853                        is_associative, set_visited);
3854
3855   if (rhscode == MULT_EXPR
3856       && TREE_CODE (binrhs) == SSA_NAME
3857       && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
3858     {
3859       add_repeat_to_ops_vec (ops, base, exponent);
3860       gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
3861     }
3862   else
3863     add_to_ops_vec (ops, binrhs);
3864 }
3865
3866 /* Repropagate the negates back into subtracts, since no other pass
3867    currently does it.  */
3868
3869 static void
3870 repropagate_negates (void)
3871 {
3872   unsigned int i = 0;
3873   tree negate;
3874
3875   FOR_EACH_VEC_ELT (plus_negates, i, negate)
3876     {
3877       gimple user = get_single_immediate_use (negate);
3878
3879       if (!user || !is_gimple_assign (user))
3880         continue;
3881
3882       /* The negate operand can be either operand of a PLUS_EXPR
3883          (it can be the LHS if the RHS is a constant for example).
3884
3885          Force the negate operand to the RHS of the PLUS_EXPR, then
3886          transform the PLUS_EXPR into a MINUS_EXPR.  */
3887       if (gimple_assign_rhs_code (user) == PLUS_EXPR)
3888         {
3889           /* If the negated operand appears on the LHS of the
3890              PLUS_EXPR, exchange the operands of the PLUS_EXPR
3891              to force the negated operand to the RHS of the PLUS_EXPR.  */
3892           if (gimple_assign_rhs1 (user) == negate)
3893             {
3894               swap_ssa_operands (user,
3895                                  gimple_assign_rhs1_ptr (user),
3896                                  gimple_assign_rhs2_ptr (user));
3897             }
3898
3899           /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
3900              the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR.  */
3901           if (gimple_assign_rhs2 (user) == negate)
3902             {
3903               tree rhs1 = gimple_assign_rhs1 (user);
3904               tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
3905               gimple_stmt_iterator gsi = gsi_for_stmt (user);
3906               gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
3907               update_stmt (user);
3908             }
3909         }
3910       else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
3911         {
3912           if (gimple_assign_rhs1 (user) == negate)
3913             {
3914               /* We have
3915                    x = -a
3916                    y = x - b
3917                  which we transform into
3918                    x = a + b
3919                    y = -x .
3920                  This pushes down the negate which we possibly can merge
3921                  into some other operation, hence insert it into the
3922                  plus_negates vector.  */
3923               gimple feed = SSA_NAME_DEF_STMT (negate);
3924               tree a = gimple_assign_rhs1 (feed);
3925               tree b = gimple_assign_rhs2 (user);
3926               gimple_stmt_iterator gsi = gsi_for_stmt (feed);
3927               gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
3928               tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)), NULL);
3929               gimple g = gimple_build_assign_with_ops (PLUS_EXPR, x, a, b);
3930               gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3931               gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x, NULL);
3932               user = gsi_stmt (gsi2);
3933               update_stmt (user);
3934               reassoc_remove_stmt (&gsi);
3935               release_defs (feed);
3936               plus_negates.safe_push (gimple_assign_lhs (user));
3937             }
3938           else
3939             {
3940               /* Transform "x = -a; y = b - x" into "y = b + a", getting
3941                  rid of one operation.  */
3942               gimple feed = SSA_NAME_DEF_STMT (negate);
3943               tree a = gimple_assign_rhs1 (feed);
3944               tree rhs1 = gimple_assign_rhs1 (user);
3945               gimple_stmt_iterator gsi = gsi_for_stmt (user);
3946               gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
3947               update_stmt (gsi_stmt (gsi));
3948             }
3949         }
3950     }
3951 }
3952
3953 /* Returns true if OP is of a type for which we can do reassociation.
3954    That is for integral or non-saturating fixed-point types, and for
3955    floating point type when associative-math is enabled.  */
3956
3957 static bool
3958 can_reassociate_p (tree op)
3959 {
3960   tree type = TREE_TYPE (op);
3961   if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
3962       || NON_SAT_FIXED_POINT_TYPE_P (type)
3963       || (flag_associative_math && FLOAT_TYPE_P (type)))
3964     return true;
3965   return false;
3966 }
3967
3968 /* Break up subtract operations in block BB.
3969
3970    We do this top down because we don't know whether the subtract is
3971    part of a possible chain of reassociation except at the top.
3972
3973    IE given
3974    d = f + g
3975    c = a + e
3976    b = c - d
3977    q = b - r
3978    k = t - q
3979
3980    we want to break up k = t - q, but we won't until we've transformed q
3981    = b - r, which won't be broken up until we transform b = c - d.
3982
3983    En passant, clear the GIMPLE visited flag on every statement
3984    and set UIDs within each basic block.  */
3985
3986 static void
3987 break_up_subtract_bb (basic_block bb)
3988 {
3989   gimple_stmt_iterator gsi;
3990   basic_block son;
3991   unsigned int uid = 1;
3992
3993   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3994     {
3995       gimple stmt = gsi_stmt (gsi);
3996       gimple_set_visited (stmt, false);
3997       gimple_set_uid (stmt, uid++);
3998
3999       if (!is_gimple_assign (stmt)
4000           || !can_reassociate_p (gimple_assign_lhs (stmt)))
4001         continue;
4002
4003       /* Look for simple gimple subtract operations.  */
4004       if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
4005         {
4006           if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
4007               || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
4008             continue;
4009
4010           /* Check for a subtract used only in an addition.  If this
4011              is the case, transform it into add of a negate for better
4012              reassociation.  IE transform C = A-B into C = A + -B if C
4013              is only used in an addition.  */
4014           if (should_break_up_subtract (stmt))
4015             break_up_subtract (stmt, &gsi);
4016         }
4017       else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
4018                && can_reassociate_p (gimple_assign_rhs1 (stmt)))
4019         plus_negates.safe_push (gimple_assign_lhs (stmt));
4020     }
4021   for (son = first_dom_son (CDI_DOMINATORS, bb);
4022        son;
4023        son = next_dom_son (CDI_DOMINATORS, son))
4024     break_up_subtract_bb (son);
4025 }
4026
4027 /* Used for repeated factor analysis.  */
4028 struct repeat_factor_d
4029 {
4030   /* An SSA name that occurs in a multiply chain.  */
4031   tree factor;
4032
4033   /* Cached rank of the factor.  */
4034   unsigned rank;
4035
4036   /* Number of occurrences of the factor in the chain.  */
4037   HOST_WIDE_INT count;
4038
4039   /* An SSA name representing the product of this factor and
4040      all factors appearing later in the repeated factor vector.  */
4041   tree repr;
4042 };
4043
4044 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
4045 typedef const struct repeat_factor_d *const_repeat_factor_t;
4046
4047
4048 static vec<repeat_factor> repeat_factor_vec;
4049
4050 /* Used for sorting the repeat factor vector.  Sort primarily by
4051    ascending occurrence count, secondarily by descending rank.  */
4052
4053 static int
4054 compare_repeat_factors (const void *x1, const void *x2)
4055 {
4056   const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
4057   const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
4058
4059   if (rf1->count != rf2->count)
4060     return rf1->count - rf2->count;
4061
4062   return rf2->rank - rf1->rank;
4063 }
4064
4065 /* Look for repeated operands in OPS in the multiply tree rooted at
4066    STMT.  Replace them with an optimal sequence of multiplies and powi
4067    builtin calls, and remove the used operands from OPS.  Return an
4068    SSA name representing the value of the replacement sequence.  */
4069
4070 static tree
4071 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
4072 {
4073   unsigned i, j, vec_len;
4074   int ii;
4075   operand_entry_t oe;
4076   repeat_factor_t rf1, rf2;
4077   repeat_factor rfnew;
4078   tree result = NULL_TREE;
4079   tree target_ssa, iter_result;
4080   tree type = TREE_TYPE (gimple_get_lhs (stmt));
4081   tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
4082   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4083   gimple mul_stmt, pow_stmt;
4084
4085   /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4086      target.  */
4087   if (!powi_fndecl)
4088     return NULL_TREE;
4089
4090   /* Allocate the repeated factor vector.  */
4091   repeat_factor_vec.create (10);
4092
4093   /* Scan the OPS vector for all SSA names in the product and build
4094      up a vector of occurrence counts for each factor.  */
4095   FOR_EACH_VEC_ELT (*ops, i, oe)
4096     {
4097       if (TREE_CODE (oe->op) == SSA_NAME)
4098         {
4099           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4100             {
4101               if (rf1->factor == oe->op)
4102                 {
4103                   rf1->count += oe->count;
4104                   break;
4105                 }
4106             }
4107
4108           if (j >= repeat_factor_vec.length ())
4109             {
4110               rfnew.factor = oe->op;
4111               rfnew.rank = oe->rank;
4112               rfnew.count = oe->count;
4113               rfnew.repr = NULL_TREE;
4114               repeat_factor_vec.safe_push (rfnew);
4115             }
4116         }
4117     }
4118
4119   /* Sort the repeated factor vector by (a) increasing occurrence count,
4120      and (b) decreasing rank.  */
4121   repeat_factor_vec.qsort (compare_repeat_factors);
4122
4123   /* It is generally best to combine as many base factors as possible
4124      into a product before applying __builtin_powi to the result.
4125      However, the sort order chosen for the repeated factor vector
4126      allows us to cache partial results for the product of the base
4127      factors for subsequent use.  When we already have a cached partial
4128      result from a previous iteration, it is best to make use of it
4129      before looking for another __builtin_pow opportunity.
4130
4131      As an example, consider x * x * y * y * y * z * z * z * z.
4132      We want to first compose the product x * y * z, raise it to the
4133      second power, then multiply this by y * z, and finally multiply
4134      by z.  This can be done in 5 multiplies provided we cache y * z
4135      for use in both expressions:
4136
4137         t1 = y * z
4138         t2 = t1 * x
4139         t3 = t2 * t2
4140         t4 = t1 * t3
4141         result = t4 * z
4142
4143      If we instead ignored the cached y * z and first multiplied by
4144      the __builtin_pow opportunity z * z, we would get the inferior:
4145
4146         t1 = y * z
4147         t2 = t1 * x
4148         t3 = t2 * t2
4149         t4 = z * z
4150         t5 = t3 * t4
4151         result = t5 * y  */
4152
4153   vec_len = repeat_factor_vec.length ();
4154
4155   /* Repeatedly look for opportunities to create a builtin_powi call.  */
4156   while (true)
4157     {
4158       HOST_WIDE_INT power;
4159
4160       /* First look for the largest cached product of factors from
4161          preceding iterations.  If found, create a builtin_powi for
4162          it if the minimum occurrence count for its factors is at
4163          least 2, or just use this cached product as our next
4164          multiplicand if the minimum occurrence count is 1.  */
4165       FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4166         {
4167           if (rf1->repr && rf1->count > 0)
4168             break;
4169         }
4170
4171       if (j < vec_len)
4172         {
4173           power = rf1->count;
4174
4175           if (power == 1)
4176             {
4177               iter_result = rf1->repr;
4178
4179               if (dump_file && (dump_flags & TDF_DETAILS))
4180                 {
4181                   unsigned elt;
4182                   repeat_factor_t rf;
4183                   fputs ("Multiplying by cached product ", dump_file);
4184                   for (elt = j; elt < vec_len; elt++)
4185                     {
4186                       rf = &repeat_factor_vec[elt];
4187                       print_generic_expr (dump_file, rf->factor, 0);
4188                       if (elt < vec_len - 1)
4189                         fputs (" * ", dump_file);
4190                     }
4191                   fputs ("\n", dump_file);
4192                 }
4193             }
4194           else
4195             {
4196               iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4197               pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4198                                             build_int_cst (integer_type_node,
4199                                                            power));
4200               gimple_call_set_lhs (pow_stmt, iter_result);
4201               gimple_set_location (pow_stmt, gimple_location (stmt));
4202               gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4203
4204               if (dump_file && (dump_flags & TDF_DETAILS))
4205                 {
4206                   unsigned elt;
4207                   repeat_factor_t rf;
4208                   fputs ("Building __builtin_pow call for cached product (",
4209                          dump_file);
4210                   for (elt = j; elt < vec_len; elt++)
4211                     {
4212                       rf = &repeat_factor_vec[elt];
4213                       print_generic_expr (dump_file, rf->factor, 0);
4214                       if (elt < vec_len - 1)
4215                         fputs (" * ", dump_file);
4216                     }
4217                   fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n",
4218                            power);
4219                 }
4220             }
4221         }
4222       else
4223         {
4224           /* Otherwise, find the first factor in the repeated factor
4225              vector whose occurrence count is at least 2.  If no such
4226              factor exists, there are no builtin_powi opportunities
4227              remaining.  */
4228           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4229             {
4230               if (rf1->count >= 2)
4231                 break;
4232             }
4233
4234           if (j >= vec_len)
4235             break;
4236
4237           power = rf1->count;
4238
4239           if (dump_file && (dump_flags & TDF_DETAILS))
4240             {
4241               unsigned elt;
4242               repeat_factor_t rf;
4243               fputs ("Building __builtin_pow call for (", dump_file);
4244               for (elt = j; elt < vec_len; elt++)
4245                 {
4246                   rf = &repeat_factor_vec[elt];
4247                   print_generic_expr (dump_file, rf->factor, 0);
4248                   if (elt < vec_len - 1)
4249                     fputs (" * ", dump_file);
4250                 }
4251               fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power);
4252             }
4253
4254           reassociate_stats.pows_created++;
4255
4256           /* Visit each element of the vector in reverse order (so that
4257              high-occurrence elements are visited first, and within the
4258              same occurrence count, lower-ranked elements are visited
4259              first).  Form a linear product of all elements in this order
4260              whose occurrencce count is at least that of element J.
4261              Record the SSA name representing the product of each element
4262              with all subsequent elements in the vector.  */
4263           if (j == vec_len - 1)
4264             rf1->repr = rf1->factor;
4265           else
4266             {
4267               for (ii = vec_len - 2; ii >= (int)j; ii--)
4268                 {
4269                   tree op1, op2;
4270
4271                   rf1 = &repeat_factor_vec[ii];
4272                   rf2 = &repeat_factor_vec[ii + 1];
4273
4274                   /* Init the last factor's representative to be itself.  */
4275                   if (!rf2->repr)
4276                     rf2->repr = rf2->factor;
4277
4278                   op1 = rf1->factor;
4279                   op2 = rf2->repr;
4280
4281                   target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4282                   mul_stmt = gimple_build_assign_with_ops (MULT_EXPR,
4283                                                            target_ssa,
4284                                                            op1, op2);
4285                   gimple_set_location (mul_stmt, gimple_location (stmt));
4286                   gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4287                   rf1->repr = target_ssa;
4288
4289                   /* Don't reprocess the multiply we just introduced.  */
4290                   gimple_set_visited (mul_stmt, true);
4291                 }
4292             }
4293
4294           /* Form a call to __builtin_powi for the maximum product
4295              just formed, raised to the power obtained earlier.  */
4296           rf1 = &repeat_factor_vec[j];
4297           iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4298           pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4299                                         build_int_cst (integer_type_node,
4300                                                        power));
4301           gimple_call_set_lhs (pow_stmt, iter_result);
4302           gimple_set_location (pow_stmt, gimple_location (stmt));
4303           gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4304         }
4305
4306       /* If we previously formed at least one other builtin_powi call,
4307          form the product of this one and those others.  */
4308       if (result)
4309         {
4310           tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4311           mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result,
4312                                                    result, iter_result);
4313           gimple_set_location (mul_stmt, gimple_location (stmt));
4314           gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4315           gimple_set_visited (mul_stmt, true);
4316           result = new_result;
4317         }
4318       else
4319         result = iter_result;
4320
4321       /* Decrement the occurrence count of each element in the product
4322          by the count found above, and remove this many copies of each
4323          factor from OPS.  */
4324       for (i = j; i < vec_len; i++)
4325         {
4326           unsigned k = power;
4327           unsigned n;
4328
4329           rf1 = &repeat_factor_vec[i];
4330           rf1->count -= power;
4331
4332           FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
4333             {
4334               if (oe->op == rf1->factor)
4335                 {
4336                   if (oe->count <= k)
4337                     {
4338                       ops->ordered_remove (n);
4339                       k -= oe->count;
4340
4341                       if (k == 0)
4342                         break;
4343                     }
4344                   else
4345                     {
4346                       oe->count -= k;
4347                       break;
4348                     }
4349                 }
4350             }
4351         }
4352     }
4353
4354   /* At this point all elements in the repeated factor vector have a
4355      remaining occurrence count of 0 or 1, and those with a count of 1
4356      don't have cached representatives.  Re-sort the ops vector and
4357      clean up.  */
4358   ops->qsort (sort_by_operand_rank);
4359   repeat_factor_vec.release ();
4360
4361   /* Return the final product computed herein.  Note that there may
4362      still be some elements with single occurrence count left in OPS;
4363      those will be handled by the normal reassociation logic.  */
4364   return result;
4365 }
4366
4367 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS.  */
4368
4369 static void
4370 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
4371 {
4372   tree rhs1;
4373
4374   if (dump_file && (dump_flags & TDF_DETAILS))
4375     {
4376       fprintf (dump_file, "Transforming ");
4377       print_gimple_stmt (dump_file, stmt, 0, 0);
4378     }
4379
4380   rhs1 = gimple_assign_rhs1 (stmt);
4381   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
4382   update_stmt (stmt);
4383   remove_visited_stmt_chain (rhs1);
4384
4385   if (dump_file && (dump_flags & TDF_DETAILS))
4386     {
4387       fprintf (dump_file, " into ");
4388       print_gimple_stmt (dump_file, stmt, 0, 0);
4389     }
4390 }
4391
4392 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2.  */
4393
4394 static void
4395 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
4396                             tree rhs1, tree rhs2)
4397 {
4398   if (dump_file && (dump_flags & TDF_DETAILS))
4399     {
4400       fprintf (dump_file, "Transforming ");
4401       print_gimple_stmt (dump_file, stmt, 0, 0);
4402     }
4403
4404   gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
4405   update_stmt (gsi_stmt (*gsi));
4406   remove_visited_stmt_chain (rhs1);
4407
4408   if (dump_file && (dump_flags & TDF_DETAILS))
4409     {
4410       fprintf (dump_file, " into ");
4411       print_gimple_stmt (dump_file, stmt, 0, 0);
4412     }
4413 }
4414
4415 /* Reassociate expressions in basic block BB and its post-dominator as
4416    children.  */
4417
4418 static void
4419 reassociate_bb (basic_block bb)
4420 {
4421   gimple_stmt_iterator gsi;
4422   basic_block son;
4423   gimple stmt = last_stmt (bb);
4424
4425   if (stmt && !gimple_visited_p (stmt))
4426     maybe_optimize_range_tests (stmt);
4427
4428   for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4429     {
4430       stmt = gsi_stmt (gsi);
4431
4432       if (is_gimple_assign (stmt)
4433           && !stmt_could_throw_p (stmt))
4434         {
4435           tree lhs, rhs1, rhs2;
4436           enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4437
4438           /* If this is not a gimple binary expression, there is
4439              nothing for us to do with it.  */
4440           if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4441             continue;
4442
4443           /* If this was part of an already processed statement,
4444              we don't need to touch it again. */
4445           if (gimple_visited_p (stmt))
4446             {
4447               /* This statement might have become dead because of previous
4448                  reassociations.  */
4449               if (has_zero_uses (gimple_get_lhs (stmt)))
4450                 {
4451                   reassoc_remove_stmt (&gsi);
4452                   release_defs (stmt);
4453                   /* We might end up removing the last stmt above which
4454                      places the iterator to the end of the sequence.
4455                      Reset it to the last stmt in this case which might
4456                      be the end of the sequence as well if we removed
4457                      the last statement of the sequence.  In which case
4458                      we need to bail out.  */
4459                   if (gsi_end_p (gsi))
4460                     {
4461                       gsi = gsi_last_bb (bb);
4462                       if (gsi_end_p (gsi))
4463                         break;
4464                     }
4465                 }
4466               continue;
4467             }
4468
4469           lhs = gimple_assign_lhs (stmt);
4470           rhs1 = gimple_assign_rhs1 (stmt);
4471           rhs2 = gimple_assign_rhs2 (stmt);
4472
4473           /* For non-bit or min/max operations we can't associate
4474              all types.  Verify that here.  */
4475           if (rhs_code != BIT_IOR_EXPR
4476               && rhs_code != BIT_AND_EXPR
4477               && rhs_code != BIT_XOR_EXPR
4478               && rhs_code != MIN_EXPR
4479               && rhs_code != MAX_EXPR
4480               && (!can_reassociate_p (lhs)
4481                   || !can_reassociate_p (rhs1)
4482                   || !can_reassociate_p (rhs2)))
4483             continue;
4484
4485           if (associative_tree_code (rhs_code))
4486             {
4487               auto_vec<operand_entry_t> ops;
4488               tree powi_result = NULL_TREE;
4489
4490               /* There may be no immediate uses left by the time we
4491                  get here because we may have eliminated them all.  */
4492               if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4493                 continue;
4494
4495               gimple_set_visited (stmt, true);
4496               linearize_expr_tree (&ops, stmt, true, true);
4497               ops.qsort (sort_by_operand_rank);
4498               optimize_ops_list (rhs_code, &ops);
4499               if (undistribute_ops_list (rhs_code, &ops,
4500                                          loop_containing_stmt (stmt)))
4501                 {
4502                   ops.qsort (sort_by_operand_rank);
4503                   optimize_ops_list (rhs_code, &ops);
4504                 }
4505
4506               if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4507                 optimize_range_tests (rhs_code, &ops);
4508
4509               if (first_pass_instance
4510                   && rhs_code == MULT_EXPR
4511                   && flag_unsafe_math_optimizations)
4512                 powi_result = attempt_builtin_powi (stmt, &ops);
4513
4514               /* If the operand vector is now empty, all operands were
4515                  consumed by the __builtin_powi optimization.  */
4516               if (ops.length () == 0)
4517                 transform_stmt_to_copy (&gsi, stmt, powi_result);
4518               else if (ops.length () == 1)
4519                 {
4520                   tree last_op = ops.last ()->op;
4521
4522                   if (powi_result)
4523                     transform_stmt_to_multiply (&gsi, stmt, last_op,
4524                                                 powi_result);
4525                   else
4526                     transform_stmt_to_copy (&gsi, stmt, last_op);
4527                 }
4528               else
4529                 {
4530                   enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4531                   int ops_num = ops.length ();
4532                   int width = get_reassociation_width (ops_num, rhs_code, mode);
4533                   tree new_lhs = lhs;
4534
4535                   if (dump_file && (dump_flags & TDF_DETAILS))
4536                     fprintf (dump_file,
4537                              "Width = %d was chosen for reassociation\n", width);
4538
4539                   if (width > 1
4540                       && ops.length () > 3)
4541                     rewrite_expr_tree_parallel (stmt, width, ops);
4542                   else
4543                     {
4544                       /* When there are three operands left, we want
4545                          to make sure the ones that get the double
4546                          binary op are chosen wisely.  */
4547                       int len = ops.length ();
4548                       if (len >= 3)
4549                         swap_ops_for_binary_stmt (ops, len - 3, stmt);
4550
4551                       new_lhs = rewrite_expr_tree (stmt, 0, ops,
4552                                                    powi_result != NULL);
4553                     }
4554
4555                   /* If we combined some repeated factors into a
4556                      __builtin_powi call, multiply that result by the
4557                      reassociated operands.  */
4558                   if (powi_result)
4559                     {
4560                       gimple mul_stmt, lhs_stmt = SSA_NAME_DEF_STMT (lhs);
4561                       tree type = TREE_TYPE (lhs);
4562                       tree target_ssa = make_temp_ssa_name (type, NULL,
4563                                                             "reassocpow");
4564                       gimple_set_lhs (lhs_stmt, target_ssa);
4565                       update_stmt (lhs_stmt);
4566                       if (lhs != new_lhs)
4567                         target_ssa = new_lhs;
4568                       mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs,
4569                                                                powi_result,
4570                                                                target_ssa);
4571                       gimple_set_location (mul_stmt, gimple_location (stmt));
4572                       gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4573                     }
4574                 }
4575             }
4576         }
4577     }
4578   for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4579        son;
4580        son = next_dom_son (CDI_POST_DOMINATORS, son))
4581     reassociate_bb (son);
4582 }
4583
4584 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4585 void debug_ops_vector (vec<operand_entry_t> ops);
4586
4587 /* Dump the operand entry vector OPS to FILE.  */
4588
4589 void
4590 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
4591 {
4592   operand_entry_t oe;
4593   unsigned int i;
4594
4595   FOR_EACH_VEC_ELT (ops, i, oe)
4596     {
4597       fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
4598       print_generic_expr (file, oe->op, 0);
4599     }
4600 }
4601
4602 /* Dump the operand entry vector OPS to STDERR.  */
4603
4604 DEBUG_FUNCTION void
4605 debug_ops_vector (vec<operand_entry_t> ops)
4606 {
4607   dump_ops_vector (stderr, ops);
4608 }
4609
4610 static void
4611 do_reassoc (void)
4612 {
4613   break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
4614   reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
4615 }
4616
4617 /* Initialize the reassociation pass.  */
4618
4619 static void
4620 init_reassoc (void)
4621 {
4622   int i;
4623   long rank = 2;
4624   int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
4625
4626   /* Find the loops, so that we can prevent moving calculations in
4627      them.  */
4628   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4629
4630   memset (&reassociate_stats, 0, sizeof (reassociate_stats));
4631
4632   operand_entry_pool = create_alloc_pool ("operand entry pool",
4633                                           sizeof (struct operand_entry), 30);
4634   next_operand_entry_id = 0;
4635
4636   /* Reverse RPO (Reverse Post Order) will give us something where
4637      deeper loops come later.  */
4638   pre_and_rev_post_order_compute (NULL, bbs, false);
4639   bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
4640   operand_rank = new hash_map<tree, long>;
4641
4642   /* Give each default definition a distinct rank.  This includes
4643      parameters and the static chain.  Walk backwards over all
4644      SSA names so that we get proper rank ordering according
4645      to tree_swap_operands_p.  */
4646   for (i = num_ssa_names - 1; i > 0; --i)
4647     {
4648       tree name = ssa_name (i);
4649       if (name && SSA_NAME_IS_DEFAULT_DEF (name))
4650         insert_operand_rank (name, ++rank);
4651     }
4652
4653   /* Set up rank for each BB  */
4654   for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
4655     bb_rank[bbs[i]] = ++rank  << 16;
4656
4657   free (bbs);
4658   calculate_dominance_info (CDI_POST_DOMINATORS);
4659   plus_negates = vNULL;
4660 }
4661
4662 /* Cleanup after the reassociation pass, and print stats if
4663    requested.  */
4664
4665 static void
4666 fini_reassoc (void)
4667 {
4668   statistics_counter_event (cfun, "Linearized",
4669                             reassociate_stats.linearized);
4670   statistics_counter_event (cfun, "Constants eliminated",
4671                             reassociate_stats.constants_eliminated);
4672   statistics_counter_event (cfun, "Ops eliminated",
4673                             reassociate_stats.ops_eliminated);
4674   statistics_counter_event (cfun, "Statements rewritten",
4675                             reassociate_stats.rewritten);
4676   statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
4677                             reassociate_stats.pows_encountered);
4678   statistics_counter_event (cfun, "Built-in powi calls created",
4679                             reassociate_stats.pows_created);
4680
4681   delete operand_rank;
4682   free_alloc_pool (operand_entry_pool);
4683   free (bb_rank);
4684   plus_negates.release ();
4685   free_dominance_info (CDI_POST_DOMINATORS);
4686   loop_optimizer_finalize ();
4687 }
4688
4689 /* Gate and execute functions for Reassociation.  */
4690
4691 static unsigned int
4692 execute_reassoc (void)
4693 {
4694   init_reassoc ();
4695
4696   do_reassoc ();
4697   repropagate_negates ();
4698
4699   fini_reassoc ();
4700   return 0;
4701 }
4702
4703 namespace {
4704
4705 const pass_data pass_data_reassoc =
4706 {
4707   GIMPLE_PASS, /* type */
4708   "reassoc", /* name */
4709   OPTGROUP_NONE, /* optinfo_flags */
4710   TV_TREE_REASSOC, /* tv_id */
4711   ( PROP_cfg | PROP_ssa ), /* properties_required */
4712   0, /* properties_provided */
4713   0, /* properties_destroyed */
4714   0, /* todo_flags_start */
4715   TODO_update_ssa_only_virtuals, /* todo_flags_finish */
4716 };
4717
4718 class pass_reassoc : public gimple_opt_pass
4719 {
4720 public:
4721   pass_reassoc (gcc::context *ctxt)
4722     : gimple_opt_pass (pass_data_reassoc, ctxt)
4723   {}
4724
4725   /* opt_pass methods: */
4726   opt_pass * clone () { return new pass_reassoc (m_ctxt); }
4727   virtual bool gate (function *) { return flag_tree_reassoc != 0; }
4728   virtual unsigned int execute (function *) { return execute_reassoc (); }
4729
4730 }; // class pass_reassoc
4731
4732 } // anon namespace
4733
4734 gimple_opt_pass *
4735 make_pass_reassoc (gcc::context *ctxt)
4736 {
4737   return new pass_reassoc (ctxt);
4738 }