gcc/tree-ssa-reassoc.c

   1 /* Reassociation for trees.
   2    Copyright (C) 2005-2014 Free Software Foundation, Inc.
   3    Contributed by Daniel Berlin <dan@dberlin.org>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 3, or (at your option)
  10 any later version.
  11
  12 GCC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "hash-table.h"
  25 #include "tm.h"
  26 #include "rtl.h"
  27 #include "tm_p.h"
  28 #include "tree.h"
  29 #include "stor-layout.h"
  30 #include "basic-block.h"
  31 #include "gimple-pretty-print.h"
  32 #include "tree-inline.h"
  33 #include "pointer-set.h"
  34 #include "tree-ssa-alias.h"
  35 #include "internal-fn.h"
  36 #include "gimple-fold.h"
  37 #include "tree-eh.h"
  38 #include "gimple-expr.h"
  39 #include "is-a.h"
  40 #include "gimple.h"
  41 #include "gimple-iterator.h"
  42 #include "gimplify-me.h"
  43 #include "gimple-ssa.h"
  44 #include "tree-cfg.h"
  45 #include "tree-phinodes.h"
  46 #include "ssa-iterators.h"
  47 #include "stringpool.h"
  48 #include "tree-ssanames.h"
  49 #include "tree-ssa-loop-niter.h"
  50 #include "tree-ssa-loop.h"
  51 #include "expr.h"
  52 #include "tree-dfa.h"
  53 #include "tree-ssa.h"
  54 #include "tree-iterator.h"
  55 #include "tree-pass.h"
  56 #include "alloc-pool.h"
  57 #include "langhooks.h"
  58 #include "cfgloop.h"
  59 #include "flags.h"
  60 #include "target.h"
  61 #include "params.h"
  62 #include "diagnostic-core.h"
  63 #include "builtins.h"
  64
  65 /*  This is a simple global reassociation pass.  It is, in part, based
  66     on the LLVM pass of the same name (They do some things more/less
  67     than we do, in different orders, etc).
  68
  69     It consists of five steps:
  70
  71     1. Breaking up subtract operations into addition + negate, where
  72     it would promote the reassociation of adds.
  73
  74     2. Left linearization of the expression trees, so that (A+B)+(C+D)
  75     becomes (((A+B)+C)+D), which is easier for us to rewrite later.
  76     During linearization, we place the operands of the binary
  77     expressions into a vector of operand_entry_t
  78
  79     3. Optimization of the operand lists, eliminating things like a +
  80     -a, a & a, etc.
  81
  82     3a. Combine repeated factors with the same occurrence counts
  83     into a __builtin_powi call that will later be optimized into
  84     an optimal number of multiplies.
  85
  86     4. Rewrite the expression trees we linearized and optimized so
  87     they are in proper rank order.
  88
  89     5. Repropagate negates, as nothing else will clean it up ATM.
  90
  91     A bit of theory on #4, since nobody seems to write anything down
  92     about why it makes sense to do it the way they do it:
  93
  94     We could do this much nicer theoretically, but don't (for reasons
  95     explained after how to do it theoretically nice :P).
  96
  97     In order to promote the most redundancy elimination, you want
  98     binary expressions whose operands are the same rank (or
  99     preferably, the same value) exposed to the redundancy eliminator,
 100     for possible elimination.
 101
 102     So the way to do this if we really cared, is to build the new op
 103     tree from the leaves to the roots, merging as you go, and putting the
 104     new op on the end of the worklist, until you are left with one
 105     thing on the worklist.
 106
 107     IE if you have to rewrite the following set of operands (listed with
 108     rank in parentheses), with opcode PLUS_EXPR:
 109
 110     a (1),  b (1),  c (1),  d (2), e (2)
 111
 112
 113     We start with our merge worklist empty, and the ops list with all of
 114     those on it.
 115
 116     You want to first merge all leaves of the same rank, as much as
 117     possible.
 118
 119     So first build a binary op of
 120
 121     mergetmp = a + b, and put "mergetmp" on the merge worklist.
 122
 123     Because there is no three operand form of PLUS_EXPR, c is not going to
 124     be exposed to redundancy elimination as a rank 1 operand.
 125
 126     So you might as well throw it on the merge worklist (you could also
 127     consider it to now be a rank two operand, and merge it with d and e,
 128     but in this case, you then have evicted e from a binary op. So at
 129     least in this situation, you can't win.)
 130
 131     Then build a binary op of d + e
 132     mergetmp2 = d + e
 133
 134     and put mergetmp2 on the merge worklist.
 135
 136     so merge worklist = {mergetmp, c, mergetmp2}
 137
 138     Continue building binary ops of these operations until you have only
 139     one operation left on the worklist.
 140
 141     So we have
 142
 143     build binary op
 144     mergetmp3 = mergetmp + c
 145
 146     worklist = {mergetmp2, mergetmp3}
 147
 148     mergetmp4 = mergetmp2 + mergetmp3
 149
 150     worklist = {mergetmp4}
 151
 152     because we have one operation left, we can now just set the original
 153     statement equal to the result of that operation.
 154
 155     This will at least expose a + b  and d + e to redundancy elimination
 156     as binary operations.
 157
 158     For extra points, you can reuse the old statements to build the
 159     mergetmps, since you shouldn't run out.
 160
 161     So why don't we do this?
 162
 163     Because it's expensive, and rarely will help.  Most trees we are
 164     reassociating have 3 or less ops.  If they have 2 ops, they already
 165     will be written into a nice single binary op.  If you have 3 ops, a
 166     single simple check suffices to tell you whether the first two are of the
 167     same rank.  If so, you know to order it
 168
 169     mergetmp = op1 + op2
 170     newstmt = mergetmp + op3
 171
 172     instead of
 173     mergetmp = op2 + op3
 174     newstmt = mergetmp + op1
 175
 176     If all three are of the same rank, you can't expose them all in a
 177     single binary operator anyway, so the above is *still* the best you
 178     can do.
 179
 180     Thus, this is what we do.  When we have three ops left, we check to see
 181     what order to put them in, and call it a day.  As a nod to vector sum
 182     reduction, we check if any of the ops are really a phi node that is a
 183     destructive update for the associating op, and keep the destructive
 184     update together for vector sum reduction recognition.  */
 185
 186
 187 /* Statistics */
 188 static struct
 189 {
 190   int linearized;
 191   int constants_eliminated;
 192   int ops_eliminated;
 193   int rewritten;
 194   int pows_encountered;
 195   int pows_created;
 196 } reassociate_stats;
 197
 198 /* Operator, rank pair.  */
 199 typedef struct operand_entry
 200 {
 201   unsigned int rank;
 202   int id;
 203   tree op;
 204   unsigned int count;
 205 } *operand_entry_t;
 206
 207 static alloc_pool operand_entry_pool;
 208
 209 /* This is used to assign a unique ID to each struct operand_entry
 210    so that qsort results are identical on different hosts.  */
 211 static int next_operand_entry_id;
 212
 213 /* Starting rank number for a given basic block, so that we can rank
 214    operations using unmovable instructions in that BB based on the bb
 215    depth.  */
 216 static long *bb_rank;
 217
 218 /* Operand->rank hashtable.  */
 219 static struct pointer_map_t *operand_rank;
 220
 221 /* Forward decls.  */
 222 static long get_rank (tree);
 223 static bool reassoc_stmt_dominates_stmt_p (gimple, gimple);
 224
 225 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
 226    possibly added by gsi_remove.  */
 227
 228 bool
 229 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
 230 {
 231   gimple stmt = gsi_stmt (*gsi);
 232
 233   if (!MAY_HAVE_DEBUG_STMTS || gimple_code (stmt) == GIMPLE_PHI)
 234     return gsi_remove (gsi, true);
 235
 236   gimple_stmt_iterator prev = *gsi;
 237   gsi_prev (&prev);
 238   unsigned uid = gimple_uid (stmt);
 239   basic_block bb = gimple_bb (stmt);
 240   bool ret = gsi_remove (gsi, true);
 241   if (!gsi_end_p (prev))
 242     gsi_next (&prev);
 243   else
 244     prev = gsi_start_bb (bb);
 245   gimple end_stmt = gsi_stmt (*gsi);
 246   while ((stmt = gsi_stmt (prev)) != end_stmt)
 247     {
 248       gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
 249       gimple_set_uid (stmt, uid);
 250       gsi_next (&prev);
 251     }
 252   return ret;
 253 }
 254
 255 /* Bias amount for loop-carried phis.  We want this to be larger than
 256    the depth of any reassociation tree we can see, but not larger than
 257    the rank difference between two blocks.  */
 258 #define PHI_LOOP_BIAS (1 << 15)
 259
 260 /* Rank assigned to a phi statement.  If STMT is a loop-carried phi of
 261    an innermost loop, and the phi has only a single use which is inside
 262    the loop, then the rank is the block rank of the loop latch plus an
 263    extra bias for the loop-carried dependence.  This causes expressions
 264    calculated into an accumulator variable to be independent for each
 265    iteration of the loop.  If STMT is some other phi, the rank is the
 266    block rank of its containing block.  */
 267 static long
 268 phi_rank (gimple stmt)
 269 {
 270   basic_block bb = gimple_bb (stmt);
 271   struct loop *father = bb->loop_father;
 272   tree res;
 273   unsigned i;
 274   use_operand_p use;
 275   gimple use_stmt;
 276
 277   /* We only care about real loops (those with a latch).  */
 278   if (!father->latch)
 279     return bb_rank[bb->index];
 280
 281   /* Interesting phis must be in headers of innermost loops.  */
 282   if (bb != father->header
 283       || father->inner)
 284     return bb_rank[bb->index];
 285
 286   /* Ignore virtual SSA_NAMEs.  */
 287   res = gimple_phi_result (stmt);
 288   if (virtual_operand_p (res))
 289     return bb_rank[bb->index];
 290
 291   /* The phi definition must have a single use, and that use must be
 292      within the loop.  Otherwise this isn't an accumulator pattern.  */
 293   if (!single_imm_use (res, &use, &use_stmt)
 294       || gimple_bb (use_stmt)->loop_father != father)
 295     return bb_rank[bb->index];
 296
 297   /* Look for phi arguments from within the loop.  If found, bias this phi.  */
 298   for (i = 0; i < gimple_phi_num_args (stmt); i++)
 299     {
 300       tree arg = gimple_phi_arg_def (stmt, i);
 301       if (TREE_CODE (arg) == SSA_NAME
 302           && !SSA_NAME_IS_DEFAULT_DEF (arg))
 303         {
 304           gimple def_stmt = SSA_NAME_DEF_STMT (arg);
 305           if (gimple_bb (def_stmt)->loop_father == father)
 306             return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
 307         }
 308     }
 309
 310   /* Must be an uninteresting phi.  */
 311   return bb_rank[bb->index];
 312 }
 313
 314 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
 315    loop-carried dependence of an innermost loop, return TRUE; else
 316    return FALSE.  */
 317 static bool
 318 loop_carried_phi (tree exp)
 319 {
 320   gimple phi_stmt;
 321   long block_rank;
 322
 323   if (TREE_CODE (exp) != SSA_NAME
 324       || SSA_NAME_IS_DEFAULT_DEF (exp))
 325     return false;
 326
 327   phi_stmt = SSA_NAME_DEF_STMT (exp);
 328
 329   if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
 330     return false;
 331
 332   /* Non-loop-carried phis have block rank.  Loop-carried phis have
 333      an additional bias added in.  If this phi doesn't have block rank,
 334      it's biased and should not be propagated.  */
 335   block_rank = bb_rank[gimple_bb (phi_stmt)->index];
 336
 337   if (phi_rank (phi_stmt) != block_rank)
 338     return true;
 339
 340   return false;
 341 }
 342
 343 /* Return the maximum of RANK and the rank that should be propagated
 344    from expression OP.  For most operands, this is just the rank of OP.
 345    For loop-carried phis, the value is zero to avoid undoing the bias
 346    in favor of the phi.  */
 347 static long
 348 propagate_rank (long rank, tree op)
 349 {
 350   long op_rank;
 351
 352   if (loop_carried_phi (op))
 353     return rank;
 354
 355   op_rank = get_rank (op);
 356
 357   return MAX (rank, op_rank);
 358 }
 359
 360 /* Look up the operand rank structure for expression E.  */
 361
 362 static inline long
 363 find_operand_rank (tree e)
 364 {
 365   void **slot = pointer_map_contains (operand_rank, e);
 366   return slot ? (long) (intptr_t) *slot : -1;
 367 }
 368
 369 /* Insert {E,RANK} into the operand rank hashtable.  */
 370
 371 static inline void
 372 insert_operand_rank (tree e, long rank)
 373 {
 374   void **slot;
 375   gcc_assert (rank > 0);
 376   slot = pointer_map_insert (operand_rank, e);
 377   gcc_assert (!*slot);
 378   *slot = (void *) (intptr_t) rank;
 379 }
 380
 381 /* Given an expression E, return the rank of the expression.  */
 382
 383 static long
 384 get_rank (tree e)
 385 {
 386   /* Constants have rank 0.  */
 387   if (is_gimple_min_invariant (e))
 388     return 0;
 389
 390   /* SSA_NAME's have the rank of the expression they are the result
 391      of.
 392      For globals and uninitialized values, the rank is 0.
 393      For function arguments, use the pre-setup rank.
 394      For PHI nodes, stores, asm statements, etc, we use the rank of
 395      the BB.
 396      For simple operations, the rank is the maximum rank of any of
 397      its operands, or the bb_rank, whichever is less.
 398      I make no claims that this is optimal, however, it gives good
 399      results.  */
 400
 401   /* We make an exception to the normal ranking system to break
 402      dependences of accumulator variables in loops.  Suppose we
 403      have a simple one-block loop containing:
 404
 405        x_1 = phi(x_0, x_2)
 406        b = a + x_1
 407        c = b + d
 408        x_2 = c + e
 409
 410      As shown, each iteration of the calculation into x is fully
 411      dependent upon the iteration before it.  We would prefer to
 412      see this in the form:
 413
 414        x_1 = phi(x_0, x_2)
 415        b = a + d
 416        c = b + e
 417        x_2 = c + x_1
 418
 419      If the loop is unrolled, the calculations of b and c from
 420      different iterations can be interleaved.
 421
 422      To obtain this result during reassociation, we bias the rank
 423      of the phi definition x_1 upward, when it is recognized as an
 424      accumulator pattern.  The artificial rank causes it to be
 425      added last, providing the desired independence.  */
 426
 427   if (TREE_CODE (e) == SSA_NAME)
 428     {
 429       gimple stmt;
 430       long rank;
 431       int i, n;
 432       tree op;
 433
 434       if (SSA_NAME_IS_DEFAULT_DEF (e))
 435         return find_operand_rank (e);
 436
 437       stmt = SSA_NAME_DEF_STMT (e);
 438       if (gimple_code (stmt) == GIMPLE_PHI)
 439         return phi_rank (stmt);
 440
 441       if (!is_gimple_assign (stmt)
 442           || gimple_vdef (stmt))
 443         return bb_rank[gimple_bb (stmt)->index];
 444
 445       /* If we already have a rank for this expression, use that.  */
 446       rank = find_operand_rank (e);
 447       if (rank != -1)
 448         return rank;
 449
 450       /* Otherwise, find the maximum rank for the operands.  As an
 451          exception, remove the bias from loop-carried phis when propagating
 452          the rank so that dependent operations are not also biased.  */
 453       rank = 0;
 454       if (gimple_assign_single_p (stmt))
 455         {
 456           tree rhs = gimple_assign_rhs1 (stmt);
 457           n = TREE_OPERAND_LENGTH (rhs);
 458           if (n == 0)
 459             rank = propagate_rank (rank, rhs);
 460           else
 461             {
 462               for (i = 0; i < n; i++)
 463                 {
 464                   op = TREE_OPERAND (rhs, i);
 465
 466                   if (op != NULL_TREE)
 467                     rank = propagate_rank (rank, op);
 468                 }
 469             }
 470         }
 471       else
 472         {
 473           n = gimple_num_ops (stmt);
 474           for (i = 1; i < n; i++)
 475             {
 476               op = gimple_op (stmt, i);
 477               gcc_assert (op);
 478               rank = propagate_rank (rank, op);
 479             }
 480         }
 481
 482       if (dump_file && (dump_flags & TDF_DETAILS))
 483         {
 484           fprintf (dump_file, "Rank for ");
 485           print_generic_expr (dump_file, e, 0);
 486           fprintf (dump_file, " is %ld\n", (rank + 1));
 487         }
 488
 489       /* Note the rank in the hashtable so we don't recompute it.  */
 490       insert_operand_rank (e, (rank + 1));
 491       return (rank + 1);
 492     }
 493
 494   /* Globals, etc,  are rank 0 */
 495   return 0;
 496 }
 497
 498
 499 /* We want integer ones to end up last no matter what, since they are
 500    the ones we can do the most with.  */
 501 #define INTEGER_CONST_TYPE 1 << 3
 502 #define FLOAT_CONST_TYPE 1 << 2
 503 #define OTHER_CONST_TYPE 1 << 1
 504
 505 /* Classify an invariant tree into integer, float, or other, so that
 506    we can sort them to be near other constants of the same type.  */
 507 static inline int
 508 constant_type (tree t)
 509 {
 510   if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
 511     return INTEGER_CONST_TYPE;
 512   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
 513     return FLOAT_CONST_TYPE;
 514   else
 515     return OTHER_CONST_TYPE;
 516 }
 517
 518 /* qsort comparison function to sort operand entries PA and PB by rank
 519    so that the sorted array is ordered by rank in decreasing order.  */
 520 static int
 521 sort_by_operand_rank (const void *pa, const void *pb)
 522 {
 523   const operand_entry_t oea = *(const operand_entry_t *)pa;
 524   const operand_entry_t oeb = *(const operand_entry_t *)pb;
 525
 526   /* It's nicer for optimize_expression if constants that are likely
 527      to fold when added/multiplied//whatever are put next to each
 528      other.  Since all constants have rank 0, order them by type.  */
 529   if (oeb->rank == 0 && oea->rank == 0)
 530     {
 531       if (constant_type (oeb->op) != constant_type (oea->op))
 532         return constant_type (oeb->op) - constant_type (oea->op);
 533       else
 534         /* To make sorting result stable, we use unique IDs to determine
 535            order.  */
 536         return oeb->id - oea->id;
 537     }
 538
 539   /* Lastly, make sure the versions that are the same go next to each
 540      other.  */
 541   if ((oeb->rank - oea->rank == 0)
 542       && TREE_CODE (oea->op) == SSA_NAME
 543       && TREE_CODE (oeb->op) == SSA_NAME)
 544     {
 545       /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
 546          versions of removed SSA_NAMEs, so if possible, prefer to sort
 547          based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
 548          See PR60418.  */
 549       if (!SSA_NAME_IS_DEFAULT_DEF (oea->op)
 550           && !SSA_NAME_IS_DEFAULT_DEF (oeb->op)
 551           && SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
 552         {
 553           gimple stmta = SSA_NAME_DEF_STMT (oea->op);
 554           gimple stmtb = SSA_NAME_DEF_STMT (oeb->op);
 555           basic_block bba = gimple_bb (stmta);
 556           basic_block bbb = gimple_bb (stmtb);
 557           if (bbb != bba)
 558             {
 559               if (bb_rank[bbb->index] != bb_rank[bba->index])
 560                 return bb_rank[bbb->index] - bb_rank[bba->index];
 561             }
 562           else
 563             {
 564               bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
 565               bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
 566               if (da != db)
 567                 return da ? 1 : -1;
 568             }
 569         }
 570
 571       if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
 572         return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
 573       else
 574         return oeb->id - oea->id;
 575     }
 576
 577   if (oeb->rank != oea->rank)
 578     return oeb->rank - oea->rank;
 579   else
 580     return oeb->id - oea->id;
 581 }
 582
 583 /* Add an operand entry to *OPS for the tree operand OP.  */
 584
 585 static void
 586 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
 587 {
 588   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
 589
 590   oe->op = op;
 591   oe->rank = get_rank (op);
 592   oe->id = next_operand_entry_id++;
 593   oe->count = 1;
 594   ops->safe_push (oe);
 595 }
 596
 597 /* Add an operand entry to *OPS for the tree operand OP with repeat
 598    count REPEAT.  */
 599
 600 static void
 601 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
 602                        HOST_WIDE_INT repeat)
 603 {
 604   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
 605
 606   oe->op = op;
 607   oe->rank = get_rank (op);
 608   oe->id = next_operand_entry_id++;
 609   oe->count = repeat;
 610   ops->safe_push (oe);
 611
 612   reassociate_stats.pows_encountered++;
 613 }
 614
 615 /* Return true if STMT is reassociable operation containing a binary
 616    operation with tree code CODE, and is inside LOOP.  */
 617
 618 static bool
 619 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
 620 {
 621   basic_block bb = gimple_bb (stmt);
 622
 623   if (gimple_bb (stmt) == NULL)
 624     return false;
 625
 626   if (!flow_bb_inside_loop_p (loop, bb))
 627     return false;
 628
 629   if (is_gimple_assign (stmt)
 630       && gimple_assign_rhs_code (stmt) == code
 631       && has_single_use (gimple_assign_lhs (stmt)))
 632     return true;
 633
 634   return false;
 635 }
 636
 637
 638 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
 639    operand of the negate operation.  Otherwise, return NULL.  */
 640
 641 static tree
 642 get_unary_op (tree name, enum tree_code opcode)
 643 {
 644   gimple stmt = SSA_NAME_DEF_STMT (name);
 645
 646   if (!is_gimple_assign (stmt))
 647     return NULL_TREE;
 648
 649   if (gimple_assign_rhs_code (stmt) == opcode)
 650     return gimple_assign_rhs1 (stmt);
 651   return NULL_TREE;
 652 }
 653
 654 /* If CURR and LAST are a pair of ops that OPCODE allows us to
 655    eliminate through equivalences, do so, remove them from OPS, and
 656    return true.  Otherwise, return false.  */
 657
 658 static bool
 659 eliminate_duplicate_pair (enum tree_code opcode,
 660                           vec<operand_entry_t> *ops,
 661                           bool *all_done,
 662                           unsigned int i,
 663                           operand_entry_t curr,
 664                           operand_entry_t last)
 665 {
 666
 667   /* If we have two of the same op, and the opcode is & |, min, or max,
 668      we can eliminate one of them.
 669      If we have two of the same op, and the opcode is ^, we can
 670      eliminate both of them.  */
 671
 672   if (last && last->op == curr->op)
 673     {
 674       switch (opcode)
 675         {
 676         case MAX_EXPR:
 677         case MIN_EXPR:
 678         case BIT_IOR_EXPR:
 679         case BIT_AND_EXPR:
 680           if (dump_file && (dump_flags & TDF_DETAILS))
 681             {
 682               fprintf (dump_file, "Equivalence: ");
 683               print_generic_expr (dump_file, curr->op, 0);
 684               fprintf (dump_file, " [&|minmax] ");
 685               print_generic_expr (dump_file, last->op, 0);
 686               fprintf (dump_file, " -> ");
 687               print_generic_stmt (dump_file, last->op, 0);
 688             }
 689
 690           ops->ordered_remove (i);
 691           reassociate_stats.ops_eliminated ++;
 692
 693           return true;
 694
 695         case BIT_XOR_EXPR:
 696           if (dump_file && (dump_flags & TDF_DETAILS))
 697             {
 698               fprintf (dump_file, "Equivalence: ");
 699               print_generic_expr (dump_file, curr->op, 0);
 700               fprintf (dump_file, " ^ ");
 701               print_generic_expr (dump_file, last->op, 0);
 702               fprintf (dump_file, " -> nothing\n");
 703             }
 704
 705           reassociate_stats.ops_eliminated += 2;
 706
 707           if (ops->length () == 2)
 708             {
 709               ops->create (0);
 710               add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
 711               *all_done = true;
 712             }
 713           else
 714             {
 715               ops->ordered_remove (i-1);
 716               ops->ordered_remove (i-1);
 717             }
 718
 719           return true;
 720
 721         default:
 722           break;
 723         }
 724     }
 725   return false;
 726 }
 727
 728 static vec<tree> plus_negates;
 729
 730 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
 731    expression, look in OPS for a corresponding positive operation to cancel
 732    it out.  If we find one, remove the other from OPS, replace
 733    OPS[CURRINDEX] with 0 or -1, respectively, and return true.  Otherwise,
 734    return false. */
 735
 736 static bool
 737 eliminate_plus_minus_pair (enum tree_code opcode,
 738                            vec<operand_entry_t> *ops,
 739                            unsigned int currindex,
 740                            operand_entry_t curr)
 741 {
 742   tree negateop;
 743   tree notop;
 744   unsigned int i;
 745   operand_entry_t oe;
 746
 747   if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
 748     return false;
 749
 750   negateop = get_unary_op (curr->op, NEGATE_EXPR);
 751   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 752   if (negateop == NULL_TREE && notop == NULL_TREE)
 753     return false;
 754
 755   /* Any non-negated version will have a rank that is one less than
 756      the current rank.  So once we hit those ranks, if we don't find
 757      one, we can stop.  */
 758
 759   for (i = currindex + 1;
 760        ops->iterate (i, &oe)
 761        && oe->rank >= curr->rank - 1 ;
 762        i++)
 763     {
 764       if (oe->op == negateop)
 765         {
 766
 767           if (dump_file && (dump_flags & TDF_DETAILS))
 768             {
 769               fprintf (dump_file, "Equivalence: ");
 770               print_generic_expr (dump_file, negateop, 0);
 771               fprintf (dump_file, " + -");
 772               print_generic_expr (dump_file, oe->op, 0);
 773               fprintf (dump_file, " -> 0\n");
 774             }
 775
 776           ops->ordered_remove (i);
 777           add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
 778           ops->ordered_remove (currindex);
 779           reassociate_stats.ops_eliminated ++;
 780
 781           return true;
 782         }
 783       else if (oe->op == notop)
 784         {
 785           tree op_type = TREE_TYPE (oe->op);
 786
 787           if (dump_file && (dump_flags & TDF_DETAILS))
 788             {
 789               fprintf (dump_file, "Equivalence: ");
 790               print_generic_expr (dump_file, notop, 0);
 791               fprintf (dump_file, " + ~");
 792               print_generic_expr (dump_file, oe->op, 0);
 793               fprintf (dump_file, " -> -1\n");
 794             }
 795
 796           ops->ordered_remove (i);
 797           add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
 798           ops->ordered_remove (currindex);
 799           reassociate_stats.ops_eliminated ++;
 800
 801           return true;
 802         }
 803     }
 804
 805   /* CURR->OP is a negate expr in a plus expr: save it for later
 806      inspection in repropagate_negates().  */
 807   if (negateop != NULL_TREE)
 808     plus_negates.safe_push (curr->op);
 809
 810   return false;
 811 }
 812
 813 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
 814    bitwise not expression, look in OPS for a corresponding operand to
 815    cancel it out.  If we find one, remove the other from OPS, replace
 816    OPS[CURRINDEX] with 0, and return true.  Otherwise, return
 817    false. */
 818
 819 static bool
 820 eliminate_not_pairs (enum tree_code opcode,
 821                      vec<operand_entry_t> *ops,
 822                      unsigned int currindex,
 823                      operand_entry_t curr)
 824 {
 825   tree notop;
 826   unsigned int i;
 827   operand_entry_t oe;
 828
 829   if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
 830       || TREE_CODE (curr->op) != SSA_NAME)
 831     return false;
 832
 833   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 834   if (notop == NULL_TREE)
 835     return false;
 836
 837   /* Any non-not version will have a rank that is one less than
 838      the current rank.  So once we hit those ranks, if we don't find
 839      one, we can stop.  */
 840
 841   for (i = currindex + 1;
 842        ops->iterate (i, &oe)
 843        && oe->rank >= curr->rank - 1;
 844        i++)
 845     {
 846       if (oe->op == notop)
 847         {
 848           if (dump_file && (dump_flags & TDF_DETAILS))
 849             {
 850               fprintf (dump_file, "Equivalence: ");
 851               print_generic_expr (dump_file, notop, 0);
 852               if (opcode == BIT_AND_EXPR)
 853                 fprintf (dump_file, " & ~");
 854               else if (opcode == BIT_IOR_EXPR)
 855                 fprintf (dump_file, " | ~");
 856               print_generic_expr (dump_file, oe->op, 0);
 857               if (opcode == BIT_AND_EXPR)
 858                 fprintf (dump_file, " -> 0\n");
 859               else if (opcode == BIT_IOR_EXPR)
 860                 fprintf (dump_file, " -> -1\n");
 861             }
 862
 863           if (opcode == BIT_AND_EXPR)
 864             oe->op = build_zero_cst (TREE_TYPE (oe->op));
 865           else if (opcode == BIT_IOR_EXPR)
 866             oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
 867
 868           reassociate_stats.ops_eliminated += ops->length () - 1;
 869           ops->truncate (0);
 870           ops->quick_push (oe);
 871           return true;
 872         }
 873     }
 874
 875   return false;
 876 }
 877
 878 /* Use constant value that may be present in OPS to try to eliminate
 879    operands.  Note that this function is only really used when we've
 880    eliminated ops for other reasons, or merged constants.  Across
 881    single statements, fold already does all of this, plus more.  There
 882    is little point in duplicating logic, so I've only included the
 883    identities that I could ever construct testcases to trigger.  */
 884
 885 static void
 886 eliminate_using_constants (enum tree_code opcode,
 887                            vec<operand_entry_t> *ops)
 888 {
 889   operand_entry_t oelast = ops->last ();
 890   tree type = TREE_TYPE (oelast->op);
 891
 892   if (oelast->rank == 0
 893       && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
 894     {
 895       switch (opcode)
 896         {
 897         case BIT_AND_EXPR:
 898           if (integer_zerop (oelast->op))
 899             {
 900               if (ops->length () != 1)
 901                 {
 902                   if (dump_file && (dump_flags & TDF_DETAILS))
 903                     fprintf (dump_file, "Found & 0, removing all other ops\n");
 904
 905                   reassociate_stats.ops_eliminated += ops->length () - 1;
 906
 907                   ops->truncate (0);
 908                   ops->quick_push (oelast);
 909                   return;
 910                 }
 911             }
 912           else if (integer_all_onesp (oelast->op))
 913             {
 914               if (ops->length () != 1)
 915                 {
 916                   if (dump_file && (dump_flags & TDF_DETAILS))
 917                     fprintf (dump_file, "Found & -1, removing\n");
 918                   ops->pop ();
 919                   reassociate_stats.ops_eliminated++;
 920                 }
 921             }
 922           break;
 923         case BIT_IOR_EXPR:
 924           if (integer_all_onesp (oelast->op))
 925             {
 926               if (ops->length () != 1)
 927                 {
 928                   if (dump_file && (dump_flags & TDF_DETAILS))
 929                     fprintf (dump_file, "Found | -1, removing all other ops\n");
 930
 931                   reassociate_stats.ops_eliminated += ops->length () - 1;
 932
 933                   ops->truncate (0);
 934                   ops->quick_push (oelast);
 935                   return;
 936                 }
 937             }
 938           else if (integer_zerop (oelast->op))
 939             {
 940               if (ops->length () != 1)
 941                 {
 942                   if (dump_file && (dump_flags & TDF_DETAILS))
 943                     fprintf (dump_file, "Found | 0, removing\n");
 944                   ops->pop ();
 945                   reassociate_stats.ops_eliminated++;
 946                 }
 947             }
 948           break;
 949         case MULT_EXPR:
 950           if (integer_zerop (oelast->op)
 951               || (FLOAT_TYPE_P (type)
 952                   && !HONOR_NANS (TYPE_MODE (type))
 953                   && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
 954                   && real_zerop (oelast->op)))
 955             {
 956               if (ops->length () != 1)
 957                 {
 958                   if (dump_file && (dump_flags & TDF_DETAILS))
 959                     fprintf (dump_file, "Found * 0, removing all other ops\n");
 960
 961                   reassociate_stats.ops_eliminated += ops->length () - 1;
 962                   ops->truncate (1);
 963                   ops->quick_push (oelast);
 964                   return;
 965                 }
 966             }
 967           else if (integer_onep (oelast->op)
 968                    || (FLOAT_TYPE_P (type)
 969                        && !HONOR_SNANS (TYPE_MODE (type))
 970                        && real_onep (oelast->op)))
 971             {
 972               if (ops->length () != 1)
 973                 {
 974                   if (dump_file && (dump_flags & TDF_DETAILS))
 975                     fprintf (dump_file, "Found * 1, removing\n");
 976                   ops->pop ();
 977                   reassociate_stats.ops_eliminated++;
 978                   return;
 979                 }
 980             }
 981           break;
 982         case BIT_XOR_EXPR:
 983         case PLUS_EXPR:
 984         case MINUS_EXPR:
 985           if (integer_zerop (oelast->op)
 986               || (FLOAT_TYPE_P (type)
 987                   && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
 988                   && fold_real_zero_addition_p (type, oelast->op,
 989                                                 opcode == MINUS_EXPR)))
 990             {
 991               if (ops->length () != 1)
 992                 {
 993                   if (dump_file && (dump_flags & TDF_DETAILS))
 994                     fprintf (dump_file, "Found [|^+] 0, removing\n");
 995                   ops->pop ();
 996                   reassociate_stats.ops_eliminated++;
 997                   return;
 998                 }
 999             }
1000           break;
1001         default:
1002           break;
1003         }
1004     }
1005 }
1006
1007
1008 static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
1009                                  bool, bool);
1010
1011 /* Structure for tracking and counting operands.  */
1012 typedef struct oecount_s {
1013   int cnt;
1014   int id;
1015   enum tree_code oecode;
1016   tree op;
1017 } oecount;
1018
1019
1020 /* The heap for the oecount hashtable and the sorted list of operands.  */
1021 static vec<oecount> cvec;
1022
1023
1024 /* Oecount hashtable helpers.  */
1025
1026 struct oecount_hasher
1027 {
1028   typedef int value_type;
1029   typedef int compare_type;
1030   typedef int store_values_directly;
1031   static inline hashval_t hash (const value_type &);
1032   static inline bool equal (const value_type &, const compare_type &);
1033   static bool is_deleted (int &v) { return v == 1; }
1034   static void mark_deleted (int &e) { e = 1; }
1035   static bool is_empty (int &v) { return v == 0; }
1036   static void mark_empty (int &e) { e = 0; }
1037   static void remove (int &) {}
1038 };
1039
1040 /* Hash function for oecount.  */
1041
1042 inline hashval_t
1043 oecount_hasher::hash (const value_type &p)
1044 {
1045   const oecount *c = &cvec[p - 42];
1046   return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1047 }
1048
1049 /* Comparison function for oecount.  */
1050
1051 inline bool
1052 oecount_hasher::equal (const value_type &p1, const compare_type &p2)
1053 {
1054   const oecount *c1 = &cvec[p1 - 42];
1055   const oecount *c2 = &cvec[p2 - 42];
1056   return (c1->oecode == c2->oecode
1057           && c1->op == c2->op);
1058 }
1059
1060 /* Comparison function for qsort sorting oecount elements by count.  */
1061
1062 static int
1063 oecount_cmp (const void *p1, const void *p2)
1064 {
1065   const oecount *c1 = (const oecount *)p1;
1066   const oecount *c2 = (const oecount *)p2;
1067   if (c1->cnt != c2->cnt)
1068     return c1->cnt - c2->cnt;
1069   else
1070     /* If counts are identical, use unique IDs to stabilize qsort.  */
1071     return c1->id - c2->id;
1072 }
1073
1074 /* Return TRUE iff STMT represents a builtin call that raises OP
1075    to some exponent.  */
1076
1077 static bool
1078 stmt_is_power_of_op (gimple stmt, tree op)
1079 {
1080   tree fndecl;
1081
1082   if (!is_gimple_call (stmt))
1083     return false;
1084
1085   fndecl = gimple_call_fndecl (stmt);
1086
1087   if (!fndecl
1088       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
1089     return false;
1090
1091   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1092     {
1093     CASE_FLT_FN (BUILT_IN_POW):
1094     CASE_FLT_FN (BUILT_IN_POWI):
1095       return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1096
1097     default:
1098       return false;
1099     }
1100 }
1101
1102 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1103    in place and return the result.  Assumes that stmt_is_power_of_op
1104    was previously called for STMT and returned TRUE.  */
1105
1106 static HOST_WIDE_INT
1107 decrement_power (gimple stmt)
1108 {
1109   REAL_VALUE_TYPE c, cint;
1110   HOST_WIDE_INT power;
1111   tree arg1;
1112
1113   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1114     {
1115     CASE_FLT_FN (BUILT_IN_POW):
1116       arg1 = gimple_call_arg (stmt, 1);
1117       c = TREE_REAL_CST (arg1);
1118       power = real_to_integer (&c) - 1;
1119       real_from_integer (&cint, VOIDmode, power, SIGNED);
1120       gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1121       return power;
1122
1123     CASE_FLT_FN (BUILT_IN_POWI):
1124       arg1 = gimple_call_arg (stmt, 1);
1125       power = TREE_INT_CST_LOW (arg1) - 1;
1126       gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1127       return power;
1128
1129     default:
1130       gcc_unreachable ();
1131     }
1132 }
1133
1134 /* Find the single immediate use of STMT's LHS, and replace it
1135    with OP.  Remove STMT.  If STMT's LHS is the same as *DEF,
1136    replace *DEF with OP as well.  */
1137
1138 static void
1139 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1140 {
1141   tree lhs;
1142   gimple use_stmt;
1143   use_operand_p use;
1144   gimple_stmt_iterator gsi;
1145
1146   if (is_gimple_call (stmt))
1147     lhs = gimple_call_lhs (stmt);
1148   else
1149     lhs = gimple_assign_lhs (stmt);
1150
1151   gcc_assert (has_single_use (lhs));
1152   single_imm_use (lhs, &use, &use_stmt);
1153   if (lhs == *def)
1154     *def = op;
1155   SET_USE (use, op);
1156   if (TREE_CODE (op) != SSA_NAME)
1157     update_stmt (use_stmt);
1158   gsi = gsi_for_stmt (stmt);
1159   unlink_stmt_vdef (stmt);
1160   reassoc_remove_stmt (&gsi);
1161   release_defs (stmt);
1162 }
1163
1164 /* Walks the linear chain with result *DEF searching for an operation
1165    with operand OP and code OPCODE removing that from the chain.  *DEF
1166    is updated if there is only one operand but no operation left.  */
1167
1168 static void
1169 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1170 {
1171   gimple stmt = SSA_NAME_DEF_STMT (*def);
1172
1173   do
1174     {
1175       tree name;
1176
1177       if (opcode == MULT_EXPR
1178           && stmt_is_power_of_op (stmt, op))
1179         {
1180           if (decrement_power (stmt) == 1)
1181             propagate_op_to_single_use (op, stmt, def);
1182           return;
1183         }
1184
1185       name = gimple_assign_rhs1 (stmt);
1186
1187       /* If this is the operation we look for and one of the operands
1188          is ours simply propagate the other operand into the stmts
1189          single use.  */
1190       if (gimple_assign_rhs_code (stmt) == opcode
1191           && (name == op
1192               || gimple_assign_rhs2 (stmt) == op))
1193         {
1194           if (name == op)
1195             name = gimple_assign_rhs2 (stmt);
1196           propagate_op_to_single_use (name, stmt, def);
1197           return;
1198         }
1199
1200       /* We might have a multiply of two __builtin_pow* calls, and
1201          the operand might be hiding in the rightmost one.  */
1202       if (opcode == MULT_EXPR
1203           && gimple_assign_rhs_code (stmt) == opcode
1204           && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1205           && has_single_use (gimple_assign_rhs2 (stmt)))
1206         {
1207           gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1208           if (stmt_is_power_of_op (stmt2, op))
1209             {
1210               if (decrement_power (stmt2) == 1)
1211                 propagate_op_to_single_use (op, stmt2, def);
1212               return;
1213             }
1214         }
1215
1216       /* Continue walking the chain.  */
1217       gcc_assert (name != op
1218                   && TREE_CODE (name) == SSA_NAME);
1219       stmt = SSA_NAME_DEF_STMT (name);
1220     }
1221   while (1);
1222 }
1223
1224 /* Returns true if statement S1 dominates statement S2.  Like
1225    stmt_dominates_stmt_p, but uses stmt UIDs to optimize.  */
1226
1227 static bool
1228 reassoc_stmt_dominates_stmt_p (gimple s1, gimple s2)
1229 {
1230   basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1231
1232   /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1233      SSA_NAME.  Assume it lives at the beginning of function and
1234      thus dominates everything.  */
1235   if (!bb1 || s1 == s2)
1236     return true;
1237
1238   /* If bb2 is NULL, it doesn't dominate any stmt with a bb.  */
1239   if (!bb2)
1240     return false;
1241
1242   if (bb1 == bb2)
1243     {
1244       /* PHIs in the same basic block are assumed to be
1245          executed all in parallel, if only one stmt is a PHI,
1246          it dominates the other stmt in the same basic block.  */
1247       if (gimple_code (s1) == GIMPLE_PHI)
1248         return true;
1249
1250       if (gimple_code (s2) == GIMPLE_PHI)
1251         return false;
1252
1253       gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1254
1255       if (gimple_uid (s1) < gimple_uid (s2))
1256         return true;
1257
1258       if (gimple_uid (s1) > gimple_uid (s2))
1259         return false;
1260
1261       gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1262       unsigned int uid = gimple_uid (s1);
1263       for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1264         {
1265           gimple s = gsi_stmt (gsi);
1266           if (gimple_uid (s) != uid)
1267             break;
1268           if (s == s2)
1269             return true;
1270         }
1271
1272       return false;
1273     }
1274
1275   return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1276 }
1277
1278 /* Insert STMT after INSERT_POINT.  */
1279
1280 static void
1281 insert_stmt_after (gimple stmt, gimple insert_point)
1282 {
1283   gimple_stmt_iterator gsi;
1284   basic_block bb;
1285
1286   if (gimple_code (insert_point) == GIMPLE_PHI)
1287     bb = gimple_bb (insert_point);
1288   else if (!stmt_ends_bb_p (insert_point))
1289     {
1290       gsi = gsi_for_stmt (insert_point);
1291       gimple_set_uid (stmt, gimple_uid (insert_point));
1292       gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1293       return;
1294     }
1295   else
1296     /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1297        thus if it must end a basic block, it should be a call that can
1298        throw, or some assignment that can throw.  If it throws, the LHS
1299        of it will not be initialized though, so only valid places using
1300        the SSA_NAME should be dominated by the fallthru edge.  */
1301     bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1302   gsi = gsi_after_labels (bb);
1303   if (gsi_end_p (gsi))
1304     {
1305       gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1306       gimple_set_uid (stmt,
1307                       gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1308     }
1309   else
1310     gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1311   gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1312 }
1313
1314 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1315    the result.  Places the statement after the definition of either
1316    OP1 or OP2.  Returns the new statement.  */
1317
1318 static gimple
1319 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1320 {
1321   gimple op1def = NULL, op2def = NULL;
1322   gimple_stmt_iterator gsi;
1323   tree op;
1324   gimple sum;
1325
1326   /* Create the addition statement.  */
1327   op = make_ssa_name (type, NULL);
1328   sum = gimple_build_assign_with_ops (opcode, op, op1, op2);
1329
1330   /* Find an insertion place and insert.  */
1331   if (TREE_CODE (op1) == SSA_NAME)
1332     op1def = SSA_NAME_DEF_STMT (op1);
1333   if (TREE_CODE (op2) == SSA_NAME)
1334     op2def = SSA_NAME_DEF_STMT (op2);
1335   if ((!op1def || gimple_nop_p (op1def))
1336       && (!op2def || gimple_nop_p (op2def)))
1337     {
1338       gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1339       if (gsi_end_p (gsi))
1340         {
1341           gimple_stmt_iterator gsi2
1342             = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1343           gimple_set_uid (sum,
1344                           gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1345         }
1346       else
1347         gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1348       gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1349     }
1350   else
1351     {
1352       gimple insert_point;
1353       if ((!op1def || gimple_nop_p (op1def))
1354            || (op2def && !gimple_nop_p (op2def)
1355                && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1356         insert_point = op2def;
1357       else
1358         insert_point = op1def;
1359       insert_stmt_after (sum, insert_point);
1360     }
1361   update_stmt (sum);
1362
1363   return sum;
1364 }
1365
1366 /* Perform un-distribution of divisions and multiplications.
1367    A * X + B * X is transformed into (A + B) * X and A / X + B / X
1368    to (A + B) / X for real X.
1369
1370    The algorithm is organized as follows.
1371
1372     - First we walk the addition chain *OPS looking for summands that
1373       are defined by a multiplication or a real division.  This results
1374       in the candidates bitmap with relevant indices into *OPS.
1375
1376     - Second we build the chains of multiplications or divisions for
1377       these candidates, counting the number of occurrences of (operand, code)
1378       pairs in all of the candidates chains.
1379
1380     - Third we sort the (operand, code) pairs by number of occurrence and
1381       process them starting with the pair with the most uses.
1382
1383       * For each such pair we walk the candidates again to build a
1384         second candidate bitmap noting all multiplication/division chains
1385         that have at least one occurrence of (operand, code).
1386
1387       * We build an alternate addition chain only covering these
1388         candidates with one (operand, code) operation removed from their
1389         multiplication/division chain.
1390
1391       * The first candidate gets replaced by the alternate addition chain
1392         multiplied/divided by the operand.
1393
1394       * All candidate chains get disabled for further processing and
1395         processing of (operand, code) pairs continues.
1396
1397   The alternate addition chains built are re-processed by the main
1398   reassociation algorithm which allows optimizing a * x * y + b * y * x
1399   to (a + b ) * x * y in one invocation of the reassociation pass.  */
1400
1401 static bool
1402 undistribute_ops_list (enum tree_code opcode,
1403                        vec<operand_entry_t> *ops, struct loop *loop)
1404 {
1405   unsigned int length = ops->length ();
1406   operand_entry_t oe1;
1407   unsigned i, j;
1408   sbitmap candidates, candidates2;
1409   unsigned nr_candidates, nr_candidates2;
1410   sbitmap_iterator sbi0;
1411   vec<operand_entry_t> *subops;
1412   bool changed = false;
1413   int next_oecount_id = 0;
1414
1415   if (length <= 1
1416       || opcode != PLUS_EXPR)
1417     return false;
1418
1419   /* Build a list of candidates to process.  */
1420   candidates = sbitmap_alloc (length);
1421   bitmap_clear (candidates);
1422   nr_candidates = 0;
1423   FOR_EACH_VEC_ELT (*ops, i, oe1)
1424     {
1425       enum tree_code dcode;
1426       gimple oe1def;
1427
1428       if (TREE_CODE (oe1->op) != SSA_NAME)
1429         continue;
1430       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1431       if (!is_gimple_assign (oe1def))
1432         continue;
1433       dcode = gimple_assign_rhs_code (oe1def);
1434       if ((dcode != MULT_EXPR
1435            && dcode != RDIV_EXPR)
1436           || !is_reassociable_op (oe1def, dcode, loop))
1437         continue;
1438
1439       bitmap_set_bit (candidates, i);
1440       nr_candidates++;
1441     }
1442
1443   if (nr_candidates < 2)
1444     {
1445       sbitmap_free (candidates);
1446       return false;
1447     }
1448
1449   if (dump_file && (dump_flags & TDF_DETAILS))
1450     {
1451       fprintf (dump_file, "searching for un-distribute opportunities ");
1452       print_generic_expr (dump_file,
1453         (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1454       fprintf (dump_file, " %d\n", nr_candidates);
1455     }
1456
1457   /* Build linearized sub-operand lists and the counting table.  */
1458   cvec.create (0);
1459
1460   hash_table<oecount_hasher> ctable (15);
1461
1462   /* ??? Macro arguments cannot have multi-argument template types in
1463      them.  This typedef is needed to workaround that limitation.  */
1464   typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1465   subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1466   EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1467     {
1468       gimple oedef;
1469       enum tree_code oecode;
1470       unsigned j;
1471
1472       oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1473       oecode = gimple_assign_rhs_code (oedef);
1474       linearize_expr_tree (&subops[i], oedef,
1475                            associative_tree_code (oecode), false);
1476
1477       FOR_EACH_VEC_ELT (subops[i], j, oe1)
1478         {
1479           oecount c;
1480           int *slot;
1481           int idx;
1482           c.oecode = oecode;
1483           c.cnt = 1;
1484           c.id = next_oecount_id++;
1485           c.op = oe1->op;
1486           cvec.safe_push (c);
1487           idx = cvec.length () + 41;
1488           slot = ctable.find_slot (idx, INSERT);
1489           if (!*slot)
1490             {
1491               *slot = idx;
1492             }
1493           else
1494             {
1495               cvec.pop ();
1496               cvec[*slot - 42].cnt++;
1497             }
1498         }
1499     }
1500
1501   /* Sort the counting table.  */
1502   cvec.qsort (oecount_cmp);
1503
1504   if (dump_file && (dump_flags & TDF_DETAILS))
1505     {
1506       oecount *c;
1507       fprintf (dump_file, "Candidates:\n");
1508       FOR_EACH_VEC_ELT (cvec, j, c)
1509         {
1510           fprintf (dump_file, "  %u %s: ", c->cnt,
1511                    c->oecode == MULT_EXPR
1512                    ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1513           print_generic_expr (dump_file, c->op, 0);
1514           fprintf (dump_file, "\n");
1515         }
1516     }
1517
1518   /* Process the (operand, code) pairs in order of most occurrence.  */
1519   candidates2 = sbitmap_alloc (length);
1520   while (!cvec.is_empty ())
1521     {
1522       oecount *c = &cvec.last ();
1523       if (c->cnt < 2)
1524         break;
1525
1526       /* Now collect the operands in the outer chain that contain
1527          the common operand in their inner chain.  */
1528       bitmap_clear (candidates2);
1529       nr_candidates2 = 0;
1530       EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1531         {
1532           gimple oedef;
1533           enum tree_code oecode;
1534           unsigned j;
1535           tree op = (*ops)[i]->op;
1536
1537           /* If we undistributed in this chain already this may be
1538              a constant.  */
1539           if (TREE_CODE (op) != SSA_NAME)
1540             continue;
1541
1542           oedef = SSA_NAME_DEF_STMT (op);
1543           oecode = gimple_assign_rhs_code (oedef);
1544           if (oecode != c->oecode)
1545             continue;
1546
1547           FOR_EACH_VEC_ELT (subops[i], j, oe1)
1548             {
1549               if (oe1->op == c->op)
1550                 {
1551                   bitmap_set_bit (candidates2, i);
1552                   ++nr_candidates2;
1553                   break;
1554                 }
1555             }
1556         }
1557
1558       if (nr_candidates2 >= 2)
1559         {
1560           operand_entry_t oe1, oe2;
1561           gimple prod;
1562           int first = bitmap_first_set_bit (candidates2);
1563
1564           /* Build the new addition chain.  */
1565           oe1 = (*ops)[first];
1566           if (dump_file && (dump_flags & TDF_DETAILS))
1567             {
1568               fprintf (dump_file, "Building (");
1569               print_generic_expr (dump_file, oe1->op, 0);
1570             }
1571           zero_one_operation (&oe1->op, c->oecode, c->op);
1572           EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1573             {
1574               gimple sum;
1575               oe2 = (*ops)[i];
1576               if (dump_file && (dump_flags & TDF_DETAILS))
1577                 {
1578                   fprintf (dump_file, " + ");
1579                   print_generic_expr (dump_file, oe2->op, 0);
1580                 }
1581               zero_one_operation (&oe2->op, c->oecode, c->op);
1582               sum = build_and_add_sum (TREE_TYPE (oe1->op),
1583                                        oe1->op, oe2->op, opcode);
1584               oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1585               oe2->rank = 0;
1586               oe1->op = gimple_get_lhs (sum);
1587             }
1588
1589           /* Apply the multiplication/division.  */
1590           prod = build_and_add_sum (TREE_TYPE (oe1->op),
1591                                     oe1->op, c->op, c->oecode);
1592           if (dump_file && (dump_flags & TDF_DETAILS))
1593             {
1594               fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1595               print_generic_expr (dump_file, c->op, 0);
1596               fprintf (dump_file, "\n");
1597             }
1598
1599           /* Record it in the addition chain and disable further
1600              undistribution with this op.  */
1601           oe1->op = gimple_assign_lhs (prod);
1602           oe1->rank = get_rank (oe1->op);
1603           subops[first].release ();
1604
1605           changed = true;
1606         }
1607
1608       cvec.pop ();
1609     }
1610
1611   for (i = 0; i < ops->length (); ++i)
1612     subops[i].release ();
1613   free (subops);
1614   cvec.release ();
1615   sbitmap_free (candidates);
1616   sbitmap_free (candidates2);
1617
1618   return changed;
1619 }
1620
1621 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1622    expression, examine the other OPS to see if any of them are comparisons
1623    of the same values, which we may be able to combine or eliminate.
1624    For example, we can rewrite (a < b) | (a == b) as (a <= b).  */
1625
1626 static bool
1627 eliminate_redundant_comparison (enum tree_code opcode,
1628                                 vec<operand_entry_t> *ops,
1629                                 unsigned int currindex,
1630                                 operand_entry_t curr)
1631 {
1632   tree op1, op2;
1633   enum tree_code lcode, rcode;
1634   gimple def1, def2;
1635   int i;
1636   operand_entry_t oe;
1637
1638   if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1639     return false;
1640
1641   /* Check that CURR is a comparison.  */
1642   if (TREE_CODE (curr->op) != SSA_NAME)
1643     return false;
1644   def1 = SSA_NAME_DEF_STMT (curr->op);
1645   if (!is_gimple_assign (def1))
1646     return false;
1647   lcode = gimple_assign_rhs_code (def1);
1648   if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1649     return false;
1650   op1 = gimple_assign_rhs1 (def1);
1651   op2 = gimple_assign_rhs2 (def1);
1652
1653   /* Now look for a similar comparison in the remaining OPS.  */
1654   for (i = currindex + 1; ops->iterate (i, &oe); i++)
1655     {
1656       tree t;
1657
1658       if (TREE_CODE (oe->op) != SSA_NAME)
1659         continue;
1660       def2 = SSA_NAME_DEF_STMT (oe->op);
1661       if (!is_gimple_assign (def2))
1662         continue;
1663       rcode = gimple_assign_rhs_code (def2);
1664       if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1665         continue;
1666
1667       /* If we got here, we have a match.  See if we can combine the
1668          two comparisons.  */
1669       if (opcode == BIT_IOR_EXPR)
1670         t = maybe_fold_or_comparisons (lcode, op1, op2,
1671                                        rcode, gimple_assign_rhs1 (def2),
1672                                        gimple_assign_rhs2 (def2));
1673       else
1674         t = maybe_fold_and_comparisons (lcode, op1, op2,
1675                                         rcode, gimple_assign_rhs1 (def2),
1676                                         gimple_assign_rhs2 (def2));
1677       if (!t)
1678         continue;
1679
1680       /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1681          always give us a boolean_type_node value back.  If the original
1682          BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1683          we need to convert.  */
1684       if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1685         t = fold_convert (TREE_TYPE (curr->op), t);
1686
1687       if (TREE_CODE (t) != INTEGER_CST
1688           && !operand_equal_p (t, curr->op, 0))
1689         {
1690           enum tree_code subcode;
1691           tree newop1, newop2;
1692           if (!COMPARISON_CLASS_P (t))
1693             continue;
1694           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1695           STRIP_USELESS_TYPE_CONVERSION (newop1);
1696           STRIP_USELESS_TYPE_CONVERSION (newop2);
1697           if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1698             continue;
1699         }
1700
1701       if (dump_file && (dump_flags & TDF_DETAILS))
1702         {
1703           fprintf (dump_file, "Equivalence: ");
1704           print_generic_expr (dump_file, curr->op, 0);
1705           fprintf (dump_file, " %s ", op_symbol_code (opcode));
1706           print_generic_expr (dump_file, oe->op, 0);
1707           fprintf (dump_file, " -> ");
1708           print_generic_expr (dump_file, t, 0);
1709           fprintf (dump_file, "\n");
1710         }
1711
1712       /* Now we can delete oe, as it has been subsumed by the new combined
1713          expression t.  */
1714       ops->ordered_remove (i);
1715       reassociate_stats.ops_eliminated ++;
1716
1717       /* If t is the same as curr->op, we're done.  Otherwise we must
1718          replace curr->op with t.  Special case is if we got a constant
1719          back, in which case we add it to the end instead of in place of
1720          the current entry.  */
1721       if (TREE_CODE (t) == INTEGER_CST)
1722         {
1723           ops->ordered_remove (currindex);
1724           add_to_ops_vec (ops, t);
1725         }
1726       else if (!operand_equal_p (t, curr->op, 0))
1727         {
1728           gimple sum;
1729           enum tree_code subcode;
1730           tree newop1;
1731           tree newop2;
1732           gcc_assert (COMPARISON_CLASS_P (t));
1733           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1734           STRIP_USELESS_TYPE_CONVERSION (newop1);
1735           STRIP_USELESS_TYPE_CONVERSION (newop2);
1736           gcc_checking_assert (is_gimple_val (newop1)
1737                                && is_gimple_val (newop2));
1738           sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1739           curr->op = gimple_get_lhs (sum);
1740         }
1741       return true;
1742     }
1743
1744   return false;
1745 }
1746
1747 /* Perform various identities and other optimizations on the list of
1748    operand entries, stored in OPS.  The tree code for the binary
1749    operation between all the operands is OPCODE.  */
1750
1751 static void
1752 optimize_ops_list (enum tree_code opcode,
1753                    vec<operand_entry_t> *ops)
1754 {
1755   unsigned int length = ops->length ();
1756   unsigned int i;
1757   operand_entry_t oe;
1758   operand_entry_t oelast = NULL;
1759   bool iterate = false;
1760
1761   if (length == 1)
1762     return;
1763
1764   oelast = ops->last ();
1765
1766   /* If the last two are constants, pop the constants off, merge them
1767      and try the next two.  */
1768   if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1769     {
1770       operand_entry_t oelm1 = (*ops)[length - 2];
1771
1772       if (oelm1->rank == 0
1773           && is_gimple_min_invariant (oelm1->op)
1774           && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1775                                        TREE_TYPE (oelast->op)))
1776         {
1777           tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1778                                      oelm1->op, oelast->op);
1779
1780           if (folded && is_gimple_min_invariant (folded))
1781             {
1782               if (dump_file && (dump_flags & TDF_DETAILS))
1783                 fprintf (dump_file, "Merging constants\n");
1784
1785               ops->pop ();
1786               ops->pop ();
1787
1788               add_to_ops_vec (ops, folded);
1789               reassociate_stats.constants_eliminated++;
1790
1791               optimize_ops_list (opcode, ops);
1792               return;
1793             }
1794         }
1795     }
1796
1797   eliminate_using_constants (opcode, ops);
1798   oelast = NULL;
1799
1800   for (i = 0; ops->iterate (i, &oe);)
1801     {
1802       bool done = false;
1803
1804       if (eliminate_not_pairs (opcode, ops, i, oe))
1805         return;
1806       if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1807           || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1808           || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1809         {
1810           if (done)
1811             return;
1812           iterate = true;
1813           oelast = NULL;
1814           continue;
1815         }
1816       oelast = oe;
1817       i++;
1818     }
1819
1820   length = ops->length ();
1821   oelast = ops->last ();
1822
1823   if (iterate)
1824     optimize_ops_list (opcode, ops);
1825 }
1826
1827 /* The following functions are subroutines to optimize_range_tests and allow
1828    it to try to change a logical combination of comparisons into a range
1829    test.
1830
1831    For example, both
1832         X == 2 || X == 5 || X == 3 || X == 4
1833    and
1834         X >= 2 && X <= 5
1835    are converted to
1836         (unsigned) (X - 2) <= 3
1837
1838    For more information see comments above fold_test_range in fold-const.c,
1839    this implementation is for GIMPLE.  */
1840
1841 struct range_entry
1842 {
1843   tree exp;
1844   tree low;
1845   tree high;
1846   bool in_p;
1847   bool strict_overflow_p;
1848   unsigned int idx, next;
1849 };
1850
1851 /* This is similar to make_range in fold-const.c, but on top of
1852    GIMPLE instead of trees.  If EXP is non-NULL, it should be
1853    an SSA_NAME and STMT argument is ignored, otherwise STMT
1854    argument should be a GIMPLE_COND.  */
1855
1856 static void
1857 init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1858 {
1859   int in_p;
1860   tree low, high;
1861   bool is_bool, strict_overflow_p;
1862
1863   r->exp = NULL_TREE;
1864   r->in_p = false;
1865   r->strict_overflow_p = false;
1866   r->low = NULL_TREE;
1867   r->high = NULL_TREE;
1868   if (exp != NULL_TREE
1869       && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1870     return;
1871
1872   /* Start with simply saying "EXP != 0" and then look at the code of EXP
1873      and see if we can refine the range.  Some of the cases below may not
1874      happen, but it doesn't seem worth worrying about this.  We "continue"
1875      the outer loop when we've changed something; otherwise we "break"
1876      the switch, which will "break" the while.  */
1877   low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1878   high = low;
1879   in_p = 0;
1880   strict_overflow_p = false;
1881   is_bool = false;
1882   if (exp == NULL_TREE)
1883     is_bool = true;
1884   else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1885     {
1886       if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1887         is_bool = true;
1888       else
1889         return;
1890     }
1891   else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1892     is_bool = true;
1893
1894   while (1)
1895     {
1896       enum tree_code code;
1897       tree arg0, arg1, exp_type;
1898       tree nexp;
1899       location_t loc;
1900
1901       if (exp != NULL_TREE)
1902         {
1903           if (TREE_CODE (exp) != SSA_NAME
1904               || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
1905             break;
1906
1907           stmt = SSA_NAME_DEF_STMT (exp);
1908           if (!is_gimple_assign (stmt))
1909             break;
1910
1911           code = gimple_assign_rhs_code (stmt);
1912           arg0 = gimple_assign_rhs1 (stmt);
1913           arg1 = gimple_assign_rhs2 (stmt);
1914           exp_type = TREE_TYPE (exp);
1915         }
1916       else
1917         {
1918           code = gimple_cond_code (stmt);
1919           arg0 = gimple_cond_lhs (stmt);
1920           arg1 = gimple_cond_rhs (stmt);
1921           exp_type = boolean_type_node;
1922         }
1923
1924       if (TREE_CODE (arg0) != SSA_NAME)
1925         break;
1926       loc = gimple_location (stmt);
1927       switch (code)
1928         {
1929         case BIT_NOT_EXPR:
1930           if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
1931               /* Ensure the range is either +[-,0], +[0,0],
1932                  -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1933                  -[1,1].  If it is e.g. +[-,-] or -[-,-]
1934                  or similar expression of unconditional true or
1935                  false, it should not be negated.  */
1936               && ((high && integer_zerop (high))
1937                   || (low && integer_onep (low))))
1938             {
1939               in_p = !in_p;
1940               exp = arg0;
1941               continue;
1942             }
1943           break;
1944         case SSA_NAME:
1945           exp = arg0;
1946           continue;
1947         CASE_CONVERT:
1948           if (is_bool)
1949             goto do_default;
1950           if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1951             {
1952               if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1953                 is_bool = true;
1954               else
1955                 return;
1956             }
1957           else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1958             is_bool = true;
1959           goto do_default;
1960         case EQ_EXPR:
1961         case NE_EXPR:
1962         case LT_EXPR:
1963         case LE_EXPR:
1964         case GE_EXPR:
1965         case GT_EXPR:
1966           is_bool = true;
1967           /* FALLTHRU */
1968         default:
1969           if (!is_bool)
1970             return;
1971         do_default:
1972           nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1973                                   &low, &high, &in_p,
1974                                   &strict_overflow_p);
1975           if (nexp != NULL_TREE)
1976             {
1977               exp = nexp;
1978               gcc_assert (TREE_CODE (exp) == SSA_NAME);
1979               continue;
1980             }
1981           break;
1982         }
1983       break;
1984     }
1985   if (is_bool)
1986     {
1987       r->exp = exp;
1988       r->in_p = in_p;
1989       r->low = low;
1990       r->high = high;
1991       r->strict_overflow_p = strict_overflow_p;
1992     }
1993 }
1994
1995 /* Comparison function for qsort.  Sort entries
1996    without SSA_NAME exp first, then with SSA_NAMEs sorted
1997    by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1998    by increasing ->low and if ->low is the same, by increasing
1999    ->high.  ->low == NULL_TREE means minimum, ->high == NULL_TREE
2000    maximum.  */
2001
2002 static int
2003 range_entry_cmp (const void *a, const void *b)
2004 {
2005   const struct range_entry *p = (const struct range_entry *) a;
2006   const struct range_entry *q = (const struct range_entry *) b;
2007
2008   if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2009     {
2010       if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2011         {
2012           /* Group range_entries for the same SSA_NAME together.  */
2013           if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2014             return -1;
2015           else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2016             return 1;
2017           /* If ->low is different, NULL low goes first, then by
2018              ascending low.  */
2019           if (p->low != NULL_TREE)
2020             {
2021               if (q->low != NULL_TREE)
2022                 {
2023                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
2024                                           p->low, q->low);
2025                   if (tem && integer_onep (tem))
2026                     return -1;
2027                   tem = fold_binary (GT_EXPR, boolean_type_node,
2028                                      p->low, q->low);
2029                   if (tem && integer_onep (tem))
2030                     return 1;
2031                 }
2032               else
2033                 return 1;
2034             }
2035           else if (q->low != NULL_TREE)
2036             return -1;
2037           /* If ->high is different, NULL high goes last, before that by
2038              ascending high.  */
2039           if (p->high != NULL_TREE)
2040             {
2041               if (q->high != NULL_TREE)
2042                 {
2043                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
2044                                           p->high, q->high);
2045                   if (tem && integer_onep (tem))
2046                     return -1;
2047                   tem = fold_binary (GT_EXPR, boolean_type_node,
2048                                      p->high, q->high);
2049                   if (tem && integer_onep (tem))
2050                     return 1;
2051                 }
2052               else
2053                 return -1;
2054             }
2055           else if (p->high != NULL_TREE)
2056             return 1;
2057           /* If both ranges are the same, sort below by ascending idx.  */
2058         }
2059       else
2060         return 1;
2061     }
2062   else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2063     return -1;
2064
2065   if (p->idx < q->idx)
2066     return -1;
2067   else
2068     {
2069       gcc_checking_assert (p->idx > q->idx);
2070       return 1;
2071     }
2072 }
2073
2074 /* Helper routine of optimize_range_test.
2075    [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2076    RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2077    OPCODE and OPS are arguments of optimize_range_tests.  Return
2078    true if the range merge has been successful.
2079    If OPCODE is ERROR_MARK, this is called from within
2080    maybe_optimize_range_tests and is performing inter-bb range optimization.
2081    In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2082    oe->rank.  */
2083
2084 static bool
2085 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2086                    unsigned int count, enum tree_code opcode,
2087                    vec<operand_entry_t> *ops, tree exp, bool in_p,
2088                    tree low, tree high, bool strict_overflow_p)
2089 {
2090   operand_entry_t oe = (*ops)[range->idx];
2091   tree op = oe->op;
2092   gimple stmt = op ? SSA_NAME_DEF_STMT (op) :
2093     last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2094   location_t loc = gimple_location (stmt);
2095   tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2096   tree tem = build_range_check (loc, optype, exp, in_p, low, high);
2097   enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2098   gimple_stmt_iterator gsi;
2099
2100   if (tem == NULL_TREE)
2101     return false;
2102
2103   if (strict_overflow_p && issue_strict_overflow_warning (wc))
2104     warning_at (loc, OPT_Wstrict_overflow,
2105                 "assuming signed overflow does not occur "
2106                 "when simplifying range test");
2107
2108   if (dump_file && (dump_flags & TDF_DETAILS))
2109     {
2110       struct range_entry *r;
2111       fprintf (dump_file, "Optimizing range tests ");
2112       print_generic_expr (dump_file, range->exp, 0);
2113       fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2114       print_generic_expr (dump_file, range->low, 0);
2115       fprintf (dump_file, ", ");
2116       print_generic_expr (dump_file, range->high, 0);
2117       fprintf (dump_file, "]");
2118       for (r = otherrange; r < otherrange + count; r++)
2119         {
2120           fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2121           print_generic_expr (dump_file, r->low, 0);
2122           fprintf (dump_file, ", ");
2123           print_generic_expr (dump_file, r->high, 0);
2124           fprintf (dump_file, "]");
2125         }
2126       fprintf (dump_file, "\n into ");
2127       print_generic_expr (dump_file, tem, 0);
2128       fprintf (dump_file, "\n");
2129     }
2130
2131   if (opcode == BIT_IOR_EXPR
2132       || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2133     tem = invert_truthvalue_loc (loc, tem);
2134
2135   tem = fold_convert_loc (loc, optype, tem);
2136   gsi = gsi_for_stmt (stmt);
2137   /* In rare cases range->exp can be equal to lhs of stmt.
2138      In that case we have to insert after the stmt rather then before
2139      it.  */
2140   if (op == range->exp)
2141     tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false,
2142                                     GSI_CONTINUE_LINKING);
2143   else
2144     {
2145       tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2146                                       GSI_SAME_STMT);
2147       gsi_prev (&gsi);
2148     }
2149   for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2150     if (gimple_uid (gsi_stmt (gsi)))
2151       break;
2152     else
2153       gimple_set_uid (gsi_stmt (gsi), gimple_uid (stmt));
2154
2155   oe->op = tem;
2156   range->exp = exp;
2157   range->low = low;
2158   range->high = high;
2159   range->in_p = in_p;
2160   range->strict_overflow_p = false;
2161
2162   for (range = otherrange; range < otherrange + count; range++)
2163     {
2164       oe = (*ops)[range->idx];
2165       /* Now change all the other range test immediate uses, so that
2166          those tests will be optimized away.  */
2167       if (opcode == ERROR_MARK)
2168         {
2169           if (oe->op)
2170             oe->op = build_int_cst (TREE_TYPE (oe->op),
2171                                     oe->rank == BIT_IOR_EXPR ? 0 : 1);
2172           else
2173             oe->op = (oe->rank == BIT_IOR_EXPR
2174                       ? boolean_false_node : boolean_true_node);
2175         }
2176       else
2177         oe->op = error_mark_node;
2178       range->exp = NULL_TREE;
2179     }
2180   return true;
2181 }
2182
2183 /* Optimize X == CST1 || X == CST2
2184    if popcount (CST1 ^ CST2) == 1 into
2185    (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2186    Similarly for ranges.  E.g.
2187    X != 2 && X != 3 && X != 10 && X != 11
2188    will be transformed by the previous optimization into
2189    !((X - 2U) <= 1U || (X - 10U) <= 1U)
2190    and this loop can transform that into
2191    !(((X & ~8) - 2U) <= 1U).  */
2192
2193 static bool
2194 optimize_range_tests_xor (enum tree_code opcode, tree type,
2195                           tree lowi, tree lowj, tree highi, tree highj,
2196                           vec<operand_entry_t> *ops,
2197                           struct range_entry *rangei,
2198                           struct range_entry *rangej)
2199 {
2200   tree lowxor, highxor, tem, exp;
2201   /* Check highi ^ lowi == highj ^ lowj and
2202      popcount (highi ^ lowi) == 1.  */
2203   lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2204   if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2205     return false;
2206   if (tree_log2 (lowxor) < 0)
2207     return false;
2208   highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2209   if (!tree_int_cst_equal (lowxor, highxor))
2210     return false;
2211
2212   tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2213   exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2214   lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2215   highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2216   if (update_range_test (rangei, rangej, 1, opcode, ops, exp,
2217                          rangei->in_p, lowj, highj,
2218                          rangei->strict_overflow_p
2219                          || rangej->strict_overflow_p))
2220     return true;
2221   return false;
2222 }
2223
2224 /* Optimize X == CST1 || X == CST2
2225    if popcount (CST2 - CST1) == 1 into
2226    ((X - CST1) & ~(CST2 - CST1)) == 0.
2227    Similarly for ranges.  E.g.
2228    X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2229    || X == 75 || X == 45
2230    will be transformed by the previous optimization into
2231    (X - 43U) <= 3U || (X - 75U) <= 3U
2232    and this loop can transform that into
2233    ((X - 43U) & ~(75U - 43U)) <= 3U.  */
2234 static bool
2235 optimize_range_tests_diff (enum tree_code opcode, tree type,
2236                             tree lowi, tree lowj, tree highi, tree highj,
2237                             vec<operand_entry_t> *ops,
2238                             struct range_entry *rangei,
2239                             struct range_entry *rangej)
2240 {
2241   tree tem1, tem2, mask;
2242   /* Check highi - lowi == highj - lowj.  */
2243   tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2244   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2245     return false;
2246   tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2247   if (!tree_int_cst_equal (tem1, tem2))
2248     return false;
2249   /* Check popcount (lowj - lowi) == 1.  */
2250   tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2251   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2252     return false;
2253   if (tree_log2 (tem1) < 0)
2254     return false;
2255
2256   mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2257   tem1 = fold_binary (MINUS_EXPR, type, rangei->exp, lowi);
2258   tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2259   lowj = build_int_cst (type, 0);
2260   if (update_range_test (rangei, rangej, 1, opcode, ops, tem1,
2261                          rangei->in_p, lowj, tem2,
2262                          rangei->strict_overflow_p
2263                          || rangej->strict_overflow_p))
2264     return true;
2265   return false;
2266 }
2267
2268 /* It does some common checks for function optimize_range_tests_xor and
2269    optimize_range_tests_diff.
2270    If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2271    Else it calls optimize_range_tests_diff.  */
2272
2273 static bool
2274 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2275                         bool optimize_xor, vec<operand_entry_t> *ops,
2276                         struct range_entry *ranges)
2277 {
2278   int i, j;
2279   bool any_changes = false;
2280   for (i = first; i < length; i++)
2281     {
2282       tree lowi, highi, lowj, highj, type, tem;
2283
2284       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2285         continue;
2286       type = TREE_TYPE (ranges[i].exp);
2287       if (!INTEGRAL_TYPE_P (type))
2288         continue;
2289       lowi = ranges[i].low;
2290       if (lowi == NULL_TREE)
2291         lowi = TYPE_MIN_VALUE (type);
2292       highi = ranges[i].high;
2293       if (highi == NULL_TREE)
2294         continue;
2295       for (j = i + 1; j < length && j < i + 64; j++)
2296         {
2297           bool changes;
2298           if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2299             continue;
2300           lowj = ranges[j].low;
2301           if (lowj == NULL_TREE)
2302             continue;
2303           highj = ranges[j].high;
2304           if (highj == NULL_TREE)
2305             highj = TYPE_MAX_VALUE (type);
2306           /* Check lowj > highi.  */
2307           tem = fold_binary (GT_EXPR, boolean_type_node,
2308                              lowj, highi);
2309           if (tem == NULL_TREE || !integer_onep (tem))
2310             continue;
2311           if (optimize_xor)
2312             changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2313                                                 highi, highj, ops,
2314                                                 ranges + i, ranges + j);
2315           else
2316             changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2317                                                  highi, highj, ops,
2318                                                  ranges + i, ranges + j);
2319           if (changes)
2320             {
2321               any_changes = true;
2322               break;
2323             }
2324         }
2325     }
2326   return any_changes;
2327 }
2328
2329 /* Optimize range tests, similarly how fold_range_test optimizes
2330    it on trees.  The tree code for the binary
2331    operation between all the operands is OPCODE.
2332    If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2333    maybe_optimize_range_tests for inter-bb range optimization.
2334    In that case if oe->op is NULL, oe->id is bb->index whose
2335    GIMPLE_COND is && or ||ed into the test, and oe->rank says
2336    the actual opcode.  */
2337
2338 static bool
2339 optimize_range_tests (enum tree_code opcode,
2340                       vec<operand_entry_t> *ops)
2341 {
2342   unsigned int length = ops->length (), i, j, first;
2343   operand_entry_t oe;
2344   struct range_entry *ranges;
2345   bool any_changes = false;
2346
2347   if (length == 1)
2348     return false;
2349
2350   ranges = XNEWVEC (struct range_entry, length);
2351   for (i = 0; i < length; i++)
2352     {
2353       oe = (*ops)[i];
2354       ranges[i].idx = i;
2355       init_range_entry (ranges + i, oe->op,
2356                         oe->op ? NULL :
2357                           last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
2358       /* For | invert it now, we will invert it again before emitting
2359          the optimized expression.  */
2360       if (opcode == BIT_IOR_EXPR
2361           || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2362         ranges[i].in_p = !ranges[i].in_p;
2363     }
2364
2365   qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2366   for (i = 0; i < length; i++)
2367     if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2368       break;
2369
2370   /* Try to merge ranges.  */
2371   for (first = i; i < length; i++)
2372     {
2373       tree low = ranges[i].low;
2374       tree high = ranges[i].high;
2375       int in_p = ranges[i].in_p;
2376       bool strict_overflow_p = ranges[i].strict_overflow_p;
2377       int update_fail_count = 0;
2378
2379       for (j = i + 1; j < length; j++)
2380         {
2381           if (ranges[i].exp != ranges[j].exp)
2382             break;
2383           if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2384                              ranges[j].in_p, ranges[j].low, ranges[j].high))
2385             break;
2386           strict_overflow_p |= ranges[j].strict_overflow_p;
2387         }
2388
2389       if (j == i + 1)
2390         continue;
2391
2392       if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
2393                              ops, ranges[i].exp, in_p, low, high,
2394                              strict_overflow_p))
2395         {
2396           i = j - 1;
2397           any_changes = true;
2398         }
2399       /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2400          while update_range_test would fail.  */
2401       else if (update_fail_count == 64)
2402         i = j - 1;
2403       else
2404         ++update_fail_count;
2405     }
2406
2407   any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2408                                          ops, ranges);
2409
2410   if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2411     any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2412                                            ops, ranges);
2413
2414   if (any_changes && opcode != ERROR_MARK)
2415     {
2416       j = 0;
2417       FOR_EACH_VEC_ELT (*ops, i, oe)
2418         {
2419           if (oe->op == error_mark_node)
2420             continue;
2421           else if (i != j)
2422             (*ops)[j] = oe;
2423           j++;
2424         }
2425       ops->truncate (j);
2426     }
2427
2428   XDELETEVEC (ranges);
2429   return any_changes;
2430 }
2431
2432 /* Return true if STMT is a cast like:
2433    <bb N>:
2434    ...
2435    _123 = (int) _234;
2436
2437    <bb M>:
2438    # _345 = PHI <_123(N), 1(...), 1(...)>
2439    where _234 has bool type, _123 has single use and
2440    bb N has a single successor M.  This is commonly used in
2441    the last block of a range test.  */
2442
2443 static bool
2444 final_range_test_p (gimple stmt)
2445 {
2446   basic_block bb, rhs_bb;
2447   edge e;
2448   tree lhs, rhs;
2449   use_operand_p use_p;
2450   gimple use_stmt;
2451
2452   if (!gimple_assign_cast_p (stmt))
2453     return false;
2454   bb = gimple_bb (stmt);
2455   if (!single_succ_p (bb))
2456     return false;
2457   e = single_succ_edge (bb);
2458   if (e->flags & EDGE_COMPLEX)
2459     return false;
2460
2461   lhs = gimple_assign_lhs (stmt);
2462   rhs = gimple_assign_rhs1 (stmt);
2463   if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2464       || TREE_CODE (rhs) != SSA_NAME
2465       || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2466     return false;
2467
2468   /* Test whether lhs is consumed only by a PHI in the only successor bb.  */
2469   if (!single_imm_use (lhs, &use_p, &use_stmt))
2470     return false;
2471
2472   if (gimple_code (use_stmt) != GIMPLE_PHI
2473       || gimple_bb (use_stmt) != e->dest)
2474     return false;
2475
2476   /* And that the rhs is defined in the same loop.  */
2477   rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2478   if (rhs_bb == NULL
2479       || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2480     return false;
2481
2482   return true;
2483 }
2484
2485 /* Return true if BB is suitable basic block for inter-bb range test
2486    optimization.  If BACKWARD is true, BB should be the only predecessor
2487    of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2488    or compared with to find a common basic block to which all conditions
2489    branch to if true resp. false.  If BACKWARD is false, TEST_BB should
2490    be the only predecessor of BB.  */
2491
2492 static bool
2493 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2494                   bool backward)
2495 {
2496   edge_iterator ei, ei2;
2497   edge e, e2;
2498   gimple stmt;
2499   gimple_stmt_iterator gsi;
2500   bool other_edge_seen = false;
2501   bool is_cond;
2502
2503   if (test_bb == bb)
2504     return false;
2505   /* Check last stmt first.  */
2506   stmt = last_stmt (bb);
2507   if (stmt == NULL
2508       || (gimple_code (stmt) != GIMPLE_COND
2509           && (backward || !final_range_test_p (stmt)))
2510       || gimple_visited_p (stmt)
2511       || stmt_could_throw_p (stmt)
2512       || *other_bb == bb)
2513     return false;
2514   is_cond = gimple_code (stmt) == GIMPLE_COND;
2515   if (is_cond)
2516     {
2517       /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2518          goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2519          to *OTHER_BB (if not set yet, try to find it out).  */
2520       if (EDGE_COUNT (bb->succs) != 2)
2521         return false;
2522       FOR_EACH_EDGE (e, ei, bb->succs)
2523         {
2524           if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2525             return false;
2526           if (e->dest == test_bb)
2527             {
2528               if (backward)
2529                 continue;
2530               else
2531                 return false;
2532             }
2533           if (e->dest == bb)
2534             return false;
2535           if (*other_bb == NULL)
2536             {
2537               FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2538                 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2539                   return false;
2540                 else if (e->dest == e2->dest)
2541                   *other_bb = e->dest;
2542               if (*other_bb == NULL)
2543                 return false;
2544             }
2545           if (e->dest == *other_bb)
2546             other_edge_seen = true;
2547           else if (backward)
2548             return false;
2549         }
2550       if (*other_bb == NULL || !other_edge_seen)
2551         return false;
2552     }
2553   else if (single_succ (bb) != *other_bb)
2554     return false;
2555
2556   /* Now check all PHIs of *OTHER_BB.  */
2557   e = find_edge (bb, *other_bb);
2558   e2 = find_edge (test_bb, *other_bb);
2559   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2560     {
2561       gimple phi = gsi_stmt (gsi);
2562       /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2563          corresponding to BB and TEST_BB predecessor must be the same.  */
2564       if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2565                             gimple_phi_arg_def (phi, e2->dest_idx), 0))
2566         {
2567           /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2568              one of the PHIs should have the lhs of the last stmt in
2569              that block as PHI arg and that PHI should have 0 or 1
2570              corresponding to it in all other range test basic blocks
2571              considered.  */
2572           if (!is_cond)
2573             {
2574               if (gimple_phi_arg_def (phi, e->dest_idx)
2575                   == gimple_assign_lhs (stmt)
2576                   && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2577                       || integer_onep (gimple_phi_arg_def (phi,
2578                                                            e2->dest_idx))))
2579                 continue;
2580             }
2581           else
2582             {
2583               gimple test_last = last_stmt (test_bb);
2584               if (gimple_code (test_last) != GIMPLE_COND
2585                   && gimple_phi_arg_def (phi, e2->dest_idx)
2586                      == gimple_assign_lhs (test_last)
2587                   && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2588                       || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2589                 continue;
2590             }
2591
2592           return false;
2593         }
2594     }
2595   return true;
2596 }
2597
2598 /* Return true if BB doesn't have side-effects that would disallow
2599    range test optimization, all SSA_NAMEs set in the bb are consumed
2600    in the bb and there are no PHIs.  */
2601
2602 static bool
2603 no_side_effect_bb (basic_block bb)
2604 {
2605   gimple_stmt_iterator gsi;
2606   gimple last;
2607
2608   if (!gimple_seq_empty_p (phi_nodes (bb)))
2609     return false;
2610   last = last_stmt (bb);
2611   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2612     {
2613       gimple stmt = gsi_stmt (gsi);
2614       tree lhs;
2615       imm_use_iterator imm_iter;
2616       use_operand_p use_p;
2617
2618       if (is_gimple_debug (stmt))
2619         continue;
2620       if (gimple_has_side_effects (stmt))
2621         return false;
2622       if (stmt == last)
2623         return true;
2624       if (!is_gimple_assign (stmt))
2625         return false;
2626       lhs = gimple_assign_lhs (stmt);
2627       if (TREE_CODE (lhs) != SSA_NAME)
2628         return false;
2629       if (gimple_assign_rhs_could_trap_p (stmt))
2630         return false;
2631       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2632         {
2633           gimple use_stmt = USE_STMT (use_p);
2634           if (is_gimple_debug (use_stmt))
2635             continue;
2636           if (gimple_bb (use_stmt) != bb)
2637             return false;
2638         }
2639     }
2640   return false;
2641 }
2642
2643 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2644    return true and fill in *OPS recursively.  */
2645
2646 static bool
2647 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2648          struct loop *loop)
2649 {
2650   gimple stmt = SSA_NAME_DEF_STMT (var);
2651   tree rhs[2];
2652   int i;
2653
2654   if (!is_reassociable_op (stmt, code, loop))
2655     return false;
2656
2657   rhs[0] = gimple_assign_rhs1 (stmt);
2658   rhs[1] = gimple_assign_rhs2 (stmt);
2659   gimple_set_visited (stmt, true);
2660   for (i = 0; i < 2; i++)
2661     if (TREE_CODE (rhs[i]) == SSA_NAME
2662         && !get_ops (rhs[i], code, ops, loop)
2663         && has_single_use (rhs[i]))
2664       {
2665         operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
2666
2667         oe->op = rhs[i];
2668         oe->rank = code;
2669         oe->id = 0;
2670         oe->count = 1;
2671         ops->safe_push (oe);
2672       }
2673   return true;
2674 }
2675
2676 /* Find the ops that were added by get_ops starting from VAR, see if
2677    they were changed during update_range_test and if yes, create new
2678    stmts.  */
2679
2680 static tree
2681 update_ops (tree var, enum tree_code code, vec<operand_entry_t> ops,
2682             unsigned int *pidx, struct loop *loop)
2683 {
2684   gimple stmt = SSA_NAME_DEF_STMT (var);
2685   tree rhs[4];
2686   int i;
2687
2688   if (!is_reassociable_op (stmt, code, loop))
2689     return NULL;
2690
2691   rhs[0] = gimple_assign_rhs1 (stmt);
2692   rhs[1] = gimple_assign_rhs2 (stmt);
2693   rhs[2] = rhs[0];
2694   rhs[3] = rhs[1];
2695   for (i = 0; i < 2; i++)
2696     if (TREE_CODE (rhs[i]) == SSA_NAME)
2697       {
2698         rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
2699         if (rhs[2 + i] == NULL_TREE)
2700           {
2701             if (has_single_use (rhs[i]))
2702               rhs[2 + i] = ops[(*pidx)++]->op;
2703             else
2704               rhs[2 + i] = rhs[i];
2705           }
2706       }
2707   if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
2708       && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
2709     {
2710       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2711       var = make_ssa_name (TREE_TYPE (var), NULL);
2712       gimple g = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
2713                                                var, rhs[2], rhs[3]);
2714       gimple_set_uid (g, gimple_uid (stmt));
2715       gimple_set_visited (g, true);
2716       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2717     }
2718   return var;
2719 }
2720
2721 /* Structure to track the initial value passed to get_ops and
2722    the range in the ops vector for each basic block.  */
2723
2724 struct inter_bb_range_test_entry
2725 {
2726   tree op;
2727   unsigned int first_idx, last_idx;
2728 };
2729
2730 /* Inter-bb range test optimization.  */
2731
2732 static void
2733 maybe_optimize_range_tests (gimple stmt)
2734 {
2735   basic_block first_bb = gimple_bb (stmt);
2736   basic_block last_bb = first_bb;
2737   basic_block other_bb = NULL;
2738   basic_block bb;
2739   edge_iterator ei;
2740   edge e;
2741   auto_vec<operand_entry_t> ops;
2742   auto_vec<inter_bb_range_test_entry> bbinfo;
2743   bool any_changes = false;
2744
2745   /* Consider only basic blocks that end with GIMPLE_COND or
2746      a cast statement satisfying final_range_test_p.  All
2747      but the last bb in the first_bb .. last_bb range
2748      should end with GIMPLE_COND.  */
2749   if (gimple_code (stmt) == GIMPLE_COND)
2750     {
2751       if (EDGE_COUNT (first_bb->succs) != 2)
2752         return;
2753     }
2754   else if (final_range_test_p (stmt))
2755     other_bb = single_succ (first_bb);
2756   else
2757     return;
2758
2759   if (stmt_could_throw_p (stmt))
2760     return;
2761
2762   /* As relative ordering of post-dominator sons isn't fixed,
2763      maybe_optimize_range_tests can be called first on any
2764      bb in the range we want to optimize.  So, start searching
2765      backwards, if first_bb can be set to a predecessor.  */
2766   while (single_pred_p (first_bb))
2767     {
2768       basic_block pred_bb = single_pred (first_bb);
2769       if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
2770         break;
2771       if (!no_side_effect_bb (first_bb))
2772         break;
2773       first_bb = pred_bb;
2774     }
2775   /* If first_bb is last_bb, other_bb hasn't been computed yet.
2776      Before starting forward search in last_bb successors, find
2777      out the other_bb.  */
2778   if (first_bb == last_bb)
2779     {
2780       other_bb = NULL;
2781       /* As non-GIMPLE_COND last stmt always terminates the range,
2782          if forward search didn't discover anything, just give up.  */
2783       if (gimple_code (stmt) != GIMPLE_COND)
2784         return;
2785       /* Look at both successors.  Either it ends with a GIMPLE_COND
2786          and satisfies suitable_cond_bb, or ends with a cast and
2787          other_bb is that cast's successor.  */
2788       FOR_EACH_EDGE (e, ei, first_bb->succs)
2789         if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
2790             || e->dest == first_bb)
2791           return;
2792         else if (single_pred_p (e->dest))
2793           {
2794             stmt = last_stmt (e->dest);
2795             if (stmt
2796                 && gimple_code (stmt) == GIMPLE_COND
2797                 && EDGE_COUNT (e->dest->succs) == 2)
2798               {
2799                 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
2800                   break;
2801                 else
2802                   other_bb = NULL;
2803               }
2804             else if (stmt
2805                      && final_range_test_p (stmt)
2806                      && find_edge (first_bb, single_succ (e->dest)))
2807               {
2808                 other_bb = single_succ (e->dest);
2809                 if (other_bb == first_bb)
2810                   other_bb = NULL;
2811               }
2812           }
2813       if (other_bb == NULL)
2814         return;
2815     }
2816   /* Now do the forward search, moving last_bb to successor bbs
2817      that aren't other_bb.  */
2818   while (EDGE_COUNT (last_bb->succs) == 2)
2819     {
2820       FOR_EACH_EDGE (e, ei, last_bb->succs)
2821         if (e->dest != other_bb)
2822           break;
2823       if (e == NULL)
2824         break;
2825       if (!single_pred_p (e->dest))
2826         break;
2827       if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
2828         break;
2829       if (!no_side_effect_bb (e->dest))
2830         break;
2831       last_bb = e->dest;
2832     }
2833   if (first_bb == last_bb)
2834     return;
2835   /* Here basic blocks first_bb through last_bb's predecessor
2836      end with GIMPLE_COND, all of them have one of the edges to
2837      other_bb and another to another block in the range,
2838      all blocks except first_bb don't have side-effects and
2839      last_bb ends with either GIMPLE_COND, or cast satisfying
2840      final_range_test_p.  */
2841   for (bb = last_bb; ; bb = single_pred (bb))
2842     {
2843       enum tree_code code;
2844       tree lhs, rhs;
2845       inter_bb_range_test_entry bb_ent;
2846
2847       bb_ent.op = NULL_TREE;
2848       bb_ent.first_idx = ops.length ();
2849       bb_ent.last_idx = bb_ent.first_idx;
2850       e = find_edge (bb, other_bb);
2851       stmt = last_stmt (bb);
2852       gimple_set_visited (stmt, true);
2853       if (gimple_code (stmt) != GIMPLE_COND)
2854         {
2855           use_operand_p use_p;
2856           gimple phi;
2857           edge e2;
2858           unsigned int d;
2859
2860           lhs = gimple_assign_lhs (stmt);
2861           rhs = gimple_assign_rhs1 (stmt);
2862           gcc_assert (bb == last_bb);
2863
2864           /* stmt is
2865              _123 = (int) _234;
2866
2867              followed by:
2868              <bb M>:
2869              # _345 = PHI <_123(N), 1(...), 1(...)>
2870
2871              or 0 instead of 1.  If it is 0, the _234
2872              range test is anded together with all the
2873              other range tests, if it is 1, it is ored with
2874              them.  */
2875           single_imm_use (lhs, &use_p, &phi);
2876           gcc_assert (gimple_code (phi) == GIMPLE_PHI);
2877           e2 = find_edge (first_bb, other_bb);
2878           d = e2->dest_idx;
2879           gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
2880           if (integer_zerop (gimple_phi_arg_def (phi, d)))
2881             code = BIT_AND_EXPR;
2882           else
2883             {
2884               gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
2885               code = BIT_IOR_EXPR;
2886             }
2887
2888           /* If _234 SSA_NAME_DEF_STMT is
2889              _234 = _567 | _789;
2890              (or &, corresponding to 1/0 in the phi arguments,
2891              push into ops the individual range test arguments
2892              of the bitwise or resp. and, recursively.  */
2893           if (!get_ops (rhs, code, &ops,
2894                         loop_containing_stmt (stmt))
2895               && has_single_use (rhs))
2896             {
2897               /* Otherwise, push the _234 range test itself.  */
2898               operand_entry_t oe
2899                 = (operand_entry_t) pool_alloc (operand_entry_pool);
2900
2901               oe->op = rhs;
2902               oe->rank = code;
2903               oe->id = 0;
2904               oe->count = 1;
2905               ops.safe_push (oe);
2906               bb_ent.last_idx++;
2907             }
2908           else
2909             bb_ent.last_idx = ops.length ();
2910           bb_ent.op = rhs;
2911           bbinfo.safe_push (bb_ent);
2912           continue;
2913         }
2914       /* Otherwise stmt is GIMPLE_COND.  */
2915       code = gimple_cond_code (stmt);
2916       lhs = gimple_cond_lhs (stmt);
2917       rhs = gimple_cond_rhs (stmt);
2918       if (TREE_CODE (lhs) == SSA_NAME
2919           && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2920           && ((code != EQ_EXPR && code != NE_EXPR)
2921               || rhs != boolean_false_node
2922                  /* Either push into ops the individual bitwise
2923                     or resp. and operands, depending on which
2924                     edge is other_bb.  */
2925               || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
2926                                  ^ (code == EQ_EXPR))
2927                                 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
2928                            loop_containing_stmt (stmt))))
2929         {
2930           /* Or push the GIMPLE_COND stmt itself.  */
2931           operand_entry_t oe
2932             = (operand_entry_t) pool_alloc (operand_entry_pool);
2933
2934           oe->op = NULL;
2935           oe->rank = (e->flags & EDGE_TRUE_VALUE)
2936                      ? BIT_IOR_EXPR : BIT_AND_EXPR;
2937           /* oe->op = NULL signs that there is no SSA_NAME
2938              for the range test, and oe->id instead is the
2939              basic block number, at which's end the GIMPLE_COND
2940              is.  */
2941           oe->id = bb->index;
2942           oe->count = 1;
2943           ops.safe_push (oe);
2944           bb_ent.op = NULL;
2945           bb_ent.last_idx++;
2946         }
2947       else if (ops.length () > bb_ent.first_idx)
2948         {
2949           bb_ent.op = lhs;
2950           bb_ent.last_idx = ops.length ();
2951         }
2952       bbinfo.safe_push (bb_ent);
2953       if (bb == first_bb)
2954         break;
2955     }
2956   if (ops.length () > 1)
2957     any_changes = optimize_range_tests (ERROR_MARK, &ops);
2958   if (any_changes)
2959     {
2960       unsigned int idx;
2961       /* update_ops relies on has_single_use predicates returning the
2962          same values as it did during get_ops earlier.  Additionally it
2963          never removes statements, only adds new ones and it should walk
2964          from the single imm use and check the predicate already before
2965          making those changes.
2966          On the other side, the handling of GIMPLE_COND directly can turn
2967          previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
2968          it needs to be done in a separate loop afterwards.  */
2969       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
2970         {
2971           if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
2972               && bbinfo[idx].op != NULL_TREE)
2973             {
2974               tree new_op;
2975
2976               stmt = last_stmt (bb);
2977               new_op = update_ops (bbinfo[idx].op,
2978                                    (enum tree_code)
2979                                    ops[bbinfo[idx].first_idx]->rank,
2980                                    ops, &bbinfo[idx].first_idx,
2981                                    loop_containing_stmt (stmt));
2982               if (new_op == NULL_TREE)
2983                 {
2984                   gcc_assert (bb == last_bb);
2985                   new_op = ops[bbinfo[idx].first_idx++]->op;
2986                 }
2987               if (bbinfo[idx].op != new_op)
2988                 {
2989                   imm_use_iterator iter;
2990                   use_operand_p use_p;
2991                   gimple use_stmt, cast_stmt = NULL;
2992
2993                   FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
2994                     if (is_gimple_debug (use_stmt))
2995                       continue;
2996                     else if (gimple_code (use_stmt) == GIMPLE_COND
2997                              || gimple_code (use_stmt) == GIMPLE_PHI)
2998                       FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2999                         SET_USE (use_p, new_op);
3000                     else if (gimple_assign_cast_p (use_stmt))
3001                       cast_stmt = use_stmt;
3002                     else
3003                       gcc_unreachable ();
3004                   if (cast_stmt)
3005                     {
3006                       gcc_assert (bb == last_bb);
3007                       tree lhs = gimple_assign_lhs (cast_stmt);
3008                       tree new_lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3009                       enum tree_code rhs_code
3010                         = gimple_assign_rhs_code (cast_stmt);
3011                       gimple g;
3012                       if (is_gimple_min_invariant (new_op))
3013                         {
3014                           new_op = fold_convert (TREE_TYPE (lhs), new_op);
3015                           g = gimple_build_assign (new_lhs, new_op);
3016                         }
3017                       else
3018                         g = gimple_build_assign_with_ops (rhs_code, new_lhs,
3019                                                           new_op, NULL_TREE);
3020                       gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt);
3021                       gimple_set_uid (g, gimple_uid (cast_stmt));
3022                       gimple_set_visited (g, true);
3023                       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3024                       FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
3025                         if (is_gimple_debug (use_stmt))
3026                           continue;
3027                         else if (gimple_code (use_stmt) == GIMPLE_COND
3028                                  || gimple_code (use_stmt) == GIMPLE_PHI)
3029                           FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3030                             SET_USE (use_p, new_lhs);
3031                         else
3032                           gcc_unreachable ();
3033                     }
3034                 }
3035             }
3036           if (bb == first_bb)
3037             break;
3038         }
3039       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3040         {
3041           if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3042               && bbinfo[idx].op == NULL_TREE
3043               && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
3044             {
3045               stmt = last_stmt (bb);
3046               if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
3047                 gimple_cond_make_false (stmt);
3048               else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
3049                 gimple_cond_make_true (stmt);
3050               else
3051                 {
3052                   gimple_cond_set_code (stmt, NE_EXPR);
3053                   gimple_cond_set_lhs (stmt, ops[bbinfo[idx].first_idx]->op);
3054                   gimple_cond_set_rhs (stmt, boolean_false_node);
3055                 }
3056               update_stmt (stmt);
3057             }
3058           if (bb == first_bb)
3059             break;
3060         }
3061     }
3062 }
3063
3064 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3065    of STMT in it's operands.  This is also known as a "destructive
3066    update" operation.  */
3067
3068 static bool
3069 is_phi_for_stmt (gimple stmt, tree operand)
3070 {
3071   gimple def_stmt;
3072   tree lhs;
3073   use_operand_p arg_p;
3074   ssa_op_iter i;
3075
3076   if (TREE_CODE (operand) != SSA_NAME)
3077     return false;
3078
3079   lhs = gimple_assign_lhs (stmt);
3080
3081   def_stmt = SSA_NAME_DEF_STMT (operand);
3082   if (gimple_code (def_stmt) != GIMPLE_PHI)
3083     return false;
3084
3085   FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
3086     if (lhs == USE_FROM_PTR (arg_p))
3087       return true;
3088   return false;
3089 }
3090
3091 /* Remove def stmt of VAR if VAR has zero uses and recurse
3092    on rhs1 operand if so.  */
3093
3094 static void
3095 remove_visited_stmt_chain (tree var)
3096 {
3097   gimple stmt;
3098   gimple_stmt_iterator gsi;
3099
3100   while (1)
3101     {
3102       if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
3103         return;
3104       stmt = SSA_NAME_DEF_STMT (var);
3105       if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
3106         {
3107           var = gimple_assign_rhs1 (stmt);
3108           gsi = gsi_for_stmt (stmt);
3109           reassoc_remove_stmt (&gsi);
3110           release_defs (stmt);
3111         }
3112       else
3113         return;
3114     }
3115 }
3116
3117 /* This function checks three consequtive operands in
3118    passed operands vector OPS starting from OPINDEX and
3119    swaps two operands if it is profitable for binary operation
3120    consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3121
3122    We pair ops with the same rank if possible.
3123
3124    The alternative we try is to see if STMT is a destructive
3125    update style statement, which is like:
3126    b = phi (a, ...)
3127    a = c + b;
3128    In that case, we want to use the destructive update form to
3129    expose the possible vectorizer sum reduction opportunity.
3130    In that case, the third operand will be the phi node. This
3131    check is not performed if STMT is null.
3132
3133    We could, of course, try to be better as noted above, and do a
3134    lot of work to try to find these opportunities in >3 operand
3135    cases, but it is unlikely to be worth it.  */
3136
3137 static void
3138 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
3139                           unsigned int opindex, gimple stmt)
3140 {
3141   operand_entry_t oe1, oe2, oe3;
3142
3143   oe1 = ops[opindex];
3144   oe2 = ops[opindex + 1];
3145   oe3 = ops[opindex + 2];
3146
3147   if ((oe1->rank == oe2->rank
3148        && oe2->rank != oe3->rank)
3149       || (stmt && is_phi_for_stmt (stmt, oe3->op)
3150           && !is_phi_for_stmt (stmt, oe1->op)
3151           && !is_phi_for_stmt (stmt, oe2->op)))
3152     {
3153       struct operand_entry temp = *oe3;
3154       oe3->op = oe1->op;
3155       oe3->rank = oe1->rank;
3156       oe1->op = temp.op;
3157       oe1->rank= temp.rank;
3158     }
3159   else if ((oe1->rank == oe3->rank
3160             && oe2->rank != oe3->rank)
3161            || (stmt && is_phi_for_stmt (stmt, oe2->op)
3162                && !is_phi_for_stmt (stmt, oe1->op)
3163                && !is_phi_for_stmt (stmt, oe3->op)))
3164     {
3165       struct operand_entry temp = *oe2;
3166       oe2->op = oe1->op;
3167       oe2->rank = oe1->rank;
3168       oe1->op = temp.op;
3169       oe1->rank = temp.rank;
3170     }
3171 }
3172
3173 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3174    two definitions, otherwise return STMT.  */
3175
3176 static inline gimple
3177 find_insert_point (gimple stmt, tree rhs1, tree rhs2)
3178 {
3179   if (TREE_CODE (rhs1) == SSA_NAME
3180       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
3181     stmt = SSA_NAME_DEF_STMT (rhs1);
3182   if (TREE_CODE (rhs2) == SSA_NAME
3183       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
3184     stmt = SSA_NAME_DEF_STMT (rhs2);
3185   return stmt;
3186 }
3187
3188 /* Recursively rewrite our linearized statements so that the operators
3189    match those in OPS[OPINDEX], putting the computation in rank
3190    order.  Return new lhs.  */
3191
3192 static tree
3193 rewrite_expr_tree (gimple stmt, unsigned int opindex,
3194                    vec<operand_entry_t> ops, bool changed)
3195 {
3196   tree rhs1 = gimple_assign_rhs1 (stmt);
3197   tree rhs2 = gimple_assign_rhs2 (stmt);
3198   tree lhs = gimple_assign_lhs (stmt);
3199   operand_entry_t oe;
3200
3201   /* The final recursion case for this function is that you have
3202      exactly two operations left.
3203      If we had one exactly one op in the entire list to start with, we
3204      would have never called this function, and the tail recursion
3205      rewrites them one at a time.  */
3206   if (opindex + 2 == ops.length ())
3207     {
3208       operand_entry_t oe1, oe2;
3209
3210       oe1 = ops[opindex];
3211       oe2 = ops[opindex + 1];
3212
3213       if (rhs1 != oe1->op || rhs2 != oe2->op)
3214         {
3215           gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3216           unsigned int uid = gimple_uid (stmt);
3217
3218           if (dump_file && (dump_flags & TDF_DETAILS))
3219             {
3220               fprintf (dump_file, "Transforming ");
3221               print_gimple_stmt (dump_file, stmt, 0, 0);
3222             }
3223
3224           if (changed)
3225             {
3226               gimple insert_point = find_insert_point (stmt, oe1->op, oe2->op);
3227               lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3228               stmt
3229                 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3230                                                 lhs, oe1->op, oe2->op);
3231               gimple_set_uid (stmt, uid);
3232               gimple_set_visited (stmt, true);
3233               if (insert_point == gsi_stmt (gsi))
3234                 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3235               else
3236                 insert_stmt_after (stmt, insert_point);
3237             }
3238           else
3239             {
3240               gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
3241                                    == stmt);
3242               gimple_assign_set_rhs1 (stmt, oe1->op);
3243               gimple_assign_set_rhs2 (stmt, oe2->op);
3244               update_stmt (stmt);
3245             }
3246
3247           if (rhs1 != oe1->op && rhs1 != oe2->op)
3248             remove_visited_stmt_chain (rhs1);
3249
3250           if (dump_file && (dump_flags & TDF_DETAILS))
3251             {
3252               fprintf (dump_file, " into ");
3253               print_gimple_stmt (dump_file, stmt, 0, 0);
3254             }
3255         }
3256       return lhs;
3257     }
3258
3259   /* If we hit here, we should have 3 or more ops left.  */
3260   gcc_assert (opindex + 2 < ops.length ());
3261
3262   /* Rewrite the next operator.  */
3263   oe = ops[opindex];
3264
3265   /* Recurse on the LHS of the binary operator, which is guaranteed to
3266      be the non-leaf side.  */
3267   tree new_rhs1
3268     = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
3269                          changed || oe->op != rhs2);
3270
3271   if (oe->op != rhs2 || new_rhs1 != rhs1)
3272     {
3273       if (dump_file && (dump_flags & TDF_DETAILS))
3274         {
3275           fprintf (dump_file, "Transforming ");
3276           print_gimple_stmt (dump_file, stmt, 0, 0);
3277         }
3278
3279       /* If changed is false, this is either opindex == 0
3280          or all outer rhs2's were equal to corresponding oe->op,
3281          and powi_result is NULL.
3282          That means lhs is equivalent before and after reassociation.
3283          Otherwise ensure the old lhs SSA_NAME is not reused and
3284          create a new stmt as well, so that any debug stmts will be
3285          properly adjusted.  */
3286       if (changed)
3287         {
3288           gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3289           unsigned int uid = gimple_uid (stmt);
3290           gimple insert_point = find_insert_point (stmt, new_rhs1, oe->op);
3291
3292           lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3293           stmt = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3294                                                lhs, new_rhs1, oe->op);
3295           gimple_set_uid (stmt, uid);
3296           gimple_set_visited (stmt, true);
3297           if (insert_point == gsi_stmt (gsi))
3298             gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3299           else
3300             insert_stmt_after (stmt, insert_point);
3301         }
3302       else
3303         {
3304           gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
3305                                == stmt);
3306           gimple_assign_set_rhs1 (stmt, new_rhs1);
3307           gimple_assign_set_rhs2 (stmt, oe->op);
3308           update_stmt (stmt);
3309         }
3310
3311       if (dump_file && (dump_flags & TDF_DETAILS))
3312         {
3313           fprintf (dump_file, " into ");
3314           print_gimple_stmt (dump_file, stmt, 0, 0);
3315         }
3316     }
3317   return lhs;
3318 }
3319
3320 /* Find out how many cycles we need to compute statements chain.
3321    OPS_NUM holds number os statements in a chain.  CPU_WIDTH is a
3322    maximum number of independent statements we may execute per cycle.  */
3323
3324 static int
3325 get_required_cycles (int ops_num, int cpu_width)
3326 {
3327   int res;
3328   int elog;
3329   unsigned int rest;
3330
3331   /* While we have more than 2 * cpu_width operands
3332      we may reduce number of operands by cpu_width
3333      per cycle.  */
3334   res = ops_num / (2 * cpu_width);
3335
3336   /* Remained operands count may be reduced twice per cycle
3337      until we have only one operand.  */
3338   rest = (unsigned)(ops_num - res * cpu_width);
3339   elog = exact_log2 (rest);
3340   if (elog >= 0)
3341     res += elog;
3342   else
3343     res += floor_log2 (rest) + 1;
3344
3345   return res;
3346 }
3347
3348 /* Returns an optimal number of registers to use for computation of
3349    given statements.  */
3350
3351 static int
3352 get_reassociation_width (int ops_num, enum tree_code opc,
3353                          enum machine_mode mode)
3354 {
3355   int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
3356   int width;
3357   int width_min;
3358   int cycles_best;
3359
3360   if (param_width > 0)
3361     width = param_width;
3362   else
3363     width = targetm.sched.reassociation_width (opc, mode);
3364
3365   if (width == 1)
3366     return width;
3367
3368   /* Get the minimal time required for sequence computation.  */
3369   cycles_best = get_required_cycles (ops_num, width);
3370
3371   /* Check if we may use less width and still compute sequence for
3372      the same time.  It will allow us to reduce registers usage.
3373      get_required_cycles is monotonically increasing with lower width
3374      so we can perform a binary search for the minimal width that still
3375      results in the optimal cycle count.  */
3376   width_min = 1;
3377   while (width > width_min)
3378     {
3379       int width_mid = (width + width_min) / 2;
3380
3381       if (get_required_cycles (ops_num, width_mid) == cycles_best)
3382         width = width_mid;
3383       else if (width_min < width_mid)
3384         width_min = width_mid;
3385       else
3386         break;
3387     }
3388
3389   return width;
3390 }
3391
3392 /* Recursively rewrite our linearized statements so that the operators
3393    match those in OPS[OPINDEX], putting the computation in rank
3394    order and trying to allow operations to be executed in
3395    parallel.  */
3396
3397 static void
3398 rewrite_expr_tree_parallel (gimple stmt, int width,
3399                             vec<operand_entry_t> ops)
3400 {
3401   enum tree_code opcode = gimple_assign_rhs_code (stmt);
3402   int op_num = ops.length ();
3403   int stmt_num = op_num - 1;
3404   gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3405   int op_index = op_num - 1;
3406   int stmt_index = 0;
3407   int ready_stmts_end = 0;
3408   int i = 0;
3409   tree last_rhs1 = gimple_assign_rhs1 (stmt);
3410
3411   /* We start expression rewriting from the top statements.
3412      So, in this loop we create a full list of statements
3413      we will work with.  */
3414   stmts[stmt_num - 1] = stmt;
3415   for (i = stmt_num - 2; i >= 0; i--)
3416     stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3417
3418   for (i = 0; i < stmt_num; i++)
3419     {
3420       tree op1, op2;
3421
3422       /* Determine whether we should use results of
3423          already handled statements or not.  */
3424       if (ready_stmts_end == 0
3425           && (i - stmt_index >= width || op_index < 1))
3426         ready_stmts_end = i;
3427
3428       /* Now we choose operands for the next statement.  Non zero
3429          value in ready_stmts_end means here that we should use
3430          the result of already generated statements as new operand.  */
3431       if (ready_stmts_end > 0)
3432         {
3433           op1 = gimple_assign_lhs (stmts[stmt_index++]);
3434           if (ready_stmts_end > stmt_index)
3435             op2 = gimple_assign_lhs (stmts[stmt_index++]);
3436           else if (op_index >= 0)
3437             op2 = ops[op_index--]->op;
3438           else
3439             {
3440               gcc_assert (stmt_index < i);
3441               op2 = gimple_assign_lhs (stmts[stmt_index++]);
3442             }
3443
3444           if (stmt_index >= ready_stmts_end)
3445             ready_stmts_end = 0;
3446         }
3447       else
3448         {
3449           if (op_index > 1)
3450             swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3451           op2 = ops[op_index--]->op;
3452           op1 = ops[op_index--]->op;
3453         }
3454
3455       /* If we emit the last statement then we should put
3456          operands into the last statement.  It will also
3457          break the loop.  */
3458       if (op_index < 0 && stmt_index == i)
3459         i = stmt_num - 1;
3460
3461       if (dump_file && (dump_flags & TDF_DETAILS))
3462         {
3463           fprintf (dump_file, "Transforming ");
3464           print_gimple_stmt (dump_file, stmts[i], 0, 0);
3465         }
3466
3467       /* We keep original statement only for the last one.  All
3468          others are recreated.  */
3469       if (i == stmt_num - 1)
3470         {
3471           gimple_assign_set_rhs1 (stmts[i], op1);
3472           gimple_assign_set_rhs2 (stmts[i], op2);
3473           update_stmt (stmts[i]);
3474         }
3475       else
3476         stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3477
3478       if (dump_file && (dump_flags & TDF_DETAILS))
3479         {
3480           fprintf (dump_file, " into ");
3481           print_gimple_stmt (dump_file, stmts[i], 0, 0);
3482         }
3483     }
3484
3485   remove_visited_stmt_chain (last_rhs1);
3486 }
3487
3488 /* Transform STMT, which is really (A +B) + (C + D) into the left
3489    linear form, ((A+B)+C)+D.
3490    Recurse on D if necessary.  */
3491
3492 static void
3493 linearize_expr (gimple stmt)
3494 {
3495   gimple_stmt_iterator gsi;
3496   gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3497   gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3498   gimple oldbinrhs = binrhs;
3499   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3500   gimple newbinrhs = NULL;
3501   struct loop *loop = loop_containing_stmt (stmt);
3502   tree lhs = gimple_assign_lhs (stmt);
3503
3504   gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3505               && is_reassociable_op (binrhs, rhscode, loop));
3506
3507   gsi = gsi_for_stmt (stmt);
3508
3509   gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3510   binrhs = gimple_build_assign_with_ops (gimple_assign_rhs_code (binrhs),
3511                                          make_ssa_name (TREE_TYPE (lhs), NULL),
3512                                          gimple_assign_lhs (binlhs),
3513                                          gimple_assign_rhs2 (binrhs));
3514   gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3515   gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
3516   gimple_set_uid (binrhs, gimple_uid (stmt));
3517
3518   if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3519     newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3520
3521   if (dump_file && (dump_flags & TDF_DETAILS))
3522     {
3523       fprintf (dump_file, "Linearized: ");
3524       print_gimple_stmt (dump_file, stmt, 0, 0);
3525     }
3526
3527   reassociate_stats.linearized++;
3528   update_stmt (stmt);
3529
3530   gsi = gsi_for_stmt (oldbinrhs);
3531   reassoc_remove_stmt (&gsi);
3532   release_defs (oldbinrhs);
3533
3534   gimple_set_visited (stmt, true);
3535   gimple_set_visited (binlhs, true);
3536   gimple_set_visited (binrhs, true);
3537
3538   /* Tail recurse on the new rhs if it still needs reassociation.  */
3539   if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3540     /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3541            want to change the algorithm while converting to tuples.  */
3542     linearize_expr (stmt);
3543 }
3544
3545 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3546    it.  Otherwise, return NULL.  */
3547
3548 static gimple
3549 get_single_immediate_use (tree lhs)
3550 {
3551   use_operand_p immuse;
3552   gimple immusestmt;
3553
3554   if (TREE_CODE (lhs) == SSA_NAME
3555       && single_imm_use (lhs, &immuse, &immusestmt)
3556       && is_gimple_assign (immusestmt))
3557     return immusestmt;
3558
3559   return NULL;
3560 }
3561
3562 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3563    representing the negated value.  Insertions of any necessary
3564    instructions go before GSI.
3565    This function is recursive in that, if you hand it "a_5" as the
3566    value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3567    transform b_3 + b_4 into a_5 = -b_3 + -b_4.  */
3568
3569 static tree
3570 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
3571 {
3572   gimple negatedefstmt = NULL;
3573   tree resultofnegate;
3574   gimple_stmt_iterator gsi;
3575   unsigned int uid;
3576
3577   /* If we are trying to negate a name, defined by an add, negate the
3578      add operands instead.  */
3579   if (TREE_CODE (tonegate) == SSA_NAME)
3580     negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3581   if (TREE_CODE (tonegate) == SSA_NAME
3582       && is_gimple_assign (negatedefstmt)
3583       && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3584       && has_single_use (gimple_assign_lhs (negatedefstmt))
3585       && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3586     {
3587       tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3588       tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3589       tree lhs = gimple_assign_lhs (negatedefstmt);
3590       gimple g;
3591
3592       gsi = gsi_for_stmt (negatedefstmt);
3593       rhs1 = negate_value (rhs1, &gsi);
3594
3595       gsi = gsi_for_stmt (negatedefstmt);
3596       rhs2 = negate_value (rhs2, &gsi);
3597
3598       gsi = gsi_for_stmt (negatedefstmt);
3599       lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3600       gimple_set_visited (negatedefstmt, true);
3601       g = gimple_build_assign_with_ops (PLUS_EXPR, lhs, rhs1, rhs2);
3602       gimple_set_uid (g, gimple_uid (negatedefstmt));
3603       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3604       return lhs;
3605     }
3606
3607   tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3608   resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
3609                                              NULL_TREE, true, GSI_SAME_STMT);
3610   gsi = *gsip;
3611   uid = gimple_uid (gsi_stmt (gsi));
3612   for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
3613     {
3614       gimple stmt = gsi_stmt (gsi);
3615       if (gimple_uid (stmt) != 0)
3616         break;
3617       gimple_set_uid (stmt, uid);
3618     }
3619   return resultofnegate;
3620 }
3621
3622 /* Return true if we should break up the subtract in STMT into an add
3623    with negate.  This is true when we the subtract operands are really
3624    adds, or the subtract itself is used in an add expression.  In
3625    either case, breaking up the subtract into an add with negate
3626    exposes the adds to reassociation.  */
3627
3628 static bool
3629 should_break_up_subtract (gimple stmt)
3630 {
3631   tree lhs = gimple_assign_lhs (stmt);
3632   tree binlhs = gimple_assign_rhs1 (stmt);
3633   tree binrhs = gimple_assign_rhs2 (stmt);
3634   gimple immusestmt;
3635   struct loop *loop = loop_containing_stmt (stmt);
3636
3637   if (TREE_CODE (binlhs) == SSA_NAME
3638       && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3639     return true;
3640
3641   if (TREE_CODE (binrhs) == SSA_NAME
3642       && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3643     return true;
3644
3645   if (TREE_CODE (lhs) == SSA_NAME
3646       && (immusestmt = get_single_immediate_use (lhs))
3647       && is_gimple_assign (immusestmt)
3648       && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3649           ||  gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3650     return true;
3651   return false;
3652 }
3653
3654 /* Transform STMT from A - B into A + -B.  */
3655
3656 static void
3657 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3658 {
3659   tree rhs1 = gimple_assign_rhs1 (stmt);
3660   tree rhs2 = gimple_assign_rhs2 (stmt);
3661
3662   if (dump_file && (dump_flags & TDF_DETAILS))
3663     {
3664       fprintf (dump_file, "Breaking up subtract ");
3665       print_gimple_stmt (dump_file, stmt, 0, 0);
3666     }
3667
3668   rhs2 = negate_value (rhs2, gsip);
3669   gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3670   update_stmt (stmt);
3671 }
3672
3673 /* Determine whether STMT is a builtin call that raises an SSA name
3674    to an integer power and has only one use.  If so, and this is early
3675    reassociation and unsafe math optimizations are permitted, place
3676    the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3677    If any of these conditions does not hold, return FALSE.  */
3678
3679 static bool
3680 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3681 {
3682   tree fndecl, arg1;
3683   REAL_VALUE_TYPE c, cint;
3684
3685   if (!first_pass_instance
3686       || !flag_unsafe_math_optimizations
3687       || !is_gimple_call (stmt)
3688       || !has_single_use (gimple_call_lhs (stmt)))
3689     return false;
3690
3691   fndecl = gimple_call_fndecl (stmt);
3692
3693   if (!fndecl
3694       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3695     return false;
3696
3697   switch (DECL_FUNCTION_CODE (fndecl))
3698     {
3699     CASE_FLT_FN (BUILT_IN_POW):
3700       *base = gimple_call_arg (stmt, 0);
3701       arg1 = gimple_call_arg (stmt, 1);
3702
3703       if (TREE_CODE (arg1) != REAL_CST)
3704         return false;
3705
3706       c = TREE_REAL_CST (arg1);
3707
3708       if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
3709         return false;
3710
3711       *exponent = real_to_integer (&c);
3712       real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
3713       if (!real_identical (&c, &cint))
3714         return false;
3715
3716       break;
3717
3718     CASE_FLT_FN (BUILT_IN_POWI):
3719       *base = gimple_call_arg (stmt, 0);
3720       arg1 = gimple_call_arg (stmt, 1);
3721
3722       if (!tree_fits_shwi_p (arg1))
3723         return false;
3724
3725       *exponent = tree_to_shwi (arg1);
3726       break;
3727
3728     default:
3729       return false;
3730     }
3731
3732   /* Expanding negative exponents is generally unproductive, so we don't
3733      complicate matters with those.  Exponents of zero and one should
3734      have been handled by expression folding.  */
3735   if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
3736     return false;
3737
3738   return true;
3739 }
3740
3741 /* Recursively linearize a binary expression that is the RHS of STMT.
3742    Place the operands of the expression tree in the vector named OPS.  */
3743
3744 static void
3745 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
3746                      bool is_associative, bool set_visited)
3747 {
3748   tree binlhs = gimple_assign_rhs1 (stmt);
3749   tree binrhs = gimple_assign_rhs2 (stmt);
3750   gimple binlhsdef = NULL, binrhsdef = NULL;
3751   bool binlhsisreassoc = false;
3752   bool binrhsisreassoc = false;
3753   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3754   struct loop *loop = loop_containing_stmt (stmt);
3755   tree base = NULL_TREE;
3756   HOST_WIDE_INT exponent = 0;
3757
3758   if (set_visited)
3759     gimple_set_visited (stmt, true);
3760
3761   if (TREE_CODE (binlhs) == SSA_NAME)
3762     {
3763       binlhsdef = SSA_NAME_DEF_STMT (binlhs);
3764       binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
3765                          && !stmt_could_throw_p (binlhsdef));
3766     }
3767
3768   if (TREE_CODE (binrhs) == SSA_NAME)
3769     {
3770       binrhsdef = SSA_NAME_DEF_STMT (binrhs);
3771       binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
3772                          && !stmt_could_throw_p (binrhsdef));
3773     }
3774
3775   /* If the LHS is not reassociable, but the RHS is, we need to swap
3776      them.  If neither is reassociable, there is nothing we can do, so
3777      just put them in the ops vector.  If the LHS is reassociable,
3778      linearize it.  If both are reassociable, then linearize the RHS
3779      and the LHS.  */
3780
3781   if (!binlhsisreassoc)
3782     {
3783       tree temp;
3784
3785       /* If this is not a associative operation like division, give up.  */
3786       if (!is_associative)
3787         {
3788           add_to_ops_vec (ops, binrhs);
3789           return;
3790         }
3791
3792       if (!binrhsisreassoc)
3793         {
3794           if (rhscode == MULT_EXPR
3795               && TREE_CODE (binrhs) == SSA_NAME
3796               && acceptable_pow_call (binrhsdef, &base, &exponent))
3797             {
3798               add_repeat_to_ops_vec (ops, base, exponent);
3799               gimple_set_visited (binrhsdef, true);
3800             }
3801           else
3802             add_to_ops_vec (ops, binrhs);
3803
3804           if (rhscode == MULT_EXPR
3805               && TREE_CODE (binlhs) == SSA_NAME
3806               && acceptable_pow_call (binlhsdef, &base, &exponent))
3807             {
3808               add_repeat_to_ops_vec (ops, base, exponent);
3809               gimple_set_visited (binlhsdef, true);
3810             }
3811           else
3812             add_to_ops_vec (ops, binlhs);
3813
3814           return;
3815         }
3816
3817       if (dump_file && (dump_flags & TDF_DETAILS))
3818         {
3819           fprintf (dump_file, "swapping operands of ");
3820           print_gimple_stmt (dump_file, stmt, 0, 0);
3821         }
3822
3823       swap_ssa_operands (stmt,
3824                          gimple_assign_rhs1_ptr (stmt),
3825                          gimple_assign_rhs2_ptr (stmt));
3826       update_stmt (stmt);
3827
3828       if (dump_file && (dump_flags & TDF_DETAILS))
3829         {
3830           fprintf (dump_file, " is now ");
3831           print_gimple_stmt (dump_file, stmt, 0, 0);
3832         }
3833
3834       /* We want to make it so the lhs is always the reassociative op,
3835          so swap.  */
3836       temp = binlhs;
3837       binlhs = binrhs;
3838       binrhs = temp;
3839     }
3840   else if (binrhsisreassoc)
3841     {
3842       linearize_expr (stmt);
3843       binlhs = gimple_assign_rhs1 (stmt);
3844       binrhs = gimple_assign_rhs2 (stmt);
3845     }
3846
3847   gcc_assert (TREE_CODE (binrhs) != SSA_NAME
3848               || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
3849                                       rhscode, loop));
3850   linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
3851                        is_associative, set_visited);
3852
3853   if (rhscode == MULT_EXPR
3854       && TREE_CODE (binrhs) == SSA_NAME
3855       && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
3856     {
3857       add_repeat_to_ops_vec (ops, base, exponent);
3858       gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
3859     }
3860   else
3861     add_to_ops_vec (ops, binrhs);
3862 }
3863
3864 /* Repropagate the negates back into subtracts, since no other pass
3865    currently does it.  */
3866
3867 static void
3868 repropagate_negates (void)
3869 {
3870   unsigned int i = 0;
3871   tree negate;
3872
3873   FOR_EACH_VEC_ELT (plus_negates, i, negate)
3874     {
3875       gimple user = get_single_immediate_use (negate);
3876
3877       if (!user || !is_gimple_assign (user))
3878         continue;
3879
3880       /* The negate operand can be either operand of a PLUS_EXPR
3881          (it can be the LHS if the RHS is a constant for example).
3882
3883          Force the negate operand to the RHS of the PLUS_EXPR, then
3884          transform the PLUS_EXPR into a MINUS_EXPR.  */
3885       if (gimple_assign_rhs_code (user) == PLUS_EXPR)
3886         {
3887           /* If the negated operand appears on the LHS of the
3888              PLUS_EXPR, exchange the operands of the PLUS_EXPR
3889              to force the negated operand to the RHS of the PLUS_EXPR.  */
3890           if (gimple_assign_rhs1 (user) == negate)
3891             {
3892               swap_ssa_operands (user,
3893                                  gimple_assign_rhs1_ptr (user),
3894                                  gimple_assign_rhs2_ptr (user));
3895             }
3896
3897           /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
3898              the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR.  */
3899           if (gimple_assign_rhs2 (user) == negate)
3900             {
3901               tree rhs1 = gimple_assign_rhs1 (user);
3902               tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
3903               gimple_stmt_iterator gsi = gsi_for_stmt (user);
3904               gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
3905               update_stmt (user);
3906             }
3907         }
3908       else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
3909         {
3910           if (gimple_assign_rhs1 (user) == negate)
3911             {
3912               /* We have
3913                    x = -a
3914                    y = x - b
3915                  which we transform into
3916                    x = a + b
3917                    y = -x .
3918                  This pushes down the negate which we possibly can merge
3919                  into some other operation, hence insert it into the
3920                  plus_negates vector.  */
3921               gimple feed = SSA_NAME_DEF_STMT (negate);
3922               tree a = gimple_assign_rhs1 (feed);
3923               tree b = gimple_assign_rhs2 (user);
3924               gimple_stmt_iterator gsi = gsi_for_stmt (feed);
3925               gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
3926               tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)), NULL);
3927               gimple g = gimple_build_assign_with_ops (PLUS_EXPR, x, a, b);
3928               gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3929               gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x, NULL);
3930               user = gsi_stmt (gsi2);
3931               update_stmt (user);
3932               reassoc_remove_stmt (&gsi);
3933               release_defs (feed);
3934               plus_negates.safe_push (gimple_assign_lhs (user));
3935             }
3936           else
3937             {
3938               /* Transform "x = -a; y = b - x" into "y = b + a", getting
3939                  rid of one operation.  */
3940               gimple feed = SSA_NAME_DEF_STMT (negate);
3941               tree a = gimple_assign_rhs1 (feed);
3942               tree rhs1 = gimple_assign_rhs1 (user);
3943               gimple_stmt_iterator gsi = gsi_for_stmt (user);
3944               gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
3945               update_stmt (gsi_stmt (gsi));
3946             }
3947         }
3948     }
3949 }
3950
3951 /* Returns true if OP is of a type for which we can do reassociation.
3952    That is for integral or non-saturating fixed-point types, and for
3953    floating point type when associative-math is enabled.  */
3954
3955 static bool
3956 can_reassociate_p (tree op)
3957 {
3958   tree type = TREE_TYPE (op);
3959   if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
3960       || NON_SAT_FIXED_POINT_TYPE_P (type)
3961       || (flag_associative_math && FLOAT_TYPE_P (type)))
3962     return true;
3963   return false;
3964 }
3965
3966 /* Break up subtract operations in block BB.
3967
3968    We do this top down because we don't know whether the subtract is
3969    part of a possible chain of reassociation except at the top.
3970
3971    IE given
3972    d = f + g
3973    c = a + e
3974    b = c - d
3975    q = b - r
3976    k = t - q
3977
3978    we want to break up k = t - q, but we won't until we've transformed q
3979    = b - r, which won't be broken up until we transform b = c - d.
3980
3981    En passant, clear the GIMPLE visited flag on every statement
3982    and set UIDs within each basic block.  */
3983
3984 static void
3985 break_up_subtract_bb (basic_block bb)
3986 {
3987   gimple_stmt_iterator gsi;
3988   basic_block son;
3989   unsigned int uid = 1;
3990
3991   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3992     {
3993       gimple stmt = gsi_stmt (gsi);
3994       gimple_set_visited (stmt, false);
3995       gimple_set_uid (stmt, uid++);
3996
3997       if (!is_gimple_assign (stmt)
3998           || !can_reassociate_p (gimple_assign_lhs (stmt)))
3999         continue;
4000
4001       /* Look for simple gimple subtract operations.  */
4002       if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
4003         {
4004           if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
4005               || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
4006             continue;
4007
4008           /* Check for a subtract used only in an addition.  If this
4009              is the case, transform it into add of a negate for better
4010              reassociation.  IE transform C = A-B into C = A + -B if C
4011              is only used in an addition.  */
4012           if (should_break_up_subtract (stmt))
4013             break_up_subtract (stmt, &gsi);
4014         }
4015       else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
4016                && can_reassociate_p (gimple_assign_rhs1 (stmt)))
4017         plus_negates.safe_push (gimple_assign_lhs (stmt));
4018     }
4019   for (son = first_dom_son (CDI_DOMINATORS, bb);
4020        son;
4021        son = next_dom_son (CDI_DOMINATORS, son))
4022     break_up_subtract_bb (son);
4023 }
4024
4025 /* Used for repeated factor analysis.  */
4026 struct repeat_factor_d
4027 {
4028   /* An SSA name that occurs in a multiply chain.  */
4029   tree factor;
4030
4031   /* Cached rank of the factor.  */
4032   unsigned rank;
4033
4034   /* Number of occurrences of the factor in the chain.  */
4035   HOST_WIDE_INT count;
4036
4037   /* An SSA name representing the product of this factor and
4038      all factors appearing later in the repeated factor vector.  */
4039   tree repr;
4040 };
4041
4042 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
4043 typedef const struct repeat_factor_d *const_repeat_factor_t;
4044
4045
4046 static vec<repeat_factor> repeat_factor_vec;
4047
4048 /* Used for sorting the repeat factor vector.  Sort primarily by
4049    ascending occurrence count, secondarily by descending rank.  */
4050
4051 static int
4052 compare_repeat_factors (const void *x1, const void *x2)
4053 {
4054   const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
4055   const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
4056
4057   if (rf1->count != rf2->count)
4058     return rf1->count - rf2->count;
4059
4060   return rf2->rank - rf1->rank;
4061 }
4062
4063 /* Look for repeated operands in OPS in the multiply tree rooted at
4064    STMT.  Replace them with an optimal sequence of multiplies and powi
4065    builtin calls, and remove the used operands from OPS.  Return an
4066    SSA name representing the value of the replacement sequence.  */
4067
4068 static tree
4069 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
4070 {
4071   unsigned i, j, vec_len;
4072   int ii;
4073   operand_entry_t oe;
4074   repeat_factor_t rf1, rf2;
4075   repeat_factor rfnew;
4076   tree result = NULL_TREE;
4077   tree target_ssa, iter_result;
4078   tree type = TREE_TYPE (gimple_get_lhs (stmt));
4079   tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
4080   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4081   gimple mul_stmt, pow_stmt;
4082
4083   /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4084      target.  */
4085   if (!powi_fndecl)
4086     return NULL_TREE;
4087
4088   /* Allocate the repeated factor vector.  */
4089   repeat_factor_vec.create (10);
4090
4091   /* Scan the OPS vector for all SSA names in the product and build
4092      up a vector of occurrence counts for each factor.  */
4093   FOR_EACH_VEC_ELT (*ops, i, oe)
4094     {
4095       if (TREE_CODE (oe->op) == SSA_NAME)
4096         {
4097           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4098             {
4099               if (rf1->factor == oe->op)
4100                 {
4101                   rf1->count += oe->count;
4102                   break;
4103                 }
4104             }
4105
4106           if (j >= repeat_factor_vec.length ())
4107             {
4108               rfnew.factor = oe->op;
4109               rfnew.rank = oe->rank;
4110               rfnew.count = oe->count;
4111               rfnew.repr = NULL_TREE;
4112               repeat_factor_vec.safe_push (rfnew);
4113             }
4114         }
4115     }
4116
4117   /* Sort the repeated factor vector by (a) increasing occurrence count,
4118      and (b) decreasing rank.  */
4119   repeat_factor_vec.qsort (compare_repeat_factors);
4120
4121   /* It is generally best to combine as many base factors as possible
4122      into a product before applying __builtin_powi to the result.
4123      However, the sort order chosen for the repeated factor vector
4124      allows us to cache partial results for the product of the base
4125      factors for subsequent use.  When we already have a cached partial
4126      result from a previous iteration, it is best to make use of it
4127      before looking for another __builtin_pow opportunity.
4128
4129      As an example, consider x * x * y * y * y * z * z * z * z.
4130      We want to first compose the product x * y * z, raise it to the
4131      second power, then multiply this by y * z, and finally multiply
4132      by z.  This can be done in 5 multiplies provided we cache y * z
4133      for use in both expressions:
4134
4135         t1 = y * z
4136         t2 = t1 * x
4137         t3 = t2 * t2
4138         t4 = t1 * t3
4139         result = t4 * z
4140
4141      If we instead ignored the cached y * z and first multiplied by
4142      the __builtin_pow opportunity z * z, we would get the inferior:
4143
4144         t1 = y * z
4145         t2 = t1 * x
4146         t3 = t2 * t2
4147         t4 = z * z
4148         t5 = t3 * t4
4149         result = t5 * y  */
4150
4151   vec_len = repeat_factor_vec.length ();
4152
4153   /* Repeatedly look for opportunities to create a builtin_powi call.  */
4154   while (true)
4155     {
4156       HOST_WIDE_INT power;
4157
4158       /* First look for the largest cached product of factors from
4159          preceding iterations.  If found, create a builtin_powi for
4160          it if the minimum occurrence count for its factors is at
4161          least 2, or just use this cached product as our next
4162          multiplicand if the minimum occurrence count is 1.  */
4163       FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4164         {
4165           if (rf1->repr && rf1->count > 0)
4166             break;
4167         }
4168
4169       if (j < vec_len)
4170         {
4171           power = rf1->count;
4172
4173           if (power == 1)
4174             {
4175               iter_result = rf1->repr;
4176
4177               if (dump_file && (dump_flags & TDF_DETAILS))
4178                 {
4179                   unsigned elt;
4180                   repeat_factor_t rf;
4181                   fputs ("Multiplying by cached product ", dump_file);
4182                   for (elt = j; elt < vec_len; elt++)
4183                     {
4184                       rf = &repeat_factor_vec[elt];
4185                       print_generic_expr (dump_file, rf->factor, 0);
4186                       if (elt < vec_len - 1)
4187                         fputs (" * ", dump_file);
4188                     }
4189                   fputs ("\n", dump_file);
4190                 }
4191             }
4192           else
4193             {
4194               iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4195               pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4196                                             build_int_cst (integer_type_node,
4197                                                            power));
4198               gimple_call_set_lhs (pow_stmt, iter_result);
4199               gimple_set_location (pow_stmt, gimple_location (stmt));
4200               gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4201
4202               if (dump_file && (dump_flags & TDF_DETAILS))
4203                 {
4204                   unsigned elt;
4205                   repeat_factor_t rf;
4206                   fputs ("Building __builtin_pow call for cached product (",
4207                          dump_file);
4208                   for (elt = j; elt < vec_len; elt++)
4209                     {
4210                       rf = &repeat_factor_vec[elt];
4211                       print_generic_expr (dump_file, rf->factor, 0);
4212                       if (elt < vec_len - 1)
4213                         fputs (" * ", dump_file);
4214                     }
4215                   fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n",
4216                            power);
4217                 }
4218             }
4219         }
4220       else
4221         {
4222           /* Otherwise, find the first factor in the repeated factor
4223              vector whose occurrence count is at least 2.  If no such
4224              factor exists, there are no builtin_powi opportunities
4225              remaining.  */
4226           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4227             {
4228               if (rf1->count >= 2)
4229                 break;
4230             }
4231
4232           if (j >= vec_len)
4233             break;
4234
4235           power = rf1->count;
4236
4237           if (dump_file && (dump_flags & TDF_DETAILS))
4238             {
4239               unsigned elt;
4240               repeat_factor_t rf;
4241               fputs ("Building __builtin_pow call for (", dump_file);
4242               for (elt = j; elt < vec_len; elt++)
4243                 {
4244                   rf = &repeat_factor_vec[elt];
4245                   print_generic_expr (dump_file, rf->factor, 0);
4246                   if (elt < vec_len - 1)
4247                     fputs (" * ", dump_file);
4248                 }
4249               fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power);
4250             }
4251
4252           reassociate_stats.pows_created++;
4253
4254           /* Visit each element of the vector in reverse order (so that
4255              high-occurrence elements are visited first, and within the
4256              same occurrence count, lower-ranked elements are visited
4257              first).  Form a linear product of all elements in this order
4258              whose occurrencce count is at least that of element J.
4259              Record the SSA name representing the product of each element
4260              with all subsequent elements in the vector.  */
4261           if (j == vec_len - 1)
4262             rf1->repr = rf1->factor;
4263           else
4264             {
4265               for (ii = vec_len - 2; ii >= (int)j; ii--)
4266                 {
4267                   tree op1, op2;
4268
4269                   rf1 = &repeat_factor_vec[ii];
4270                   rf2 = &repeat_factor_vec[ii + 1];
4271
4272                   /* Init the last factor's representative to be itself.  */
4273                   if (!rf2->repr)
4274                     rf2->repr = rf2->factor;
4275
4276                   op1 = rf1->factor;
4277                   op2 = rf2->repr;
4278
4279                   target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4280                   mul_stmt = gimple_build_assign_with_ops (MULT_EXPR,
4281                                                            target_ssa,
4282                                                            op1, op2);
4283                   gimple_set_location (mul_stmt, gimple_location (stmt));
4284                   gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4285                   rf1->repr = target_ssa;
4286
4287                   /* Don't reprocess the multiply we just introduced.  */
4288                   gimple_set_visited (mul_stmt, true);
4289                 }
4290             }
4291
4292           /* Form a call to __builtin_powi for the maximum product
4293              just formed, raised to the power obtained earlier.  */
4294           rf1 = &repeat_factor_vec[j];
4295           iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4296           pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4297                                         build_int_cst (integer_type_node,
4298                                                        power));
4299           gimple_call_set_lhs (pow_stmt, iter_result);
4300           gimple_set_location (pow_stmt, gimple_location (stmt));
4301           gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4302         }
4303
4304       /* If we previously formed at least one other builtin_powi call,
4305          form the product of this one and those others.  */
4306       if (result)
4307         {
4308           tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4309           mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result,
4310                                                    result, iter_result);
4311           gimple_set_location (mul_stmt, gimple_location (stmt));
4312           gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4313           gimple_set_visited (mul_stmt, true);
4314           result = new_result;
4315         }
4316       else
4317         result = iter_result;
4318
4319       /* Decrement the occurrence count of each element in the product
4320          by the count found above, and remove this many copies of each
4321          factor from OPS.  */
4322       for (i = j; i < vec_len; i++)
4323         {
4324           unsigned k = power;
4325           unsigned n;
4326
4327           rf1 = &repeat_factor_vec[i];
4328           rf1->count -= power;
4329
4330           FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
4331             {
4332               if (oe->op == rf1->factor)
4333                 {
4334                   if (oe->count <= k)
4335                     {
4336                       ops->ordered_remove (n);
4337                       k -= oe->count;
4338
4339                       if (k == 0)
4340                         break;
4341                     }
4342                   else
4343                     {
4344                       oe->count -= k;
4345                       break;
4346                     }
4347                 }
4348             }
4349         }
4350     }
4351
4352   /* At this point all elements in the repeated factor vector have a
4353      remaining occurrence count of 0 or 1, and those with a count of 1
4354      don't have cached representatives.  Re-sort the ops vector and
4355      clean up.  */
4356   ops->qsort (sort_by_operand_rank);
4357   repeat_factor_vec.release ();
4358
4359   /* Return the final product computed herein.  Note that there may
4360      still be some elements with single occurrence count left in OPS;
4361      those will be handled by the normal reassociation logic.  */
4362   return result;
4363 }
4364
4365 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS.  */
4366
4367 static void
4368 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
4369 {
4370   tree rhs1;
4371
4372   if (dump_file && (dump_flags & TDF_DETAILS))
4373     {
4374       fprintf (dump_file, "Transforming ");
4375       print_gimple_stmt (dump_file, stmt, 0, 0);
4376     }
4377
4378   rhs1 = gimple_assign_rhs1 (stmt);
4379   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
4380   update_stmt (stmt);
4381   remove_visited_stmt_chain (rhs1);
4382
4383   if (dump_file && (dump_flags & TDF_DETAILS))
4384     {
4385       fprintf (dump_file, " into ");
4386       print_gimple_stmt (dump_file, stmt, 0, 0);
4387     }
4388 }
4389
4390 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2.  */
4391
4392 static void
4393 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
4394                             tree rhs1, tree rhs2)
4395 {
4396   if (dump_file && (dump_flags & TDF_DETAILS))
4397     {
4398       fprintf (dump_file, "Transforming ");
4399       print_gimple_stmt (dump_file, stmt, 0, 0);
4400     }
4401
4402   gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
4403   update_stmt (gsi_stmt (*gsi));
4404   remove_visited_stmt_chain (rhs1);
4405
4406   if (dump_file && (dump_flags & TDF_DETAILS))
4407     {
4408       fprintf (dump_file, " into ");
4409       print_gimple_stmt (dump_file, stmt, 0, 0);
4410     }
4411 }
4412
4413 /* Reassociate expressions in basic block BB and its post-dominator as
4414    children.  */
4415
4416 static void
4417 reassociate_bb (basic_block bb)
4418 {
4419   gimple_stmt_iterator gsi;
4420   basic_block son;
4421   gimple stmt = last_stmt (bb);
4422
4423   if (stmt && !gimple_visited_p (stmt))
4424     maybe_optimize_range_tests (stmt);
4425
4426   for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4427     {
4428       stmt = gsi_stmt (gsi);
4429
4430       if (is_gimple_assign (stmt)
4431           && !stmt_could_throw_p (stmt))
4432         {
4433           tree lhs, rhs1, rhs2;
4434           enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4435
4436           /* If this is not a gimple binary expression, there is
4437              nothing for us to do with it.  */
4438           if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4439             continue;
4440
4441           /* If this was part of an already processed statement,
4442              we don't need to touch it again. */
4443           if (gimple_visited_p (stmt))
4444             {
4445               /* This statement might have become dead because of previous
4446                  reassociations.  */
4447               if (has_zero_uses (gimple_get_lhs (stmt)))
4448                 {
4449                   reassoc_remove_stmt (&gsi);
4450                   release_defs (stmt);
4451                   /* We might end up removing the last stmt above which
4452                      places the iterator to the end of the sequence.
4453                      Reset it to the last stmt in this case which might
4454                      be the end of the sequence as well if we removed
4455                      the last statement of the sequence.  In which case
4456                      we need to bail out.  */
4457                   if (gsi_end_p (gsi))
4458                     {
4459                       gsi = gsi_last_bb (bb);
4460                       if (gsi_end_p (gsi))
4461                         break;
4462                     }
4463                 }
4464               continue;
4465             }
4466
4467           lhs = gimple_assign_lhs (stmt);
4468           rhs1 = gimple_assign_rhs1 (stmt);
4469           rhs2 = gimple_assign_rhs2 (stmt);
4470
4471           /* For non-bit or min/max operations we can't associate
4472              all types.  Verify that here.  */
4473           if (rhs_code != BIT_IOR_EXPR
4474               && rhs_code != BIT_AND_EXPR
4475               && rhs_code != BIT_XOR_EXPR
4476               && rhs_code != MIN_EXPR
4477               && rhs_code != MAX_EXPR
4478               && (!can_reassociate_p (lhs)
4479                   || !can_reassociate_p (rhs1)
4480                   || !can_reassociate_p (rhs2)))
4481             continue;
4482
4483           if (associative_tree_code (rhs_code))
4484             {
4485               auto_vec<operand_entry_t> ops;
4486               tree powi_result = NULL_TREE;
4487
4488               /* There may be no immediate uses left by the time we
4489                  get here because we may have eliminated them all.  */
4490               if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4491                 continue;
4492
4493               gimple_set_visited (stmt, true);
4494               linearize_expr_tree (&ops, stmt, true, true);
4495               ops.qsort (sort_by_operand_rank);
4496               optimize_ops_list (rhs_code, &ops);
4497               if (undistribute_ops_list (rhs_code, &ops,
4498                                          loop_containing_stmt (stmt)))
4499                 {
4500                   ops.qsort (sort_by_operand_rank);
4501                   optimize_ops_list (rhs_code, &ops);
4502                 }
4503
4504               if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4505                 optimize_range_tests (rhs_code, &ops);
4506
4507               if (first_pass_instance
4508                   && rhs_code == MULT_EXPR
4509                   && flag_unsafe_math_optimizations)
4510                 powi_result = attempt_builtin_powi (stmt, &ops);
4511
4512               /* If the operand vector is now empty, all operands were
4513                  consumed by the __builtin_powi optimization.  */
4514               if (ops.length () == 0)
4515                 transform_stmt_to_copy (&gsi, stmt, powi_result);
4516               else if (ops.length () == 1)
4517                 {
4518                   tree last_op = ops.last ()->op;
4519
4520                   if (powi_result)
4521                     transform_stmt_to_multiply (&gsi, stmt, last_op,
4522                                                 powi_result);
4523                   else
4524                     transform_stmt_to_copy (&gsi, stmt, last_op);
4525                 }
4526               else
4527                 {
4528                   enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4529                   int ops_num = ops.length ();
4530                   int width = get_reassociation_width (ops_num, rhs_code, mode);
4531                   tree new_lhs = lhs;
4532
4533                   if (dump_file && (dump_flags & TDF_DETAILS))
4534                     fprintf (dump_file,
4535                              "Width = %d was chosen for reassociation\n", width);
4536
4537                   if (width > 1
4538                       && ops.length () > 3)
4539                     rewrite_expr_tree_parallel (stmt, width, ops);
4540                   else
4541                     {
4542                       /* When there are three operands left, we want
4543                          to make sure the ones that get the double
4544                          binary op are chosen wisely.  */
4545                       int len = ops.length ();
4546                       if (len >= 3)
4547                         swap_ops_for_binary_stmt (ops, len - 3, stmt);
4548
4549                       new_lhs = rewrite_expr_tree (stmt, 0, ops,
4550                                                    powi_result != NULL);
4551                     }
4552
4553                   /* If we combined some repeated factors into a
4554                      __builtin_powi call, multiply that result by the
4555                      reassociated operands.  */
4556                   if (powi_result)
4557                     {
4558                       gimple mul_stmt, lhs_stmt = SSA_NAME_DEF_STMT (lhs);
4559                       tree type = TREE_TYPE (lhs);
4560                       tree target_ssa = make_temp_ssa_name (type, NULL,
4561                                                             "reassocpow");
4562                       gimple_set_lhs (lhs_stmt, target_ssa);
4563                       update_stmt (lhs_stmt);
4564                       if (lhs != new_lhs)
4565                         target_ssa = new_lhs;
4566                       mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs,
4567                                                                powi_result,
4568                                                                target_ssa);
4569                       gimple_set_location (mul_stmt, gimple_location (stmt));
4570                       gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4571                     }
4572                 }
4573             }
4574         }
4575     }
4576   for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4577        son;
4578        son = next_dom_son (CDI_POST_DOMINATORS, son))
4579     reassociate_bb (son);
4580 }
4581
4582 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4583 void debug_ops_vector (vec<operand_entry_t> ops);
4584
4585 /* Dump the operand entry vector OPS to FILE.  */
4586
4587 void
4588 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
4589 {
4590   operand_entry_t oe;
4591   unsigned int i;
4592
4593   FOR_EACH_VEC_ELT (ops, i, oe)
4594     {
4595       fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
4596       print_generic_expr (file, oe->op, 0);
4597     }
4598 }
4599
4600 /* Dump the operand entry vector OPS to STDERR.  */
4601
4602 DEBUG_FUNCTION void
4603 debug_ops_vector (vec<operand_entry_t> ops)
4604 {
4605   dump_ops_vector (stderr, ops);
4606 }
4607
4608 static void
4609 do_reassoc (void)
4610 {
4611   break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
4612   reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
4613 }
4614
4615 /* Initialize the reassociation pass.  */
4616
4617 static void
4618 init_reassoc (void)
4619 {
4620   int i;
4621   long rank = 2;
4622   int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
4623
4624   /* Find the loops, so that we can prevent moving calculations in
4625      them.  */
4626   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4627
4628   memset (&reassociate_stats, 0, sizeof (reassociate_stats));
4629
4630   operand_entry_pool = create_alloc_pool ("operand entry pool",
4631                                           sizeof (struct operand_entry), 30);
4632   next_operand_entry_id = 0;
4633
4634   /* Reverse RPO (Reverse Post Order) will give us something where
4635      deeper loops come later.  */
4636   pre_and_rev_post_order_compute (NULL, bbs, false);
4637   bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
4638   operand_rank = pointer_map_create ();
4639
4640   /* Give each default definition a distinct rank.  This includes
4641      parameters and the static chain.  Walk backwards over all
4642      SSA names so that we get proper rank ordering according
4643      to tree_swap_operands_p.  */
4644   for (i = num_ssa_names - 1; i > 0; --i)
4645     {
4646       tree name = ssa_name (i);
4647       if (name && SSA_NAME_IS_DEFAULT_DEF (name))
4648         insert_operand_rank (name, ++rank);
4649     }
4650
4651   /* Set up rank for each BB  */
4652   for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
4653     bb_rank[bbs[i]] = ++rank  << 16;
4654
4655   free (bbs);
4656   calculate_dominance_info (CDI_POST_DOMINATORS);
4657   plus_negates = vNULL;
4658 }
4659
4660 /* Cleanup after the reassociation pass, and print stats if
4661    requested.  */
4662
4663 static void
4664 fini_reassoc (void)
4665 {
4666   statistics_counter_event (cfun, "Linearized",
4667                             reassociate_stats.linearized);
4668   statistics_counter_event (cfun, "Constants eliminated",
4669                             reassociate_stats.constants_eliminated);
4670   statistics_counter_event (cfun, "Ops eliminated",
4671                             reassociate_stats.ops_eliminated);
4672   statistics_counter_event (cfun, "Statements rewritten",
4673                             reassociate_stats.rewritten);
4674   statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
4675                             reassociate_stats.pows_encountered);
4676   statistics_counter_event (cfun, "Built-in powi calls created",
4677                             reassociate_stats.pows_created);
4678
4679   pointer_map_destroy (operand_rank);
4680   free_alloc_pool (operand_entry_pool);
4681   free (bb_rank);
4682   plus_negates.release ();
4683   free_dominance_info (CDI_POST_DOMINATORS);
4684   loop_optimizer_finalize ();
4685 }
4686
4687 /* Gate and execute functions for Reassociation.  */
4688
4689 static unsigned int
4690 execute_reassoc (void)
4691 {
4692   init_reassoc ();
4693
4694   do_reassoc ();
4695   repropagate_negates ();
4696
4697   fini_reassoc ();
4698   return 0;
4699 }
4700
4701 namespace {
4702
4703 const pass_data pass_data_reassoc =
4704 {
4705   GIMPLE_PASS, /* type */
4706   "reassoc", /* name */
4707   OPTGROUP_NONE, /* optinfo_flags */
4708   TV_TREE_REASSOC, /* tv_id */
4709   ( PROP_cfg | PROP_ssa ), /* properties_required */
4710   0, /* properties_provided */
4711   0, /* properties_destroyed */
4712   0, /* todo_flags_start */
4713   TODO_update_ssa_only_virtuals, /* todo_flags_finish */
4714 };
4715
4716 class pass_reassoc : public gimple_opt_pass
4717 {
4718 public:
4719   pass_reassoc (gcc::context *ctxt)
4720     : gimple_opt_pass (pass_data_reassoc, ctxt)
4721   {}
4722
4723   /* opt_pass methods: */
4724   opt_pass * clone () { return new pass_reassoc (m_ctxt); }
4725   virtual bool gate (function *) { return flag_tree_reassoc != 0; }
4726   virtual unsigned int execute (function *) { return execute_reassoc (); }
4727
4728 }; // class pass_reassoc
4729
4730 } // anon namespace
4731
4732 gimple_opt_pass *
4733 make_pass_reassoc (gcc::context *ctxt)
4734 {
4735   return new pass_reassoc (ctxt);
4736 }