gcc/tree-ssa-reassoc.c

   1 /* Reassociation for trees.
   2    Copyright (C) 2005-2014 Free Software Foundation, Inc.
   3    Contributed by Daniel Berlin <dan@dberlin.org>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 3, or (at your option)
  10 any later version.
  11
  12 GCC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "hash-table.h"
  25 #include "tm.h"
  26 #include "rtl.h"
  27 #include "tm_p.h"
  28 #include "tree.h"
  29 #include "stor-layout.h"
  30 #include "basic-block.h"
  31 #include "gimple-pretty-print.h"
  32 #include "tree-inline.h"
  33 #include "pointer-set.h"
  34 #include "tree-ssa-alias.h"
  35 #include "internal-fn.h"
  36 #include "gimple-fold.h"
  37 #include "tree-eh.h"
  38 #include "gimple-expr.h"
  39 #include "is-a.h"
  40 #include "gimple.h"
  41 #include "gimple-iterator.h"
  42 #include "gimplify-me.h"
  43 #include "gimple-ssa.h"
  44 #include "tree-cfg.h"
  45 #include "tree-phinodes.h"
  46 #include "ssa-iterators.h"
  47 #include "stringpool.h"
  48 #include "tree-ssanames.h"
  49 #include "tree-ssa-loop-niter.h"
  50 #include "tree-ssa-loop.h"
  51 #include "expr.h"
  52 #include "tree-dfa.h"
  53 #include "tree-ssa.h"
  54 #include "tree-iterator.h"
  55 #include "tree-pass.h"
  56 #include "alloc-pool.h"
  57 #include "langhooks.h"
  58 #include "cfgloop.h"
  59 #include "flags.h"
  60 #include "target.h"
  61 #include "params.h"
  62 #include "diagnostic-core.h"
  63 #include "builtins.h"
  64
  65 /*  This is a simple global reassociation pass.  It is, in part, based
  66     on the LLVM pass of the same name (They do some things more/less
  67     than we do, in different orders, etc).
  68
  69     It consists of five steps:
  70
  71     1. Breaking up subtract operations into addition + negate, where
  72     it would promote the reassociation of adds.
  73
  74     2. Left linearization of the expression trees, so that (A+B)+(C+D)
  75     becomes (((A+B)+C)+D), which is easier for us to rewrite later.
  76     During linearization, we place the operands of the binary
  77     expressions into a vector of operand_entry_t
  78
  79     3. Optimization of the operand lists, eliminating things like a +
  80     -a, a & a, etc.
  81
  82     3a. Combine repeated factors with the same occurrence counts
  83     into a __builtin_powi call that will later be optimized into
  84     an optimal number of multiplies.
  85
  86     4. Rewrite the expression trees we linearized and optimized so
  87     they are in proper rank order.
  88
  89     5. Repropagate negates, as nothing else will clean it up ATM.
  90
  91     A bit of theory on #4, since nobody seems to write anything down
  92     about why it makes sense to do it the way they do it:
  93
  94     We could do this much nicer theoretically, but don't (for reasons
  95     explained after how to do it theoretically nice :P).
  96
  97     In order to promote the most redundancy elimination, you want
  98     binary expressions whose operands are the same rank (or
  99     preferably, the same value) exposed to the redundancy eliminator,
 100     for possible elimination.
 101
 102     So the way to do this if we really cared, is to build the new op
 103     tree from the leaves to the roots, merging as you go, and putting the
 104     new op on the end of the worklist, until you are left with one
 105     thing on the worklist.
 106
 107     IE if you have to rewrite the following set of operands (listed with
 108     rank in parentheses), with opcode PLUS_EXPR:
 109
 110     a (1),  b (1),  c (1),  d (2), e (2)
 111
 112
 113     We start with our merge worklist empty, and the ops list with all of
 114     those on it.
 115
 116     You want to first merge all leaves of the same rank, as much as
 117     possible.
 118
 119     So first build a binary op of
 120
 121     mergetmp = a + b, and put "mergetmp" on the merge worklist.
 122
 123     Because there is no three operand form of PLUS_EXPR, c is not going to
 124     be exposed to redundancy elimination as a rank 1 operand.
 125
 126     So you might as well throw it on the merge worklist (you could also
 127     consider it to now be a rank two operand, and merge it with d and e,
 128     but in this case, you then have evicted e from a binary op. So at
 129     least in this situation, you can't win.)
 130
 131     Then build a binary op of d + e
 132     mergetmp2 = d + e
 133
 134     and put mergetmp2 on the merge worklist.
 135
 136     so merge worklist = {mergetmp, c, mergetmp2}
 137
 138     Continue building binary ops of these operations until you have only
 139     one operation left on the worklist.
 140
 141     So we have
 142
 143     build binary op
 144     mergetmp3 = mergetmp + c
 145
 146     worklist = {mergetmp2, mergetmp3}
 147
 148     mergetmp4 = mergetmp2 + mergetmp3
 149
 150     worklist = {mergetmp4}
 151
 152     because we have one operation left, we can now just set the original
 153     statement equal to the result of that operation.
 154
 155     This will at least expose a + b  and d + e to redundancy elimination
 156     as binary operations.
 157
 158     For extra points, you can reuse the old statements to build the
 159     mergetmps, since you shouldn't run out.
 160
 161     So why don't we do this?
 162
 163     Because it's expensive, and rarely will help.  Most trees we are
 164     reassociating have 3 or less ops.  If they have 2 ops, they already
 165     will be written into a nice single binary op.  If you have 3 ops, a
 166     single simple check suffices to tell you whether the first two are of the
 167     same rank.  If so, you know to order it
 168
 169     mergetmp = op1 + op2
 170     newstmt = mergetmp + op3
 171
 172     instead of
 173     mergetmp = op2 + op3
 174     newstmt = mergetmp + op1
 175
 176     If all three are of the same rank, you can't expose them all in a
 177     single binary operator anyway, so the above is *still* the best you
 178     can do.
 179
 180     Thus, this is what we do.  When we have three ops left, we check to see
 181     what order to put them in, and call it a day.  As a nod to vector sum
 182     reduction, we check if any of the ops are really a phi node that is a
 183     destructive update for the associating op, and keep the destructive
 184     update together for vector sum reduction recognition.  */
 185
 186
 187 /* Statistics */
 188 static struct
 189 {
 190   int linearized;
 191   int constants_eliminated;
 192   int ops_eliminated;
 193   int rewritten;
 194   int pows_encountered;
 195   int pows_created;
 196 } reassociate_stats;
 197
 198 /* Operator, rank pair.  */
 199 typedef struct operand_entry
 200 {
 201   unsigned int rank;
 202   int id;
 203   tree op;
 204   unsigned int count;
 205 } *operand_entry_t;
 206
 207 static alloc_pool operand_entry_pool;
 208
 209 /* This is used to assign a unique ID to each struct operand_entry
 210    so that qsort results are identical on different hosts.  */
 211 static int next_operand_entry_id;
 212
 213 /* Starting rank number for a given basic block, so that we can rank
 214    operations using unmovable instructions in that BB based on the bb
 215    depth.  */
 216 static long *bb_rank;
 217
 218 /* Operand->rank hashtable.  */
 219 static struct pointer_map_t *operand_rank;
 220
 221 /* Forward decls.  */
 222 static long get_rank (tree);
 223 static bool reassoc_stmt_dominates_stmt_p (gimple, gimple);
 224
 225 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
 226    possibly added by gsi_remove.  */
 227
 228 bool
 229 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
 230 {
 231   gimple stmt = gsi_stmt (*gsi);
 232
 233   if (!MAY_HAVE_DEBUG_STMTS || gimple_code (stmt) == GIMPLE_PHI)
 234     return gsi_remove (gsi, true);
 235
 236   gimple_stmt_iterator prev = *gsi;
 237   gsi_prev (&prev);
 238   unsigned uid = gimple_uid (stmt);
 239   basic_block bb = gimple_bb (stmt);
 240   bool ret = gsi_remove (gsi, true);
 241   if (!gsi_end_p (prev))
 242     gsi_next (&prev);
 243   else
 244     prev = gsi_start_bb (bb);
 245   gimple end_stmt = gsi_stmt (*gsi);
 246   while ((stmt = gsi_stmt (prev)) != end_stmt)
 247     {
 248       gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
 249       gimple_set_uid (stmt, uid);
 250       gsi_next (&prev);
 251     }
 252   return ret;
 253 }
 254
 255 /* Bias amount for loop-carried phis.  We want this to be larger than
 256    the depth of any reassociation tree we can see, but not larger than
 257    the rank difference between two blocks.  */
 258 #define PHI_LOOP_BIAS (1 << 15)
 259
 260 /* Rank assigned to a phi statement.  If STMT is a loop-carried phi of
 261    an innermost loop, and the phi has only a single use which is inside
 262    the loop, then the rank is the block rank of the loop latch plus an
 263    extra bias for the loop-carried dependence.  This causes expressions
 264    calculated into an accumulator variable to be independent for each
 265    iteration of the loop.  If STMT is some other phi, the rank is the
 266    block rank of its containing block.  */
 267 static long
 268 phi_rank (gimple stmt)
 269 {
 270   basic_block bb = gimple_bb (stmt);
 271   struct loop *father = bb->loop_father;
 272   tree res;
 273   unsigned i;
 274   use_operand_p use;
 275   gimple use_stmt;
 276
 277   /* We only care about real loops (those with a latch).  */
 278   if (!father->latch)
 279     return bb_rank[bb->index];
 280
 281   /* Interesting phis must be in headers of innermost loops.  */
 282   if (bb != father->header
 283       || father->inner)
 284     return bb_rank[bb->index];
 285
 286   /* Ignore virtual SSA_NAMEs.  */
 287   res = gimple_phi_result (stmt);
 288   if (virtual_operand_p (res))
 289     return bb_rank[bb->index];
 290
 291   /* The phi definition must have a single use, and that use must be
 292      within the loop.  Otherwise this isn't an accumulator pattern.  */
 293   if (!single_imm_use (res, &use, &use_stmt)
 294       || gimple_bb (use_stmt)->loop_father != father)
 295     return bb_rank[bb->index];
 296
 297   /* Look for phi arguments from within the loop.  If found, bias this phi.  */
 298   for (i = 0; i < gimple_phi_num_args (stmt); i++)
 299     {
 300       tree arg = gimple_phi_arg_def (stmt, i);
 301       if (TREE_CODE (arg) == SSA_NAME
 302           && !SSA_NAME_IS_DEFAULT_DEF (arg))
 303         {
 304           gimple def_stmt = SSA_NAME_DEF_STMT (arg);
 305           if (gimple_bb (def_stmt)->loop_father == father)
 306             return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
 307         }
 308     }
 309
 310   /* Must be an uninteresting phi.  */
 311   return bb_rank[bb->index];
 312 }
 313
 314 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
 315    loop-carried dependence of an innermost loop, return TRUE; else
 316    return FALSE.  */
 317 static bool
 318 loop_carried_phi (tree exp)
 319 {
 320   gimple phi_stmt;
 321   long block_rank;
 322
 323   if (TREE_CODE (exp) != SSA_NAME
 324       || SSA_NAME_IS_DEFAULT_DEF (exp))
 325     return false;
 326
 327   phi_stmt = SSA_NAME_DEF_STMT (exp);
 328
 329   if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
 330     return false;
 331
 332   /* Non-loop-carried phis have block rank.  Loop-carried phis have
 333      an additional bias added in.  If this phi doesn't have block rank,
 334      it's biased and should not be propagated.  */
 335   block_rank = bb_rank[gimple_bb (phi_stmt)->index];
 336
 337   if (phi_rank (phi_stmt) != block_rank)
 338     return true;
 339
 340   return false;
 341 }
 342
 343 /* Return the maximum of RANK and the rank that should be propagated
 344    from expression OP.  For most operands, this is just the rank of OP.
 345    For loop-carried phis, the value is zero to avoid undoing the bias
 346    in favor of the phi.  */
 347 static long
 348 propagate_rank (long rank, tree op)
 349 {
 350   long op_rank;
 351
 352   if (loop_carried_phi (op))
 353     return rank;
 354
 355   op_rank = get_rank (op);
 356
 357   return MAX (rank, op_rank);
 358 }
 359
 360 /* Look up the operand rank structure for expression E.  */
 361
 362 static inline long
 363 find_operand_rank (tree e)
 364 {
 365   void **slot = pointer_map_contains (operand_rank, e);
 366   return slot ? (long) (intptr_t) *slot : -1;
 367 }
 368
 369 /* Insert {E,RANK} into the operand rank hashtable.  */
 370
 371 static inline void
 372 insert_operand_rank (tree e, long rank)
 373 {
 374   void **slot;
 375   gcc_assert (rank > 0);
 376   slot = pointer_map_insert (operand_rank, e);
 377   gcc_assert (!*slot);
 378   *slot = (void *) (intptr_t) rank;
 379 }
 380
 381 /* Given an expression E, return the rank of the expression.  */
 382
 383 static long
 384 get_rank (tree e)
 385 {
 386   /* Constants have rank 0.  */
 387   if (is_gimple_min_invariant (e))
 388     return 0;
 389
 390   /* SSA_NAME's have the rank of the expression they are the result
 391      of.
 392      For globals and uninitialized values, the rank is 0.
 393      For function arguments, use the pre-setup rank.
 394      For PHI nodes, stores, asm statements, etc, we use the rank of
 395      the BB.
 396      For simple operations, the rank is the maximum rank of any of
 397      its operands, or the bb_rank, whichever is less.
 398      I make no claims that this is optimal, however, it gives good
 399      results.  */
 400
 401   /* We make an exception to the normal ranking system to break
 402      dependences of accumulator variables in loops.  Suppose we
 403      have a simple one-block loop containing:
 404
 405        x_1 = phi(x_0, x_2)
 406        b = a + x_1
 407        c = b + d
 408        x_2 = c + e
 409
 410      As shown, each iteration of the calculation into x is fully
 411      dependent upon the iteration before it.  We would prefer to
 412      see this in the form:
 413
 414        x_1 = phi(x_0, x_2)
 415        b = a + d
 416        c = b + e
 417        x_2 = c + x_1
 418
 419      If the loop is unrolled, the calculations of b and c from
 420      different iterations can be interleaved.
 421
 422      To obtain this result during reassociation, we bias the rank
 423      of the phi definition x_1 upward, when it is recognized as an
 424      accumulator pattern.  The artificial rank causes it to be
 425      added last, providing the desired independence.  */
 426
 427   if (TREE_CODE (e) == SSA_NAME)
 428     {
 429       gimple stmt;
 430       long rank;
 431       int i, n;
 432       tree op;
 433
 434       if (SSA_NAME_IS_DEFAULT_DEF (e))
 435         return find_operand_rank (e);
 436
 437       stmt = SSA_NAME_DEF_STMT (e);
 438       if (gimple_code (stmt) == GIMPLE_PHI)
 439         return phi_rank (stmt);
 440
 441       if (!is_gimple_assign (stmt)
 442           || gimple_vdef (stmt))
 443         return bb_rank[gimple_bb (stmt)->index];
 444
 445       /* If we already have a rank for this expression, use that.  */
 446       rank = find_operand_rank (e);
 447       if (rank != -1)
 448         return rank;
 449
 450       /* Otherwise, find the maximum rank for the operands.  As an
 451          exception, remove the bias from loop-carried phis when propagating
 452          the rank so that dependent operations are not also biased.  */
 453       rank = 0;
 454       if (gimple_assign_single_p (stmt))
 455         {
 456           tree rhs = gimple_assign_rhs1 (stmt);
 457           n = TREE_OPERAND_LENGTH (rhs);
 458           if (n == 0)
 459             rank = propagate_rank (rank, rhs);
 460           else
 461             {
 462               for (i = 0; i < n; i++)
 463                 {
 464                   op = TREE_OPERAND (rhs, i);
 465
 466                   if (op != NULL_TREE)
 467                     rank = propagate_rank (rank, op);
 468                 }
 469             }
 470         }
 471       else
 472         {
 473           n = gimple_num_ops (stmt);
 474           for (i = 1; i < n; i++)
 475             {
 476               op = gimple_op (stmt, i);
 477               gcc_assert (op);
 478               rank = propagate_rank (rank, op);
 479             }
 480         }
 481
 482       if (dump_file && (dump_flags & TDF_DETAILS))
 483         {
 484           fprintf (dump_file, "Rank for ");
 485           print_generic_expr (dump_file, e, 0);
 486           fprintf (dump_file, " is %ld\n", (rank + 1));
 487         }
 488
 489       /* Note the rank in the hashtable so we don't recompute it.  */
 490       insert_operand_rank (e, (rank + 1));
 491       return (rank + 1);
 492     }
 493
 494   /* Globals, etc,  are rank 0 */
 495   return 0;
 496 }
 497
 498
 499 /* We want integer ones to end up last no matter what, since they are
 500    the ones we can do the most with.  */
 501 #define INTEGER_CONST_TYPE 1 << 3
 502 #define FLOAT_CONST_TYPE 1 << 2
 503 #define OTHER_CONST_TYPE 1 << 1
 504
 505 /* Classify an invariant tree into integer, float, or other, so that
 506    we can sort them to be near other constants of the same type.  */
 507 static inline int
 508 constant_type (tree t)
 509 {
 510   if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
 511     return INTEGER_CONST_TYPE;
 512   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
 513     return FLOAT_CONST_TYPE;
 514   else
 515     return OTHER_CONST_TYPE;
 516 }
 517
 518 /* qsort comparison function to sort operand entries PA and PB by rank
 519    so that the sorted array is ordered by rank in decreasing order.  */
 520 static int
 521 sort_by_operand_rank (const void *pa, const void *pb)
 522 {
 523   const operand_entry_t oea = *(const operand_entry_t *)pa;
 524   const operand_entry_t oeb = *(const operand_entry_t *)pb;
 525
 526   /* It's nicer for optimize_expression if constants that are likely
 527      to fold when added/multiplied//whatever are put next to each
 528      other.  Since all constants have rank 0, order them by type.  */
 529   if (oeb->rank == 0 && oea->rank == 0)
 530     {
 531       if (constant_type (oeb->op) != constant_type (oea->op))
 532         return constant_type (oeb->op) - constant_type (oea->op);
 533       else
 534         /* To make sorting result stable, we use unique IDs to determine
 535            order.  */
 536         return oeb->id - oea->id;
 537     }
 538
 539   /* Lastly, make sure the versions that are the same go next to each
 540      other.  */
 541   if ((oeb->rank - oea->rank == 0)
 542       && TREE_CODE (oea->op) == SSA_NAME
 543       && TREE_CODE (oeb->op) == SSA_NAME)
 544     {
 545       /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
 546          versions of removed SSA_NAMEs, so if possible, prefer to sort
 547          based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
 548          See PR60418.  */
 549       if (!SSA_NAME_IS_DEFAULT_DEF (oea->op)
 550           && !SSA_NAME_IS_DEFAULT_DEF (oeb->op)
 551           && SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
 552         {
 553           gimple stmta = SSA_NAME_DEF_STMT (oea->op);
 554           gimple stmtb = SSA_NAME_DEF_STMT (oeb->op);
 555           basic_block bba = gimple_bb (stmta);
 556           basic_block bbb = gimple_bb (stmtb);
 557           if (bbb != bba)
 558             {
 559               if (bb_rank[bbb->index] != bb_rank[bba->index])
 560                 return bb_rank[bbb->index] - bb_rank[bba->index];
 561             }
 562           else
 563             {
 564               bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
 565               bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
 566               if (da != db)
 567                 return da ? 1 : -1;
 568             }
 569         }
 570
 571       if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
 572         return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
 573       else
 574         return oeb->id - oea->id;
 575     }
 576
 577   if (oeb->rank != oea->rank)
 578     return oeb->rank - oea->rank;
 579   else
 580     return oeb->id - oea->id;
 581 }
 582
 583 /* Add an operand entry to *OPS for the tree operand OP.  */
 584
 585 static void
 586 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
 587 {
 588   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
 589
 590   oe->op = op;
 591   oe->rank = get_rank (op);
 592   oe->id = next_operand_entry_id++;
 593   oe->count = 1;
 594   ops->safe_push (oe);
 595 }
 596
 597 /* Add an operand entry to *OPS for the tree operand OP with repeat
 598    count REPEAT.  */
 599
 600 static void
 601 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
 602                        HOST_WIDE_INT repeat)
 603 {
 604   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
 605
 606   oe->op = op;
 607   oe->rank = get_rank (op);
 608   oe->id = next_operand_entry_id++;
 609   oe->count = repeat;
 610   ops->safe_push (oe);
 611
 612   reassociate_stats.pows_encountered++;
 613 }
 614
 615 /* Return true if STMT is reassociable operation containing a binary
 616    operation with tree code CODE, and is inside LOOP.  */
 617
 618 static bool
 619 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
 620 {
 621   basic_block bb = gimple_bb (stmt);
 622
 623   if (gimple_bb (stmt) == NULL)
 624     return false;
 625
 626   if (!flow_bb_inside_loop_p (loop, bb))
 627     return false;
 628
 629   if (is_gimple_assign (stmt)
 630       && gimple_assign_rhs_code (stmt) == code
 631       && has_single_use (gimple_assign_lhs (stmt)))
 632     return true;
 633
 634   return false;
 635 }
 636
 637
 638 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
 639    operand of the negate operation.  Otherwise, return NULL.  */
 640
 641 static tree
 642 get_unary_op (tree name, enum tree_code opcode)
 643 {
 644   gimple stmt = SSA_NAME_DEF_STMT (name);
 645
 646   if (!is_gimple_assign (stmt))
 647     return NULL_TREE;
 648
 649   if (gimple_assign_rhs_code (stmt) == opcode)
 650     return gimple_assign_rhs1 (stmt);
 651   return NULL_TREE;
 652 }
 653
 654 /* If CURR and LAST are a pair of ops that OPCODE allows us to
 655    eliminate through equivalences, do so, remove them from OPS, and
 656    return true.  Otherwise, return false.  */
 657
 658 static bool
 659 eliminate_duplicate_pair (enum tree_code opcode,
 660                           vec<operand_entry_t> *ops,
 661                           bool *all_done,
 662                           unsigned int i,
 663                           operand_entry_t curr,
 664                           operand_entry_t last)
 665 {
 666
 667   /* If we have two of the same op, and the opcode is & |, min, or max,
 668      we can eliminate one of them.
 669      If we have two of the same op, and the opcode is ^, we can
 670      eliminate both of them.  */
 671
 672   if (last && last->op == curr->op)
 673     {
 674       switch (opcode)
 675         {
 676         case MAX_EXPR:
 677         case MIN_EXPR:
 678         case BIT_IOR_EXPR:
 679         case BIT_AND_EXPR:
 680           if (dump_file && (dump_flags & TDF_DETAILS))
 681             {
 682               fprintf (dump_file, "Equivalence: ");
 683               print_generic_expr (dump_file, curr->op, 0);
 684               fprintf (dump_file, " [&|minmax] ");
 685               print_generic_expr (dump_file, last->op, 0);
 686               fprintf (dump_file, " -> ");
 687               print_generic_stmt (dump_file, last->op, 0);
 688             }
 689
 690           ops->ordered_remove (i);
 691           reassociate_stats.ops_eliminated ++;
 692
 693           return true;
 694
 695         case BIT_XOR_EXPR:
 696           if (dump_file && (dump_flags & TDF_DETAILS))
 697             {
 698               fprintf (dump_file, "Equivalence: ");
 699               print_generic_expr (dump_file, curr->op, 0);
 700               fprintf (dump_file, " ^ ");
 701               print_generic_expr (dump_file, last->op, 0);
 702               fprintf (dump_file, " -> nothing\n");
 703             }
 704
 705           reassociate_stats.ops_eliminated += 2;
 706
 707           if (ops->length () == 2)
 708             {
 709               ops->create (0);
 710               add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
 711               *all_done = true;
 712             }
 713           else
 714             {
 715               ops->ordered_remove (i-1);
 716               ops->ordered_remove (i-1);
 717             }
 718
 719           return true;
 720
 721         default:
 722           break;
 723         }
 724     }
 725   return false;
 726 }
 727
 728 static vec<tree> plus_negates;
 729
 730 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
 731    expression, look in OPS for a corresponding positive operation to cancel
 732    it out.  If we find one, remove the other from OPS, replace
 733    OPS[CURRINDEX] with 0 or -1, respectively, and return true.  Otherwise,
 734    return false. */
 735
 736 static bool
 737 eliminate_plus_minus_pair (enum tree_code opcode,
 738                            vec<operand_entry_t> *ops,
 739                            unsigned int currindex,
 740                            operand_entry_t curr)
 741 {
 742   tree negateop;
 743   tree notop;
 744   unsigned int i;
 745   operand_entry_t oe;
 746
 747   if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
 748     return false;
 749
 750   negateop = get_unary_op (curr->op, NEGATE_EXPR);
 751   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 752   if (negateop == NULL_TREE && notop == NULL_TREE)
 753     return false;
 754
 755   /* Any non-negated version will have a rank that is one less than
 756      the current rank.  So once we hit those ranks, if we don't find
 757      one, we can stop.  */
 758
 759   for (i = currindex + 1;
 760        ops->iterate (i, &oe)
 761        && oe->rank >= curr->rank - 1 ;
 762        i++)
 763     {
 764       if (oe->op == negateop)
 765         {
 766
 767           if (dump_file && (dump_flags & TDF_DETAILS))
 768             {
 769               fprintf (dump_file, "Equivalence: ");
 770               print_generic_expr (dump_file, negateop, 0);
 771               fprintf (dump_file, " + -");
 772               print_generic_expr (dump_file, oe->op, 0);
 773               fprintf (dump_file, " -> 0\n");
 774             }
 775
 776           ops->ordered_remove (i);
 777           add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
 778           ops->ordered_remove (currindex);
 779           reassociate_stats.ops_eliminated ++;
 780
 781           return true;
 782         }
 783       else if (oe->op == notop)
 784         {
 785           tree op_type = TREE_TYPE (oe->op);
 786
 787           if (dump_file && (dump_flags & TDF_DETAILS))
 788             {
 789               fprintf (dump_file, "Equivalence: ");
 790               print_generic_expr (dump_file, notop, 0);
 791               fprintf (dump_file, " + ~");
 792               print_generic_expr (dump_file, oe->op, 0);
 793               fprintf (dump_file, " -> -1\n");
 794             }
 795
 796           ops->ordered_remove (i);
 797           add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
 798           ops->ordered_remove (currindex);
 799           reassociate_stats.ops_eliminated ++;
 800
 801           return true;
 802         }
 803     }
 804
 805   /* CURR->OP is a negate expr in a plus expr: save it for later
 806      inspection in repropagate_negates().  */
 807   if (negateop != NULL_TREE)
 808     plus_negates.safe_push (curr->op);
 809
 810   return false;
 811 }
 812
 813 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
 814    bitwise not expression, look in OPS for a corresponding operand to
 815    cancel it out.  If we find one, remove the other from OPS, replace
 816    OPS[CURRINDEX] with 0, and return true.  Otherwise, return
 817    false. */
 818
 819 static bool
 820 eliminate_not_pairs (enum tree_code opcode,
 821                      vec<operand_entry_t> *ops,
 822                      unsigned int currindex,
 823                      operand_entry_t curr)
 824 {
 825   tree notop;
 826   unsigned int i;
 827   operand_entry_t oe;
 828
 829   if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
 830       || TREE_CODE (curr->op) != SSA_NAME)
 831     return false;
 832
 833   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 834   if (notop == NULL_TREE)
 835     return false;
 836
 837   /* Any non-not version will have a rank that is one less than
 838      the current rank.  So once we hit those ranks, if we don't find
 839      one, we can stop.  */
 840
 841   for (i = currindex + 1;
 842        ops->iterate (i, &oe)
 843        && oe->rank >= curr->rank - 1;
 844        i++)
 845     {
 846       if (oe->op == notop)
 847         {
 848           if (dump_file && (dump_flags & TDF_DETAILS))
 849             {
 850               fprintf (dump_file, "Equivalence: ");
 851               print_generic_expr (dump_file, notop, 0);
 852               if (opcode == BIT_AND_EXPR)
 853                 fprintf (dump_file, " & ~");
 854               else if (opcode == BIT_IOR_EXPR)
 855                 fprintf (dump_file, " | ~");
 856               print_generic_expr (dump_file, oe->op, 0);
 857               if (opcode == BIT_AND_EXPR)
 858                 fprintf (dump_file, " -> 0\n");
 859               else if (opcode == BIT_IOR_EXPR)
 860                 fprintf (dump_file, " -> -1\n");
 861             }
 862
 863           if (opcode == BIT_AND_EXPR)
 864             oe->op = build_zero_cst (TREE_TYPE (oe->op));
 865           else if (opcode == BIT_IOR_EXPR)
 866             oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
 867
 868           reassociate_stats.ops_eliminated += ops->length () - 1;
 869           ops->truncate (0);
 870           ops->quick_push (oe);
 871           return true;
 872         }
 873     }
 874
 875   return false;
 876 }
 877
 878 /* Use constant value that may be present in OPS to try to eliminate
 879    operands.  Note that this function is only really used when we've
 880    eliminated ops for other reasons, or merged constants.  Across
 881    single statements, fold already does all of this, plus more.  There
 882    is little point in duplicating logic, so I've only included the
 883    identities that I could ever construct testcases to trigger.  */
 884
 885 static void
 886 eliminate_using_constants (enum tree_code opcode,
 887                            vec<operand_entry_t> *ops)
 888 {
 889   operand_entry_t oelast = ops->last ();
 890   tree type = TREE_TYPE (oelast->op);
 891
 892   if (oelast->rank == 0
 893       && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
 894     {
 895       switch (opcode)
 896         {
 897         case BIT_AND_EXPR:
 898           if (integer_zerop (oelast->op))
 899             {
 900               if (ops->length () != 1)
 901                 {
 902                   if (dump_file && (dump_flags & TDF_DETAILS))
 903                     fprintf (dump_file, "Found & 0, removing all other ops\n");
 904
 905                   reassociate_stats.ops_eliminated += ops->length () - 1;
 906
 907                   ops->truncate (0);
 908                   ops->quick_push (oelast);
 909                   return;
 910                 }
 911             }
 912           else if (integer_all_onesp (oelast->op))
 913             {
 914               if (ops->length () != 1)
 915                 {
 916                   if (dump_file && (dump_flags & TDF_DETAILS))
 917                     fprintf (dump_file, "Found & -1, removing\n");
 918                   ops->pop ();
 919                   reassociate_stats.ops_eliminated++;
 920                 }
 921             }
 922           break;
 923         case BIT_IOR_EXPR:
 924           if (integer_all_onesp (oelast->op))
 925             {
 926               if (ops->length () != 1)
 927                 {
 928                   if (dump_file && (dump_flags & TDF_DETAILS))
 929                     fprintf (dump_file, "Found | -1, removing all other ops\n");
 930
 931                   reassociate_stats.ops_eliminated += ops->length () - 1;
 932
 933                   ops->truncate (0);
 934                   ops->quick_push (oelast);
 935                   return;
 936                 }
 937             }
 938           else if (integer_zerop (oelast->op))
 939             {
 940               if (ops->length () != 1)
 941                 {
 942                   if (dump_file && (dump_flags & TDF_DETAILS))
 943                     fprintf (dump_file, "Found | 0, removing\n");
 944                   ops->pop ();
 945                   reassociate_stats.ops_eliminated++;
 946                 }
 947             }
 948           break;
 949         case MULT_EXPR:
 950           if (integer_zerop (oelast->op)
 951               || (FLOAT_TYPE_P (type)
 952                   && !HONOR_NANS (TYPE_MODE (type))
 953                   && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
 954                   && real_zerop (oelast->op)))
 955             {
 956               if (ops->length () != 1)
 957                 {
 958                   if (dump_file && (dump_flags & TDF_DETAILS))
 959                     fprintf (dump_file, "Found * 0, removing all other ops\n");
 960
 961                   reassociate_stats.ops_eliminated += ops->length () - 1;
 962                   ops->truncate (1);
 963                   ops->quick_push (oelast);
 964                   return;
 965                 }
 966             }
 967           else if (integer_onep (oelast->op)
 968                    || (FLOAT_TYPE_P (type)
 969                        && !HONOR_SNANS (TYPE_MODE (type))
 970                        && real_onep (oelast->op)))
 971             {
 972               if (ops->length () != 1)
 973                 {
 974                   if (dump_file && (dump_flags & TDF_DETAILS))
 975                     fprintf (dump_file, "Found * 1, removing\n");
 976                   ops->pop ();
 977                   reassociate_stats.ops_eliminated++;
 978                   return;
 979                 }
 980             }
 981           break;
 982         case BIT_XOR_EXPR:
 983         case PLUS_EXPR:
 984         case MINUS_EXPR:
 985           if (integer_zerop (oelast->op)
 986               || (FLOAT_TYPE_P (type)
 987                   && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
 988                   && fold_real_zero_addition_p (type, oelast->op,
 989                                                 opcode == MINUS_EXPR)))
 990             {
 991               if (ops->length () != 1)
 992                 {
 993                   if (dump_file && (dump_flags & TDF_DETAILS))
 994                     fprintf (dump_file, "Found [|^+] 0, removing\n");
 995                   ops->pop ();
 996                   reassociate_stats.ops_eliminated++;
 997                   return;
 998                 }
 999             }
1000           break;
1001         default:
1002           break;
1003         }
1004     }
1005 }
1006
1007
1008 static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
1009                                  bool, bool);
1010
1011 /* Structure for tracking and counting operands.  */
1012 typedef struct oecount_s {
1013   int cnt;
1014   int id;
1015   enum tree_code oecode;
1016   tree op;
1017 } oecount;
1018
1019
1020 /* The heap for the oecount hashtable and the sorted list of operands.  */
1021 static vec<oecount> cvec;
1022
1023
1024 /* Oecount hashtable helpers.  */
1025
1026 struct oecount_hasher : typed_noop_remove <void>
1027 {
1028   /* Note that this hash table stores integers, not pointers.
1029      So, observe the casting in the member functions.  */
1030   typedef void value_type;
1031   typedef void compare_type;
1032   static inline hashval_t hash (const value_type *);
1033   static inline bool equal (const value_type *, const compare_type *);
1034 };
1035
1036 /* Hash function for oecount.  */
1037
1038 inline hashval_t
1039 oecount_hasher::hash (const value_type *p)
1040 {
1041   const oecount *c = &cvec[(size_t)p - 42];
1042   return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1043 }
1044
1045 /* Comparison function for oecount.  */
1046
1047 inline bool
1048 oecount_hasher::equal (const value_type *p1, const compare_type *p2)
1049 {
1050   const oecount *c1 = &cvec[(size_t)p1 - 42];
1051   const oecount *c2 = &cvec[(size_t)p2 - 42];
1052   return (c1->oecode == c2->oecode
1053           && c1->op == c2->op);
1054 }
1055
1056 /* Comparison function for qsort sorting oecount elements by count.  */
1057
1058 static int
1059 oecount_cmp (const void *p1, const void *p2)
1060 {
1061   const oecount *c1 = (const oecount *)p1;
1062   const oecount *c2 = (const oecount *)p2;
1063   if (c1->cnt != c2->cnt)
1064     return c1->cnt - c2->cnt;
1065   else
1066     /* If counts are identical, use unique IDs to stabilize qsort.  */
1067     return c1->id - c2->id;
1068 }
1069
1070 /* Return TRUE iff STMT represents a builtin call that raises OP
1071    to some exponent.  */
1072
1073 static bool
1074 stmt_is_power_of_op (gimple stmt, tree op)
1075 {
1076   tree fndecl;
1077
1078   if (!is_gimple_call (stmt))
1079     return false;
1080
1081   fndecl = gimple_call_fndecl (stmt);
1082
1083   if (!fndecl
1084       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
1085     return false;
1086
1087   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1088     {
1089     CASE_FLT_FN (BUILT_IN_POW):
1090     CASE_FLT_FN (BUILT_IN_POWI):
1091       return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1092
1093     default:
1094       return false;
1095     }
1096 }
1097
1098 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1099    in place and return the result.  Assumes that stmt_is_power_of_op
1100    was previously called for STMT and returned TRUE.  */
1101
1102 static HOST_WIDE_INT
1103 decrement_power (gimple stmt)
1104 {
1105   REAL_VALUE_TYPE c, cint;
1106   HOST_WIDE_INT power;
1107   tree arg1;
1108
1109   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1110     {
1111     CASE_FLT_FN (BUILT_IN_POW):
1112       arg1 = gimple_call_arg (stmt, 1);
1113       c = TREE_REAL_CST (arg1);
1114       power = real_to_integer (&c) - 1;
1115       real_from_integer (&cint, VOIDmode, power, SIGNED);
1116       gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1117       return power;
1118
1119     CASE_FLT_FN (BUILT_IN_POWI):
1120       arg1 = gimple_call_arg (stmt, 1);
1121       power = TREE_INT_CST_LOW (arg1) - 1;
1122       gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1123       return power;
1124
1125     default:
1126       gcc_unreachable ();
1127     }
1128 }
1129
1130 /* Find the single immediate use of STMT's LHS, and replace it
1131    with OP.  Remove STMT.  If STMT's LHS is the same as *DEF,
1132    replace *DEF with OP as well.  */
1133
1134 static void
1135 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1136 {
1137   tree lhs;
1138   gimple use_stmt;
1139   use_operand_p use;
1140   gimple_stmt_iterator gsi;
1141
1142   if (is_gimple_call (stmt))
1143     lhs = gimple_call_lhs (stmt);
1144   else
1145     lhs = gimple_assign_lhs (stmt);
1146
1147   gcc_assert (has_single_use (lhs));
1148   single_imm_use (lhs, &use, &use_stmt);
1149   if (lhs == *def)
1150     *def = op;
1151   SET_USE (use, op);
1152   if (TREE_CODE (op) != SSA_NAME)
1153     update_stmt (use_stmt);
1154   gsi = gsi_for_stmt (stmt);
1155   unlink_stmt_vdef (stmt);
1156   reassoc_remove_stmt (&gsi);
1157   release_defs (stmt);
1158 }
1159
1160 /* Walks the linear chain with result *DEF searching for an operation
1161    with operand OP and code OPCODE removing that from the chain.  *DEF
1162    is updated if there is only one operand but no operation left.  */
1163
1164 static void
1165 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1166 {
1167   gimple stmt = SSA_NAME_DEF_STMT (*def);
1168
1169   do
1170     {
1171       tree name;
1172
1173       if (opcode == MULT_EXPR
1174           && stmt_is_power_of_op (stmt, op))
1175         {
1176           if (decrement_power (stmt) == 1)
1177             propagate_op_to_single_use (op, stmt, def);
1178           return;
1179         }
1180
1181       name = gimple_assign_rhs1 (stmt);
1182
1183       /* If this is the operation we look for and one of the operands
1184          is ours simply propagate the other operand into the stmts
1185          single use.  */
1186       if (gimple_assign_rhs_code (stmt) == opcode
1187           && (name == op
1188               || gimple_assign_rhs2 (stmt) == op))
1189         {
1190           if (name == op)
1191             name = gimple_assign_rhs2 (stmt);
1192           propagate_op_to_single_use (name, stmt, def);
1193           return;
1194         }
1195
1196       /* We might have a multiply of two __builtin_pow* calls, and
1197          the operand might be hiding in the rightmost one.  */
1198       if (opcode == MULT_EXPR
1199           && gimple_assign_rhs_code (stmt) == opcode
1200           && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1201           && has_single_use (gimple_assign_rhs2 (stmt)))
1202         {
1203           gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1204           if (stmt_is_power_of_op (stmt2, op))
1205             {
1206               if (decrement_power (stmt2) == 1)
1207                 propagate_op_to_single_use (op, stmt2, def);
1208               return;
1209             }
1210         }
1211
1212       /* Continue walking the chain.  */
1213       gcc_assert (name != op
1214                   && TREE_CODE (name) == SSA_NAME);
1215       stmt = SSA_NAME_DEF_STMT (name);
1216     }
1217   while (1);
1218 }
1219
1220 /* Returns true if statement S1 dominates statement S2.  Like
1221    stmt_dominates_stmt_p, but uses stmt UIDs to optimize.  */
1222
1223 static bool
1224 reassoc_stmt_dominates_stmt_p (gimple s1, gimple s2)
1225 {
1226   basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1227
1228   /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1229      SSA_NAME.  Assume it lives at the beginning of function and
1230      thus dominates everything.  */
1231   if (!bb1 || s1 == s2)
1232     return true;
1233
1234   /* If bb2 is NULL, it doesn't dominate any stmt with a bb.  */
1235   if (!bb2)
1236     return false;
1237
1238   if (bb1 == bb2)
1239     {
1240       /* PHIs in the same basic block are assumed to be
1241          executed all in parallel, if only one stmt is a PHI,
1242          it dominates the other stmt in the same basic block.  */
1243       if (gimple_code (s1) == GIMPLE_PHI)
1244         return true;
1245
1246       if (gimple_code (s2) == GIMPLE_PHI)
1247         return false;
1248
1249       gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1250
1251       if (gimple_uid (s1) < gimple_uid (s2))
1252         return true;
1253
1254       if (gimple_uid (s1) > gimple_uid (s2))
1255         return false;
1256
1257       gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1258       unsigned int uid = gimple_uid (s1);
1259       for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1260         {
1261           gimple s = gsi_stmt (gsi);
1262           if (gimple_uid (s) != uid)
1263             break;
1264           if (s == s2)
1265             return true;
1266         }
1267
1268       return false;
1269     }
1270
1271   return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1272 }
1273
1274 /* Insert STMT after INSERT_POINT.  */
1275
1276 static void
1277 insert_stmt_after (gimple stmt, gimple insert_point)
1278 {
1279   gimple_stmt_iterator gsi;
1280   basic_block bb;
1281
1282   if (gimple_code (insert_point) == GIMPLE_PHI)
1283     bb = gimple_bb (insert_point);
1284   else if (!stmt_ends_bb_p (insert_point))
1285     {
1286       gsi = gsi_for_stmt (insert_point);
1287       gimple_set_uid (stmt, gimple_uid (insert_point));
1288       gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1289       return;
1290     }
1291   else
1292     /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1293        thus if it must end a basic block, it should be a call that can
1294        throw, or some assignment that can throw.  If it throws, the LHS
1295        of it will not be initialized though, so only valid places using
1296        the SSA_NAME should be dominated by the fallthru edge.  */
1297     bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1298   gsi = gsi_after_labels (bb);
1299   if (gsi_end_p (gsi))
1300     {
1301       gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1302       gimple_set_uid (stmt,
1303                       gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1304     }
1305   else
1306     gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1307   gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1308 }
1309
1310 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1311    the result.  Places the statement after the definition of either
1312    OP1 or OP2.  Returns the new statement.  */
1313
1314 static gimple
1315 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1316 {
1317   gimple op1def = NULL, op2def = NULL;
1318   gimple_stmt_iterator gsi;
1319   tree op;
1320   gimple sum;
1321
1322   /* Create the addition statement.  */
1323   op = make_ssa_name (type, NULL);
1324   sum = gimple_build_assign_with_ops (opcode, op, op1, op2);
1325
1326   /* Find an insertion place and insert.  */
1327   if (TREE_CODE (op1) == SSA_NAME)
1328     op1def = SSA_NAME_DEF_STMT (op1);
1329   if (TREE_CODE (op2) == SSA_NAME)
1330     op2def = SSA_NAME_DEF_STMT (op2);
1331   if ((!op1def || gimple_nop_p (op1def))
1332       && (!op2def || gimple_nop_p (op2def)))
1333     {
1334       gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1335       if (gsi_end_p (gsi))
1336         {
1337           gimple_stmt_iterator gsi2
1338             = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1339           gimple_set_uid (sum,
1340                           gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1341         }
1342       else
1343         gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1344       gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1345     }
1346   else
1347     {
1348       gimple insert_point;
1349       if ((!op1def || gimple_nop_p (op1def))
1350            || (op2def && !gimple_nop_p (op2def)
1351                && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1352         insert_point = op2def;
1353       else
1354         insert_point = op1def;
1355       insert_stmt_after (sum, insert_point);
1356     }
1357   update_stmt (sum);
1358
1359   return sum;
1360 }
1361
1362 /* Perform un-distribution of divisions and multiplications.
1363    A * X + B * X is transformed into (A + B) * X and A / X + B / X
1364    to (A + B) / X for real X.
1365
1366    The algorithm is organized as follows.
1367
1368     - First we walk the addition chain *OPS looking for summands that
1369       are defined by a multiplication or a real division.  This results
1370       in the candidates bitmap with relevant indices into *OPS.
1371
1372     - Second we build the chains of multiplications or divisions for
1373       these candidates, counting the number of occurrences of (operand, code)
1374       pairs in all of the candidates chains.
1375
1376     - Third we sort the (operand, code) pairs by number of occurrence and
1377       process them starting with the pair with the most uses.
1378
1379       * For each such pair we walk the candidates again to build a
1380         second candidate bitmap noting all multiplication/division chains
1381         that have at least one occurrence of (operand, code).
1382
1383       * We build an alternate addition chain only covering these
1384         candidates with one (operand, code) operation removed from their
1385         multiplication/division chain.
1386
1387       * The first candidate gets replaced by the alternate addition chain
1388         multiplied/divided by the operand.
1389
1390       * All candidate chains get disabled for further processing and
1391         processing of (operand, code) pairs continues.
1392
1393   The alternate addition chains built are re-processed by the main
1394   reassociation algorithm which allows optimizing a * x * y + b * y * x
1395   to (a + b ) * x * y in one invocation of the reassociation pass.  */
1396
1397 static bool
1398 undistribute_ops_list (enum tree_code opcode,
1399                        vec<operand_entry_t> *ops, struct loop *loop)
1400 {
1401   unsigned int length = ops->length ();
1402   operand_entry_t oe1;
1403   unsigned i, j;
1404   sbitmap candidates, candidates2;
1405   unsigned nr_candidates, nr_candidates2;
1406   sbitmap_iterator sbi0;
1407   vec<operand_entry_t> *subops;
1408   hash_table <oecount_hasher> ctable;
1409   bool changed = false;
1410   int next_oecount_id = 0;
1411
1412   if (length <= 1
1413       || opcode != PLUS_EXPR)
1414     return false;
1415
1416   /* Build a list of candidates to process.  */
1417   candidates = sbitmap_alloc (length);
1418   bitmap_clear (candidates);
1419   nr_candidates = 0;
1420   FOR_EACH_VEC_ELT (*ops, i, oe1)
1421     {
1422       enum tree_code dcode;
1423       gimple oe1def;
1424
1425       if (TREE_CODE (oe1->op) != SSA_NAME)
1426         continue;
1427       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1428       if (!is_gimple_assign (oe1def))
1429         continue;
1430       dcode = gimple_assign_rhs_code (oe1def);
1431       if ((dcode != MULT_EXPR
1432            && dcode != RDIV_EXPR)
1433           || !is_reassociable_op (oe1def, dcode, loop))
1434         continue;
1435
1436       bitmap_set_bit (candidates, i);
1437       nr_candidates++;
1438     }
1439
1440   if (nr_candidates < 2)
1441     {
1442       sbitmap_free (candidates);
1443       return false;
1444     }
1445
1446   if (dump_file && (dump_flags & TDF_DETAILS))
1447     {
1448       fprintf (dump_file, "searching for un-distribute opportunities ");
1449       print_generic_expr (dump_file,
1450         (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1451       fprintf (dump_file, " %d\n", nr_candidates);
1452     }
1453
1454   /* Build linearized sub-operand lists and the counting table.  */
1455   cvec.create (0);
1456   ctable.create (15);
1457   /* ??? Macro arguments cannot have multi-argument template types in
1458      them.  This typedef is needed to workaround that limitation.  */
1459   typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1460   subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1461   EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1462     {
1463       gimple oedef;
1464       enum tree_code oecode;
1465       unsigned j;
1466
1467       oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1468       oecode = gimple_assign_rhs_code (oedef);
1469       linearize_expr_tree (&subops[i], oedef,
1470                            associative_tree_code (oecode), false);
1471
1472       FOR_EACH_VEC_ELT (subops[i], j, oe1)
1473         {
1474           oecount c;
1475           void **slot;
1476           size_t idx;
1477           c.oecode = oecode;
1478           c.cnt = 1;
1479           c.id = next_oecount_id++;
1480           c.op = oe1->op;
1481           cvec.safe_push (c);
1482           idx = cvec.length () + 41;
1483           slot = ctable.find_slot ((void *)idx, INSERT);
1484           if (!*slot)
1485             {
1486               *slot = (void *)idx;
1487             }
1488           else
1489             {
1490               cvec.pop ();
1491               cvec[(size_t)*slot - 42].cnt++;
1492             }
1493         }
1494     }
1495   ctable.dispose ();
1496
1497   /* Sort the counting table.  */
1498   cvec.qsort (oecount_cmp);
1499
1500   if (dump_file && (dump_flags & TDF_DETAILS))
1501     {
1502       oecount *c;
1503       fprintf (dump_file, "Candidates:\n");
1504       FOR_EACH_VEC_ELT (cvec, j, c)
1505         {
1506           fprintf (dump_file, "  %u %s: ", c->cnt,
1507                    c->oecode == MULT_EXPR
1508                    ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1509           print_generic_expr (dump_file, c->op, 0);
1510           fprintf (dump_file, "\n");
1511         }
1512     }
1513
1514   /* Process the (operand, code) pairs in order of most occurrence.  */
1515   candidates2 = sbitmap_alloc (length);
1516   while (!cvec.is_empty ())
1517     {
1518       oecount *c = &cvec.last ();
1519       if (c->cnt < 2)
1520         break;
1521
1522       /* Now collect the operands in the outer chain that contain
1523          the common operand in their inner chain.  */
1524       bitmap_clear (candidates2);
1525       nr_candidates2 = 0;
1526       EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1527         {
1528           gimple oedef;
1529           enum tree_code oecode;
1530           unsigned j;
1531           tree op = (*ops)[i]->op;
1532
1533           /* If we undistributed in this chain already this may be
1534              a constant.  */
1535           if (TREE_CODE (op) != SSA_NAME)
1536             continue;
1537
1538           oedef = SSA_NAME_DEF_STMT (op);
1539           oecode = gimple_assign_rhs_code (oedef);
1540           if (oecode != c->oecode)
1541             continue;
1542
1543           FOR_EACH_VEC_ELT (subops[i], j, oe1)
1544             {
1545               if (oe1->op == c->op)
1546                 {
1547                   bitmap_set_bit (candidates2, i);
1548                   ++nr_candidates2;
1549                   break;
1550                 }
1551             }
1552         }
1553
1554       if (nr_candidates2 >= 2)
1555         {
1556           operand_entry_t oe1, oe2;
1557           gimple prod;
1558           int first = bitmap_first_set_bit (candidates2);
1559
1560           /* Build the new addition chain.  */
1561           oe1 = (*ops)[first];
1562           if (dump_file && (dump_flags & TDF_DETAILS))
1563             {
1564               fprintf (dump_file, "Building (");
1565               print_generic_expr (dump_file, oe1->op, 0);
1566             }
1567           zero_one_operation (&oe1->op, c->oecode, c->op);
1568           EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1569             {
1570               gimple sum;
1571               oe2 = (*ops)[i];
1572               if (dump_file && (dump_flags & TDF_DETAILS))
1573                 {
1574                   fprintf (dump_file, " + ");
1575                   print_generic_expr (dump_file, oe2->op, 0);
1576                 }
1577               zero_one_operation (&oe2->op, c->oecode, c->op);
1578               sum = build_and_add_sum (TREE_TYPE (oe1->op),
1579                                        oe1->op, oe2->op, opcode);
1580               oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1581               oe2->rank = 0;
1582               oe1->op = gimple_get_lhs (sum);
1583             }
1584
1585           /* Apply the multiplication/division.  */
1586           prod = build_and_add_sum (TREE_TYPE (oe1->op),
1587                                     oe1->op, c->op, c->oecode);
1588           if (dump_file && (dump_flags & TDF_DETAILS))
1589             {
1590               fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1591               print_generic_expr (dump_file, c->op, 0);
1592               fprintf (dump_file, "\n");
1593             }
1594
1595           /* Record it in the addition chain and disable further
1596              undistribution with this op.  */
1597           oe1->op = gimple_assign_lhs (prod);
1598           oe1->rank = get_rank (oe1->op);
1599           subops[first].release ();
1600
1601           changed = true;
1602         }
1603
1604       cvec.pop ();
1605     }
1606
1607   for (i = 0; i < ops->length (); ++i)
1608     subops[i].release ();
1609   free (subops);
1610   cvec.release ();
1611   sbitmap_free (candidates);
1612   sbitmap_free (candidates2);
1613
1614   return changed;
1615 }
1616
1617 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1618    expression, examine the other OPS to see if any of them are comparisons
1619    of the same values, which we may be able to combine or eliminate.
1620    For example, we can rewrite (a < b) | (a == b) as (a <= b).  */
1621
1622 static bool
1623 eliminate_redundant_comparison (enum tree_code opcode,
1624                                 vec<operand_entry_t> *ops,
1625                                 unsigned int currindex,
1626                                 operand_entry_t curr)
1627 {
1628   tree op1, op2;
1629   enum tree_code lcode, rcode;
1630   gimple def1, def2;
1631   int i;
1632   operand_entry_t oe;
1633
1634   if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1635     return false;
1636
1637   /* Check that CURR is a comparison.  */
1638   if (TREE_CODE (curr->op) != SSA_NAME)
1639     return false;
1640   def1 = SSA_NAME_DEF_STMT (curr->op);
1641   if (!is_gimple_assign (def1))
1642     return false;
1643   lcode = gimple_assign_rhs_code (def1);
1644   if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1645     return false;
1646   op1 = gimple_assign_rhs1 (def1);
1647   op2 = gimple_assign_rhs2 (def1);
1648
1649   /* Now look for a similar comparison in the remaining OPS.  */
1650   for (i = currindex + 1; ops->iterate (i, &oe); i++)
1651     {
1652       tree t;
1653
1654       if (TREE_CODE (oe->op) != SSA_NAME)
1655         continue;
1656       def2 = SSA_NAME_DEF_STMT (oe->op);
1657       if (!is_gimple_assign (def2))
1658         continue;
1659       rcode = gimple_assign_rhs_code (def2);
1660       if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1661         continue;
1662
1663       /* If we got here, we have a match.  See if we can combine the
1664          two comparisons.  */
1665       if (opcode == BIT_IOR_EXPR)
1666         t = maybe_fold_or_comparisons (lcode, op1, op2,
1667                                        rcode, gimple_assign_rhs1 (def2),
1668                                        gimple_assign_rhs2 (def2));
1669       else
1670         t = maybe_fold_and_comparisons (lcode, op1, op2,
1671                                         rcode, gimple_assign_rhs1 (def2),
1672                                         gimple_assign_rhs2 (def2));
1673       if (!t)
1674         continue;
1675
1676       /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1677          always give us a boolean_type_node value back.  If the original
1678          BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1679          we need to convert.  */
1680       if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1681         t = fold_convert (TREE_TYPE (curr->op), t);
1682
1683       if (TREE_CODE (t) != INTEGER_CST
1684           && !operand_equal_p (t, curr->op, 0))
1685         {
1686           enum tree_code subcode;
1687           tree newop1, newop2;
1688           if (!COMPARISON_CLASS_P (t))
1689             continue;
1690           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1691           STRIP_USELESS_TYPE_CONVERSION (newop1);
1692           STRIP_USELESS_TYPE_CONVERSION (newop2);
1693           if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1694             continue;
1695         }
1696
1697       if (dump_file && (dump_flags & TDF_DETAILS))
1698         {
1699           fprintf (dump_file, "Equivalence: ");
1700           print_generic_expr (dump_file, curr->op, 0);
1701           fprintf (dump_file, " %s ", op_symbol_code (opcode));
1702           print_generic_expr (dump_file, oe->op, 0);
1703           fprintf (dump_file, " -> ");
1704           print_generic_expr (dump_file, t, 0);
1705           fprintf (dump_file, "\n");
1706         }
1707
1708       /* Now we can delete oe, as it has been subsumed by the new combined
1709          expression t.  */
1710       ops->ordered_remove (i);
1711       reassociate_stats.ops_eliminated ++;
1712
1713       /* If t is the same as curr->op, we're done.  Otherwise we must
1714          replace curr->op with t.  Special case is if we got a constant
1715          back, in which case we add it to the end instead of in place of
1716          the current entry.  */
1717       if (TREE_CODE (t) == INTEGER_CST)
1718         {
1719           ops->ordered_remove (currindex);
1720           add_to_ops_vec (ops, t);
1721         }
1722       else if (!operand_equal_p (t, curr->op, 0))
1723         {
1724           gimple sum;
1725           enum tree_code subcode;
1726           tree newop1;
1727           tree newop2;
1728           gcc_assert (COMPARISON_CLASS_P (t));
1729           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1730           STRIP_USELESS_TYPE_CONVERSION (newop1);
1731           STRIP_USELESS_TYPE_CONVERSION (newop2);
1732           gcc_checking_assert (is_gimple_val (newop1)
1733                                && is_gimple_val (newop2));
1734           sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1735           curr->op = gimple_get_lhs (sum);
1736         }
1737       return true;
1738     }
1739
1740   return false;
1741 }
1742
1743 /* Perform various identities and other optimizations on the list of
1744    operand entries, stored in OPS.  The tree code for the binary
1745    operation between all the operands is OPCODE.  */
1746
1747 static void
1748 optimize_ops_list (enum tree_code opcode,
1749                    vec<operand_entry_t> *ops)
1750 {
1751   unsigned int length = ops->length ();
1752   unsigned int i;
1753   operand_entry_t oe;
1754   operand_entry_t oelast = NULL;
1755   bool iterate = false;
1756
1757   if (length == 1)
1758     return;
1759
1760   oelast = ops->last ();
1761
1762   /* If the last two are constants, pop the constants off, merge them
1763      and try the next two.  */
1764   if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1765     {
1766       operand_entry_t oelm1 = (*ops)[length - 2];
1767
1768       if (oelm1->rank == 0
1769           && is_gimple_min_invariant (oelm1->op)
1770           && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1771                                        TREE_TYPE (oelast->op)))
1772         {
1773           tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1774                                      oelm1->op, oelast->op);
1775
1776           if (folded && is_gimple_min_invariant (folded))
1777             {
1778               if (dump_file && (dump_flags & TDF_DETAILS))
1779                 fprintf (dump_file, "Merging constants\n");
1780
1781               ops->pop ();
1782               ops->pop ();
1783
1784               add_to_ops_vec (ops, folded);
1785               reassociate_stats.constants_eliminated++;
1786
1787               optimize_ops_list (opcode, ops);
1788               return;
1789             }
1790         }
1791     }
1792
1793   eliminate_using_constants (opcode, ops);
1794   oelast = NULL;
1795
1796   for (i = 0; ops->iterate (i, &oe);)
1797     {
1798       bool done = false;
1799
1800       if (eliminate_not_pairs (opcode, ops, i, oe))
1801         return;
1802       if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1803           || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1804           || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1805         {
1806           if (done)
1807             return;
1808           iterate = true;
1809           oelast = NULL;
1810           continue;
1811         }
1812       oelast = oe;
1813       i++;
1814     }
1815
1816   length = ops->length ();
1817   oelast = ops->last ();
1818
1819   if (iterate)
1820     optimize_ops_list (opcode, ops);
1821 }
1822
1823 /* The following functions are subroutines to optimize_range_tests and allow
1824    it to try to change a logical combination of comparisons into a range
1825    test.
1826
1827    For example, both
1828         X == 2 || X == 5 || X == 3 || X == 4
1829    and
1830         X >= 2 && X <= 5
1831    are converted to
1832         (unsigned) (X - 2) <= 3
1833
1834    For more information see comments above fold_test_range in fold-const.c,
1835    this implementation is for GIMPLE.  */
1836
1837 struct range_entry
1838 {
1839   tree exp;
1840   tree low;
1841   tree high;
1842   bool in_p;
1843   bool strict_overflow_p;
1844   unsigned int idx, next;
1845 };
1846
1847 /* This is similar to make_range in fold-const.c, but on top of
1848    GIMPLE instead of trees.  If EXP is non-NULL, it should be
1849    an SSA_NAME and STMT argument is ignored, otherwise STMT
1850    argument should be a GIMPLE_COND.  */
1851
1852 static void
1853 init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1854 {
1855   int in_p;
1856   tree low, high;
1857   bool is_bool, strict_overflow_p;
1858
1859   r->exp = NULL_TREE;
1860   r->in_p = false;
1861   r->strict_overflow_p = false;
1862   r->low = NULL_TREE;
1863   r->high = NULL_TREE;
1864   if (exp != NULL_TREE
1865       && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1866     return;
1867
1868   /* Start with simply saying "EXP != 0" and then look at the code of EXP
1869      and see if we can refine the range.  Some of the cases below may not
1870      happen, but it doesn't seem worth worrying about this.  We "continue"
1871      the outer loop when we've changed something; otherwise we "break"
1872      the switch, which will "break" the while.  */
1873   low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1874   high = low;
1875   in_p = 0;
1876   strict_overflow_p = false;
1877   is_bool = false;
1878   if (exp == NULL_TREE)
1879     is_bool = true;
1880   else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1881     {
1882       if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1883         is_bool = true;
1884       else
1885         return;
1886     }
1887   else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1888     is_bool = true;
1889
1890   while (1)
1891     {
1892       enum tree_code code;
1893       tree arg0, arg1, exp_type;
1894       tree nexp;
1895       location_t loc;
1896
1897       if (exp != NULL_TREE)
1898         {
1899           if (TREE_CODE (exp) != SSA_NAME
1900               || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
1901             break;
1902
1903           stmt = SSA_NAME_DEF_STMT (exp);
1904           if (!is_gimple_assign (stmt))
1905             break;
1906
1907           code = gimple_assign_rhs_code (stmt);
1908           arg0 = gimple_assign_rhs1 (stmt);
1909           arg1 = gimple_assign_rhs2 (stmt);
1910           exp_type = TREE_TYPE (exp);
1911         }
1912       else
1913         {
1914           code = gimple_cond_code (stmt);
1915           arg0 = gimple_cond_lhs (stmt);
1916           arg1 = gimple_cond_rhs (stmt);
1917           exp_type = boolean_type_node;
1918         }
1919
1920       if (TREE_CODE (arg0) != SSA_NAME)
1921         break;
1922       loc = gimple_location (stmt);
1923       switch (code)
1924         {
1925         case BIT_NOT_EXPR:
1926           if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
1927               /* Ensure the range is either +[-,0], +[0,0],
1928                  -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1929                  -[1,1].  If it is e.g. +[-,-] or -[-,-]
1930                  or similar expression of unconditional true or
1931                  false, it should not be negated.  */
1932               && ((high && integer_zerop (high))
1933                   || (low && integer_onep (low))))
1934             {
1935               in_p = !in_p;
1936               exp = arg0;
1937               continue;
1938             }
1939           break;
1940         case SSA_NAME:
1941           exp = arg0;
1942           continue;
1943         CASE_CONVERT:
1944           if (is_bool)
1945             goto do_default;
1946           if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1947             {
1948               if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1949                 is_bool = true;
1950               else
1951                 return;
1952             }
1953           else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1954             is_bool = true;
1955           goto do_default;
1956         case EQ_EXPR:
1957         case NE_EXPR:
1958         case LT_EXPR:
1959         case LE_EXPR:
1960         case GE_EXPR:
1961         case GT_EXPR:
1962           is_bool = true;
1963           /* FALLTHRU */
1964         default:
1965           if (!is_bool)
1966             return;
1967         do_default:
1968           nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1969                                   &low, &high, &in_p,
1970                                   &strict_overflow_p);
1971           if (nexp != NULL_TREE)
1972             {
1973               exp = nexp;
1974               gcc_assert (TREE_CODE (exp) == SSA_NAME);
1975               continue;
1976             }
1977           break;
1978         }
1979       break;
1980     }
1981   if (is_bool)
1982     {
1983       r->exp = exp;
1984       r->in_p = in_p;
1985       r->low = low;
1986       r->high = high;
1987       r->strict_overflow_p = strict_overflow_p;
1988     }
1989 }
1990
1991 /* Comparison function for qsort.  Sort entries
1992    without SSA_NAME exp first, then with SSA_NAMEs sorted
1993    by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1994    by increasing ->low and if ->low is the same, by increasing
1995    ->high.  ->low == NULL_TREE means minimum, ->high == NULL_TREE
1996    maximum.  */
1997
1998 static int
1999 range_entry_cmp (const void *a, const void *b)
2000 {
2001   const struct range_entry *p = (const struct range_entry *) a;
2002   const struct range_entry *q = (const struct range_entry *) b;
2003
2004   if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2005     {
2006       if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2007         {
2008           /* Group range_entries for the same SSA_NAME together.  */
2009           if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2010             return -1;
2011           else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2012             return 1;
2013           /* If ->low is different, NULL low goes first, then by
2014              ascending low.  */
2015           if (p->low != NULL_TREE)
2016             {
2017               if (q->low != NULL_TREE)
2018                 {
2019                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
2020                                           p->low, q->low);
2021                   if (tem && integer_onep (tem))
2022                     return -1;
2023                   tem = fold_binary (GT_EXPR, boolean_type_node,
2024                                      p->low, q->low);
2025                   if (tem && integer_onep (tem))
2026                     return 1;
2027                 }
2028               else
2029                 return 1;
2030             }
2031           else if (q->low != NULL_TREE)
2032             return -1;
2033           /* If ->high is different, NULL high goes last, before that by
2034              ascending high.  */
2035           if (p->high != NULL_TREE)
2036             {
2037               if (q->high != NULL_TREE)
2038                 {
2039                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
2040                                           p->high, q->high);
2041                   if (tem && integer_onep (tem))
2042                     return -1;
2043                   tem = fold_binary (GT_EXPR, boolean_type_node,
2044                                      p->high, q->high);
2045                   if (tem && integer_onep (tem))
2046                     return 1;
2047                 }
2048               else
2049                 return -1;
2050             }
2051           else if (p->high != NULL_TREE)
2052             return 1;
2053           /* If both ranges are the same, sort below by ascending idx.  */
2054         }
2055       else
2056         return 1;
2057     }
2058   else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2059     return -1;
2060
2061   if (p->idx < q->idx)
2062     return -1;
2063   else
2064     {
2065       gcc_checking_assert (p->idx > q->idx);
2066       return 1;
2067     }
2068 }
2069
2070 /* Helper routine of optimize_range_test.
2071    [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2072    RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2073    OPCODE and OPS are arguments of optimize_range_tests.  Return
2074    true if the range merge has been successful.
2075    If OPCODE is ERROR_MARK, this is called from within
2076    maybe_optimize_range_tests and is performing inter-bb range optimization.
2077    In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2078    oe->rank.  */
2079
2080 static bool
2081 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2082                    unsigned int count, enum tree_code opcode,
2083                    vec<operand_entry_t> *ops, tree exp, bool in_p,
2084                    tree low, tree high, bool strict_overflow_p)
2085 {
2086   operand_entry_t oe = (*ops)[range->idx];
2087   tree op = oe->op;
2088   gimple stmt = op ? SSA_NAME_DEF_STMT (op) :
2089     last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2090   location_t loc = gimple_location (stmt);
2091   tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2092   tree tem = build_range_check (loc, optype, exp, in_p, low, high);
2093   enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2094   gimple_stmt_iterator gsi;
2095
2096   if (tem == NULL_TREE)
2097     return false;
2098
2099   if (strict_overflow_p && issue_strict_overflow_warning (wc))
2100     warning_at (loc, OPT_Wstrict_overflow,
2101                 "assuming signed overflow does not occur "
2102                 "when simplifying range test");
2103
2104   if (dump_file && (dump_flags & TDF_DETAILS))
2105     {
2106       struct range_entry *r;
2107       fprintf (dump_file, "Optimizing range tests ");
2108       print_generic_expr (dump_file, range->exp, 0);
2109       fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2110       print_generic_expr (dump_file, range->low, 0);
2111       fprintf (dump_file, ", ");
2112       print_generic_expr (dump_file, range->high, 0);
2113       fprintf (dump_file, "]");
2114       for (r = otherrange; r < otherrange + count; r++)
2115         {
2116           fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2117           print_generic_expr (dump_file, r->low, 0);
2118           fprintf (dump_file, ", ");
2119           print_generic_expr (dump_file, r->high, 0);
2120           fprintf (dump_file, "]");
2121         }
2122       fprintf (dump_file, "\n into ");
2123       print_generic_expr (dump_file, tem, 0);
2124       fprintf (dump_file, "\n");
2125     }
2126
2127   if (opcode == BIT_IOR_EXPR
2128       || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2129     tem = invert_truthvalue_loc (loc, tem);
2130
2131   tem = fold_convert_loc (loc, optype, tem);
2132   gsi = gsi_for_stmt (stmt);
2133   /* In rare cases range->exp can be equal to lhs of stmt.
2134      In that case we have to insert after the stmt rather then before
2135      it.  */
2136   if (op == range->exp)
2137     tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false,
2138                                     GSI_CONTINUE_LINKING);
2139   else
2140     {
2141       tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2142                                       GSI_SAME_STMT);
2143       gsi_prev (&gsi);
2144     }
2145   for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2146     if (gimple_uid (gsi_stmt (gsi)))
2147       break;
2148     else
2149       gimple_set_uid (gsi_stmt (gsi), gimple_uid (stmt));
2150
2151   oe->op = tem;
2152   range->exp = exp;
2153   range->low = low;
2154   range->high = high;
2155   range->in_p = in_p;
2156   range->strict_overflow_p = false;
2157
2158   for (range = otherrange; range < otherrange + count; range++)
2159     {
2160       oe = (*ops)[range->idx];
2161       /* Now change all the other range test immediate uses, so that
2162          those tests will be optimized away.  */
2163       if (opcode == ERROR_MARK)
2164         {
2165           if (oe->op)
2166             oe->op = build_int_cst (TREE_TYPE (oe->op),
2167                                     oe->rank == BIT_IOR_EXPR ? 0 : 1);
2168           else
2169             oe->op = (oe->rank == BIT_IOR_EXPR
2170                       ? boolean_false_node : boolean_true_node);
2171         }
2172       else
2173         oe->op = error_mark_node;
2174       range->exp = NULL_TREE;
2175     }
2176   return true;
2177 }
2178
2179 /* Optimize X == CST1 || X == CST2
2180    if popcount (CST1 ^ CST2) == 1 into
2181    (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2182    Similarly for ranges.  E.g.
2183    X != 2 && X != 3 && X != 10 && X != 11
2184    will be transformed by the previous optimization into
2185    !((X - 2U) <= 1U || (X - 10U) <= 1U)
2186    and this loop can transform that into
2187    !(((X & ~8) - 2U) <= 1U).  */
2188
2189 static bool
2190 optimize_range_tests_xor (enum tree_code opcode, tree type,
2191                           tree lowi, tree lowj, tree highi, tree highj,
2192                           vec<operand_entry_t> *ops,
2193                           struct range_entry *rangei,
2194                           struct range_entry *rangej)
2195 {
2196   tree lowxor, highxor, tem, exp;
2197   /* Check highi ^ lowi == highj ^ lowj and
2198      popcount (highi ^ lowi) == 1.  */
2199   lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2200   if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2201     return false;
2202   if (tree_log2 (lowxor) < 0)
2203     return false;
2204   highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2205   if (!tree_int_cst_equal (lowxor, highxor))
2206     return false;
2207
2208   tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2209   exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2210   lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2211   highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2212   if (update_range_test (rangei, rangej, 1, opcode, ops, exp,
2213                          rangei->in_p, lowj, highj,
2214                          rangei->strict_overflow_p
2215                          || rangej->strict_overflow_p))
2216     return true;
2217   return false;
2218 }
2219
2220 /* Optimize X == CST1 || X == CST2
2221    if popcount (CST2 - CST1) == 1 into
2222    ((X - CST1) & ~(CST2 - CST1)) == 0.
2223    Similarly for ranges.  E.g.
2224    X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2225    || X == 75 || X == 45
2226    will be transformed by the previous optimization into
2227    (X - 43U) <= 3U || (X - 75U) <= 3U
2228    and this loop can transform that into
2229    ((X - 43U) & ~(75U - 43U)) <= 3U.  */
2230 static bool
2231 optimize_range_tests_diff (enum tree_code opcode, tree type,
2232                             tree lowi, tree lowj, tree highi, tree highj,
2233                             vec<operand_entry_t> *ops,
2234                             struct range_entry *rangei,
2235                             struct range_entry *rangej)
2236 {
2237   tree tem1, tem2, mask;
2238   /* Check highi - lowi == highj - lowj.  */
2239   tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2240   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2241     return false;
2242   tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2243   if (!tree_int_cst_equal (tem1, tem2))
2244     return false;
2245   /* Check popcount (lowj - lowi) == 1.  */
2246   tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2247   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2248     return false;
2249   if (tree_log2 (tem1) < 0)
2250     return false;
2251
2252   mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2253   tem1 = fold_binary (MINUS_EXPR, type, rangei->exp, lowi);
2254   tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2255   lowj = build_int_cst (type, 0);
2256   if (update_range_test (rangei, rangej, 1, opcode, ops, tem1,
2257                          rangei->in_p, lowj, tem2,
2258                          rangei->strict_overflow_p
2259                          || rangej->strict_overflow_p))
2260     return true;
2261   return false;
2262 }
2263
2264 /* It does some common checks for function optimize_range_tests_xor and
2265    optimize_range_tests_diff.
2266    If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2267    Else it calls optimize_range_tests_diff.  */
2268
2269 static bool
2270 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2271                         bool optimize_xor, vec<operand_entry_t> *ops,
2272                         struct range_entry *ranges)
2273 {
2274   int i, j;
2275   bool any_changes = false;
2276   for (i = first; i < length; i++)
2277     {
2278       tree lowi, highi, lowj, highj, type, tem;
2279
2280       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2281         continue;
2282       type = TREE_TYPE (ranges[i].exp);
2283       if (!INTEGRAL_TYPE_P (type))
2284         continue;
2285       lowi = ranges[i].low;
2286       if (lowi == NULL_TREE)
2287         lowi = TYPE_MIN_VALUE (type);
2288       highi = ranges[i].high;
2289       if (highi == NULL_TREE)
2290         continue;
2291       for (j = i + 1; j < length && j < i + 64; j++)
2292         {
2293           bool changes;
2294           if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2295             continue;
2296           lowj = ranges[j].low;
2297           if (lowj == NULL_TREE)
2298             continue;
2299           highj = ranges[j].high;
2300           if (highj == NULL_TREE)
2301             highj = TYPE_MAX_VALUE (type);
2302           /* Check lowj > highi.  */
2303           tem = fold_binary (GT_EXPR, boolean_type_node,
2304                              lowj, highi);
2305           if (tem == NULL_TREE || !integer_onep (tem))
2306             continue;
2307           if (optimize_xor)
2308             changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2309                                                 highi, highj, ops,
2310                                                 ranges + i, ranges + j);
2311           else
2312             changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2313                                                  highi, highj, ops,
2314                                                  ranges + i, ranges + j);
2315           if (changes)
2316             {
2317               any_changes = true;
2318               break;
2319             }
2320         }
2321     }
2322   return any_changes;
2323 }
2324
2325 /* Optimize range tests, similarly how fold_range_test optimizes
2326    it on trees.  The tree code for the binary
2327    operation between all the operands is OPCODE.
2328    If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2329    maybe_optimize_range_tests for inter-bb range optimization.
2330    In that case if oe->op is NULL, oe->id is bb->index whose
2331    GIMPLE_COND is && or ||ed into the test, and oe->rank says
2332    the actual opcode.  */
2333
2334 static bool
2335 optimize_range_tests (enum tree_code opcode,
2336                       vec<operand_entry_t> *ops)
2337 {
2338   unsigned int length = ops->length (), i, j, first;
2339   operand_entry_t oe;
2340   struct range_entry *ranges;
2341   bool any_changes = false;
2342
2343   if (length == 1)
2344     return false;
2345
2346   ranges = XNEWVEC (struct range_entry, length);
2347   for (i = 0; i < length; i++)
2348     {
2349       oe = (*ops)[i];
2350       ranges[i].idx = i;
2351       init_range_entry (ranges + i, oe->op,
2352                         oe->op ? NULL :
2353                           last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
2354       /* For | invert it now, we will invert it again before emitting
2355          the optimized expression.  */
2356       if (opcode == BIT_IOR_EXPR
2357           || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2358         ranges[i].in_p = !ranges[i].in_p;
2359     }
2360
2361   qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2362   for (i = 0; i < length; i++)
2363     if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2364       break;
2365
2366   /* Try to merge ranges.  */
2367   for (first = i; i < length; i++)
2368     {
2369       tree low = ranges[i].low;
2370       tree high = ranges[i].high;
2371       int in_p = ranges[i].in_p;
2372       bool strict_overflow_p = ranges[i].strict_overflow_p;
2373       int update_fail_count = 0;
2374
2375       for (j = i + 1; j < length; j++)
2376         {
2377           if (ranges[i].exp != ranges[j].exp)
2378             break;
2379           if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2380                              ranges[j].in_p, ranges[j].low, ranges[j].high))
2381             break;
2382           strict_overflow_p |= ranges[j].strict_overflow_p;
2383         }
2384
2385       if (j == i + 1)
2386         continue;
2387
2388       if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
2389                              ops, ranges[i].exp, in_p, low, high,
2390                              strict_overflow_p))
2391         {
2392           i = j - 1;
2393           any_changes = true;
2394         }
2395       /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2396          while update_range_test would fail.  */
2397       else if (update_fail_count == 64)
2398         i = j - 1;
2399       else
2400         ++update_fail_count;
2401     }
2402
2403   any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2404                                          ops, ranges);
2405
2406   if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2407     any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2408                                            ops, ranges);
2409
2410   if (any_changes && opcode != ERROR_MARK)
2411     {
2412       j = 0;
2413       FOR_EACH_VEC_ELT (*ops, i, oe)
2414         {
2415           if (oe->op == error_mark_node)
2416             continue;
2417           else if (i != j)
2418             (*ops)[j] = oe;
2419           j++;
2420         }
2421       ops->truncate (j);
2422     }
2423
2424   XDELETEVEC (ranges);
2425   return any_changes;
2426 }
2427
2428 /* Return true if STMT is a cast like:
2429    <bb N>:
2430    ...
2431    _123 = (int) _234;
2432
2433    <bb M>:
2434    # _345 = PHI <_123(N), 1(...), 1(...)>
2435    where _234 has bool type, _123 has single use and
2436    bb N has a single successor M.  This is commonly used in
2437    the last block of a range test.  */
2438
2439 static bool
2440 final_range_test_p (gimple stmt)
2441 {
2442   basic_block bb, rhs_bb;
2443   edge e;
2444   tree lhs, rhs;
2445   use_operand_p use_p;
2446   gimple use_stmt;
2447
2448   if (!gimple_assign_cast_p (stmt))
2449     return false;
2450   bb = gimple_bb (stmt);
2451   if (!single_succ_p (bb))
2452     return false;
2453   e = single_succ_edge (bb);
2454   if (e->flags & EDGE_COMPLEX)
2455     return false;
2456
2457   lhs = gimple_assign_lhs (stmt);
2458   rhs = gimple_assign_rhs1 (stmt);
2459   if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2460       || TREE_CODE (rhs) != SSA_NAME
2461       || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2462     return false;
2463
2464   /* Test whether lhs is consumed only by a PHI in the only successor bb.  */
2465   if (!single_imm_use (lhs, &use_p, &use_stmt))
2466     return false;
2467
2468   if (gimple_code (use_stmt) != GIMPLE_PHI
2469       || gimple_bb (use_stmt) != e->dest)
2470     return false;
2471
2472   /* And that the rhs is defined in the same loop.  */
2473   rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2474   if (rhs_bb == NULL
2475       || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2476     return false;
2477
2478   return true;
2479 }
2480
2481 /* Return true if BB is suitable basic block for inter-bb range test
2482    optimization.  If BACKWARD is true, BB should be the only predecessor
2483    of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2484    or compared with to find a common basic block to which all conditions
2485    branch to if true resp. false.  If BACKWARD is false, TEST_BB should
2486    be the only predecessor of BB.  */
2487
2488 static bool
2489 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2490                   bool backward)
2491 {
2492   edge_iterator ei, ei2;
2493   edge e, e2;
2494   gimple stmt;
2495   gimple_stmt_iterator gsi;
2496   bool other_edge_seen = false;
2497   bool is_cond;
2498
2499   if (test_bb == bb)
2500     return false;
2501   /* Check last stmt first.  */
2502   stmt = last_stmt (bb);
2503   if (stmt == NULL
2504       || (gimple_code (stmt) != GIMPLE_COND
2505           && (backward || !final_range_test_p (stmt)))
2506       || gimple_visited_p (stmt)
2507       || stmt_could_throw_p (stmt)
2508       || *other_bb == bb)
2509     return false;
2510   is_cond = gimple_code (stmt) == GIMPLE_COND;
2511   if (is_cond)
2512     {
2513       /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2514          goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2515          to *OTHER_BB (if not set yet, try to find it out).  */
2516       if (EDGE_COUNT (bb->succs) != 2)
2517         return false;
2518       FOR_EACH_EDGE (e, ei, bb->succs)
2519         {
2520           if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2521             return false;
2522           if (e->dest == test_bb)
2523             {
2524               if (backward)
2525                 continue;
2526               else
2527                 return false;
2528             }
2529           if (e->dest == bb)
2530             return false;
2531           if (*other_bb == NULL)
2532             {
2533               FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2534                 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2535                   return false;
2536                 else if (e->dest == e2->dest)
2537                   *other_bb = e->dest;
2538               if (*other_bb == NULL)
2539                 return false;
2540             }
2541           if (e->dest == *other_bb)
2542             other_edge_seen = true;
2543           else if (backward)
2544             return false;
2545         }
2546       if (*other_bb == NULL || !other_edge_seen)
2547         return false;
2548     }
2549   else if (single_succ (bb) != *other_bb)
2550     return false;
2551
2552   /* Now check all PHIs of *OTHER_BB.  */
2553   e = find_edge (bb, *other_bb);
2554   e2 = find_edge (test_bb, *other_bb);
2555   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2556     {
2557       gimple phi = gsi_stmt (gsi);
2558       /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2559          corresponding to BB and TEST_BB predecessor must be the same.  */
2560       if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2561                             gimple_phi_arg_def (phi, e2->dest_idx), 0))
2562         {
2563           /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2564              one of the PHIs should have the lhs of the last stmt in
2565              that block as PHI arg and that PHI should have 0 or 1
2566              corresponding to it in all other range test basic blocks
2567              considered.  */
2568           if (!is_cond)
2569             {
2570               if (gimple_phi_arg_def (phi, e->dest_idx)
2571                   == gimple_assign_lhs (stmt)
2572                   && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2573                       || integer_onep (gimple_phi_arg_def (phi,
2574                                                            e2->dest_idx))))
2575                 continue;
2576             }
2577           else
2578             {
2579               gimple test_last = last_stmt (test_bb);
2580               if (gimple_code (test_last) != GIMPLE_COND
2581                   && gimple_phi_arg_def (phi, e2->dest_idx)
2582                      == gimple_assign_lhs (test_last)
2583                   && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2584                       || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2585                 continue;
2586             }
2587
2588           return false;
2589         }
2590     }
2591   return true;
2592 }
2593
2594 /* Return true if BB doesn't have side-effects that would disallow
2595    range test optimization, all SSA_NAMEs set in the bb are consumed
2596    in the bb and there are no PHIs.  */
2597
2598 static bool
2599 no_side_effect_bb (basic_block bb)
2600 {
2601   gimple_stmt_iterator gsi;
2602   gimple last;
2603
2604   if (!gimple_seq_empty_p (phi_nodes (bb)))
2605     return false;
2606   last = last_stmt (bb);
2607   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2608     {
2609       gimple stmt = gsi_stmt (gsi);
2610       tree lhs;
2611       imm_use_iterator imm_iter;
2612       use_operand_p use_p;
2613
2614       if (is_gimple_debug (stmt))
2615         continue;
2616       if (gimple_has_side_effects (stmt))
2617         return false;
2618       if (stmt == last)
2619         return true;
2620       if (!is_gimple_assign (stmt))
2621         return false;
2622       lhs = gimple_assign_lhs (stmt);
2623       if (TREE_CODE (lhs) != SSA_NAME)
2624         return false;
2625       if (gimple_assign_rhs_could_trap_p (stmt))
2626         return false;
2627       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2628         {
2629           gimple use_stmt = USE_STMT (use_p);
2630           if (is_gimple_debug (use_stmt))
2631             continue;
2632           if (gimple_bb (use_stmt) != bb)
2633             return false;
2634         }
2635     }
2636   return false;
2637 }
2638
2639 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2640    return true and fill in *OPS recursively.  */
2641
2642 static bool
2643 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2644          struct loop *loop)
2645 {
2646   gimple stmt = SSA_NAME_DEF_STMT (var);
2647   tree rhs[2];
2648   int i;
2649
2650   if (!is_reassociable_op (stmt, code, loop))
2651     return false;
2652
2653   rhs[0] = gimple_assign_rhs1 (stmt);
2654   rhs[1] = gimple_assign_rhs2 (stmt);
2655   gimple_set_visited (stmt, true);
2656   for (i = 0; i < 2; i++)
2657     if (TREE_CODE (rhs[i]) == SSA_NAME
2658         && !get_ops (rhs[i], code, ops, loop)
2659         && has_single_use (rhs[i]))
2660       {
2661         operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
2662
2663         oe->op = rhs[i];
2664         oe->rank = code;
2665         oe->id = 0;
2666         oe->count = 1;
2667         ops->safe_push (oe);
2668       }
2669   return true;
2670 }
2671
2672 /* Find the ops that were added by get_ops starting from VAR, see if
2673    they were changed during update_range_test and if yes, create new
2674    stmts.  */
2675
2676 static tree
2677 update_ops (tree var, enum tree_code code, vec<operand_entry_t> ops,
2678             unsigned int *pidx, struct loop *loop)
2679 {
2680   gimple stmt = SSA_NAME_DEF_STMT (var);
2681   tree rhs[4];
2682   int i;
2683
2684   if (!is_reassociable_op (stmt, code, loop))
2685     return NULL;
2686
2687   rhs[0] = gimple_assign_rhs1 (stmt);
2688   rhs[1] = gimple_assign_rhs2 (stmt);
2689   rhs[2] = rhs[0];
2690   rhs[3] = rhs[1];
2691   for (i = 0; i < 2; i++)
2692     if (TREE_CODE (rhs[i]) == SSA_NAME)
2693       {
2694         rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
2695         if (rhs[2 + i] == NULL_TREE)
2696           {
2697             if (has_single_use (rhs[i]))
2698               rhs[2 + i] = ops[(*pidx)++]->op;
2699             else
2700               rhs[2 + i] = rhs[i];
2701           }
2702       }
2703   if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
2704       && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
2705     {
2706       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2707       var = make_ssa_name (TREE_TYPE (var), NULL);
2708       gimple g = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
2709                                                var, rhs[2], rhs[3]);
2710       gimple_set_uid (g, gimple_uid (stmt));
2711       gimple_set_visited (g, true);
2712       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2713     }
2714   return var;
2715 }
2716
2717 /* Structure to track the initial value passed to get_ops and
2718    the range in the ops vector for each basic block.  */
2719
2720 struct inter_bb_range_test_entry
2721 {
2722   tree op;
2723   unsigned int first_idx, last_idx;
2724 };
2725
2726 /* Inter-bb range test optimization.  */
2727
2728 static void
2729 maybe_optimize_range_tests (gimple stmt)
2730 {
2731   basic_block first_bb = gimple_bb (stmt);
2732   basic_block last_bb = first_bb;
2733   basic_block other_bb = NULL;
2734   basic_block bb;
2735   edge_iterator ei;
2736   edge e;
2737   auto_vec<operand_entry_t> ops;
2738   auto_vec<inter_bb_range_test_entry> bbinfo;
2739   bool any_changes = false;
2740
2741   /* Consider only basic blocks that end with GIMPLE_COND or
2742      a cast statement satisfying final_range_test_p.  All
2743      but the last bb in the first_bb .. last_bb range
2744      should end with GIMPLE_COND.  */
2745   if (gimple_code (stmt) == GIMPLE_COND)
2746     {
2747       if (EDGE_COUNT (first_bb->succs) != 2)
2748         return;
2749     }
2750   else if (final_range_test_p (stmt))
2751     other_bb = single_succ (first_bb);
2752   else
2753     return;
2754
2755   if (stmt_could_throw_p (stmt))
2756     return;
2757
2758   /* As relative ordering of post-dominator sons isn't fixed,
2759      maybe_optimize_range_tests can be called first on any
2760      bb in the range we want to optimize.  So, start searching
2761      backwards, if first_bb can be set to a predecessor.  */
2762   while (single_pred_p (first_bb))
2763     {
2764       basic_block pred_bb = single_pred (first_bb);
2765       if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
2766         break;
2767       if (!no_side_effect_bb (first_bb))
2768         break;
2769       first_bb = pred_bb;
2770     }
2771   /* If first_bb is last_bb, other_bb hasn't been computed yet.
2772      Before starting forward search in last_bb successors, find
2773      out the other_bb.  */
2774   if (first_bb == last_bb)
2775     {
2776       other_bb = NULL;
2777       /* As non-GIMPLE_COND last stmt always terminates the range,
2778          if forward search didn't discover anything, just give up.  */
2779       if (gimple_code (stmt) != GIMPLE_COND)
2780         return;
2781       /* Look at both successors.  Either it ends with a GIMPLE_COND
2782          and satisfies suitable_cond_bb, or ends with a cast and
2783          other_bb is that cast's successor.  */
2784       FOR_EACH_EDGE (e, ei, first_bb->succs)
2785         if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
2786             || e->dest == first_bb)
2787           return;
2788         else if (single_pred_p (e->dest))
2789           {
2790             stmt = last_stmt (e->dest);
2791             if (stmt
2792                 && gimple_code (stmt) == GIMPLE_COND
2793                 && EDGE_COUNT (e->dest->succs) == 2)
2794               {
2795                 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
2796                   break;
2797                 else
2798                   other_bb = NULL;
2799               }
2800             else if (stmt
2801                      && final_range_test_p (stmt)
2802                      && find_edge (first_bb, single_succ (e->dest)))
2803               {
2804                 other_bb = single_succ (e->dest);
2805                 if (other_bb == first_bb)
2806                   other_bb = NULL;
2807               }
2808           }
2809       if (other_bb == NULL)
2810         return;
2811     }
2812   /* Now do the forward search, moving last_bb to successor bbs
2813      that aren't other_bb.  */
2814   while (EDGE_COUNT (last_bb->succs) == 2)
2815     {
2816       FOR_EACH_EDGE (e, ei, last_bb->succs)
2817         if (e->dest != other_bb)
2818           break;
2819       if (e == NULL)
2820         break;
2821       if (!single_pred_p (e->dest))
2822         break;
2823       if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
2824         break;
2825       if (!no_side_effect_bb (e->dest))
2826         break;
2827       last_bb = e->dest;
2828     }
2829   if (first_bb == last_bb)
2830     return;
2831   /* Here basic blocks first_bb through last_bb's predecessor
2832      end with GIMPLE_COND, all of them have one of the edges to
2833      other_bb and another to another block in the range,
2834      all blocks except first_bb don't have side-effects and
2835      last_bb ends with either GIMPLE_COND, or cast satisfying
2836      final_range_test_p.  */
2837   for (bb = last_bb; ; bb = single_pred (bb))
2838     {
2839       enum tree_code code;
2840       tree lhs, rhs;
2841       inter_bb_range_test_entry bb_ent;
2842
2843       bb_ent.op = NULL_TREE;
2844       bb_ent.first_idx = ops.length ();
2845       bb_ent.last_idx = bb_ent.first_idx;
2846       e = find_edge (bb, other_bb);
2847       stmt = last_stmt (bb);
2848       gimple_set_visited (stmt, true);
2849       if (gimple_code (stmt) != GIMPLE_COND)
2850         {
2851           use_operand_p use_p;
2852           gimple phi;
2853           edge e2;
2854           unsigned int d;
2855
2856           lhs = gimple_assign_lhs (stmt);
2857           rhs = gimple_assign_rhs1 (stmt);
2858           gcc_assert (bb == last_bb);
2859
2860           /* stmt is
2861              _123 = (int) _234;
2862
2863              followed by:
2864              <bb M>:
2865              # _345 = PHI <_123(N), 1(...), 1(...)>
2866
2867              or 0 instead of 1.  If it is 0, the _234
2868              range test is anded together with all the
2869              other range tests, if it is 1, it is ored with
2870              them.  */
2871           single_imm_use (lhs, &use_p, &phi);
2872           gcc_assert (gimple_code (phi) == GIMPLE_PHI);
2873           e2 = find_edge (first_bb, other_bb);
2874           d = e2->dest_idx;
2875           gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
2876           if (integer_zerop (gimple_phi_arg_def (phi, d)))
2877             code = BIT_AND_EXPR;
2878           else
2879             {
2880               gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
2881               code = BIT_IOR_EXPR;
2882             }
2883
2884           /* If _234 SSA_NAME_DEF_STMT is
2885              _234 = _567 | _789;
2886              (or &, corresponding to 1/0 in the phi arguments,
2887              push into ops the individual range test arguments
2888              of the bitwise or resp. and, recursively.  */
2889           if (!get_ops (rhs, code, &ops,
2890                         loop_containing_stmt (stmt))
2891               && has_single_use (rhs))
2892             {
2893               /* Otherwise, push the _234 range test itself.  */
2894               operand_entry_t oe
2895                 = (operand_entry_t) pool_alloc (operand_entry_pool);
2896
2897               oe->op = rhs;
2898               oe->rank = code;
2899               oe->id = 0;
2900               oe->count = 1;
2901               ops.safe_push (oe);
2902               bb_ent.last_idx++;
2903             }
2904           else
2905             bb_ent.last_idx = ops.length ();
2906           bb_ent.op = rhs;
2907           bbinfo.safe_push (bb_ent);
2908           continue;
2909         }
2910       /* Otherwise stmt is GIMPLE_COND.  */
2911       code = gimple_cond_code (stmt);
2912       lhs = gimple_cond_lhs (stmt);
2913       rhs = gimple_cond_rhs (stmt);
2914       if (TREE_CODE (lhs) == SSA_NAME
2915           && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2916           && ((code != EQ_EXPR && code != NE_EXPR)
2917               || rhs != boolean_false_node
2918                  /* Either push into ops the individual bitwise
2919                     or resp. and operands, depending on which
2920                     edge is other_bb.  */
2921               || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
2922                                  ^ (code == EQ_EXPR))
2923                                 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
2924                            loop_containing_stmt (stmt))))
2925         {
2926           /* Or push the GIMPLE_COND stmt itself.  */
2927           operand_entry_t oe
2928             = (operand_entry_t) pool_alloc (operand_entry_pool);
2929
2930           oe->op = NULL;
2931           oe->rank = (e->flags & EDGE_TRUE_VALUE)
2932                      ? BIT_IOR_EXPR : BIT_AND_EXPR;
2933           /* oe->op = NULL signs that there is no SSA_NAME
2934              for the range test, and oe->id instead is the
2935              basic block number, at which's end the GIMPLE_COND
2936              is.  */
2937           oe->id = bb->index;
2938           oe->count = 1;
2939           ops.safe_push (oe);
2940           bb_ent.op = NULL;
2941           bb_ent.last_idx++;
2942         }
2943       else if (ops.length () > bb_ent.first_idx)
2944         {
2945           bb_ent.op = lhs;
2946           bb_ent.last_idx = ops.length ();
2947         }
2948       bbinfo.safe_push (bb_ent);
2949       if (bb == first_bb)
2950         break;
2951     }
2952   if (ops.length () > 1)
2953     any_changes = optimize_range_tests (ERROR_MARK, &ops);
2954   if (any_changes)
2955     {
2956       unsigned int idx;
2957       /* update_ops relies on has_single_use predicates returning the
2958          same values as it did during get_ops earlier.  Additionally it
2959          never removes statements, only adds new ones and it should walk
2960          from the single imm use and check the predicate already before
2961          making those changes.
2962          On the other side, the handling of GIMPLE_COND directly can turn
2963          previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
2964          it needs to be done in a separate loop afterwards.  */
2965       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
2966         {
2967           if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
2968               && bbinfo[idx].op != NULL_TREE)
2969             {
2970               tree new_op;
2971
2972               stmt = last_stmt (bb);
2973               new_op = update_ops (bbinfo[idx].op,
2974                                    (enum tree_code)
2975                                    ops[bbinfo[idx].first_idx]->rank,
2976                                    ops, &bbinfo[idx].first_idx,
2977                                    loop_containing_stmt (stmt));
2978               if (new_op == NULL_TREE)
2979                 {
2980                   gcc_assert (bb == last_bb);
2981                   new_op = ops[bbinfo[idx].first_idx++]->op;
2982                 }
2983               if (bbinfo[idx].op != new_op)
2984                 {
2985                   imm_use_iterator iter;
2986                   use_operand_p use_p;
2987                   gimple use_stmt, cast_stmt = NULL;
2988
2989                   FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
2990                     if (is_gimple_debug (use_stmt))
2991                       continue;
2992                     else if (gimple_code (use_stmt) == GIMPLE_COND
2993                              || gimple_code (use_stmt) == GIMPLE_PHI)
2994                       FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2995                         SET_USE (use_p, new_op);
2996                     else if (gimple_assign_cast_p (use_stmt))
2997                       cast_stmt = use_stmt;
2998                     else
2999                       gcc_unreachable ();
3000                   if (cast_stmt)
3001                     {
3002                       gcc_assert (bb == last_bb);
3003                       tree lhs = gimple_assign_lhs (cast_stmt);
3004                       tree new_lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3005                       enum tree_code rhs_code
3006                         = gimple_assign_rhs_code (cast_stmt);
3007                       gimple g;
3008                       if (is_gimple_min_invariant (new_op))
3009                         {
3010                           new_op = fold_convert (TREE_TYPE (lhs), new_op);
3011                           g = gimple_build_assign (new_lhs, new_op);
3012                         }
3013                       else
3014                         g = gimple_build_assign_with_ops (rhs_code, new_lhs,
3015                                                           new_op, NULL_TREE);
3016                       gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt);
3017                       gimple_set_uid (g, gimple_uid (cast_stmt));
3018                       gimple_set_visited (g, true);
3019                       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3020                       FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
3021                         if (is_gimple_debug (use_stmt))
3022                           continue;
3023                         else if (gimple_code (use_stmt) == GIMPLE_COND
3024                                  || gimple_code (use_stmt) == GIMPLE_PHI)
3025                           FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3026                             SET_USE (use_p, new_lhs);
3027                         else
3028                           gcc_unreachable ();
3029                     }
3030                 }
3031             }
3032           if (bb == first_bb)
3033             break;
3034         }
3035       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3036         {
3037           if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3038               && bbinfo[idx].op == NULL_TREE
3039               && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
3040             {
3041               stmt = last_stmt (bb);
3042               if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
3043                 gimple_cond_make_false (stmt);
3044               else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
3045                 gimple_cond_make_true (stmt);
3046               else
3047                 {
3048                   gimple_cond_set_code (stmt, NE_EXPR);
3049                   gimple_cond_set_lhs (stmt, ops[bbinfo[idx].first_idx]->op);
3050                   gimple_cond_set_rhs (stmt, boolean_false_node);
3051                 }
3052               update_stmt (stmt);
3053             }
3054           if (bb == first_bb)
3055             break;
3056         }
3057     }
3058 }
3059
3060 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3061    of STMT in it's operands.  This is also known as a "destructive
3062    update" operation.  */
3063
3064 static bool
3065 is_phi_for_stmt (gimple stmt, tree operand)
3066 {
3067   gimple def_stmt;
3068   tree lhs;
3069   use_operand_p arg_p;
3070   ssa_op_iter i;
3071
3072   if (TREE_CODE (operand) != SSA_NAME)
3073     return false;
3074
3075   lhs = gimple_assign_lhs (stmt);
3076
3077   def_stmt = SSA_NAME_DEF_STMT (operand);
3078   if (gimple_code (def_stmt) != GIMPLE_PHI)
3079     return false;
3080
3081   FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
3082     if (lhs == USE_FROM_PTR (arg_p))
3083       return true;
3084   return false;
3085 }
3086
3087 /* Remove def stmt of VAR if VAR has zero uses and recurse
3088    on rhs1 operand if so.  */
3089
3090 static void
3091 remove_visited_stmt_chain (tree var)
3092 {
3093   gimple stmt;
3094   gimple_stmt_iterator gsi;
3095
3096   while (1)
3097     {
3098       if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
3099         return;
3100       stmt = SSA_NAME_DEF_STMT (var);
3101       if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
3102         {
3103           var = gimple_assign_rhs1 (stmt);
3104           gsi = gsi_for_stmt (stmt);
3105           reassoc_remove_stmt (&gsi);
3106           release_defs (stmt);
3107         }
3108       else
3109         return;
3110     }
3111 }
3112
3113 /* This function checks three consequtive operands in
3114    passed operands vector OPS starting from OPINDEX and
3115    swaps two operands if it is profitable for binary operation
3116    consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3117
3118    We pair ops with the same rank if possible.
3119
3120    The alternative we try is to see if STMT is a destructive
3121    update style statement, which is like:
3122    b = phi (a, ...)
3123    a = c + b;
3124    In that case, we want to use the destructive update form to
3125    expose the possible vectorizer sum reduction opportunity.
3126    In that case, the third operand will be the phi node. This
3127    check is not performed if STMT is null.
3128
3129    We could, of course, try to be better as noted above, and do a
3130    lot of work to try to find these opportunities in >3 operand
3131    cases, but it is unlikely to be worth it.  */
3132
3133 static void
3134 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
3135                           unsigned int opindex, gimple stmt)
3136 {
3137   operand_entry_t oe1, oe2, oe3;
3138
3139   oe1 = ops[opindex];
3140   oe2 = ops[opindex + 1];
3141   oe3 = ops[opindex + 2];
3142
3143   if ((oe1->rank == oe2->rank
3144        && oe2->rank != oe3->rank)
3145       || (stmt && is_phi_for_stmt (stmt, oe3->op)
3146           && !is_phi_for_stmt (stmt, oe1->op)
3147           && !is_phi_for_stmt (stmt, oe2->op)))
3148     {
3149       struct operand_entry temp = *oe3;
3150       oe3->op = oe1->op;
3151       oe3->rank = oe1->rank;
3152       oe1->op = temp.op;
3153       oe1->rank= temp.rank;
3154     }
3155   else if ((oe1->rank == oe3->rank
3156             && oe2->rank != oe3->rank)
3157            || (stmt && is_phi_for_stmt (stmt, oe2->op)
3158                && !is_phi_for_stmt (stmt, oe1->op)
3159                && !is_phi_for_stmt (stmt, oe3->op)))
3160     {
3161       struct operand_entry temp = *oe2;
3162       oe2->op = oe1->op;
3163       oe2->rank = oe1->rank;
3164       oe1->op = temp.op;
3165       oe1->rank = temp.rank;
3166     }
3167 }
3168
3169 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3170    two definitions, otherwise return STMT.  */
3171
3172 static inline gimple
3173 find_insert_point (gimple stmt, tree rhs1, tree rhs2)
3174 {
3175   if (TREE_CODE (rhs1) == SSA_NAME
3176       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
3177     stmt = SSA_NAME_DEF_STMT (rhs1);
3178   if (TREE_CODE (rhs2) == SSA_NAME
3179       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
3180     stmt = SSA_NAME_DEF_STMT (rhs2);
3181   return stmt;
3182 }
3183
3184 /* Recursively rewrite our linearized statements so that the operators
3185    match those in OPS[OPINDEX], putting the computation in rank
3186    order.  Return new lhs.  */
3187
3188 static tree
3189 rewrite_expr_tree (gimple stmt, unsigned int opindex,
3190                    vec<operand_entry_t> ops, bool changed)
3191 {
3192   tree rhs1 = gimple_assign_rhs1 (stmt);
3193   tree rhs2 = gimple_assign_rhs2 (stmt);
3194   tree lhs = gimple_assign_lhs (stmt);
3195   operand_entry_t oe;
3196
3197   /* The final recursion case for this function is that you have
3198      exactly two operations left.
3199      If we had one exactly one op in the entire list to start with, we
3200      would have never called this function, and the tail recursion
3201      rewrites them one at a time.  */
3202   if (opindex + 2 == ops.length ())
3203     {
3204       operand_entry_t oe1, oe2;
3205
3206       oe1 = ops[opindex];
3207       oe2 = ops[opindex + 1];
3208
3209       if (rhs1 != oe1->op || rhs2 != oe2->op)
3210         {
3211           gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3212           unsigned int uid = gimple_uid (stmt);
3213
3214           if (dump_file && (dump_flags & TDF_DETAILS))
3215             {
3216               fprintf (dump_file, "Transforming ");
3217               print_gimple_stmt (dump_file, stmt, 0, 0);
3218             }
3219
3220           if (changed)
3221             {
3222               gimple insert_point = find_insert_point (stmt, oe1->op, oe2->op);
3223               lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3224               stmt
3225                 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3226                                                 lhs, oe1->op, oe2->op);
3227               gimple_set_uid (stmt, uid);
3228               gimple_set_visited (stmt, true);
3229               if (insert_point == gsi_stmt (gsi))
3230                 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3231               else
3232                 insert_stmt_after (stmt, insert_point);
3233             }
3234           else
3235             {
3236               gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
3237                                    == stmt);
3238               gimple_assign_set_rhs1 (stmt, oe1->op);
3239               gimple_assign_set_rhs2 (stmt, oe2->op);
3240               update_stmt (stmt);
3241             }
3242
3243           if (rhs1 != oe1->op && rhs1 != oe2->op)
3244             remove_visited_stmt_chain (rhs1);
3245
3246           if (dump_file && (dump_flags & TDF_DETAILS))
3247             {
3248               fprintf (dump_file, " into ");
3249               print_gimple_stmt (dump_file, stmt, 0, 0);
3250             }
3251         }
3252       return lhs;
3253     }
3254
3255   /* If we hit here, we should have 3 or more ops left.  */
3256   gcc_assert (opindex + 2 < ops.length ());
3257
3258   /* Rewrite the next operator.  */
3259   oe = ops[opindex];
3260
3261   /* Recurse on the LHS of the binary operator, which is guaranteed to
3262      be the non-leaf side.  */
3263   tree new_rhs1
3264     = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
3265                          changed || oe->op != rhs2);
3266
3267   if (oe->op != rhs2 || new_rhs1 != rhs1)
3268     {
3269       if (dump_file && (dump_flags & TDF_DETAILS))
3270         {
3271           fprintf (dump_file, "Transforming ");
3272           print_gimple_stmt (dump_file, stmt, 0, 0);
3273         }
3274
3275       /* If changed is false, this is either opindex == 0
3276          or all outer rhs2's were equal to corresponding oe->op,
3277          and powi_result is NULL.
3278          That means lhs is equivalent before and after reassociation.
3279          Otherwise ensure the old lhs SSA_NAME is not reused and
3280          create a new stmt as well, so that any debug stmts will be
3281          properly adjusted.  */
3282       if (changed)
3283         {
3284           gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3285           unsigned int uid = gimple_uid (stmt);
3286           gimple insert_point = find_insert_point (stmt, new_rhs1, oe->op);
3287
3288           lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3289           stmt = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3290                                                lhs, new_rhs1, oe->op);
3291           gimple_set_uid (stmt, uid);
3292           gimple_set_visited (stmt, true);
3293           if (insert_point == gsi_stmt (gsi))
3294             gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3295           else
3296             insert_stmt_after (stmt, insert_point);
3297         }
3298       else
3299         {
3300           gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
3301                                == stmt);
3302           gimple_assign_set_rhs1 (stmt, new_rhs1);
3303           gimple_assign_set_rhs2 (stmt, oe->op);
3304           update_stmt (stmt);
3305         }
3306
3307       if (dump_file && (dump_flags & TDF_DETAILS))
3308         {
3309           fprintf (dump_file, " into ");
3310           print_gimple_stmt (dump_file, stmt, 0, 0);
3311         }
3312     }
3313   return lhs;
3314 }
3315
3316 /* Find out how many cycles we need to compute statements chain.
3317    OPS_NUM holds number os statements in a chain.  CPU_WIDTH is a
3318    maximum number of independent statements we may execute per cycle.  */
3319
3320 static int
3321 get_required_cycles (int ops_num, int cpu_width)
3322 {
3323   int res;
3324   int elog;
3325   unsigned int rest;
3326
3327   /* While we have more than 2 * cpu_width operands
3328      we may reduce number of operands by cpu_width
3329      per cycle.  */
3330   res = ops_num / (2 * cpu_width);
3331
3332   /* Remained operands count may be reduced twice per cycle
3333      until we have only one operand.  */
3334   rest = (unsigned)(ops_num - res * cpu_width);
3335   elog = exact_log2 (rest);
3336   if (elog >= 0)
3337     res += elog;
3338   else
3339     res += floor_log2 (rest) + 1;
3340
3341   return res;
3342 }
3343
3344 /* Returns an optimal number of registers to use for computation of
3345    given statements.  */
3346
3347 static int
3348 get_reassociation_width (int ops_num, enum tree_code opc,
3349                          enum machine_mode mode)
3350 {
3351   int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
3352   int width;
3353   int width_min;
3354   int cycles_best;
3355
3356   if (param_width > 0)
3357     width = param_width;
3358   else
3359     width = targetm.sched.reassociation_width (opc, mode);
3360
3361   if (width == 1)
3362     return width;
3363
3364   /* Get the minimal time required for sequence computation.  */
3365   cycles_best = get_required_cycles (ops_num, width);
3366
3367   /* Check if we may use less width and still compute sequence for
3368      the same time.  It will allow us to reduce registers usage.
3369      get_required_cycles is monotonically increasing with lower width
3370      so we can perform a binary search for the minimal width that still
3371      results in the optimal cycle count.  */
3372   width_min = 1;
3373   while (width > width_min)
3374     {
3375       int width_mid = (width + width_min) / 2;
3376
3377       if (get_required_cycles (ops_num, width_mid) == cycles_best)
3378         width = width_mid;
3379       else if (width_min < width_mid)
3380         width_min = width_mid;
3381       else
3382         break;
3383     }
3384
3385   return width;
3386 }
3387
3388 /* Recursively rewrite our linearized statements so that the operators
3389    match those in OPS[OPINDEX], putting the computation in rank
3390    order and trying to allow operations to be executed in
3391    parallel.  */
3392
3393 static void
3394 rewrite_expr_tree_parallel (gimple stmt, int width,
3395                             vec<operand_entry_t> ops)
3396 {
3397   enum tree_code opcode = gimple_assign_rhs_code (stmt);
3398   int op_num = ops.length ();
3399   int stmt_num = op_num - 1;
3400   gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3401   int op_index = op_num - 1;
3402   int stmt_index = 0;
3403   int ready_stmts_end = 0;
3404   int i = 0;
3405   tree last_rhs1 = gimple_assign_rhs1 (stmt);
3406
3407   /* We start expression rewriting from the top statements.
3408      So, in this loop we create a full list of statements
3409      we will work with.  */
3410   stmts[stmt_num - 1] = stmt;
3411   for (i = stmt_num - 2; i >= 0; i--)
3412     stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3413
3414   for (i = 0; i < stmt_num; i++)
3415     {
3416       tree op1, op2;
3417
3418       /* Determine whether we should use results of
3419          already handled statements or not.  */
3420       if (ready_stmts_end == 0
3421           && (i - stmt_index >= width || op_index < 1))
3422         ready_stmts_end = i;
3423
3424       /* Now we choose operands for the next statement.  Non zero
3425          value in ready_stmts_end means here that we should use
3426          the result of already generated statements as new operand.  */
3427       if (ready_stmts_end > 0)
3428         {
3429           op1 = gimple_assign_lhs (stmts[stmt_index++]);
3430           if (ready_stmts_end > stmt_index)
3431             op2 = gimple_assign_lhs (stmts[stmt_index++]);
3432           else if (op_index >= 0)
3433             op2 = ops[op_index--]->op;
3434           else
3435             {
3436               gcc_assert (stmt_index < i);
3437               op2 = gimple_assign_lhs (stmts[stmt_index++]);
3438             }
3439
3440           if (stmt_index >= ready_stmts_end)
3441             ready_stmts_end = 0;
3442         }
3443       else
3444         {
3445           if (op_index > 1)
3446             swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3447           op2 = ops[op_index--]->op;
3448           op1 = ops[op_index--]->op;
3449         }
3450
3451       /* If we emit the last statement then we should put
3452          operands into the last statement.  It will also
3453          break the loop.  */
3454       if (op_index < 0 && stmt_index == i)
3455         i = stmt_num - 1;
3456
3457       if (dump_file && (dump_flags & TDF_DETAILS))
3458         {
3459           fprintf (dump_file, "Transforming ");
3460           print_gimple_stmt (dump_file, stmts[i], 0, 0);
3461         }
3462
3463       /* We keep original statement only for the last one.  All
3464          others are recreated.  */
3465       if (i == stmt_num - 1)
3466         {
3467           gimple_assign_set_rhs1 (stmts[i], op1);
3468           gimple_assign_set_rhs2 (stmts[i], op2);
3469           update_stmt (stmts[i]);
3470         }
3471       else
3472         stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3473
3474       if (dump_file && (dump_flags & TDF_DETAILS))
3475         {
3476           fprintf (dump_file, " into ");
3477           print_gimple_stmt (dump_file, stmts[i], 0, 0);
3478         }
3479     }
3480
3481   remove_visited_stmt_chain (last_rhs1);
3482 }
3483
3484 /* Transform STMT, which is really (A +B) + (C + D) into the left
3485    linear form, ((A+B)+C)+D.
3486    Recurse on D if necessary.  */
3487
3488 static void
3489 linearize_expr (gimple stmt)
3490 {
3491   gimple_stmt_iterator gsi;
3492   gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3493   gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3494   gimple oldbinrhs = binrhs;
3495   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3496   gimple newbinrhs = NULL;
3497   struct loop *loop = loop_containing_stmt (stmt);
3498   tree lhs = gimple_assign_lhs (stmt);
3499
3500   gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3501               && is_reassociable_op (binrhs, rhscode, loop));
3502
3503   gsi = gsi_for_stmt (stmt);
3504
3505   gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3506   binrhs = gimple_build_assign_with_ops (gimple_assign_rhs_code (binrhs),
3507                                          make_ssa_name (TREE_TYPE (lhs), NULL),
3508                                          gimple_assign_lhs (binlhs),
3509                                          gimple_assign_rhs2 (binrhs));
3510   gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3511   gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
3512   gimple_set_uid (binrhs, gimple_uid (stmt));
3513
3514   if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3515     newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3516
3517   if (dump_file && (dump_flags & TDF_DETAILS))
3518     {
3519       fprintf (dump_file, "Linearized: ");
3520       print_gimple_stmt (dump_file, stmt, 0, 0);
3521     }
3522
3523   reassociate_stats.linearized++;
3524   update_stmt (stmt);
3525
3526   gsi = gsi_for_stmt (oldbinrhs);
3527   reassoc_remove_stmt (&gsi);
3528   release_defs (oldbinrhs);
3529
3530   gimple_set_visited (stmt, true);
3531   gimple_set_visited (binlhs, true);
3532   gimple_set_visited (binrhs, true);
3533
3534   /* Tail recurse on the new rhs if it still needs reassociation.  */
3535   if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3536     /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3537            want to change the algorithm while converting to tuples.  */
3538     linearize_expr (stmt);
3539 }
3540
3541 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3542    it.  Otherwise, return NULL.  */
3543
3544 static gimple
3545 get_single_immediate_use (tree lhs)
3546 {
3547   use_operand_p immuse;
3548   gimple immusestmt;
3549
3550   if (TREE_CODE (lhs) == SSA_NAME
3551       && single_imm_use (lhs, &immuse, &immusestmt)
3552       && is_gimple_assign (immusestmt))
3553     return immusestmt;
3554
3555   return NULL;
3556 }
3557
3558 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3559    representing the negated value.  Insertions of any necessary
3560    instructions go before GSI.
3561    This function is recursive in that, if you hand it "a_5" as the
3562    value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3563    transform b_3 + b_4 into a_5 = -b_3 + -b_4.  */
3564
3565 static tree
3566 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
3567 {
3568   gimple negatedefstmt = NULL;
3569   tree resultofnegate;
3570   gimple_stmt_iterator gsi;
3571   unsigned int uid;
3572
3573   /* If we are trying to negate a name, defined by an add, negate the
3574      add operands instead.  */
3575   if (TREE_CODE (tonegate) == SSA_NAME)
3576     negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3577   if (TREE_CODE (tonegate) == SSA_NAME
3578       && is_gimple_assign (negatedefstmt)
3579       && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3580       && has_single_use (gimple_assign_lhs (negatedefstmt))
3581       && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3582     {
3583       tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3584       tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3585       tree lhs = gimple_assign_lhs (negatedefstmt);
3586       gimple g;
3587
3588       gsi = gsi_for_stmt (negatedefstmt);
3589       rhs1 = negate_value (rhs1, &gsi);
3590
3591       gsi = gsi_for_stmt (negatedefstmt);
3592       rhs2 = negate_value (rhs2, &gsi);
3593
3594       gsi = gsi_for_stmt (negatedefstmt);
3595       lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3596       gimple_set_visited (negatedefstmt, true);
3597       g = gimple_build_assign_with_ops (PLUS_EXPR, lhs, rhs1, rhs2);
3598       gimple_set_uid (g, gimple_uid (negatedefstmt));
3599       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3600       return lhs;
3601     }
3602
3603   tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3604   resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
3605                                              NULL_TREE, true, GSI_SAME_STMT);
3606   gsi = *gsip;
3607   uid = gimple_uid (gsi_stmt (gsi));
3608   for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
3609     {
3610       gimple stmt = gsi_stmt (gsi);
3611       if (gimple_uid (stmt) != 0)
3612         break;
3613       gimple_set_uid (stmt, uid);
3614     }
3615   return resultofnegate;
3616 }
3617
3618 /* Return true if we should break up the subtract in STMT into an add
3619    with negate.  This is true when we the subtract operands are really
3620    adds, or the subtract itself is used in an add expression.  In
3621    either case, breaking up the subtract into an add with negate
3622    exposes the adds to reassociation.  */
3623
3624 static bool
3625 should_break_up_subtract (gimple stmt)
3626 {
3627   tree lhs = gimple_assign_lhs (stmt);
3628   tree binlhs = gimple_assign_rhs1 (stmt);
3629   tree binrhs = gimple_assign_rhs2 (stmt);
3630   gimple immusestmt;
3631   struct loop *loop = loop_containing_stmt (stmt);
3632
3633   if (TREE_CODE (binlhs) == SSA_NAME
3634       && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3635     return true;
3636
3637   if (TREE_CODE (binrhs) == SSA_NAME
3638       && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3639     return true;
3640
3641   if (TREE_CODE (lhs) == SSA_NAME
3642       && (immusestmt = get_single_immediate_use (lhs))
3643       && is_gimple_assign (immusestmt)
3644       && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3645           ||  gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3646     return true;
3647   return false;
3648 }
3649
3650 /* Transform STMT from A - B into A + -B.  */
3651
3652 static void
3653 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3654 {
3655   tree rhs1 = gimple_assign_rhs1 (stmt);
3656   tree rhs2 = gimple_assign_rhs2 (stmt);
3657
3658   if (dump_file && (dump_flags & TDF_DETAILS))
3659     {
3660       fprintf (dump_file, "Breaking up subtract ");
3661       print_gimple_stmt (dump_file, stmt, 0, 0);
3662     }
3663
3664   rhs2 = negate_value (rhs2, gsip);
3665   gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3666   update_stmt (stmt);
3667 }
3668
3669 /* Determine whether STMT is a builtin call that raises an SSA name
3670    to an integer power and has only one use.  If so, and this is early
3671    reassociation and unsafe math optimizations are permitted, place
3672    the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3673    If any of these conditions does not hold, return FALSE.  */
3674
3675 static bool
3676 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3677 {
3678   tree fndecl, arg1;
3679   REAL_VALUE_TYPE c, cint;
3680
3681   if (!first_pass_instance
3682       || !flag_unsafe_math_optimizations
3683       || !is_gimple_call (stmt)
3684       || !has_single_use (gimple_call_lhs (stmt)))
3685     return false;
3686
3687   fndecl = gimple_call_fndecl (stmt);
3688
3689   if (!fndecl
3690       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3691     return false;
3692
3693   switch (DECL_FUNCTION_CODE (fndecl))
3694     {
3695     CASE_FLT_FN (BUILT_IN_POW):
3696       *base = gimple_call_arg (stmt, 0);
3697       arg1 = gimple_call_arg (stmt, 1);
3698
3699       if (TREE_CODE (arg1) != REAL_CST)
3700         return false;
3701
3702       c = TREE_REAL_CST (arg1);
3703
3704       if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
3705         return false;
3706
3707       *exponent = real_to_integer (&c);
3708       real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
3709       if (!real_identical (&c, &cint))
3710         return false;
3711
3712       break;
3713
3714     CASE_FLT_FN (BUILT_IN_POWI):
3715       *base = gimple_call_arg (stmt, 0);
3716       arg1 = gimple_call_arg (stmt, 1);
3717
3718       if (!tree_fits_shwi_p (arg1))
3719         return false;
3720
3721       *exponent = tree_to_shwi (arg1);
3722       break;
3723
3724     default:
3725       return false;
3726     }
3727
3728   /* Expanding negative exponents is generally unproductive, so we don't
3729      complicate matters with those.  Exponents of zero and one should
3730      have been handled by expression folding.  */
3731   if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
3732     return false;
3733
3734   return true;
3735 }
3736
3737 /* Recursively linearize a binary expression that is the RHS of STMT.
3738    Place the operands of the expression tree in the vector named OPS.  */
3739
3740 static void
3741 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
3742                      bool is_associative, bool set_visited)
3743 {
3744   tree binlhs = gimple_assign_rhs1 (stmt);
3745   tree binrhs = gimple_assign_rhs2 (stmt);
3746   gimple binlhsdef = NULL, binrhsdef = NULL;
3747   bool binlhsisreassoc = false;
3748   bool binrhsisreassoc = false;
3749   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3750   struct loop *loop = loop_containing_stmt (stmt);
3751   tree base = NULL_TREE;
3752   HOST_WIDE_INT exponent = 0;
3753
3754   if (set_visited)
3755     gimple_set_visited (stmt, true);
3756
3757   if (TREE_CODE (binlhs) == SSA_NAME)
3758     {
3759       binlhsdef = SSA_NAME_DEF_STMT (binlhs);
3760       binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
3761                          && !stmt_could_throw_p (binlhsdef));
3762     }
3763
3764   if (TREE_CODE (binrhs) == SSA_NAME)
3765     {
3766       binrhsdef = SSA_NAME_DEF_STMT (binrhs);
3767       binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
3768                          && !stmt_could_throw_p (binrhsdef));
3769     }
3770
3771   /* If the LHS is not reassociable, but the RHS is, we need to swap
3772      them.  If neither is reassociable, there is nothing we can do, so
3773      just put them in the ops vector.  If the LHS is reassociable,
3774      linearize it.  If both are reassociable, then linearize the RHS
3775      and the LHS.  */
3776
3777   if (!binlhsisreassoc)
3778     {
3779       tree temp;
3780
3781       /* If this is not a associative operation like division, give up.  */
3782       if (!is_associative)
3783         {
3784           add_to_ops_vec (ops, binrhs);
3785           return;
3786         }
3787
3788       if (!binrhsisreassoc)
3789         {
3790           if (rhscode == MULT_EXPR
3791               && TREE_CODE (binrhs) == SSA_NAME
3792               && acceptable_pow_call (binrhsdef, &base, &exponent))
3793             {
3794               add_repeat_to_ops_vec (ops, base, exponent);
3795               gimple_set_visited (binrhsdef, true);
3796             }
3797           else
3798             add_to_ops_vec (ops, binrhs);
3799
3800           if (rhscode == MULT_EXPR
3801               && TREE_CODE (binlhs) == SSA_NAME
3802               && acceptable_pow_call (binlhsdef, &base, &exponent))
3803             {
3804               add_repeat_to_ops_vec (ops, base, exponent);
3805               gimple_set_visited (binlhsdef, true);
3806             }
3807           else
3808             add_to_ops_vec (ops, binlhs);
3809
3810           return;
3811         }
3812
3813       if (dump_file && (dump_flags & TDF_DETAILS))
3814         {
3815           fprintf (dump_file, "swapping operands of ");
3816           print_gimple_stmt (dump_file, stmt, 0, 0);
3817         }
3818
3819       swap_ssa_operands (stmt,
3820                          gimple_assign_rhs1_ptr (stmt),
3821                          gimple_assign_rhs2_ptr (stmt));
3822       update_stmt (stmt);
3823
3824       if (dump_file && (dump_flags & TDF_DETAILS))
3825         {
3826           fprintf (dump_file, " is now ");
3827           print_gimple_stmt (dump_file, stmt, 0, 0);
3828         }
3829
3830       /* We want to make it so the lhs is always the reassociative op,
3831          so swap.  */
3832       temp = binlhs;
3833       binlhs = binrhs;
3834       binrhs = temp;
3835     }
3836   else if (binrhsisreassoc)
3837     {
3838       linearize_expr (stmt);
3839       binlhs = gimple_assign_rhs1 (stmt);
3840       binrhs = gimple_assign_rhs2 (stmt);
3841     }
3842
3843   gcc_assert (TREE_CODE (binrhs) != SSA_NAME
3844               || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
3845                                       rhscode, loop));
3846   linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
3847                        is_associative, set_visited);
3848
3849   if (rhscode == MULT_EXPR
3850       && TREE_CODE (binrhs) == SSA_NAME
3851       && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
3852     {
3853       add_repeat_to_ops_vec (ops, base, exponent);
3854       gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
3855     }
3856   else
3857     add_to_ops_vec (ops, binrhs);
3858 }
3859
3860 /* Repropagate the negates back into subtracts, since no other pass
3861    currently does it.  */
3862
3863 static void
3864 repropagate_negates (void)
3865 {
3866   unsigned int i = 0;
3867   tree negate;
3868
3869   FOR_EACH_VEC_ELT (plus_negates, i, negate)
3870     {
3871       gimple user = get_single_immediate_use (negate);
3872
3873       if (!user || !is_gimple_assign (user))
3874         continue;
3875
3876       /* The negate operand can be either operand of a PLUS_EXPR
3877          (it can be the LHS if the RHS is a constant for example).
3878
3879          Force the negate operand to the RHS of the PLUS_EXPR, then
3880          transform the PLUS_EXPR into a MINUS_EXPR.  */
3881       if (gimple_assign_rhs_code (user) == PLUS_EXPR)
3882         {
3883           /* If the negated operand appears on the LHS of the
3884              PLUS_EXPR, exchange the operands of the PLUS_EXPR
3885              to force the negated operand to the RHS of the PLUS_EXPR.  */
3886           if (gimple_assign_rhs1 (user) == negate)
3887             {
3888               swap_ssa_operands (user,
3889                                  gimple_assign_rhs1_ptr (user),
3890                                  gimple_assign_rhs2_ptr (user));
3891             }
3892
3893           /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
3894              the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR.  */
3895           if (gimple_assign_rhs2 (user) == negate)
3896             {
3897               tree rhs1 = gimple_assign_rhs1 (user);
3898               tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
3899               gimple_stmt_iterator gsi = gsi_for_stmt (user);
3900               gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
3901               update_stmt (user);
3902             }
3903         }
3904       else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
3905         {
3906           if (gimple_assign_rhs1 (user) == negate)
3907             {
3908               /* We have
3909                    x = -a
3910                    y = x - b
3911                  which we transform into
3912                    x = a + b
3913                    y = -x .
3914                  This pushes down the negate which we possibly can merge
3915                  into some other operation, hence insert it into the
3916                  plus_negates vector.  */
3917               gimple feed = SSA_NAME_DEF_STMT (negate);
3918               tree a = gimple_assign_rhs1 (feed);
3919               tree b = gimple_assign_rhs2 (user);
3920               gimple_stmt_iterator gsi = gsi_for_stmt (feed);
3921               gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
3922               tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)), NULL);
3923               gimple g = gimple_build_assign_with_ops (PLUS_EXPR, x, a, b);
3924               gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3925               gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x, NULL);
3926               user = gsi_stmt (gsi2);
3927               update_stmt (user);
3928               reassoc_remove_stmt (&gsi);
3929               release_defs (feed);
3930               plus_negates.safe_push (gimple_assign_lhs (user));
3931             }
3932           else
3933             {
3934               /* Transform "x = -a; y = b - x" into "y = b + a", getting
3935                  rid of one operation.  */
3936               gimple feed = SSA_NAME_DEF_STMT (negate);
3937               tree a = gimple_assign_rhs1 (feed);
3938               tree rhs1 = gimple_assign_rhs1 (user);
3939               gimple_stmt_iterator gsi = gsi_for_stmt (user);
3940               gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
3941               update_stmt (gsi_stmt (gsi));
3942             }
3943         }
3944     }
3945 }
3946
3947 /* Returns true if OP is of a type for which we can do reassociation.
3948    That is for integral or non-saturating fixed-point types, and for
3949    floating point type when associative-math is enabled.  */
3950
3951 static bool
3952 can_reassociate_p (tree op)
3953 {
3954   tree type = TREE_TYPE (op);
3955   if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
3956       || NON_SAT_FIXED_POINT_TYPE_P (type)
3957       || (flag_associative_math && FLOAT_TYPE_P (type)))
3958     return true;
3959   return false;
3960 }
3961
3962 /* Break up subtract operations in block BB.
3963
3964    We do this top down because we don't know whether the subtract is
3965    part of a possible chain of reassociation except at the top.
3966
3967    IE given
3968    d = f + g
3969    c = a + e
3970    b = c - d
3971    q = b - r
3972    k = t - q
3973
3974    we want to break up k = t - q, but we won't until we've transformed q
3975    = b - r, which won't be broken up until we transform b = c - d.
3976
3977    En passant, clear the GIMPLE visited flag on every statement
3978    and set UIDs within each basic block.  */
3979
3980 static void
3981 break_up_subtract_bb (basic_block bb)
3982 {
3983   gimple_stmt_iterator gsi;
3984   basic_block son;
3985   unsigned int uid = 1;
3986
3987   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3988     {
3989       gimple stmt = gsi_stmt (gsi);
3990       gimple_set_visited (stmt, false);
3991       gimple_set_uid (stmt, uid++);
3992
3993       if (!is_gimple_assign (stmt)
3994           || !can_reassociate_p (gimple_assign_lhs (stmt)))
3995         continue;
3996
3997       /* Look for simple gimple subtract operations.  */
3998       if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
3999         {
4000           if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
4001               || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
4002             continue;
4003
4004           /* Check for a subtract used only in an addition.  If this
4005              is the case, transform it into add of a negate for better
4006              reassociation.  IE transform C = A-B into C = A + -B if C
4007              is only used in an addition.  */
4008           if (should_break_up_subtract (stmt))
4009             break_up_subtract (stmt, &gsi);
4010         }
4011       else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
4012                && can_reassociate_p (gimple_assign_rhs1 (stmt)))
4013         plus_negates.safe_push (gimple_assign_lhs (stmt));
4014     }
4015   for (son = first_dom_son (CDI_DOMINATORS, bb);
4016        son;
4017        son = next_dom_son (CDI_DOMINATORS, son))
4018     break_up_subtract_bb (son);
4019 }
4020
4021 /* Used for repeated factor analysis.  */
4022 struct repeat_factor_d
4023 {
4024   /* An SSA name that occurs in a multiply chain.  */
4025   tree factor;
4026
4027   /* Cached rank of the factor.  */
4028   unsigned rank;
4029
4030   /* Number of occurrences of the factor in the chain.  */
4031   HOST_WIDE_INT count;
4032
4033   /* An SSA name representing the product of this factor and
4034      all factors appearing later in the repeated factor vector.  */
4035   tree repr;
4036 };
4037
4038 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
4039 typedef const struct repeat_factor_d *const_repeat_factor_t;
4040
4041
4042 static vec<repeat_factor> repeat_factor_vec;
4043
4044 /* Used for sorting the repeat factor vector.  Sort primarily by
4045    ascending occurrence count, secondarily by descending rank.  */
4046
4047 static int
4048 compare_repeat_factors (const void *x1, const void *x2)
4049 {
4050   const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
4051   const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
4052
4053   if (rf1->count != rf2->count)
4054     return rf1->count - rf2->count;
4055
4056   return rf2->rank - rf1->rank;
4057 }
4058
4059 /* Look for repeated operands in OPS in the multiply tree rooted at
4060    STMT.  Replace them with an optimal sequence of multiplies and powi
4061    builtin calls, and remove the used operands from OPS.  Return an
4062    SSA name representing the value of the replacement sequence.  */
4063
4064 static tree
4065 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
4066 {
4067   unsigned i, j, vec_len;
4068   int ii;
4069   operand_entry_t oe;
4070   repeat_factor_t rf1, rf2;
4071   repeat_factor rfnew;
4072   tree result = NULL_TREE;
4073   tree target_ssa, iter_result;
4074   tree type = TREE_TYPE (gimple_get_lhs (stmt));
4075   tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
4076   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4077   gimple mul_stmt, pow_stmt;
4078
4079   /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4080      target.  */
4081   if (!powi_fndecl)
4082     return NULL_TREE;
4083
4084   /* Allocate the repeated factor vector.  */
4085   repeat_factor_vec.create (10);
4086
4087   /* Scan the OPS vector for all SSA names in the product and build
4088      up a vector of occurrence counts for each factor.  */
4089   FOR_EACH_VEC_ELT (*ops, i, oe)
4090     {
4091       if (TREE_CODE (oe->op) == SSA_NAME)
4092         {
4093           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4094             {
4095               if (rf1->factor == oe->op)
4096                 {
4097                   rf1->count += oe->count;
4098                   break;
4099                 }
4100             }
4101
4102           if (j >= repeat_factor_vec.length ())
4103             {
4104               rfnew.factor = oe->op;
4105               rfnew.rank = oe->rank;
4106               rfnew.count = oe->count;
4107               rfnew.repr = NULL_TREE;
4108               repeat_factor_vec.safe_push (rfnew);
4109             }
4110         }
4111     }
4112
4113   /* Sort the repeated factor vector by (a) increasing occurrence count,
4114      and (b) decreasing rank.  */
4115   repeat_factor_vec.qsort (compare_repeat_factors);
4116
4117   /* It is generally best to combine as many base factors as possible
4118      into a product before applying __builtin_powi to the result.
4119      However, the sort order chosen for the repeated factor vector
4120      allows us to cache partial results for the product of the base
4121      factors for subsequent use.  When we already have a cached partial
4122      result from a previous iteration, it is best to make use of it
4123      before looking for another __builtin_pow opportunity.
4124
4125      As an example, consider x * x * y * y * y * z * z * z * z.
4126      We want to first compose the product x * y * z, raise it to the
4127      second power, then multiply this by y * z, and finally multiply
4128      by z.  This can be done in 5 multiplies provided we cache y * z
4129      for use in both expressions:
4130
4131         t1 = y * z
4132         t2 = t1 * x
4133         t3 = t2 * t2
4134         t4 = t1 * t3
4135         result = t4 * z
4136
4137      If we instead ignored the cached y * z and first multiplied by
4138      the __builtin_pow opportunity z * z, we would get the inferior:
4139
4140         t1 = y * z
4141         t2 = t1 * x
4142         t3 = t2 * t2
4143         t4 = z * z
4144         t5 = t3 * t4
4145         result = t5 * y  */
4146
4147   vec_len = repeat_factor_vec.length ();
4148
4149   /* Repeatedly look for opportunities to create a builtin_powi call.  */
4150   while (true)
4151     {
4152       HOST_WIDE_INT power;
4153
4154       /* First look for the largest cached product of factors from
4155          preceding iterations.  If found, create a builtin_powi for
4156          it if the minimum occurrence count for its factors is at
4157          least 2, or just use this cached product as our next
4158          multiplicand if the minimum occurrence count is 1.  */
4159       FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4160         {
4161           if (rf1->repr && rf1->count > 0)
4162             break;
4163         }
4164
4165       if (j < vec_len)
4166         {
4167           power = rf1->count;
4168
4169           if (power == 1)
4170             {
4171               iter_result = rf1->repr;
4172
4173               if (dump_file && (dump_flags & TDF_DETAILS))
4174                 {
4175                   unsigned elt;
4176                   repeat_factor_t rf;
4177                   fputs ("Multiplying by cached product ", dump_file);
4178                   for (elt = j; elt < vec_len; elt++)
4179                     {
4180                       rf = &repeat_factor_vec[elt];
4181                       print_generic_expr (dump_file, rf->factor, 0);
4182                       if (elt < vec_len - 1)
4183                         fputs (" * ", dump_file);
4184                     }
4185                   fputs ("\n", dump_file);
4186                 }
4187             }
4188           else
4189             {
4190               iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4191               pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4192                                             build_int_cst (integer_type_node,
4193                                                            power));
4194               gimple_call_set_lhs (pow_stmt, iter_result);
4195               gimple_set_location (pow_stmt, gimple_location (stmt));
4196               gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4197
4198               if (dump_file && (dump_flags & TDF_DETAILS))
4199                 {
4200                   unsigned elt;
4201                   repeat_factor_t rf;
4202                   fputs ("Building __builtin_pow call for cached product (",
4203                          dump_file);
4204                   for (elt = j; elt < vec_len; elt++)
4205                     {
4206                       rf = &repeat_factor_vec[elt];
4207                       print_generic_expr (dump_file, rf->factor, 0);
4208                       if (elt < vec_len - 1)
4209                         fputs (" * ", dump_file);
4210                     }
4211                   fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n",
4212                            power);
4213                 }
4214             }
4215         }
4216       else
4217         {
4218           /* Otherwise, find the first factor in the repeated factor
4219              vector whose occurrence count is at least 2.  If no such
4220              factor exists, there are no builtin_powi opportunities
4221              remaining.  */
4222           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4223             {
4224               if (rf1->count >= 2)
4225                 break;
4226             }
4227
4228           if (j >= vec_len)
4229             break;
4230
4231           power = rf1->count;
4232
4233           if (dump_file && (dump_flags & TDF_DETAILS))
4234             {
4235               unsigned elt;
4236               repeat_factor_t rf;
4237               fputs ("Building __builtin_pow call for (", dump_file);
4238               for (elt = j; elt < vec_len; elt++)
4239                 {
4240                   rf = &repeat_factor_vec[elt];
4241                   print_generic_expr (dump_file, rf->factor, 0);
4242                   if (elt < vec_len - 1)
4243                     fputs (" * ", dump_file);
4244                 }
4245               fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power);
4246             }
4247
4248           reassociate_stats.pows_created++;
4249
4250           /* Visit each element of the vector in reverse order (so that
4251              high-occurrence elements are visited first, and within the
4252              same occurrence count, lower-ranked elements are visited
4253              first).  Form a linear product of all elements in this order
4254              whose occurrencce count is at least that of element J.
4255              Record the SSA name representing the product of each element
4256              with all subsequent elements in the vector.  */
4257           if (j == vec_len - 1)
4258             rf1->repr = rf1->factor;
4259           else
4260             {
4261               for (ii = vec_len - 2; ii >= (int)j; ii--)
4262                 {
4263                   tree op1, op2;
4264
4265                   rf1 = &repeat_factor_vec[ii];
4266                   rf2 = &repeat_factor_vec[ii + 1];
4267
4268                   /* Init the last factor's representative to be itself.  */
4269                   if (!rf2->repr)
4270                     rf2->repr = rf2->factor;
4271
4272                   op1 = rf1->factor;
4273                   op2 = rf2->repr;
4274
4275                   target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4276                   mul_stmt = gimple_build_assign_with_ops (MULT_EXPR,
4277                                                            target_ssa,
4278                                                            op1, op2);
4279                   gimple_set_location (mul_stmt, gimple_location (stmt));
4280                   gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4281                   rf1->repr = target_ssa;
4282
4283                   /* Don't reprocess the multiply we just introduced.  */
4284                   gimple_set_visited (mul_stmt, true);
4285                 }
4286             }
4287
4288           /* Form a call to __builtin_powi for the maximum product
4289              just formed, raised to the power obtained earlier.  */
4290           rf1 = &repeat_factor_vec[j];
4291           iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4292           pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4293                                         build_int_cst (integer_type_node,
4294                                                        power));
4295           gimple_call_set_lhs (pow_stmt, iter_result);
4296           gimple_set_location (pow_stmt, gimple_location (stmt));
4297           gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4298         }
4299
4300       /* If we previously formed at least one other builtin_powi call,
4301          form the product of this one and those others.  */
4302       if (result)
4303         {
4304           tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4305           mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result,
4306                                                    result, iter_result);
4307           gimple_set_location (mul_stmt, gimple_location (stmt));
4308           gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4309           gimple_set_visited (mul_stmt, true);
4310           result = new_result;
4311         }
4312       else
4313         result = iter_result;
4314
4315       /* Decrement the occurrence count of each element in the product
4316          by the count found above, and remove this many copies of each
4317          factor from OPS.  */
4318       for (i = j; i < vec_len; i++)
4319         {
4320           unsigned k = power;
4321           unsigned n;
4322
4323           rf1 = &repeat_factor_vec[i];
4324           rf1->count -= power;
4325
4326           FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
4327             {
4328               if (oe->op == rf1->factor)
4329                 {
4330                   if (oe->count <= k)
4331                     {
4332                       ops->ordered_remove (n);
4333                       k -= oe->count;
4334
4335                       if (k == 0)
4336                         break;
4337                     }
4338                   else
4339                     {
4340                       oe->count -= k;
4341                       break;
4342                     }
4343                 }
4344             }
4345         }
4346     }
4347
4348   /* At this point all elements in the repeated factor vector have a
4349      remaining occurrence count of 0 or 1, and those with a count of 1
4350      don't have cached representatives.  Re-sort the ops vector and
4351      clean up.  */
4352   ops->qsort (sort_by_operand_rank);
4353   repeat_factor_vec.release ();
4354
4355   /* Return the final product computed herein.  Note that there may
4356      still be some elements with single occurrence count left in OPS;
4357      those will be handled by the normal reassociation logic.  */
4358   return result;
4359 }
4360
4361 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS.  */
4362
4363 static void
4364 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
4365 {
4366   tree rhs1;
4367
4368   if (dump_file && (dump_flags & TDF_DETAILS))
4369     {
4370       fprintf (dump_file, "Transforming ");
4371       print_gimple_stmt (dump_file, stmt, 0, 0);
4372     }
4373
4374   rhs1 = gimple_assign_rhs1 (stmt);
4375   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
4376   update_stmt (stmt);
4377   remove_visited_stmt_chain (rhs1);
4378
4379   if (dump_file && (dump_flags & TDF_DETAILS))
4380     {
4381       fprintf (dump_file, " into ");
4382       print_gimple_stmt (dump_file, stmt, 0, 0);
4383     }
4384 }
4385
4386 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2.  */
4387
4388 static void
4389 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
4390                             tree rhs1, tree rhs2)
4391 {
4392   if (dump_file && (dump_flags & TDF_DETAILS))
4393     {
4394       fprintf (dump_file, "Transforming ");
4395       print_gimple_stmt (dump_file, stmt, 0, 0);
4396     }
4397
4398   gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
4399   update_stmt (gsi_stmt (*gsi));
4400   remove_visited_stmt_chain (rhs1);
4401
4402   if (dump_file && (dump_flags & TDF_DETAILS))
4403     {
4404       fprintf (dump_file, " into ");
4405       print_gimple_stmt (dump_file, stmt, 0, 0);
4406     }
4407 }
4408
4409 /* Reassociate expressions in basic block BB and its post-dominator as
4410    children.  */
4411
4412 static void
4413 reassociate_bb (basic_block bb)
4414 {
4415   gimple_stmt_iterator gsi;
4416   basic_block son;
4417   gimple stmt = last_stmt (bb);
4418
4419   if (stmt && !gimple_visited_p (stmt))
4420     maybe_optimize_range_tests (stmt);
4421
4422   for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4423     {
4424       stmt = gsi_stmt (gsi);
4425
4426       if (is_gimple_assign (stmt)
4427           && !stmt_could_throw_p (stmt))
4428         {
4429           tree lhs, rhs1, rhs2;
4430           enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4431
4432           /* If this is not a gimple binary expression, there is
4433              nothing for us to do with it.  */
4434           if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4435             continue;
4436
4437           /* If this was part of an already processed statement,
4438              we don't need to touch it again. */
4439           if (gimple_visited_p (stmt))
4440             {
4441               /* This statement might have become dead because of previous
4442                  reassociations.  */
4443               if (has_zero_uses (gimple_get_lhs (stmt)))
4444                 {
4445                   reassoc_remove_stmt (&gsi);
4446                   release_defs (stmt);
4447                   /* We might end up removing the last stmt above which
4448                      places the iterator to the end of the sequence.
4449                      Reset it to the last stmt in this case which might
4450                      be the end of the sequence as well if we removed
4451                      the last statement of the sequence.  In which case
4452                      we need to bail out.  */
4453                   if (gsi_end_p (gsi))
4454                     {
4455                       gsi = gsi_last_bb (bb);
4456                       if (gsi_end_p (gsi))
4457                         break;
4458                     }
4459                 }
4460               continue;
4461             }
4462
4463           lhs = gimple_assign_lhs (stmt);
4464           rhs1 = gimple_assign_rhs1 (stmt);
4465           rhs2 = gimple_assign_rhs2 (stmt);
4466
4467           /* For non-bit or min/max operations we can't associate
4468              all types.  Verify that here.  */
4469           if (rhs_code != BIT_IOR_EXPR
4470               && rhs_code != BIT_AND_EXPR
4471               && rhs_code != BIT_XOR_EXPR
4472               && rhs_code != MIN_EXPR
4473               && rhs_code != MAX_EXPR
4474               && (!can_reassociate_p (lhs)
4475                   || !can_reassociate_p (rhs1)
4476                   || !can_reassociate_p (rhs2)))
4477             continue;
4478
4479           if (associative_tree_code (rhs_code))
4480             {
4481               auto_vec<operand_entry_t> ops;
4482               tree powi_result = NULL_TREE;
4483
4484               /* There may be no immediate uses left by the time we
4485                  get here because we may have eliminated them all.  */
4486               if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4487                 continue;
4488
4489               gimple_set_visited (stmt, true);
4490               linearize_expr_tree (&ops, stmt, true, true);
4491               ops.qsort (sort_by_operand_rank);
4492               optimize_ops_list (rhs_code, &ops);
4493               if (undistribute_ops_list (rhs_code, &ops,
4494                                          loop_containing_stmt (stmt)))
4495                 {
4496                   ops.qsort (sort_by_operand_rank);
4497                   optimize_ops_list (rhs_code, &ops);
4498                 }
4499
4500               if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4501                 optimize_range_tests (rhs_code, &ops);
4502
4503               if (first_pass_instance
4504                   && rhs_code == MULT_EXPR
4505                   && flag_unsafe_math_optimizations)
4506                 powi_result = attempt_builtin_powi (stmt, &ops);
4507
4508               /* If the operand vector is now empty, all operands were
4509                  consumed by the __builtin_powi optimization.  */
4510               if (ops.length () == 0)
4511                 transform_stmt_to_copy (&gsi, stmt, powi_result);
4512               else if (ops.length () == 1)
4513                 {
4514                   tree last_op = ops.last ()->op;
4515
4516                   if (powi_result)
4517                     transform_stmt_to_multiply (&gsi, stmt, last_op,
4518                                                 powi_result);
4519                   else
4520                     transform_stmt_to_copy (&gsi, stmt, last_op);
4521                 }
4522               else
4523                 {
4524                   enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4525                   int ops_num = ops.length ();
4526                   int width = get_reassociation_width (ops_num, rhs_code, mode);
4527                   tree new_lhs = lhs;
4528
4529                   if (dump_file && (dump_flags & TDF_DETAILS))
4530                     fprintf (dump_file,
4531                              "Width = %d was chosen for reassociation\n", width);
4532
4533                   if (width > 1
4534                       && ops.length () > 3)
4535                     rewrite_expr_tree_parallel (stmt, width, ops);
4536                   else
4537                     {
4538                       /* When there are three operands left, we want
4539                          to make sure the ones that get the double
4540                          binary op are chosen wisely.  */
4541                       int len = ops.length ();
4542                       if (len >= 3)
4543                         swap_ops_for_binary_stmt (ops, len - 3, stmt);
4544
4545                       new_lhs = rewrite_expr_tree (stmt, 0, ops,
4546                                                    powi_result != NULL);
4547                     }
4548
4549                   /* If we combined some repeated factors into a
4550                      __builtin_powi call, multiply that result by the
4551                      reassociated operands.  */
4552                   if (powi_result)
4553                     {
4554                       gimple mul_stmt, lhs_stmt = SSA_NAME_DEF_STMT (lhs);
4555                       tree type = TREE_TYPE (lhs);
4556                       tree target_ssa = make_temp_ssa_name (type, NULL,
4557                                                             "reassocpow");
4558                       gimple_set_lhs (lhs_stmt, target_ssa);
4559                       update_stmt (lhs_stmt);
4560                       if (lhs != new_lhs)
4561                         target_ssa = new_lhs;
4562                       mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs,
4563                                                                powi_result,
4564                                                                target_ssa);
4565                       gimple_set_location (mul_stmt, gimple_location (stmt));
4566                       gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4567                     }
4568                 }
4569             }
4570         }
4571     }
4572   for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4573        son;
4574        son = next_dom_son (CDI_POST_DOMINATORS, son))
4575     reassociate_bb (son);
4576 }
4577
4578 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4579 void debug_ops_vector (vec<operand_entry_t> ops);
4580
4581 /* Dump the operand entry vector OPS to FILE.  */
4582
4583 void
4584 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
4585 {
4586   operand_entry_t oe;
4587   unsigned int i;
4588
4589   FOR_EACH_VEC_ELT (ops, i, oe)
4590     {
4591       fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
4592       print_generic_expr (file, oe->op, 0);
4593     }
4594 }
4595
4596 /* Dump the operand entry vector OPS to STDERR.  */
4597
4598 DEBUG_FUNCTION void
4599 debug_ops_vector (vec<operand_entry_t> ops)
4600 {
4601   dump_ops_vector (stderr, ops);
4602 }
4603
4604 static void
4605 do_reassoc (void)
4606 {
4607   break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
4608   reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
4609 }
4610
4611 /* Initialize the reassociation pass.  */
4612
4613 static void
4614 init_reassoc (void)
4615 {
4616   int i;
4617   long rank = 2;
4618   int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
4619
4620   /* Find the loops, so that we can prevent moving calculations in
4621      them.  */
4622   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4623
4624   memset (&reassociate_stats, 0, sizeof (reassociate_stats));
4625
4626   operand_entry_pool = create_alloc_pool ("operand entry pool",
4627                                           sizeof (struct operand_entry), 30);
4628   next_operand_entry_id = 0;
4629
4630   /* Reverse RPO (Reverse Post Order) will give us something where
4631      deeper loops come later.  */
4632   pre_and_rev_post_order_compute (NULL, bbs, false);
4633   bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
4634   operand_rank = pointer_map_create ();
4635
4636   /* Give each default definition a distinct rank.  This includes
4637      parameters and the static chain.  Walk backwards over all
4638      SSA names so that we get proper rank ordering according
4639      to tree_swap_operands_p.  */
4640   for (i = num_ssa_names - 1; i > 0; --i)
4641     {
4642       tree name = ssa_name (i);
4643       if (name && SSA_NAME_IS_DEFAULT_DEF (name))
4644         insert_operand_rank (name, ++rank);
4645     }
4646
4647   /* Set up rank for each BB  */
4648   for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
4649     bb_rank[bbs[i]] = ++rank  << 16;
4650
4651   free (bbs);
4652   calculate_dominance_info (CDI_POST_DOMINATORS);
4653   plus_negates = vNULL;
4654 }
4655
4656 /* Cleanup after the reassociation pass, and print stats if
4657    requested.  */
4658
4659 static void
4660 fini_reassoc (void)
4661 {
4662   statistics_counter_event (cfun, "Linearized",
4663                             reassociate_stats.linearized);
4664   statistics_counter_event (cfun, "Constants eliminated",
4665                             reassociate_stats.constants_eliminated);
4666   statistics_counter_event (cfun, "Ops eliminated",
4667                             reassociate_stats.ops_eliminated);
4668   statistics_counter_event (cfun, "Statements rewritten",
4669                             reassociate_stats.rewritten);
4670   statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
4671                             reassociate_stats.pows_encountered);
4672   statistics_counter_event (cfun, "Built-in powi calls created",
4673                             reassociate_stats.pows_created);
4674
4675   pointer_map_destroy (operand_rank);
4676   free_alloc_pool (operand_entry_pool);
4677   free (bb_rank);
4678   plus_negates.release ();
4679   free_dominance_info (CDI_POST_DOMINATORS);
4680   loop_optimizer_finalize ();
4681 }
4682
4683 /* Gate and execute functions for Reassociation.  */
4684
4685 static unsigned int
4686 execute_reassoc (void)
4687 {
4688   init_reassoc ();
4689
4690   do_reassoc ();
4691   repropagate_negates ();
4692
4693   fini_reassoc ();
4694   return 0;
4695 }
4696
4697 namespace {
4698
4699 const pass_data pass_data_reassoc =
4700 {
4701   GIMPLE_PASS, /* type */
4702   "reassoc", /* name */
4703   OPTGROUP_NONE, /* optinfo_flags */
4704   true, /* has_execute */
4705   TV_TREE_REASSOC, /* tv_id */
4706   ( PROP_cfg | PROP_ssa ), /* properties_required */
4707   0, /* properties_provided */
4708   0, /* properties_destroyed */
4709   0, /* todo_flags_start */
4710   TODO_update_ssa_only_virtuals, /* todo_flags_finish */
4711 };
4712
4713 class pass_reassoc : public gimple_opt_pass
4714 {
4715 public:
4716   pass_reassoc (gcc::context *ctxt)
4717     : gimple_opt_pass (pass_data_reassoc, ctxt)
4718   {}
4719
4720   /* opt_pass methods: */
4721   opt_pass * clone () { return new pass_reassoc (m_ctxt); }
4722   virtual bool gate (function *) { return flag_tree_reassoc != 0; }
4723   virtual unsigned int execute (function *) { return execute_reassoc (); }
4724
4725 }; // class pass_reassoc
4726
4727 } // anon namespace
4728
4729 gimple_opt_pass *
4730 make_pass_reassoc (gcc::context *ctxt)
4731 {
4732   return new pass_reassoc (ctxt);
4733 }