gcc/tree-ssa-reassoc.c

   1 /* Reassociation for trees.
   2    Copyright (C) 2005-2014 Free Software Foundation, Inc.
   3    Contributed by Daniel Berlin <dan@dberlin.org>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 3, or (at your option)
  10 any later version.
  11
  12 GCC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "hash-table.h"
  25 #include "tm.h"
  26 #include "rtl.h"
  27 #include "tm_p.h"
  28 #include "tree.h"
  29 #include "stor-layout.h"
  30 #include "basic-block.h"
  31 #include "gimple-pretty-print.h"
  32 #include "tree-inline.h"
  33 #include "pointer-set.h"
  34 #include "tree-ssa-alias.h"
  35 #include "internal-fn.h"
  36 #include "gimple-fold.h"
  37 #include "tree-eh.h"
  38 #include "gimple-expr.h"
  39 #include "is-a.h"
  40 #include "gimple.h"
  41 #include "gimple-iterator.h"
  42 #include "gimplify-me.h"
  43 #include "gimple-ssa.h"
  44 #include "tree-cfg.h"
  45 #include "tree-phinodes.h"
  46 #include "ssa-iterators.h"
  47 #include "stringpool.h"
  48 #include "tree-ssanames.h"
  49 #include "tree-ssa-loop-niter.h"
  50 #include "tree-ssa-loop.h"
  51 #include "expr.h"
  52 #include "tree-dfa.h"
  53 #include "tree-ssa.h"
  54 #include "tree-iterator.h"
  55 #include "tree-pass.h"
  56 #include "alloc-pool.h"
  57 #include "langhooks.h"
  58 #include "cfgloop.h"
  59 #include "flags.h"
  60 #include "target.h"
  61 #include "params.h"
  62 #include "diagnostic-core.h"
  63
  64 /*  This is a simple global reassociation pass.  It is, in part, based
  65     on the LLVM pass of the same name (They do some things more/less
  66     than we do, in different orders, etc).
  67
  68     It consists of five steps:
  69
  70     1. Breaking up subtract operations into addition + negate, where
  71     it would promote the reassociation of adds.
  72
  73     2. Left linearization of the expression trees, so that (A+B)+(C+D)
  74     becomes (((A+B)+C)+D), which is easier for us to rewrite later.
  75     During linearization, we place the operands of the binary
  76     expressions into a vector of operand_entry_t
  77
  78     3. Optimization of the operand lists, eliminating things like a +
  79     -a, a & a, etc.
  80
  81     3a. Combine repeated factors with the same occurrence counts
  82     into a __builtin_powi call that will later be optimized into
  83     an optimal number of multiplies.
  84
  85     4. Rewrite the expression trees we linearized and optimized so
  86     they are in proper rank order.
  87
  88     5. Repropagate negates, as nothing else will clean it up ATM.
  89
  90     A bit of theory on #4, since nobody seems to write anything down
  91     about why it makes sense to do it the way they do it:
  92
  93     We could do this much nicer theoretically, but don't (for reasons
  94     explained after how to do it theoretically nice :P).
  95
  96     In order to promote the most redundancy elimination, you want
  97     binary expressions whose operands are the same rank (or
  98     preferably, the same value) exposed to the redundancy eliminator,
  99     for possible elimination.
 100
 101     So the way to do this if we really cared, is to build the new op
 102     tree from the leaves to the roots, merging as you go, and putting the
 103     new op on the end of the worklist, until you are left with one
 104     thing on the worklist.
 105
 106     IE if you have to rewrite the following set of operands (listed with
 107     rank in parentheses), with opcode PLUS_EXPR:
 108
 109     a (1),  b (1),  c (1),  d (2), e (2)
 110
 111
 112     We start with our merge worklist empty, and the ops list with all of
 113     those on it.
 114
 115     You want to first merge all leaves of the same rank, as much as
 116     possible.
 117
 118     So first build a binary op of
 119
 120     mergetmp = a + b, and put "mergetmp" on the merge worklist.
 121
 122     Because there is no three operand form of PLUS_EXPR, c is not going to
 123     be exposed to redundancy elimination as a rank 1 operand.
 124
 125     So you might as well throw it on the merge worklist (you could also
 126     consider it to now be a rank two operand, and merge it with d and e,
 127     but in this case, you then have evicted e from a binary op. So at
 128     least in this situation, you can't win.)
 129
 130     Then build a binary op of d + e
 131     mergetmp2 = d + e
 132
 133     and put mergetmp2 on the merge worklist.
 134
 135     so merge worklist = {mergetmp, c, mergetmp2}
 136
 137     Continue building binary ops of these operations until you have only
 138     one operation left on the worklist.
 139
 140     So we have
 141
 142     build binary op
 143     mergetmp3 = mergetmp + c
 144
 145     worklist = {mergetmp2, mergetmp3}
 146
 147     mergetmp4 = mergetmp2 + mergetmp3
 148
 149     worklist = {mergetmp4}
 150
 151     because we have one operation left, we can now just set the original
 152     statement equal to the result of that operation.
 153
 154     This will at least expose a + b  and d + e to redundancy elimination
 155     as binary operations.
 156
 157     For extra points, you can reuse the old statements to build the
 158     mergetmps, since you shouldn't run out.
 159
 160     So why don't we do this?
 161
 162     Because it's expensive, and rarely will help.  Most trees we are
 163     reassociating have 3 or less ops.  If they have 2 ops, they already
 164     will be written into a nice single binary op.  If you have 3 ops, a
 165     single simple check suffices to tell you whether the first two are of the
 166     same rank.  If so, you know to order it
 167
 168     mergetmp = op1 + op2
 169     newstmt = mergetmp + op3
 170
 171     instead of
 172     mergetmp = op2 + op3
 173     newstmt = mergetmp + op1
 174
 175     If all three are of the same rank, you can't expose them all in a
 176     single binary operator anyway, so the above is *still* the best you
 177     can do.
 178
 179     Thus, this is what we do.  When we have three ops left, we check to see
 180     what order to put them in, and call it a day.  As a nod to vector sum
 181     reduction, we check if any of the ops are really a phi node that is a
 182     destructive update for the associating op, and keep the destructive
 183     update together for vector sum reduction recognition.  */
 184
 185
 186 /* Statistics */
 187 static struct
 188 {
 189   int linearized;
 190   int constants_eliminated;
 191   int ops_eliminated;
 192   int rewritten;
 193   int pows_encountered;
 194   int pows_created;
 195 } reassociate_stats;
 196
 197 /* Operator, rank pair.  */
 198 typedef struct operand_entry
 199 {
 200   unsigned int rank;
 201   int id;
 202   tree op;
 203   unsigned int count;
 204 } *operand_entry_t;
 205
 206 static alloc_pool operand_entry_pool;
 207
 208 /* This is used to assign a unique ID to each struct operand_entry
 209    so that qsort results are identical on different hosts.  */
 210 static int next_operand_entry_id;
 211
 212 /* Starting rank number for a given basic block, so that we can rank
 213    operations using unmovable instructions in that BB based on the bb
 214    depth.  */
 215 static long *bb_rank;
 216
 217 /* Operand->rank hashtable.  */
 218 static struct pointer_map_t *operand_rank;
 219
 220 /* Forward decls.  */
 221 static long get_rank (tree);
 222 static bool reassoc_stmt_dominates_stmt_p (gimple, gimple);
 223
 224 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
 225    possibly added by gsi_remove.  */
 226
 227 bool
 228 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
 229 {
 230   gimple stmt = gsi_stmt (*gsi);
 231
 232   if (!MAY_HAVE_DEBUG_STMTS || gimple_code (stmt) == GIMPLE_PHI)
 233     return gsi_remove (gsi, true);
 234
 235   gimple_stmt_iterator prev = *gsi;
 236   gsi_prev (&prev);
 237   unsigned uid = gimple_uid (stmt);
 238   basic_block bb = gimple_bb (stmt);
 239   bool ret = gsi_remove (gsi, true);
 240   if (!gsi_end_p (prev))
 241     gsi_next (&prev);
 242   else
 243     prev = gsi_start_bb (bb);
 244   gimple end_stmt = gsi_stmt (*gsi);
 245   while ((stmt = gsi_stmt (prev)) != end_stmt)
 246     {
 247       gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
 248       gimple_set_uid (stmt, uid);
 249       gsi_next (&prev);
 250     }
 251   return ret;
 252 }
 253
 254 /* Bias amount for loop-carried phis.  We want this to be larger than
 255    the depth of any reassociation tree we can see, but not larger than
 256    the rank difference between two blocks.  */
 257 #define PHI_LOOP_BIAS (1 << 15)
 258
 259 /* Rank assigned to a phi statement.  If STMT is a loop-carried phi of
 260    an innermost loop, and the phi has only a single use which is inside
 261    the loop, then the rank is the block rank of the loop latch plus an
 262    extra bias for the loop-carried dependence.  This causes expressions
 263    calculated into an accumulator variable to be independent for each
 264    iteration of the loop.  If STMT is some other phi, the rank is the
 265    block rank of its containing block.  */
 266 static long
 267 phi_rank (gimple stmt)
 268 {
 269   basic_block bb = gimple_bb (stmt);
 270   struct loop *father = bb->loop_father;
 271   tree res;
 272   unsigned i;
 273   use_operand_p use;
 274   gimple use_stmt;
 275
 276   /* We only care about real loops (those with a latch).  */
 277   if (!father->latch)
 278     return bb_rank[bb->index];
 279
 280   /* Interesting phis must be in headers of innermost loops.  */
 281   if (bb != father->header
 282       || father->inner)
 283     return bb_rank[bb->index];
 284
 285   /* Ignore virtual SSA_NAMEs.  */
 286   res = gimple_phi_result (stmt);
 287   if (virtual_operand_p (res))
 288     return bb_rank[bb->index];
 289
 290   /* The phi definition must have a single use, and that use must be
 291      within the loop.  Otherwise this isn't an accumulator pattern.  */
 292   if (!single_imm_use (res, &use, &use_stmt)
 293       || gimple_bb (use_stmt)->loop_father != father)
 294     return bb_rank[bb->index];
 295
 296   /* Look for phi arguments from within the loop.  If found, bias this phi.  */
 297   for (i = 0; i < gimple_phi_num_args (stmt); i++)
 298     {
 299       tree arg = gimple_phi_arg_def (stmt, i);
 300       if (TREE_CODE (arg) == SSA_NAME
 301           && !SSA_NAME_IS_DEFAULT_DEF (arg))
 302         {
 303           gimple def_stmt = SSA_NAME_DEF_STMT (arg);
 304           if (gimple_bb (def_stmt)->loop_father == father)
 305             return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
 306         }
 307     }
 308
 309   /* Must be an uninteresting phi.  */
 310   return bb_rank[bb->index];
 311 }
 312
 313 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
 314    loop-carried dependence of an innermost loop, return TRUE; else
 315    return FALSE.  */
 316 static bool
 317 loop_carried_phi (tree exp)
 318 {
 319   gimple phi_stmt;
 320   long block_rank;
 321
 322   if (TREE_CODE (exp) != SSA_NAME
 323       || SSA_NAME_IS_DEFAULT_DEF (exp))
 324     return false;
 325
 326   phi_stmt = SSA_NAME_DEF_STMT (exp);
 327
 328   if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
 329     return false;
 330
 331   /* Non-loop-carried phis have block rank.  Loop-carried phis have
 332      an additional bias added in.  If this phi doesn't have block rank,
 333      it's biased and should not be propagated.  */
 334   block_rank = bb_rank[gimple_bb (phi_stmt)->index];
 335
 336   if (phi_rank (phi_stmt) != block_rank)
 337     return true;
 338
 339   return false;
 340 }
 341
 342 /* Return the maximum of RANK and the rank that should be propagated
 343    from expression OP.  For most operands, this is just the rank of OP.
 344    For loop-carried phis, the value is zero to avoid undoing the bias
 345    in favor of the phi.  */
 346 static long
 347 propagate_rank (long rank, tree op)
 348 {
 349   long op_rank;
 350
 351   if (loop_carried_phi (op))
 352     return rank;
 353
 354   op_rank = get_rank (op);
 355
 356   return MAX (rank, op_rank);
 357 }
 358
 359 /* Look up the operand rank structure for expression E.  */
 360
 361 static inline long
 362 find_operand_rank (tree e)
 363 {
 364   void **slot = pointer_map_contains (operand_rank, e);
 365   return slot ? (long) (intptr_t) *slot : -1;
 366 }
 367
 368 /* Insert {E,RANK} into the operand rank hashtable.  */
 369
 370 static inline void
 371 insert_operand_rank (tree e, long rank)
 372 {
 373   void **slot;
 374   gcc_assert (rank > 0);
 375   slot = pointer_map_insert (operand_rank, e);
 376   gcc_assert (!*slot);
 377   *slot = (void *) (intptr_t) rank;
 378 }
 379
 380 /* Given an expression E, return the rank of the expression.  */
 381
 382 static long
 383 get_rank (tree e)
 384 {
 385   /* Constants have rank 0.  */
 386   if (is_gimple_min_invariant (e))
 387     return 0;
 388
 389   /* SSA_NAME's have the rank of the expression they are the result
 390      of.
 391      For globals and uninitialized values, the rank is 0.
 392      For function arguments, use the pre-setup rank.
 393      For PHI nodes, stores, asm statements, etc, we use the rank of
 394      the BB.
 395      For simple operations, the rank is the maximum rank of any of
 396      its operands, or the bb_rank, whichever is less.
 397      I make no claims that this is optimal, however, it gives good
 398      results.  */
 399
 400   /* We make an exception to the normal ranking system to break
 401      dependences of accumulator variables in loops.  Suppose we
 402      have a simple one-block loop containing:
 403
 404        x_1 = phi(x_0, x_2)
 405        b = a + x_1
 406        c = b + d
 407        x_2 = c + e
 408
 409      As shown, each iteration of the calculation into x is fully
 410      dependent upon the iteration before it.  We would prefer to
 411      see this in the form:
 412
 413        x_1 = phi(x_0, x_2)
 414        b = a + d
 415        c = b + e
 416        x_2 = c + x_1
 417
 418      If the loop is unrolled, the calculations of b and c from
 419      different iterations can be interleaved.
 420
 421      To obtain this result during reassociation, we bias the rank
 422      of the phi definition x_1 upward, when it is recognized as an
 423      accumulator pattern.  The artificial rank causes it to be
 424      added last, providing the desired independence.  */
 425
 426   if (TREE_CODE (e) == SSA_NAME)
 427     {
 428       gimple stmt;
 429       long rank;
 430       int i, n;
 431       tree op;
 432
 433       if (SSA_NAME_IS_DEFAULT_DEF (e))
 434         return find_operand_rank (e);
 435
 436       stmt = SSA_NAME_DEF_STMT (e);
 437       if (gimple_code (stmt) == GIMPLE_PHI)
 438         return phi_rank (stmt);
 439
 440       if (!is_gimple_assign (stmt)
 441           || gimple_vdef (stmt))
 442         return bb_rank[gimple_bb (stmt)->index];
 443
 444       /* If we already have a rank for this expression, use that.  */
 445       rank = find_operand_rank (e);
 446       if (rank != -1)
 447         return rank;
 448
 449       /* Otherwise, find the maximum rank for the operands.  As an
 450          exception, remove the bias from loop-carried phis when propagating
 451          the rank so that dependent operations are not also biased.  */
 452       rank = 0;
 453       if (gimple_assign_single_p (stmt))
 454         {
 455           tree rhs = gimple_assign_rhs1 (stmt);
 456           n = TREE_OPERAND_LENGTH (rhs);
 457           if (n == 0)
 458             rank = propagate_rank (rank, rhs);
 459           else
 460             {
 461               for (i = 0; i < n; i++)
 462                 {
 463                   op = TREE_OPERAND (rhs, i);
 464
 465                   if (op != NULL_TREE)
 466                     rank = propagate_rank (rank, op);
 467                 }
 468             }
 469         }
 470       else
 471         {
 472           n = gimple_num_ops (stmt);
 473           for (i = 1; i < n; i++)
 474             {
 475               op = gimple_op (stmt, i);
 476               gcc_assert (op);
 477               rank = propagate_rank (rank, op);
 478             }
 479         }
 480
 481       if (dump_file && (dump_flags & TDF_DETAILS))
 482         {
 483           fprintf (dump_file, "Rank for ");
 484           print_generic_expr (dump_file, e, 0);
 485           fprintf (dump_file, " is %ld\n", (rank + 1));
 486         }
 487
 488       /* Note the rank in the hashtable so we don't recompute it.  */
 489       insert_operand_rank (e, (rank + 1));
 490       return (rank + 1);
 491     }
 492
 493   /* Globals, etc,  are rank 0 */
 494   return 0;
 495 }
 496
 497
 498 /* We want integer ones to end up last no matter what, since they are
 499    the ones we can do the most with.  */
 500 #define INTEGER_CONST_TYPE 1 << 3
 501 #define FLOAT_CONST_TYPE 1 << 2
 502 #define OTHER_CONST_TYPE 1 << 1
 503
 504 /* Classify an invariant tree into integer, float, or other, so that
 505    we can sort them to be near other constants of the same type.  */
 506 static inline int
 507 constant_type (tree t)
 508 {
 509   if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
 510     return INTEGER_CONST_TYPE;
 511   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
 512     return FLOAT_CONST_TYPE;
 513   else
 514     return OTHER_CONST_TYPE;
 515 }
 516
 517 /* qsort comparison function to sort operand entries PA and PB by rank
 518    so that the sorted array is ordered by rank in decreasing order.  */
 519 static int
 520 sort_by_operand_rank (const void *pa, const void *pb)
 521 {
 522   const operand_entry_t oea = *(const operand_entry_t *)pa;
 523   const operand_entry_t oeb = *(const operand_entry_t *)pb;
 524
 525   /* It's nicer for optimize_expression if constants that are likely
 526      to fold when added/multiplied//whatever are put next to each
 527      other.  Since all constants have rank 0, order them by type.  */
 528   if (oeb->rank == 0 && oea->rank == 0)
 529     {
 530       if (constant_type (oeb->op) != constant_type (oea->op))
 531         return constant_type (oeb->op) - constant_type (oea->op);
 532       else
 533         /* To make sorting result stable, we use unique IDs to determine
 534            order.  */
 535         return oeb->id - oea->id;
 536     }
 537
 538   /* Lastly, make sure the versions that are the same go next to each
 539      other.  */
 540   if ((oeb->rank - oea->rank == 0)
 541       && TREE_CODE (oea->op) == SSA_NAME
 542       && TREE_CODE (oeb->op) == SSA_NAME)
 543     {
 544       /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
 545          versions of removed SSA_NAMEs, so if possible, prefer to sort
 546          based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
 547          See PR60418.  */
 548       if (!SSA_NAME_IS_DEFAULT_DEF (oea->op)
 549           && !SSA_NAME_IS_DEFAULT_DEF (oeb->op)
 550           && SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
 551         {
 552           gimple stmta = SSA_NAME_DEF_STMT (oea->op);
 553           gimple stmtb = SSA_NAME_DEF_STMT (oeb->op);
 554           basic_block bba = gimple_bb (stmta);
 555           basic_block bbb = gimple_bb (stmtb);
 556           if (bbb != bba)
 557             {
 558               if (bb_rank[bbb->index] != bb_rank[bba->index])
 559                 return bb_rank[bbb->index] - bb_rank[bba->index];
 560             }
 561           else
 562             {
 563               bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
 564               bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
 565               if (da != db)
 566                 return da ? 1 : -1;
 567             }
 568         }
 569
 570       if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
 571         return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
 572       else
 573         return oeb->id - oea->id;
 574     }
 575
 576   if (oeb->rank != oea->rank)
 577     return oeb->rank - oea->rank;
 578   else
 579     return oeb->id - oea->id;
 580 }
 581
 582 /* Add an operand entry to *OPS for the tree operand OP.  */
 583
 584 static void
 585 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
 586 {
 587   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
 588
 589   oe->op = op;
 590   oe->rank = get_rank (op);
 591   oe->id = next_operand_entry_id++;
 592   oe->count = 1;
 593   ops->safe_push (oe);
 594 }
 595
 596 /* Add an operand entry to *OPS for the tree operand OP with repeat
 597    count REPEAT.  */
 598
 599 static void
 600 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
 601                        HOST_WIDE_INT repeat)
 602 {
 603   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
 604
 605   oe->op = op;
 606   oe->rank = get_rank (op);
 607   oe->id = next_operand_entry_id++;
 608   oe->count = repeat;
 609   ops->safe_push (oe);
 610
 611   reassociate_stats.pows_encountered++;
 612 }
 613
 614 /* Return true if STMT is reassociable operation containing a binary
 615    operation with tree code CODE, and is inside LOOP.  */
 616
 617 static bool
 618 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
 619 {
 620   basic_block bb = gimple_bb (stmt);
 621
 622   if (gimple_bb (stmt) == NULL)
 623     return false;
 624
 625   if (!flow_bb_inside_loop_p (loop, bb))
 626     return false;
 627
 628   if (is_gimple_assign (stmt)
 629       && gimple_assign_rhs_code (stmt) == code
 630       && has_single_use (gimple_assign_lhs (stmt)))
 631     return true;
 632
 633   return false;
 634 }
 635
 636
 637 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
 638    operand of the negate operation.  Otherwise, return NULL.  */
 639
 640 static tree
 641 get_unary_op (tree name, enum tree_code opcode)
 642 {
 643   gimple stmt = SSA_NAME_DEF_STMT (name);
 644
 645   if (!is_gimple_assign (stmt))
 646     return NULL_TREE;
 647
 648   if (gimple_assign_rhs_code (stmt) == opcode)
 649     return gimple_assign_rhs1 (stmt);
 650   return NULL_TREE;
 651 }
 652
 653 /* If CURR and LAST are a pair of ops that OPCODE allows us to
 654    eliminate through equivalences, do so, remove them from OPS, and
 655    return true.  Otherwise, return false.  */
 656
 657 static bool
 658 eliminate_duplicate_pair (enum tree_code opcode,
 659                           vec<operand_entry_t> *ops,
 660                           bool *all_done,
 661                           unsigned int i,
 662                           operand_entry_t curr,
 663                           operand_entry_t last)
 664 {
 665
 666   /* If we have two of the same op, and the opcode is & |, min, or max,
 667      we can eliminate one of them.
 668      If we have two of the same op, and the opcode is ^, we can
 669      eliminate both of them.  */
 670
 671   if (last && last->op == curr->op)
 672     {
 673       switch (opcode)
 674         {
 675         case MAX_EXPR:
 676         case MIN_EXPR:
 677         case BIT_IOR_EXPR:
 678         case BIT_AND_EXPR:
 679           if (dump_file && (dump_flags & TDF_DETAILS))
 680             {
 681               fprintf (dump_file, "Equivalence: ");
 682               print_generic_expr (dump_file, curr->op, 0);
 683               fprintf (dump_file, " [&|minmax] ");
 684               print_generic_expr (dump_file, last->op, 0);
 685               fprintf (dump_file, " -> ");
 686               print_generic_stmt (dump_file, last->op, 0);
 687             }
 688
 689           ops->ordered_remove (i);
 690           reassociate_stats.ops_eliminated ++;
 691
 692           return true;
 693
 694         case BIT_XOR_EXPR:
 695           if (dump_file && (dump_flags & TDF_DETAILS))
 696             {
 697               fprintf (dump_file, "Equivalence: ");
 698               print_generic_expr (dump_file, curr->op, 0);
 699               fprintf (dump_file, " ^ ");
 700               print_generic_expr (dump_file, last->op, 0);
 701               fprintf (dump_file, " -> nothing\n");
 702             }
 703
 704           reassociate_stats.ops_eliminated += 2;
 705
 706           if (ops->length () == 2)
 707             {
 708               ops->create (0);
 709               add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
 710               *all_done = true;
 711             }
 712           else
 713             {
 714               ops->ordered_remove (i-1);
 715               ops->ordered_remove (i-1);
 716             }
 717
 718           return true;
 719
 720         default:
 721           break;
 722         }
 723     }
 724   return false;
 725 }
 726
 727 static vec<tree> plus_negates;
 728
 729 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
 730    expression, look in OPS for a corresponding positive operation to cancel
 731    it out.  If we find one, remove the other from OPS, replace
 732    OPS[CURRINDEX] with 0 or -1, respectively, and return true.  Otherwise,
 733    return false. */
 734
 735 static bool
 736 eliminate_plus_minus_pair (enum tree_code opcode,
 737                            vec<operand_entry_t> *ops,
 738                            unsigned int currindex,
 739                            operand_entry_t curr)
 740 {
 741   tree negateop;
 742   tree notop;
 743   unsigned int i;
 744   operand_entry_t oe;
 745
 746   if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
 747     return false;
 748
 749   negateop = get_unary_op (curr->op, NEGATE_EXPR);
 750   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 751   if (negateop == NULL_TREE && notop == NULL_TREE)
 752     return false;
 753
 754   /* Any non-negated version will have a rank that is one less than
 755      the current rank.  So once we hit those ranks, if we don't find
 756      one, we can stop.  */
 757
 758   for (i = currindex + 1;
 759        ops->iterate (i, &oe)
 760        && oe->rank >= curr->rank - 1 ;
 761        i++)
 762     {
 763       if (oe->op == negateop)
 764         {
 765
 766           if (dump_file && (dump_flags & TDF_DETAILS))
 767             {
 768               fprintf (dump_file, "Equivalence: ");
 769               print_generic_expr (dump_file, negateop, 0);
 770               fprintf (dump_file, " + -");
 771               print_generic_expr (dump_file, oe->op, 0);
 772               fprintf (dump_file, " -> 0\n");
 773             }
 774
 775           ops->ordered_remove (i);
 776           add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
 777           ops->ordered_remove (currindex);
 778           reassociate_stats.ops_eliminated ++;
 779
 780           return true;
 781         }
 782       else if (oe->op == notop)
 783         {
 784           tree op_type = TREE_TYPE (oe->op);
 785
 786           if (dump_file && (dump_flags & TDF_DETAILS))
 787             {
 788               fprintf (dump_file, "Equivalence: ");
 789               print_generic_expr (dump_file, notop, 0);
 790               fprintf (dump_file, " + ~");
 791               print_generic_expr (dump_file, oe->op, 0);
 792               fprintf (dump_file, " -> -1\n");
 793             }
 794
 795           ops->ordered_remove (i);
 796           add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
 797           ops->ordered_remove (currindex);
 798           reassociate_stats.ops_eliminated ++;
 799
 800           return true;
 801         }
 802     }
 803
 804   /* CURR->OP is a negate expr in a plus expr: save it for later
 805      inspection in repropagate_negates().  */
 806   if (negateop != NULL_TREE)
 807     plus_negates.safe_push (curr->op);
 808
 809   return false;
 810 }
 811
 812 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
 813    bitwise not expression, look in OPS for a corresponding operand to
 814    cancel it out.  If we find one, remove the other from OPS, replace
 815    OPS[CURRINDEX] with 0, and return true.  Otherwise, return
 816    false. */
 817
 818 static bool
 819 eliminate_not_pairs (enum tree_code opcode,
 820                      vec<operand_entry_t> *ops,
 821                      unsigned int currindex,
 822                      operand_entry_t curr)
 823 {
 824   tree notop;
 825   unsigned int i;
 826   operand_entry_t oe;
 827
 828   if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
 829       || TREE_CODE (curr->op) != SSA_NAME)
 830     return false;
 831
 832   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 833   if (notop == NULL_TREE)
 834     return false;
 835
 836   /* Any non-not version will have a rank that is one less than
 837      the current rank.  So once we hit those ranks, if we don't find
 838      one, we can stop.  */
 839
 840   for (i = currindex + 1;
 841        ops->iterate (i, &oe)
 842        && oe->rank >= curr->rank - 1;
 843        i++)
 844     {
 845       if (oe->op == notop)
 846         {
 847           if (dump_file && (dump_flags & TDF_DETAILS))
 848             {
 849               fprintf (dump_file, "Equivalence: ");
 850               print_generic_expr (dump_file, notop, 0);
 851               if (opcode == BIT_AND_EXPR)
 852                 fprintf (dump_file, " & ~");
 853               else if (opcode == BIT_IOR_EXPR)
 854                 fprintf (dump_file, " | ~");
 855               print_generic_expr (dump_file, oe->op, 0);
 856               if (opcode == BIT_AND_EXPR)
 857                 fprintf (dump_file, " -> 0\n");
 858               else if (opcode == BIT_IOR_EXPR)
 859                 fprintf (dump_file, " -> -1\n");
 860             }
 861
 862           if (opcode == BIT_AND_EXPR)
 863             oe->op = build_zero_cst (TREE_TYPE (oe->op));
 864           else if (opcode == BIT_IOR_EXPR)
 865             oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
 866
 867           reassociate_stats.ops_eliminated += ops->length () - 1;
 868           ops->truncate (0);
 869           ops->quick_push (oe);
 870           return true;
 871         }
 872     }
 873
 874   return false;
 875 }
 876
 877 /* Use constant value that may be present in OPS to try to eliminate
 878    operands.  Note that this function is only really used when we've
 879    eliminated ops for other reasons, or merged constants.  Across
 880    single statements, fold already does all of this, plus more.  There
 881    is little point in duplicating logic, so I've only included the
 882    identities that I could ever construct testcases to trigger.  */
 883
 884 static void
 885 eliminate_using_constants (enum tree_code opcode,
 886                            vec<operand_entry_t> *ops)
 887 {
 888   operand_entry_t oelast = ops->last ();
 889   tree type = TREE_TYPE (oelast->op);
 890
 891   if (oelast->rank == 0
 892       && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
 893     {
 894       switch (opcode)
 895         {
 896         case BIT_AND_EXPR:
 897           if (integer_zerop (oelast->op))
 898             {
 899               if (ops->length () != 1)
 900                 {
 901                   if (dump_file && (dump_flags & TDF_DETAILS))
 902                     fprintf (dump_file, "Found & 0, removing all other ops\n");
 903
 904                   reassociate_stats.ops_eliminated += ops->length () - 1;
 905
 906                   ops->truncate (0);
 907                   ops->quick_push (oelast);
 908                   return;
 909                 }
 910             }
 911           else if (integer_all_onesp (oelast->op))
 912             {
 913               if (ops->length () != 1)
 914                 {
 915                   if (dump_file && (dump_flags & TDF_DETAILS))
 916                     fprintf (dump_file, "Found & -1, removing\n");
 917                   ops->pop ();
 918                   reassociate_stats.ops_eliminated++;
 919                 }
 920             }
 921           break;
 922         case BIT_IOR_EXPR:
 923           if (integer_all_onesp (oelast->op))
 924             {
 925               if (ops->length () != 1)
 926                 {
 927                   if (dump_file && (dump_flags & TDF_DETAILS))
 928                     fprintf (dump_file, "Found | -1, removing all other ops\n");
 929
 930                   reassociate_stats.ops_eliminated += ops->length () - 1;
 931
 932                   ops->truncate (0);
 933                   ops->quick_push (oelast);
 934                   return;
 935                 }
 936             }
 937           else if (integer_zerop (oelast->op))
 938             {
 939               if (ops->length () != 1)
 940                 {
 941                   if (dump_file && (dump_flags & TDF_DETAILS))
 942                     fprintf (dump_file, "Found | 0, removing\n");
 943                   ops->pop ();
 944                   reassociate_stats.ops_eliminated++;
 945                 }
 946             }
 947           break;
 948         case MULT_EXPR:
 949           if (integer_zerop (oelast->op)
 950               || (FLOAT_TYPE_P (type)
 951                   && !HONOR_NANS (TYPE_MODE (type))
 952                   && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
 953                   && real_zerop (oelast->op)))
 954             {
 955               if (ops->length () != 1)
 956                 {
 957                   if (dump_file && (dump_flags & TDF_DETAILS))
 958                     fprintf (dump_file, "Found * 0, removing all other ops\n");
 959
 960                   reassociate_stats.ops_eliminated += ops->length () - 1;
 961                   ops->truncate (1);
 962                   ops->quick_push (oelast);
 963                   return;
 964                 }
 965             }
 966           else if (integer_onep (oelast->op)
 967                    || (FLOAT_TYPE_P (type)
 968                        && !HONOR_SNANS (TYPE_MODE (type))
 969                        && real_onep (oelast->op)))
 970             {
 971               if (ops->length () != 1)
 972                 {
 973                   if (dump_file && (dump_flags & TDF_DETAILS))
 974                     fprintf (dump_file, "Found * 1, removing\n");
 975                   ops->pop ();
 976                   reassociate_stats.ops_eliminated++;
 977                   return;
 978                 }
 979             }
 980           break;
 981         case BIT_XOR_EXPR:
 982         case PLUS_EXPR:
 983         case MINUS_EXPR:
 984           if (integer_zerop (oelast->op)
 985               || (FLOAT_TYPE_P (type)
 986                   && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
 987                   && fold_real_zero_addition_p (type, oelast->op,
 988                                                 opcode == MINUS_EXPR)))
 989             {
 990               if (ops->length () != 1)
 991                 {
 992                   if (dump_file && (dump_flags & TDF_DETAILS))
 993                     fprintf (dump_file, "Found [|^+] 0, removing\n");
 994                   ops->pop ();
 995                   reassociate_stats.ops_eliminated++;
 996                   return;
 997                 }
 998             }
 999           break;
1000         default:
1001           break;
1002         }
1003     }
1004 }
1005
1006
1007 static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
1008                                  bool, bool);
1009
1010 /* Structure for tracking and counting operands.  */
1011 typedef struct oecount_s {
1012   int cnt;
1013   int id;
1014   enum tree_code oecode;
1015   tree op;
1016 } oecount;
1017
1018
1019 /* The heap for the oecount hashtable and the sorted list of operands.  */
1020 static vec<oecount> cvec;
1021
1022
1023 /* Oecount hashtable helpers.  */
1024
1025 struct oecount_hasher : typed_noop_remove <void>
1026 {
1027   /* Note that this hash table stores integers, not pointers.
1028      So, observe the casting in the member functions.  */
1029   typedef void value_type;
1030   typedef void compare_type;
1031   static inline hashval_t hash (const value_type *);
1032   static inline bool equal (const value_type *, const compare_type *);
1033 };
1034
1035 /* Hash function for oecount.  */
1036
1037 inline hashval_t
1038 oecount_hasher::hash (const value_type *p)
1039 {
1040   const oecount *c = &cvec[(size_t)p - 42];
1041   return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1042 }
1043
1044 /* Comparison function for oecount.  */
1045
1046 inline bool
1047 oecount_hasher::equal (const value_type *p1, const compare_type *p2)
1048 {
1049   const oecount *c1 = &cvec[(size_t)p1 - 42];
1050   const oecount *c2 = &cvec[(size_t)p2 - 42];
1051   return (c1->oecode == c2->oecode
1052           && c1->op == c2->op);
1053 }
1054
1055 /* Comparison function for qsort sorting oecount elements by count.  */
1056
1057 static int
1058 oecount_cmp (const void *p1, const void *p2)
1059 {
1060   const oecount *c1 = (const oecount *)p1;
1061   const oecount *c2 = (const oecount *)p2;
1062   if (c1->cnt != c2->cnt)
1063     return c1->cnt - c2->cnt;
1064   else
1065     /* If counts are identical, use unique IDs to stabilize qsort.  */
1066     return c1->id - c2->id;
1067 }
1068
1069 /* Return TRUE iff STMT represents a builtin call that raises OP
1070    to some exponent.  */
1071
1072 static bool
1073 stmt_is_power_of_op (gimple stmt, tree op)
1074 {
1075   tree fndecl;
1076
1077   if (!is_gimple_call (stmt))
1078     return false;
1079
1080   fndecl = gimple_call_fndecl (stmt);
1081
1082   if (!fndecl
1083       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
1084     return false;
1085
1086   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1087     {
1088     CASE_FLT_FN (BUILT_IN_POW):
1089     CASE_FLT_FN (BUILT_IN_POWI):
1090       return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1091
1092     default:
1093       return false;
1094     }
1095 }
1096
1097 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1098    in place and return the result.  Assumes that stmt_is_power_of_op
1099    was previously called for STMT and returned TRUE.  */
1100
1101 static HOST_WIDE_INT
1102 decrement_power (gimple stmt)
1103 {
1104   REAL_VALUE_TYPE c, cint;
1105   HOST_WIDE_INT power;
1106   tree arg1;
1107
1108   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1109     {
1110     CASE_FLT_FN (BUILT_IN_POW):
1111       arg1 = gimple_call_arg (stmt, 1);
1112       c = TREE_REAL_CST (arg1);
1113       power = real_to_integer (&c) - 1;
1114       real_from_integer (&cint, VOIDmode, power, SIGNED);
1115       gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1116       return power;
1117
1118     CASE_FLT_FN (BUILT_IN_POWI):
1119       arg1 = gimple_call_arg (stmt, 1);
1120       power = TREE_INT_CST_LOW (arg1) - 1;
1121       gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1122       return power;
1123
1124     default:
1125       gcc_unreachable ();
1126     }
1127 }
1128
1129 /* Find the single immediate use of STMT's LHS, and replace it
1130    with OP.  Remove STMT.  If STMT's LHS is the same as *DEF,
1131    replace *DEF with OP as well.  */
1132
1133 static void
1134 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1135 {
1136   tree lhs;
1137   gimple use_stmt;
1138   use_operand_p use;
1139   gimple_stmt_iterator gsi;
1140
1141   if (is_gimple_call (stmt))
1142     lhs = gimple_call_lhs (stmt);
1143   else
1144     lhs = gimple_assign_lhs (stmt);
1145
1146   gcc_assert (has_single_use (lhs));
1147   single_imm_use (lhs, &use, &use_stmt);
1148   if (lhs == *def)
1149     *def = op;
1150   SET_USE (use, op);
1151   if (TREE_CODE (op) != SSA_NAME)
1152     update_stmt (use_stmt);
1153   gsi = gsi_for_stmt (stmt);
1154   unlink_stmt_vdef (stmt);
1155   reassoc_remove_stmt (&gsi);
1156   release_defs (stmt);
1157 }
1158
1159 /* Walks the linear chain with result *DEF searching for an operation
1160    with operand OP and code OPCODE removing that from the chain.  *DEF
1161    is updated if there is only one operand but no operation left.  */
1162
1163 static void
1164 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1165 {
1166   gimple stmt = SSA_NAME_DEF_STMT (*def);
1167
1168   do
1169     {
1170       tree name;
1171
1172       if (opcode == MULT_EXPR
1173           && stmt_is_power_of_op (stmt, op))
1174         {
1175           if (decrement_power (stmt) == 1)
1176             propagate_op_to_single_use (op, stmt, def);
1177           return;
1178         }
1179
1180       name = gimple_assign_rhs1 (stmt);
1181
1182       /* If this is the operation we look for and one of the operands
1183          is ours simply propagate the other operand into the stmts
1184          single use.  */
1185       if (gimple_assign_rhs_code (stmt) == opcode
1186           && (name == op
1187               || gimple_assign_rhs2 (stmt) == op))
1188         {
1189           if (name == op)
1190             name = gimple_assign_rhs2 (stmt);
1191           propagate_op_to_single_use (name, stmt, def);
1192           return;
1193         }
1194
1195       /* We might have a multiply of two __builtin_pow* calls, and
1196          the operand might be hiding in the rightmost one.  */
1197       if (opcode == MULT_EXPR
1198           && gimple_assign_rhs_code (stmt) == opcode
1199           && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1200           && has_single_use (gimple_assign_rhs2 (stmt)))
1201         {
1202           gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1203           if (stmt_is_power_of_op (stmt2, op))
1204             {
1205               if (decrement_power (stmt2) == 1)
1206                 propagate_op_to_single_use (op, stmt2, def);
1207               return;
1208             }
1209         }
1210
1211       /* Continue walking the chain.  */
1212       gcc_assert (name != op
1213                   && TREE_CODE (name) == SSA_NAME);
1214       stmt = SSA_NAME_DEF_STMT (name);
1215     }
1216   while (1);
1217 }
1218
1219 /* Returns true if statement S1 dominates statement S2.  Like
1220    stmt_dominates_stmt_p, but uses stmt UIDs to optimize.  */
1221
1222 static bool
1223 reassoc_stmt_dominates_stmt_p (gimple s1, gimple s2)
1224 {
1225   basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1226
1227   /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1228      SSA_NAME.  Assume it lives at the beginning of function and
1229      thus dominates everything.  */
1230   if (!bb1 || s1 == s2)
1231     return true;
1232
1233   /* If bb2 is NULL, it doesn't dominate any stmt with a bb.  */
1234   if (!bb2)
1235     return false;
1236
1237   if (bb1 == bb2)
1238     {
1239       /* PHIs in the same basic block are assumed to be
1240          executed all in parallel, if only one stmt is a PHI,
1241          it dominates the other stmt in the same basic block.  */
1242       if (gimple_code (s1) == GIMPLE_PHI)
1243         return true;
1244
1245       if (gimple_code (s2) == GIMPLE_PHI)
1246         return false;
1247
1248       gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1249
1250       if (gimple_uid (s1) < gimple_uid (s2))
1251         return true;
1252
1253       if (gimple_uid (s1) > gimple_uid (s2))
1254         return false;
1255
1256       gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1257       unsigned int uid = gimple_uid (s1);
1258       for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1259         {
1260           gimple s = gsi_stmt (gsi);
1261           if (gimple_uid (s) != uid)
1262             break;
1263           if (s == s2)
1264             return true;
1265         }
1266
1267       return false;
1268     }
1269
1270   return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1271 }
1272
1273 /* Insert STMT after INSERT_POINT.  */
1274
1275 static void
1276 insert_stmt_after (gimple stmt, gimple insert_point)
1277 {
1278   gimple_stmt_iterator gsi;
1279   basic_block bb;
1280
1281   if (gimple_code (insert_point) == GIMPLE_PHI)
1282     bb = gimple_bb (insert_point);
1283   else if (!stmt_ends_bb_p (insert_point))
1284     {
1285       gsi = gsi_for_stmt (insert_point);
1286       gimple_set_uid (stmt, gimple_uid (insert_point));
1287       gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1288       return;
1289     }
1290   else
1291     /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1292        thus if it must end a basic block, it should be a call that can
1293        throw, or some assignment that can throw.  If it throws, the LHS
1294        of it will not be initialized though, so only valid places using
1295        the SSA_NAME should be dominated by the fallthru edge.  */
1296     bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1297   gsi = gsi_after_labels (bb);
1298   if (gsi_end_p (gsi))
1299     {
1300       gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1301       gimple_set_uid (stmt,
1302                       gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1303     }
1304   else
1305     gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1306   gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1307 }
1308
1309 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1310    the result.  Places the statement after the definition of either
1311    OP1 or OP2.  Returns the new statement.  */
1312
1313 static gimple
1314 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1315 {
1316   gimple op1def = NULL, op2def = NULL;
1317   gimple_stmt_iterator gsi;
1318   tree op;
1319   gimple sum;
1320
1321   /* Create the addition statement.  */
1322   op = make_ssa_name (type, NULL);
1323   sum = gimple_build_assign_with_ops (opcode, op, op1, op2);
1324
1325   /* Find an insertion place and insert.  */
1326   if (TREE_CODE (op1) == SSA_NAME)
1327     op1def = SSA_NAME_DEF_STMT (op1);
1328   if (TREE_CODE (op2) == SSA_NAME)
1329     op2def = SSA_NAME_DEF_STMT (op2);
1330   if ((!op1def || gimple_nop_p (op1def))
1331       && (!op2def || gimple_nop_p (op2def)))
1332     {
1333       gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1334       if (gsi_end_p (gsi))
1335         {
1336           gimple_stmt_iterator gsi2
1337             = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1338           gimple_set_uid (sum,
1339                           gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1340         }
1341       else
1342         gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1343       gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1344     }
1345   else
1346     {
1347       gimple insert_point;
1348       if ((!op1def || gimple_nop_p (op1def))
1349            || (op2def && !gimple_nop_p (op2def)
1350                && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1351         insert_point = op2def;
1352       else
1353         insert_point = op1def;
1354       insert_stmt_after (sum, insert_point);
1355     }
1356   update_stmt (sum);
1357
1358   return sum;
1359 }
1360
1361 /* Perform un-distribution of divisions and multiplications.
1362    A * X + B * X is transformed into (A + B) * X and A / X + B / X
1363    to (A + B) / X for real X.
1364
1365    The algorithm is organized as follows.
1366
1367     - First we walk the addition chain *OPS looking for summands that
1368       are defined by a multiplication or a real division.  This results
1369       in the candidates bitmap with relevant indices into *OPS.
1370
1371     - Second we build the chains of multiplications or divisions for
1372       these candidates, counting the number of occurrences of (operand, code)
1373       pairs in all of the candidates chains.
1374
1375     - Third we sort the (operand, code) pairs by number of occurrence and
1376       process them starting with the pair with the most uses.
1377
1378       * For each such pair we walk the candidates again to build a
1379         second candidate bitmap noting all multiplication/division chains
1380         that have at least one occurrence of (operand, code).
1381
1382       * We build an alternate addition chain only covering these
1383         candidates with one (operand, code) operation removed from their
1384         multiplication/division chain.
1385
1386       * The first candidate gets replaced by the alternate addition chain
1387         multiplied/divided by the operand.
1388
1389       * All candidate chains get disabled for further processing and
1390         processing of (operand, code) pairs continues.
1391
1392   The alternate addition chains built are re-processed by the main
1393   reassociation algorithm which allows optimizing a * x * y + b * y * x
1394   to (a + b ) * x * y in one invocation of the reassociation pass.  */
1395
1396 static bool
1397 undistribute_ops_list (enum tree_code opcode,
1398                        vec<operand_entry_t> *ops, struct loop *loop)
1399 {
1400   unsigned int length = ops->length ();
1401   operand_entry_t oe1;
1402   unsigned i, j;
1403   sbitmap candidates, candidates2;
1404   unsigned nr_candidates, nr_candidates2;
1405   sbitmap_iterator sbi0;
1406   vec<operand_entry_t> *subops;
1407   hash_table <oecount_hasher> ctable;
1408   bool changed = false;
1409   int next_oecount_id = 0;
1410
1411   if (length <= 1
1412       || opcode != PLUS_EXPR)
1413     return false;
1414
1415   /* Build a list of candidates to process.  */
1416   candidates = sbitmap_alloc (length);
1417   bitmap_clear (candidates);
1418   nr_candidates = 0;
1419   FOR_EACH_VEC_ELT (*ops, i, oe1)
1420     {
1421       enum tree_code dcode;
1422       gimple oe1def;
1423
1424       if (TREE_CODE (oe1->op) != SSA_NAME)
1425         continue;
1426       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1427       if (!is_gimple_assign (oe1def))
1428         continue;
1429       dcode = gimple_assign_rhs_code (oe1def);
1430       if ((dcode != MULT_EXPR
1431            && dcode != RDIV_EXPR)
1432           || !is_reassociable_op (oe1def, dcode, loop))
1433         continue;
1434
1435       bitmap_set_bit (candidates, i);
1436       nr_candidates++;
1437     }
1438
1439   if (nr_candidates < 2)
1440     {
1441       sbitmap_free (candidates);
1442       return false;
1443     }
1444
1445   if (dump_file && (dump_flags & TDF_DETAILS))
1446     {
1447       fprintf (dump_file, "searching for un-distribute opportunities ");
1448       print_generic_expr (dump_file,
1449         (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1450       fprintf (dump_file, " %d\n", nr_candidates);
1451     }
1452
1453   /* Build linearized sub-operand lists and the counting table.  */
1454   cvec.create (0);
1455   ctable.create (15);
1456   /* ??? Macro arguments cannot have multi-argument template types in
1457      them.  This typedef is needed to workaround that limitation.  */
1458   typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1459   subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1460   EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1461     {
1462       gimple oedef;
1463       enum tree_code oecode;
1464       unsigned j;
1465
1466       oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1467       oecode = gimple_assign_rhs_code (oedef);
1468       linearize_expr_tree (&subops[i], oedef,
1469                            associative_tree_code (oecode), false);
1470
1471       FOR_EACH_VEC_ELT (subops[i], j, oe1)
1472         {
1473           oecount c;
1474           void **slot;
1475           size_t idx;
1476           c.oecode = oecode;
1477           c.cnt = 1;
1478           c.id = next_oecount_id++;
1479           c.op = oe1->op;
1480           cvec.safe_push (c);
1481           idx = cvec.length () + 41;
1482           slot = ctable.find_slot ((void *)idx, INSERT);
1483           if (!*slot)
1484             {
1485               *slot = (void *)idx;
1486             }
1487           else
1488             {
1489               cvec.pop ();
1490               cvec[(size_t)*slot - 42].cnt++;
1491             }
1492         }
1493     }
1494   ctable.dispose ();
1495
1496   /* Sort the counting table.  */
1497   cvec.qsort (oecount_cmp);
1498
1499   if (dump_file && (dump_flags & TDF_DETAILS))
1500     {
1501       oecount *c;
1502       fprintf (dump_file, "Candidates:\n");
1503       FOR_EACH_VEC_ELT (cvec, j, c)
1504         {
1505           fprintf (dump_file, "  %u %s: ", c->cnt,
1506                    c->oecode == MULT_EXPR
1507                    ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1508           print_generic_expr (dump_file, c->op, 0);
1509           fprintf (dump_file, "\n");
1510         }
1511     }
1512
1513   /* Process the (operand, code) pairs in order of most occurrence.  */
1514   candidates2 = sbitmap_alloc (length);
1515   while (!cvec.is_empty ())
1516     {
1517       oecount *c = &cvec.last ();
1518       if (c->cnt < 2)
1519         break;
1520
1521       /* Now collect the operands in the outer chain that contain
1522          the common operand in their inner chain.  */
1523       bitmap_clear (candidates2);
1524       nr_candidates2 = 0;
1525       EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1526         {
1527           gimple oedef;
1528           enum tree_code oecode;
1529           unsigned j;
1530           tree op = (*ops)[i]->op;
1531
1532           /* If we undistributed in this chain already this may be
1533              a constant.  */
1534           if (TREE_CODE (op) != SSA_NAME)
1535             continue;
1536
1537           oedef = SSA_NAME_DEF_STMT (op);
1538           oecode = gimple_assign_rhs_code (oedef);
1539           if (oecode != c->oecode)
1540             continue;
1541
1542           FOR_EACH_VEC_ELT (subops[i], j, oe1)
1543             {
1544               if (oe1->op == c->op)
1545                 {
1546                   bitmap_set_bit (candidates2, i);
1547                   ++nr_candidates2;
1548                   break;
1549                 }
1550             }
1551         }
1552
1553       if (nr_candidates2 >= 2)
1554         {
1555           operand_entry_t oe1, oe2;
1556           gimple prod;
1557           int first = bitmap_first_set_bit (candidates2);
1558
1559           /* Build the new addition chain.  */
1560           oe1 = (*ops)[first];
1561           if (dump_file && (dump_flags & TDF_DETAILS))
1562             {
1563               fprintf (dump_file, "Building (");
1564               print_generic_expr (dump_file, oe1->op, 0);
1565             }
1566           zero_one_operation (&oe1->op, c->oecode, c->op);
1567           EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1568             {
1569               gimple sum;
1570               oe2 = (*ops)[i];
1571               if (dump_file && (dump_flags & TDF_DETAILS))
1572                 {
1573                   fprintf (dump_file, " + ");
1574                   print_generic_expr (dump_file, oe2->op, 0);
1575                 }
1576               zero_one_operation (&oe2->op, c->oecode, c->op);
1577               sum = build_and_add_sum (TREE_TYPE (oe1->op),
1578                                        oe1->op, oe2->op, opcode);
1579               oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1580               oe2->rank = 0;
1581               oe1->op = gimple_get_lhs (sum);
1582             }
1583
1584           /* Apply the multiplication/division.  */
1585           prod = build_and_add_sum (TREE_TYPE (oe1->op),
1586                                     oe1->op, c->op, c->oecode);
1587           if (dump_file && (dump_flags & TDF_DETAILS))
1588             {
1589               fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1590               print_generic_expr (dump_file, c->op, 0);
1591               fprintf (dump_file, "\n");
1592             }
1593
1594           /* Record it in the addition chain and disable further
1595              undistribution with this op.  */
1596           oe1->op = gimple_assign_lhs (prod);
1597           oe1->rank = get_rank (oe1->op);
1598           subops[first].release ();
1599
1600           changed = true;
1601         }
1602
1603       cvec.pop ();
1604     }
1605
1606   for (i = 0; i < ops->length (); ++i)
1607     subops[i].release ();
1608   free (subops);
1609   cvec.release ();
1610   sbitmap_free (candidates);
1611   sbitmap_free (candidates2);
1612
1613   return changed;
1614 }
1615
1616 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1617    expression, examine the other OPS to see if any of them are comparisons
1618    of the same values, which we may be able to combine or eliminate.
1619    For example, we can rewrite (a < b) | (a == b) as (a <= b).  */
1620
1621 static bool
1622 eliminate_redundant_comparison (enum tree_code opcode,
1623                                 vec<operand_entry_t> *ops,
1624                                 unsigned int currindex,
1625                                 operand_entry_t curr)
1626 {
1627   tree op1, op2;
1628   enum tree_code lcode, rcode;
1629   gimple def1, def2;
1630   int i;
1631   operand_entry_t oe;
1632
1633   if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1634     return false;
1635
1636   /* Check that CURR is a comparison.  */
1637   if (TREE_CODE (curr->op) != SSA_NAME)
1638     return false;
1639   def1 = SSA_NAME_DEF_STMT (curr->op);
1640   if (!is_gimple_assign (def1))
1641     return false;
1642   lcode = gimple_assign_rhs_code (def1);
1643   if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1644     return false;
1645   op1 = gimple_assign_rhs1 (def1);
1646   op2 = gimple_assign_rhs2 (def1);
1647
1648   /* Now look for a similar comparison in the remaining OPS.  */
1649   for (i = currindex + 1; ops->iterate (i, &oe); i++)
1650     {
1651       tree t;
1652
1653       if (TREE_CODE (oe->op) != SSA_NAME)
1654         continue;
1655       def2 = SSA_NAME_DEF_STMT (oe->op);
1656       if (!is_gimple_assign (def2))
1657         continue;
1658       rcode = gimple_assign_rhs_code (def2);
1659       if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1660         continue;
1661
1662       /* If we got here, we have a match.  See if we can combine the
1663          two comparisons.  */
1664       if (opcode == BIT_IOR_EXPR)
1665         t = maybe_fold_or_comparisons (lcode, op1, op2,
1666                                        rcode, gimple_assign_rhs1 (def2),
1667                                        gimple_assign_rhs2 (def2));
1668       else
1669         t = maybe_fold_and_comparisons (lcode, op1, op2,
1670                                         rcode, gimple_assign_rhs1 (def2),
1671                                         gimple_assign_rhs2 (def2));
1672       if (!t)
1673         continue;
1674
1675       /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1676          always give us a boolean_type_node value back.  If the original
1677          BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1678          we need to convert.  */
1679       if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1680         t = fold_convert (TREE_TYPE (curr->op), t);
1681
1682       if (TREE_CODE (t) != INTEGER_CST
1683           && !operand_equal_p (t, curr->op, 0))
1684         {
1685           enum tree_code subcode;
1686           tree newop1, newop2;
1687           if (!COMPARISON_CLASS_P (t))
1688             continue;
1689           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1690           STRIP_USELESS_TYPE_CONVERSION (newop1);
1691           STRIP_USELESS_TYPE_CONVERSION (newop2);
1692           if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1693             continue;
1694         }
1695
1696       if (dump_file && (dump_flags & TDF_DETAILS))
1697         {
1698           fprintf (dump_file, "Equivalence: ");
1699           print_generic_expr (dump_file, curr->op, 0);
1700           fprintf (dump_file, " %s ", op_symbol_code (opcode));
1701           print_generic_expr (dump_file, oe->op, 0);
1702           fprintf (dump_file, " -> ");
1703           print_generic_expr (dump_file, t, 0);
1704           fprintf (dump_file, "\n");
1705         }
1706
1707       /* Now we can delete oe, as it has been subsumed by the new combined
1708          expression t.  */
1709       ops->ordered_remove (i);
1710       reassociate_stats.ops_eliminated ++;
1711
1712       /* If t is the same as curr->op, we're done.  Otherwise we must
1713          replace curr->op with t.  Special case is if we got a constant
1714          back, in which case we add it to the end instead of in place of
1715          the current entry.  */
1716       if (TREE_CODE (t) == INTEGER_CST)
1717         {
1718           ops->ordered_remove (currindex);
1719           add_to_ops_vec (ops, t);
1720         }
1721       else if (!operand_equal_p (t, curr->op, 0))
1722         {
1723           gimple sum;
1724           enum tree_code subcode;
1725           tree newop1;
1726           tree newop2;
1727           gcc_assert (COMPARISON_CLASS_P (t));
1728           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1729           STRIP_USELESS_TYPE_CONVERSION (newop1);
1730           STRIP_USELESS_TYPE_CONVERSION (newop2);
1731           gcc_checking_assert (is_gimple_val (newop1)
1732                                && is_gimple_val (newop2));
1733           sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1734           curr->op = gimple_get_lhs (sum);
1735         }
1736       return true;
1737     }
1738
1739   return false;
1740 }
1741
1742 /* Perform various identities and other optimizations on the list of
1743    operand entries, stored in OPS.  The tree code for the binary
1744    operation between all the operands is OPCODE.  */
1745
1746 static void
1747 optimize_ops_list (enum tree_code opcode,
1748                    vec<operand_entry_t> *ops)
1749 {
1750   unsigned int length = ops->length ();
1751   unsigned int i;
1752   operand_entry_t oe;
1753   operand_entry_t oelast = NULL;
1754   bool iterate = false;
1755
1756   if (length == 1)
1757     return;
1758
1759   oelast = ops->last ();
1760
1761   /* If the last two are constants, pop the constants off, merge them
1762      and try the next two.  */
1763   if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1764     {
1765       operand_entry_t oelm1 = (*ops)[length - 2];
1766
1767       if (oelm1->rank == 0
1768           && is_gimple_min_invariant (oelm1->op)
1769           && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1770                                        TREE_TYPE (oelast->op)))
1771         {
1772           tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1773                                      oelm1->op, oelast->op);
1774
1775           if (folded && is_gimple_min_invariant (folded))
1776             {
1777               if (dump_file && (dump_flags & TDF_DETAILS))
1778                 fprintf (dump_file, "Merging constants\n");
1779
1780               ops->pop ();
1781               ops->pop ();
1782
1783               add_to_ops_vec (ops, folded);
1784               reassociate_stats.constants_eliminated++;
1785
1786               optimize_ops_list (opcode, ops);
1787               return;
1788             }
1789         }
1790     }
1791
1792   eliminate_using_constants (opcode, ops);
1793   oelast = NULL;
1794
1795   for (i = 0; ops->iterate (i, &oe);)
1796     {
1797       bool done = false;
1798
1799       if (eliminate_not_pairs (opcode, ops, i, oe))
1800         return;
1801       if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1802           || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1803           || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1804         {
1805           if (done)
1806             return;
1807           iterate = true;
1808           oelast = NULL;
1809           continue;
1810         }
1811       oelast = oe;
1812       i++;
1813     }
1814
1815   length = ops->length ();
1816   oelast = ops->last ();
1817
1818   if (iterate)
1819     optimize_ops_list (opcode, ops);
1820 }
1821
1822 /* The following functions are subroutines to optimize_range_tests and allow
1823    it to try to change a logical combination of comparisons into a range
1824    test.
1825
1826    For example, both
1827         X == 2 || X == 5 || X == 3 || X == 4
1828    and
1829         X >= 2 && X <= 5
1830    are converted to
1831         (unsigned) (X - 2) <= 3
1832
1833    For more information see comments above fold_test_range in fold-const.c,
1834    this implementation is for GIMPLE.  */
1835
1836 struct range_entry
1837 {
1838   tree exp;
1839   tree low;
1840   tree high;
1841   bool in_p;
1842   bool strict_overflow_p;
1843   unsigned int idx, next;
1844 };
1845
1846 /* This is similar to make_range in fold-const.c, but on top of
1847    GIMPLE instead of trees.  If EXP is non-NULL, it should be
1848    an SSA_NAME and STMT argument is ignored, otherwise STMT
1849    argument should be a GIMPLE_COND.  */
1850
1851 static void
1852 init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1853 {
1854   int in_p;
1855   tree low, high;
1856   bool is_bool, strict_overflow_p;
1857
1858   r->exp = NULL_TREE;
1859   r->in_p = false;
1860   r->strict_overflow_p = false;
1861   r->low = NULL_TREE;
1862   r->high = NULL_TREE;
1863   if (exp != NULL_TREE
1864       && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1865     return;
1866
1867   /* Start with simply saying "EXP != 0" and then look at the code of EXP
1868      and see if we can refine the range.  Some of the cases below may not
1869      happen, but it doesn't seem worth worrying about this.  We "continue"
1870      the outer loop when we've changed something; otherwise we "break"
1871      the switch, which will "break" the while.  */
1872   low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1873   high = low;
1874   in_p = 0;
1875   strict_overflow_p = false;
1876   is_bool = false;
1877   if (exp == NULL_TREE)
1878     is_bool = true;
1879   else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1880     {
1881       if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1882         is_bool = true;
1883       else
1884         return;
1885     }
1886   else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1887     is_bool = true;
1888
1889   while (1)
1890     {
1891       enum tree_code code;
1892       tree arg0, arg1, exp_type;
1893       tree nexp;
1894       location_t loc;
1895
1896       if (exp != NULL_TREE)
1897         {
1898           if (TREE_CODE (exp) != SSA_NAME
1899               || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
1900             break;
1901
1902           stmt = SSA_NAME_DEF_STMT (exp);
1903           if (!is_gimple_assign (stmt))
1904             break;
1905
1906           code = gimple_assign_rhs_code (stmt);
1907           arg0 = gimple_assign_rhs1 (stmt);
1908           arg1 = gimple_assign_rhs2 (stmt);
1909           exp_type = TREE_TYPE (exp);
1910         }
1911       else
1912         {
1913           code = gimple_cond_code (stmt);
1914           arg0 = gimple_cond_lhs (stmt);
1915           arg1 = gimple_cond_rhs (stmt);
1916           exp_type = boolean_type_node;
1917         }
1918
1919       if (TREE_CODE (arg0) != SSA_NAME)
1920         break;
1921       loc = gimple_location (stmt);
1922       switch (code)
1923         {
1924         case BIT_NOT_EXPR:
1925           if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
1926               /* Ensure the range is either +[-,0], +[0,0],
1927                  -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1928                  -[1,1].  If it is e.g. +[-,-] or -[-,-]
1929                  or similar expression of unconditional true or
1930                  false, it should not be negated.  */
1931               && ((high && integer_zerop (high))
1932                   || (low && integer_onep (low))))
1933             {
1934               in_p = !in_p;
1935               exp = arg0;
1936               continue;
1937             }
1938           break;
1939         case SSA_NAME:
1940           exp = arg0;
1941           continue;
1942         CASE_CONVERT:
1943           if (is_bool)
1944             goto do_default;
1945           if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1946             {
1947               if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1948                 is_bool = true;
1949               else
1950                 return;
1951             }
1952           else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1953             is_bool = true;
1954           goto do_default;
1955         case EQ_EXPR:
1956         case NE_EXPR:
1957         case LT_EXPR:
1958         case LE_EXPR:
1959         case GE_EXPR:
1960         case GT_EXPR:
1961           is_bool = true;
1962           /* FALLTHRU */
1963         default:
1964           if (!is_bool)
1965             return;
1966         do_default:
1967           nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1968                                   &low, &high, &in_p,
1969                                   &strict_overflow_p);
1970           if (nexp != NULL_TREE)
1971             {
1972               exp = nexp;
1973               gcc_assert (TREE_CODE (exp) == SSA_NAME);
1974               continue;
1975             }
1976           break;
1977         }
1978       break;
1979     }
1980   if (is_bool)
1981     {
1982       r->exp = exp;
1983       r->in_p = in_p;
1984       r->low = low;
1985       r->high = high;
1986       r->strict_overflow_p = strict_overflow_p;
1987     }
1988 }
1989
1990 /* Comparison function for qsort.  Sort entries
1991    without SSA_NAME exp first, then with SSA_NAMEs sorted
1992    by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1993    by increasing ->low and if ->low is the same, by increasing
1994    ->high.  ->low == NULL_TREE means minimum, ->high == NULL_TREE
1995    maximum.  */
1996
1997 static int
1998 range_entry_cmp (const void *a, const void *b)
1999 {
2000   const struct range_entry *p = (const struct range_entry *) a;
2001   const struct range_entry *q = (const struct range_entry *) b;
2002
2003   if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2004     {
2005       if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2006         {
2007           /* Group range_entries for the same SSA_NAME together.  */
2008           if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2009             return -1;
2010           else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2011             return 1;
2012           /* If ->low is different, NULL low goes first, then by
2013              ascending low.  */
2014           if (p->low != NULL_TREE)
2015             {
2016               if (q->low != NULL_TREE)
2017                 {
2018                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
2019                                           p->low, q->low);
2020                   if (tem && integer_onep (tem))
2021                     return -1;
2022                   tem = fold_binary (GT_EXPR, boolean_type_node,
2023                                      p->low, q->low);
2024                   if (tem && integer_onep (tem))
2025                     return 1;
2026                 }
2027               else
2028                 return 1;
2029             }
2030           else if (q->low != NULL_TREE)
2031             return -1;
2032           /* If ->high is different, NULL high goes last, before that by
2033              ascending high.  */
2034           if (p->high != NULL_TREE)
2035             {
2036               if (q->high != NULL_TREE)
2037                 {
2038                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
2039                                           p->high, q->high);
2040                   if (tem && integer_onep (tem))
2041                     return -1;
2042                   tem = fold_binary (GT_EXPR, boolean_type_node,
2043                                      p->high, q->high);
2044                   if (tem && integer_onep (tem))
2045                     return 1;
2046                 }
2047               else
2048                 return -1;
2049             }
2050           else if (p->high != NULL_TREE)
2051             return 1;
2052           /* If both ranges are the same, sort below by ascending idx.  */
2053         }
2054       else
2055         return 1;
2056     }
2057   else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2058     return -1;
2059
2060   if (p->idx < q->idx)
2061     return -1;
2062   else
2063     {
2064       gcc_checking_assert (p->idx > q->idx);
2065       return 1;
2066     }
2067 }
2068
2069 /* Helper routine of optimize_range_test.
2070    [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2071    RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2072    OPCODE and OPS are arguments of optimize_range_tests.  Return
2073    true if the range merge has been successful.
2074    If OPCODE is ERROR_MARK, this is called from within
2075    maybe_optimize_range_tests and is performing inter-bb range optimization.
2076    In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2077    oe->rank.  */
2078
2079 static bool
2080 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2081                    unsigned int count, enum tree_code opcode,
2082                    vec<operand_entry_t> *ops, tree exp, bool in_p,
2083                    tree low, tree high, bool strict_overflow_p)
2084 {
2085   operand_entry_t oe = (*ops)[range->idx];
2086   tree op = oe->op;
2087   gimple stmt = op ? SSA_NAME_DEF_STMT (op) :
2088     last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2089   location_t loc = gimple_location (stmt);
2090   tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2091   tree tem = build_range_check (loc, optype, exp, in_p, low, high);
2092   enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2093   gimple_stmt_iterator gsi;
2094
2095   if (tem == NULL_TREE)
2096     return false;
2097
2098   if (strict_overflow_p && issue_strict_overflow_warning (wc))
2099     warning_at (loc, OPT_Wstrict_overflow,
2100                 "assuming signed overflow does not occur "
2101                 "when simplifying range test");
2102
2103   if (dump_file && (dump_flags & TDF_DETAILS))
2104     {
2105       struct range_entry *r;
2106       fprintf (dump_file, "Optimizing range tests ");
2107       print_generic_expr (dump_file, range->exp, 0);
2108       fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2109       print_generic_expr (dump_file, range->low, 0);
2110       fprintf (dump_file, ", ");
2111       print_generic_expr (dump_file, range->high, 0);
2112       fprintf (dump_file, "]");
2113       for (r = otherrange; r < otherrange + count; r++)
2114         {
2115           fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2116           print_generic_expr (dump_file, r->low, 0);
2117           fprintf (dump_file, ", ");
2118           print_generic_expr (dump_file, r->high, 0);
2119           fprintf (dump_file, "]");
2120         }
2121       fprintf (dump_file, "\n into ");
2122       print_generic_expr (dump_file, tem, 0);
2123       fprintf (dump_file, "\n");
2124     }
2125
2126   if (opcode == BIT_IOR_EXPR
2127       || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2128     tem = invert_truthvalue_loc (loc, tem);
2129
2130   tem = fold_convert_loc (loc, optype, tem);
2131   gsi = gsi_for_stmt (stmt);
2132   /* In rare cases range->exp can be equal to lhs of stmt.
2133      In that case we have to insert after the stmt rather then before
2134      it.  */
2135   if (op == range->exp)
2136     tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false,
2137                                     GSI_CONTINUE_LINKING);
2138   else
2139     {
2140       tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2141                                       GSI_SAME_STMT);
2142       gsi_prev (&gsi);
2143     }
2144   for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2145     if (gimple_uid (gsi_stmt (gsi)))
2146       break;
2147     else
2148       gimple_set_uid (gsi_stmt (gsi), gimple_uid (stmt));
2149
2150   oe->op = tem;
2151   range->exp = exp;
2152   range->low = low;
2153   range->high = high;
2154   range->in_p = in_p;
2155   range->strict_overflow_p = false;
2156
2157   for (range = otherrange; range < otherrange + count; range++)
2158     {
2159       oe = (*ops)[range->idx];
2160       /* Now change all the other range test immediate uses, so that
2161          those tests will be optimized away.  */
2162       if (opcode == ERROR_MARK)
2163         {
2164           if (oe->op)
2165             oe->op = build_int_cst (TREE_TYPE (oe->op),
2166                                     oe->rank == BIT_IOR_EXPR ? 0 : 1);
2167           else
2168             oe->op = (oe->rank == BIT_IOR_EXPR
2169                       ? boolean_false_node : boolean_true_node);
2170         }
2171       else
2172         oe->op = error_mark_node;
2173       range->exp = NULL_TREE;
2174     }
2175   return true;
2176 }
2177
2178 /* Optimize X == CST1 || X == CST2
2179    if popcount (CST1 ^ CST2) == 1 into
2180    (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2181    Similarly for ranges.  E.g.
2182    X != 2 && X != 3 && X != 10 && X != 11
2183    will be transformed by the previous optimization into
2184    !((X - 2U) <= 1U || (X - 10U) <= 1U)
2185    and this loop can transform that into
2186    !(((X & ~8) - 2U) <= 1U).  */
2187
2188 static bool
2189 optimize_range_tests_xor (enum tree_code opcode, tree type,
2190                           tree lowi, tree lowj, tree highi, tree highj,
2191                           vec<operand_entry_t> *ops,
2192                           struct range_entry *rangei,
2193                           struct range_entry *rangej)
2194 {
2195   tree lowxor, highxor, tem, exp;
2196   /* Check highi ^ lowi == highj ^ lowj and
2197      popcount (highi ^ lowi) == 1.  */
2198   lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2199   if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2200     return false;
2201   if (tree_log2 (lowxor) < 0)
2202     return false;
2203   highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2204   if (!tree_int_cst_equal (lowxor, highxor))
2205     return false;
2206
2207   tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2208   exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2209   lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2210   highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2211   if (update_range_test (rangei, rangej, 1, opcode, ops, exp,
2212                          rangei->in_p, lowj, highj,
2213                          rangei->strict_overflow_p
2214                          || rangej->strict_overflow_p))
2215     return true;
2216   return false;
2217 }
2218
2219 /* Optimize X == CST1 || X == CST2
2220    if popcount (CST2 - CST1) == 1 into
2221    ((X - CST1) & ~(CST2 - CST1)) == 0.
2222    Similarly for ranges.  E.g.
2223    X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2224    || X == 75 || X == 45
2225    will be transformed by the previous optimization into
2226    (X - 43U) <= 3U || (X - 75U) <= 3U
2227    and this loop can transform that into
2228    ((X - 43U) & ~(75U - 43U)) <= 3U.  */
2229 static bool
2230 optimize_range_tests_diff (enum tree_code opcode, tree type,
2231                             tree lowi, tree lowj, tree highi, tree highj,
2232                             vec<operand_entry_t> *ops,
2233                             struct range_entry *rangei,
2234                             struct range_entry *rangej)
2235 {
2236   tree tem1, tem2, mask;
2237   /* Check highi - lowi == highj - lowj.  */
2238   tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2239   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2240     return false;
2241   tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2242   if (!tree_int_cst_equal (tem1, tem2))
2243     return false;
2244   /* Check popcount (lowj - lowi) == 1.  */
2245   tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2246   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2247     return false;
2248   if (tree_log2 (tem1) < 0)
2249     return false;
2250
2251   mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2252   tem1 = fold_binary (MINUS_EXPR, type, rangei->exp, lowi);
2253   tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2254   lowj = build_int_cst (type, 0);
2255   if (update_range_test (rangei, rangej, 1, opcode, ops, tem1,
2256                          rangei->in_p, lowj, tem2,
2257                          rangei->strict_overflow_p
2258                          || rangej->strict_overflow_p))
2259     return true;
2260   return false;
2261 }
2262
2263 /* It does some common checks for function optimize_range_tests_xor and
2264    optimize_range_tests_diff.
2265    If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2266    Else it calls optimize_range_tests_diff.  */
2267
2268 static bool
2269 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2270                         bool optimize_xor, vec<operand_entry_t> *ops,
2271                         struct range_entry *ranges)
2272 {
2273   int i, j;
2274   bool any_changes = false;
2275   for (i = first; i < length; i++)
2276     {
2277       tree lowi, highi, lowj, highj, type, tem;
2278
2279       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2280         continue;
2281       type = TREE_TYPE (ranges[i].exp);
2282       if (!INTEGRAL_TYPE_P (type))
2283         continue;
2284       lowi = ranges[i].low;
2285       if (lowi == NULL_TREE)
2286         lowi = TYPE_MIN_VALUE (type);
2287       highi = ranges[i].high;
2288       if (highi == NULL_TREE)
2289         continue;
2290       for (j = i + 1; j < length && j < i + 64; j++)
2291         {
2292           bool changes;
2293           if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2294             continue;
2295           lowj = ranges[j].low;
2296           if (lowj == NULL_TREE)
2297             continue;
2298           highj = ranges[j].high;
2299           if (highj == NULL_TREE)
2300             highj = TYPE_MAX_VALUE (type);
2301           /* Check lowj > highi.  */
2302           tem = fold_binary (GT_EXPR, boolean_type_node,
2303                              lowj, highi);
2304           if (tem == NULL_TREE || !integer_onep (tem))
2305             continue;
2306           if (optimize_xor)
2307             changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2308                                                 highi, highj, ops,
2309                                                 ranges + i, ranges + j);
2310           else
2311             changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2312                                                  highi, highj, ops,
2313                                                  ranges + i, ranges + j);
2314           if (changes)
2315             {
2316               any_changes = true;
2317               break;
2318             }
2319         }
2320     }
2321   return any_changes;
2322 }
2323
2324 /* Optimize range tests, similarly how fold_range_test optimizes
2325    it on trees.  The tree code for the binary
2326    operation between all the operands is OPCODE.
2327    If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2328    maybe_optimize_range_tests for inter-bb range optimization.
2329    In that case if oe->op is NULL, oe->id is bb->index whose
2330    GIMPLE_COND is && or ||ed into the test, and oe->rank says
2331    the actual opcode.  */
2332
2333 static bool
2334 optimize_range_tests (enum tree_code opcode,
2335                       vec<operand_entry_t> *ops)
2336 {
2337   unsigned int length = ops->length (), i, j, first;
2338   operand_entry_t oe;
2339   struct range_entry *ranges;
2340   bool any_changes = false;
2341
2342   if (length == 1)
2343     return false;
2344
2345   ranges = XNEWVEC (struct range_entry, length);
2346   for (i = 0; i < length; i++)
2347     {
2348       oe = (*ops)[i];
2349       ranges[i].idx = i;
2350       init_range_entry (ranges + i, oe->op,
2351                         oe->op ? NULL :
2352                           last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
2353       /* For | invert it now, we will invert it again before emitting
2354          the optimized expression.  */
2355       if (opcode == BIT_IOR_EXPR
2356           || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2357         ranges[i].in_p = !ranges[i].in_p;
2358     }
2359
2360   qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2361   for (i = 0; i < length; i++)
2362     if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2363       break;
2364
2365   /* Try to merge ranges.  */
2366   for (first = i; i < length; i++)
2367     {
2368       tree low = ranges[i].low;
2369       tree high = ranges[i].high;
2370       int in_p = ranges[i].in_p;
2371       bool strict_overflow_p = ranges[i].strict_overflow_p;
2372       int update_fail_count = 0;
2373
2374       for (j = i + 1; j < length; j++)
2375         {
2376           if (ranges[i].exp != ranges[j].exp)
2377             break;
2378           if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2379                              ranges[j].in_p, ranges[j].low, ranges[j].high))
2380             break;
2381           strict_overflow_p |= ranges[j].strict_overflow_p;
2382         }
2383
2384       if (j == i + 1)
2385         continue;
2386
2387       if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
2388                              ops, ranges[i].exp, in_p, low, high,
2389                              strict_overflow_p))
2390         {
2391           i = j - 1;
2392           any_changes = true;
2393         }
2394       /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2395          while update_range_test would fail.  */
2396       else if (update_fail_count == 64)
2397         i = j - 1;
2398       else
2399         ++update_fail_count;
2400     }
2401
2402   any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2403                                          ops, ranges);
2404
2405   if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2406     any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2407                                            ops, ranges);
2408
2409   if (any_changes && opcode != ERROR_MARK)
2410     {
2411       j = 0;
2412       FOR_EACH_VEC_ELT (*ops, i, oe)
2413         {
2414           if (oe->op == error_mark_node)
2415             continue;
2416           else if (i != j)
2417             (*ops)[j] = oe;
2418           j++;
2419         }
2420       ops->truncate (j);
2421     }
2422
2423   XDELETEVEC (ranges);
2424   return any_changes;
2425 }
2426
2427 /* Return true if STMT is a cast like:
2428    <bb N>:
2429    ...
2430    _123 = (int) _234;
2431
2432    <bb M>:
2433    # _345 = PHI <_123(N), 1(...), 1(...)>
2434    where _234 has bool type, _123 has single use and
2435    bb N has a single successor M.  This is commonly used in
2436    the last block of a range test.  */
2437
2438 static bool
2439 final_range_test_p (gimple stmt)
2440 {
2441   basic_block bb, rhs_bb;
2442   edge e;
2443   tree lhs, rhs;
2444   use_operand_p use_p;
2445   gimple use_stmt;
2446
2447   if (!gimple_assign_cast_p (stmt))
2448     return false;
2449   bb = gimple_bb (stmt);
2450   if (!single_succ_p (bb))
2451     return false;
2452   e = single_succ_edge (bb);
2453   if (e->flags & EDGE_COMPLEX)
2454     return false;
2455
2456   lhs = gimple_assign_lhs (stmt);
2457   rhs = gimple_assign_rhs1 (stmt);
2458   if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2459       || TREE_CODE (rhs) != SSA_NAME
2460       || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2461     return false;
2462
2463   /* Test whether lhs is consumed only by a PHI in the only successor bb.  */
2464   if (!single_imm_use (lhs, &use_p, &use_stmt))
2465     return false;
2466
2467   if (gimple_code (use_stmt) != GIMPLE_PHI
2468       || gimple_bb (use_stmt) != e->dest)
2469     return false;
2470
2471   /* And that the rhs is defined in the same loop.  */
2472   rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2473   if (rhs_bb == NULL
2474       || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2475     return false;
2476
2477   return true;
2478 }
2479
2480 /* Return true if BB is suitable basic block for inter-bb range test
2481    optimization.  If BACKWARD is true, BB should be the only predecessor
2482    of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2483    or compared with to find a common basic block to which all conditions
2484    branch to if true resp. false.  If BACKWARD is false, TEST_BB should
2485    be the only predecessor of BB.  */
2486
2487 static bool
2488 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2489                   bool backward)
2490 {
2491   edge_iterator ei, ei2;
2492   edge e, e2;
2493   gimple stmt;
2494   gimple_stmt_iterator gsi;
2495   bool other_edge_seen = false;
2496   bool is_cond;
2497
2498   if (test_bb == bb)
2499     return false;
2500   /* Check last stmt first.  */
2501   stmt = last_stmt (bb);
2502   if (stmt == NULL
2503       || (gimple_code (stmt) != GIMPLE_COND
2504           && (backward || !final_range_test_p (stmt)))
2505       || gimple_visited_p (stmt)
2506       || stmt_could_throw_p (stmt)
2507       || *other_bb == bb)
2508     return false;
2509   is_cond = gimple_code (stmt) == GIMPLE_COND;
2510   if (is_cond)
2511     {
2512       /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2513          goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2514          to *OTHER_BB (if not set yet, try to find it out).  */
2515       if (EDGE_COUNT (bb->succs) != 2)
2516         return false;
2517       FOR_EACH_EDGE (e, ei, bb->succs)
2518         {
2519           if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2520             return false;
2521           if (e->dest == test_bb)
2522             {
2523               if (backward)
2524                 continue;
2525               else
2526                 return false;
2527             }
2528           if (e->dest == bb)
2529             return false;
2530           if (*other_bb == NULL)
2531             {
2532               FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2533                 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2534                   return false;
2535                 else if (e->dest == e2->dest)
2536                   *other_bb = e->dest;
2537               if (*other_bb == NULL)
2538                 return false;
2539             }
2540           if (e->dest == *other_bb)
2541             other_edge_seen = true;
2542           else if (backward)
2543             return false;
2544         }
2545       if (*other_bb == NULL || !other_edge_seen)
2546         return false;
2547     }
2548   else if (single_succ (bb) != *other_bb)
2549     return false;
2550
2551   /* Now check all PHIs of *OTHER_BB.  */
2552   e = find_edge (bb, *other_bb);
2553   e2 = find_edge (test_bb, *other_bb);
2554   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2555     {
2556       gimple phi = gsi_stmt (gsi);
2557       /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2558          corresponding to BB and TEST_BB predecessor must be the same.  */
2559       if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2560                             gimple_phi_arg_def (phi, e2->dest_idx), 0))
2561         {
2562           /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2563              one of the PHIs should have the lhs of the last stmt in
2564              that block as PHI arg and that PHI should have 0 or 1
2565              corresponding to it in all other range test basic blocks
2566              considered.  */
2567           if (!is_cond)
2568             {
2569               if (gimple_phi_arg_def (phi, e->dest_idx)
2570                   == gimple_assign_lhs (stmt)
2571                   && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2572                       || integer_onep (gimple_phi_arg_def (phi,
2573                                                            e2->dest_idx))))
2574                 continue;
2575             }
2576           else
2577             {
2578               gimple test_last = last_stmt (test_bb);
2579               if (gimple_code (test_last) != GIMPLE_COND
2580                   && gimple_phi_arg_def (phi, e2->dest_idx)
2581                      == gimple_assign_lhs (test_last)
2582                   && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2583                       || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2584                 continue;
2585             }
2586
2587           return false;
2588         }
2589     }
2590   return true;
2591 }
2592
2593 /* Return true if BB doesn't have side-effects that would disallow
2594    range test optimization, all SSA_NAMEs set in the bb are consumed
2595    in the bb and there are no PHIs.  */
2596
2597 static bool
2598 no_side_effect_bb (basic_block bb)
2599 {
2600   gimple_stmt_iterator gsi;
2601   gimple last;
2602
2603   if (!gimple_seq_empty_p (phi_nodes (bb)))
2604     return false;
2605   last = last_stmt (bb);
2606   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2607     {
2608       gimple stmt = gsi_stmt (gsi);
2609       tree lhs;
2610       imm_use_iterator imm_iter;
2611       use_operand_p use_p;
2612
2613       if (is_gimple_debug (stmt))
2614         continue;
2615       if (gimple_has_side_effects (stmt))
2616         return false;
2617       if (stmt == last)
2618         return true;
2619       if (!is_gimple_assign (stmt))
2620         return false;
2621       lhs = gimple_assign_lhs (stmt);
2622       if (TREE_CODE (lhs) != SSA_NAME)
2623         return false;
2624       if (gimple_assign_rhs_could_trap_p (stmt))
2625         return false;
2626       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2627         {
2628           gimple use_stmt = USE_STMT (use_p);
2629           if (is_gimple_debug (use_stmt))
2630             continue;
2631           if (gimple_bb (use_stmt) != bb)
2632             return false;
2633         }
2634     }
2635   return false;
2636 }
2637
2638 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2639    return true and fill in *OPS recursively.  */
2640
2641 static bool
2642 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2643          struct loop *loop)
2644 {
2645   gimple stmt = SSA_NAME_DEF_STMT (var);
2646   tree rhs[2];
2647   int i;
2648
2649   if (!is_reassociable_op (stmt, code, loop))
2650     return false;
2651
2652   rhs[0] = gimple_assign_rhs1 (stmt);
2653   rhs[1] = gimple_assign_rhs2 (stmt);
2654   gimple_set_visited (stmt, true);
2655   for (i = 0; i < 2; i++)
2656     if (TREE_CODE (rhs[i]) == SSA_NAME
2657         && !get_ops (rhs[i], code, ops, loop)
2658         && has_single_use (rhs[i]))
2659       {
2660         operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
2661
2662         oe->op = rhs[i];
2663         oe->rank = code;
2664         oe->id = 0;
2665         oe->count = 1;
2666         ops->safe_push (oe);
2667       }
2668   return true;
2669 }
2670
2671 /* Find the ops that were added by get_ops starting from VAR, see if
2672    they were changed during update_range_test and if yes, create new
2673    stmts.  */
2674
2675 static tree
2676 update_ops (tree var, enum tree_code code, vec<operand_entry_t> ops,
2677             unsigned int *pidx, struct loop *loop)
2678 {
2679   gimple stmt = SSA_NAME_DEF_STMT (var);
2680   tree rhs[4];
2681   int i;
2682
2683   if (!is_reassociable_op (stmt, code, loop))
2684     return NULL;
2685
2686   rhs[0] = gimple_assign_rhs1 (stmt);
2687   rhs[1] = gimple_assign_rhs2 (stmt);
2688   rhs[2] = rhs[0];
2689   rhs[3] = rhs[1];
2690   for (i = 0; i < 2; i++)
2691     if (TREE_CODE (rhs[i]) == SSA_NAME)
2692       {
2693         rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
2694         if (rhs[2 + i] == NULL_TREE)
2695           {
2696             if (has_single_use (rhs[i]))
2697               rhs[2 + i] = ops[(*pidx)++]->op;
2698             else
2699               rhs[2 + i] = rhs[i];
2700           }
2701       }
2702   if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
2703       && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
2704     {
2705       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2706       var = make_ssa_name (TREE_TYPE (var), NULL);
2707       gimple g = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
2708                                                var, rhs[2], rhs[3]);
2709       gimple_set_uid (g, gimple_uid (stmt));
2710       gimple_set_visited (g, true);
2711       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2712     }
2713   return var;
2714 }
2715
2716 /* Structure to track the initial value passed to get_ops and
2717    the range in the ops vector for each basic block.  */
2718
2719 struct inter_bb_range_test_entry
2720 {
2721   tree op;
2722   unsigned int first_idx, last_idx;
2723 };
2724
2725 /* Inter-bb range test optimization.  */
2726
2727 static void
2728 maybe_optimize_range_tests (gimple stmt)
2729 {
2730   basic_block first_bb = gimple_bb (stmt);
2731   basic_block last_bb = first_bb;
2732   basic_block other_bb = NULL;
2733   basic_block bb;
2734   edge_iterator ei;
2735   edge e;
2736   auto_vec<operand_entry_t> ops;
2737   auto_vec<inter_bb_range_test_entry> bbinfo;
2738   bool any_changes = false;
2739
2740   /* Consider only basic blocks that end with GIMPLE_COND or
2741      a cast statement satisfying final_range_test_p.  All
2742      but the last bb in the first_bb .. last_bb range
2743      should end with GIMPLE_COND.  */
2744   if (gimple_code (stmt) == GIMPLE_COND)
2745     {
2746       if (EDGE_COUNT (first_bb->succs) != 2)
2747         return;
2748     }
2749   else if (final_range_test_p (stmt))
2750     other_bb = single_succ (first_bb);
2751   else
2752     return;
2753
2754   if (stmt_could_throw_p (stmt))
2755     return;
2756
2757   /* As relative ordering of post-dominator sons isn't fixed,
2758      maybe_optimize_range_tests can be called first on any
2759      bb in the range we want to optimize.  So, start searching
2760      backwards, if first_bb can be set to a predecessor.  */
2761   while (single_pred_p (first_bb))
2762     {
2763       basic_block pred_bb = single_pred (first_bb);
2764       if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
2765         break;
2766       if (!no_side_effect_bb (first_bb))
2767         break;
2768       first_bb = pred_bb;
2769     }
2770   /* If first_bb is last_bb, other_bb hasn't been computed yet.
2771      Before starting forward search in last_bb successors, find
2772      out the other_bb.  */
2773   if (first_bb == last_bb)
2774     {
2775       other_bb = NULL;
2776       /* As non-GIMPLE_COND last stmt always terminates the range,
2777          if forward search didn't discover anything, just give up.  */
2778       if (gimple_code (stmt) != GIMPLE_COND)
2779         return;
2780       /* Look at both successors.  Either it ends with a GIMPLE_COND
2781          and satisfies suitable_cond_bb, or ends with a cast and
2782          other_bb is that cast's successor.  */
2783       FOR_EACH_EDGE (e, ei, first_bb->succs)
2784         if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
2785             || e->dest == first_bb)
2786           return;
2787         else if (single_pred_p (e->dest))
2788           {
2789             stmt = last_stmt (e->dest);
2790             if (stmt
2791                 && gimple_code (stmt) == GIMPLE_COND
2792                 && EDGE_COUNT (e->dest->succs) == 2)
2793               {
2794                 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
2795                   break;
2796                 else
2797                   other_bb = NULL;
2798               }
2799             else if (stmt
2800                      && final_range_test_p (stmt)
2801                      && find_edge (first_bb, single_succ (e->dest)))
2802               {
2803                 other_bb = single_succ (e->dest);
2804                 if (other_bb == first_bb)
2805                   other_bb = NULL;
2806               }
2807           }
2808       if (other_bb == NULL)
2809         return;
2810     }
2811   /* Now do the forward search, moving last_bb to successor bbs
2812      that aren't other_bb.  */
2813   while (EDGE_COUNT (last_bb->succs) == 2)
2814     {
2815       FOR_EACH_EDGE (e, ei, last_bb->succs)
2816         if (e->dest != other_bb)
2817           break;
2818       if (e == NULL)
2819         break;
2820       if (!single_pred_p (e->dest))
2821         break;
2822       if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
2823         break;
2824       if (!no_side_effect_bb (e->dest))
2825         break;
2826       last_bb = e->dest;
2827     }
2828   if (first_bb == last_bb)
2829     return;
2830   /* Here basic blocks first_bb through last_bb's predecessor
2831      end with GIMPLE_COND, all of them have one of the edges to
2832      other_bb and another to another block in the range,
2833      all blocks except first_bb don't have side-effects and
2834      last_bb ends with either GIMPLE_COND, or cast satisfying
2835      final_range_test_p.  */
2836   for (bb = last_bb; ; bb = single_pred (bb))
2837     {
2838       enum tree_code code;
2839       tree lhs, rhs;
2840       inter_bb_range_test_entry bb_ent;
2841
2842       bb_ent.op = NULL_TREE;
2843       bb_ent.first_idx = ops.length ();
2844       bb_ent.last_idx = bb_ent.first_idx;
2845       e = find_edge (bb, other_bb);
2846       stmt = last_stmt (bb);
2847       gimple_set_visited (stmt, true);
2848       if (gimple_code (stmt) != GIMPLE_COND)
2849         {
2850           use_operand_p use_p;
2851           gimple phi;
2852           edge e2;
2853           unsigned int d;
2854
2855           lhs = gimple_assign_lhs (stmt);
2856           rhs = gimple_assign_rhs1 (stmt);
2857           gcc_assert (bb == last_bb);
2858
2859           /* stmt is
2860              _123 = (int) _234;
2861
2862              followed by:
2863              <bb M>:
2864              # _345 = PHI <_123(N), 1(...), 1(...)>
2865
2866              or 0 instead of 1.  If it is 0, the _234
2867              range test is anded together with all the
2868              other range tests, if it is 1, it is ored with
2869              them.  */
2870           single_imm_use (lhs, &use_p, &phi);
2871           gcc_assert (gimple_code (phi) == GIMPLE_PHI);
2872           e2 = find_edge (first_bb, other_bb);
2873           d = e2->dest_idx;
2874           gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
2875           if (integer_zerop (gimple_phi_arg_def (phi, d)))
2876             code = BIT_AND_EXPR;
2877           else
2878             {
2879               gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
2880               code = BIT_IOR_EXPR;
2881             }
2882
2883           /* If _234 SSA_NAME_DEF_STMT is
2884              _234 = _567 | _789;
2885              (or &, corresponding to 1/0 in the phi arguments,
2886              push into ops the individual range test arguments
2887              of the bitwise or resp. and, recursively.  */
2888           if (!get_ops (rhs, code, &ops,
2889                         loop_containing_stmt (stmt))
2890               && has_single_use (rhs))
2891             {
2892               /* Otherwise, push the _234 range test itself.  */
2893               operand_entry_t oe
2894                 = (operand_entry_t) pool_alloc (operand_entry_pool);
2895
2896               oe->op = rhs;
2897               oe->rank = code;
2898               oe->id = 0;
2899               oe->count = 1;
2900               ops.safe_push (oe);
2901               bb_ent.last_idx++;
2902             }
2903           else
2904             bb_ent.last_idx = ops.length ();
2905           bb_ent.op = rhs;
2906           bbinfo.safe_push (bb_ent);
2907           continue;
2908         }
2909       /* Otherwise stmt is GIMPLE_COND.  */
2910       code = gimple_cond_code (stmt);
2911       lhs = gimple_cond_lhs (stmt);
2912       rhs = gimple_cond_rhs (stmt);
2913       if (TREE_CODE (lhs) == SSA_NAME
2914           && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2915           && ((code != EQ_EXPR && code != NE_EXPR)
2916               || rhs != boolean_false_node
2917                  /* Either push into ops the individual bitwise
2918                     or resp. and operands, depending on which
2919                     edge is other_bb.  */
2920               || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
2921                                  ^ (code == EQ_EXPR))
2922                                 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
2923                            loop_containing_stmt (stmt))))
2924         {
2925           /* Or push the GIMPLE_COND stmt itself.  */
2926           operand_entry_t oe
2927             = (operand_entry_t) pool_alloc (operand_entry_pool);
2928
2929           oe->op = NULL;
2930           oe->rank = (e->flags & EDGE_TRUE_VALUE)
2931                      ? BIT_IOR_EXPR : BIT_AND_EXPR;
2932           /* oe->op = NULL signs that there is no SSA_NAME
2933              for the range test, and oe->id instead is the
2934              basic block number, at which's end the GIMPLE_COND
2935              is.  */
2936           oe->id = bb->index;
2937           oe->count = 1;
2938           ops.safe_push (oe);
2939           bb_ent.op = NULL;
2940           bb_ent.last_idx++;
2941         }
2942       else if (ops.length () > bb_ent.first_idx)
2943         {
2944           bb_ent.op = lhs;
2945           bb_ent.last_idx = ops.length ();
2946         }
2947       bbinfo.safe_push (bb_ent);
2948       if (bb == first_bb)
2949         break;
2950     }
2951   if (ops.length () > 1)
2952     any_changes = optimize_range_tests (ERROR_MARK, &ops);
2953   if (any_changes)
2954     {
2955       unsigned int idx;
2956       /* update_ops relies on has_single_use predicates returning the
2957          same values as it did during get_ops earlier.  Additionally it
2958          never removes statements, only adds new ones and it should walk
2959          from the single imm use and check the predicate already before
2960          making those changes.
2961          On the other side, the handling of GIMPLE_COND directly can turn
2962          previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
2963          it needs to be done in a separate loop afterwards.  */
2964       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
2965         {
2966           if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
2967               && bbinfo[idx].op != NULL_TREE)
2968             {
2969               tree new_op;
2970
2971               stmt = last_stmt (bb);
2972               new_op = update_ops (bbinfo[idx].op,
2973                                    (enum tree_code)
2974                                    ops[bbinfo[idx].first_idx]->rank,
2975                                    ops, &bbinfo[idx].first_idx,
2976                                    loop_containing_stmt (stmt));
2977               if (new_op == NULL_TREE)
2978                 {
2979                   gcc_assert (bb == last_bb);
2980                   new_op = ops[bbinfo[idx].first_idx++]->op;
2981                 }
2982               if (bbinfo[idx].op != new_op)
2983                 {
2984                   imm_use_iterator iter;
2985                   use_operand_p use_p;
2986                   gimple use_stmt, cast_stmt = NULL;
2987
2988                   FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
2989                     if (is_gimple_debug (use_stmt))
2990                       continue;
2991                     else if (gimple_code (use_stmt) == GIMPLE_COND
2992                              || gimple_code (use_stmt) == GIMPLE_PHI)
2993                       FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2994                         SET_USE (use_p, new_op);
2995                     else if (gimple_assign_cast_p (use_stmt))
2996                       cast_stmt = use_stmt;
2997                     else
2998                       gcc_unreachable ();
2999                   if (cast_stmt)
3000                     {
3001                       gcc_assert (bb == last_bb);
3002                       tree lhs = gimple_assign_lhs (cast_stmt);
3003                       tree new_lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3004                       enum tree_code rhs_code
3005                         = gimple_assign_rhs_code (cast_stmt);
3006                       gimple g;
3007                       if (is_gimple_min_invariant (new_op))
3008                         {
3009                           new_op = fold_convert (TREE_TYPE (lhs), new_op);
3010                           g = gimple_build_assign (new_lhs, new_op);
3011                         }
3012                       else
3013                         g = gimple_build_assign_with_ops (rhs_code, new_lhs,
3014                                                           new_op, NULL_TREE);
3015                       gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt);
3016                       gimple_set_uid (g, gimple_uid (cast_stmt));
3017                       gimple_set_visited (g, true);
3018                       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3019                       FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
3020                         if (is_gimple_debug (use_stmt))
3021                           continue;
3022                         else if (gimple_code (use_stmt) == GIMPLE_COND
3023                                  || gimple_code (use_stmt) == GIMPLE_PHI)
3024                           FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3025                             SET_USE (use_p, new_lhs);
3026                         else
3027                           gcc_unreachable ();
3028                     }
3029                 }
3030             }
3031           if (bb == first_bb)
3032             break;
3033         }
3034       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3035         {
3036           if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3037               && bbinfo[idx].op == NULL_TREE
3038               && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
3039             {
3040               stmt = last_stmt (bb);
3041               if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
3042                 gimple_cond_make_false (stmt);
3043               else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
3044                 gimple_cond_make_true (stmt);
3045               else
3046                 {
3047                   gimple_cond_set_code (stmt, NE_EXPR);
3048                   gimple_cond_set_lhs (stmt, ops[bbinfo[idx].first_idx]->op);
3049                   gimple_cond_set_rhs (stmt, boolean_false_node);
3050                 }
3051               update_stmt (stmt);
3052             }
3053           if (bb == first_bb)
3054             break;
3055         }
3056     }
3057 }
3058
3059 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3060    of STMT in it's operands.  This is also known as a "destructive
3061    update" operation.  */
3062
3063 static bool
3064 is_phi_for_stmt (gimple stmt, tree operand)
3065 {
3066   gimple def_stmt;
3067   tree lhs;
3068   use_operand_p arg_p;
3069   ssa_op_iter i;
3070
3071   if (TREE_CODE (operand) != SSA_NAME)
3072     return false;
3073
3074   lhs = gimple_assign_lhs (stmt);
3075
3076   def_stmt = SSA_NAME_DEF_STMT (operand);
3077   if (gimple_code (def_stmt) != GIMPLE_PHI)
3078     return false;
3079
3080   FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
3081     if (lhs == USE_FROM_PTR (arg_p))
3082       return true;
3083   return false;
3084 }
3085
3086 /* Remove def stmt of VAR if VAR has zero uses and recurse
3087    on rhs1 operand if so.  */
3088
3089 static void
3090 remove_visited_stmt_chain (tree var)
3091 {
3092   gimple stmt;
3093   gimple_stmt_iterator gsi;
3094
3095   while (1)
3096     {
3097       if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
3098         return;
3099       stmt = SSA_NAME_DEF_STMT (var);
3100       if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
3101         {
3102           var = gimple_assign_rhs1 (stmt);
3103           gsi = gsi_for_stmt (stmt);
3104           reassoc_remove_stmt (&gsi);
3105           release_defs (stmt);
3106         }
3107       else
3108         return;
3109     }
3110 }
3111
3112 /* This function checks three consequtive operands in
3113    passed operands vector OPS starting from OPINDEX and
3114    swaps two operands if it is profitable for binary operation
3115    consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3116
3117    We pair ops with the same rank if possible.
3118
3119    The alternative we try is to see if STMT is a destructive
3120    update style statement, which is like:
3121    b = phi (a, ...)
3122    a = c + b;
3123    In that case, we want to use the destructive update form to
3124    expose the possible vectorizer sum reduction opportunity.
3125    In that case, the third operand will be the phi node. This
3126    check is not performed if STMT is null.
3127
3128    We could, of course, try to be better as noted above, and do a
3129    lot of work to try to find these opportunities in >3 operand
3130    cases, but it is unlikely to be worth it.  */
3131
3132 static void
3133 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
3134                           unsigned int opindex, gimple stmt)
3135 {
3136   operand_entry_t oe1, oe2, oe3;
3137
3138   oe1 = ops[opindex];
3139   oe2 = ops[opindex + 1];
3140   oe3 = ops[opindex + 2];
3141
3142   if ((oe1->rank == oe2->rank
3143        && oe2->rank != oe3->rank)
3144       || (stmt && is_phi_for_stmt (stmt, oe3->op)
3145           && !is_phi_for_stmt (stmt, oe1->op)
3146           && !is_phi_for_stmt (stmt, oe2->op)))
3147     {
3148       struct operand_entry temp = *oe3;
3149       oe3->op = oe1->op;
3150       oe3->rank = oe1->rank;
3151       oe1->op = temp.op;
3152       oe1->rank= temp.rank;
3153     }
3154   else if ((oe1->rank == oe3->rank
3155             && oe2->rank != oe3->rank)
3156            || (stmt && is_phi_for_stmt (stmt, oe2->op)
3157                && !is_phi_for_stmt (stmt, oe1->op)
3158                && !is_phi_for_stmt (stmt, oe3->op)))
3159     {
3160       struct operand_entry temp = *oe2;
3161       oe2->op = oe1->op;
3162       oe2->rank = oe1->rank;
3163       oe1->op = temp.op;
3164       oe1->rank = temp.rank;
3165     }
3166 }
3167
3168 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3169    two definitions, otherwise return STMT.  */
3170
3171 static inline gimple
3172 find_insert_point (gimple stmt, tree rhs1, tree rhs2)
3173 {
3174   if (TREE_CODE (rhs1) == SSA_NAME
3175       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
3176     stmt = SSA_NAME_DEF_STMT (rhs1);
3177   if (TREE_CODE (rhs2) == SSA_NAME
3178       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
3179     stmt = SSA_NAME_DEF_STMT (rhs2);
3180   return stmt;
3181 }
3182
3183 /* Recursively rewrite our linearized statements so that the operators
3184    match those in OPS[OPINDEX], putting the computation in rank
3185    order.  Return new lhs.  */
3186
3187 static tree
3188 rewrite_expr_tree (gimple stmt, unsigned int opindex,
3189                    vec<operand_entry_t> ops, bool changed)
3190 {
3191   tree rhs1 = gimple_assign_rhs1 (stmt);
3192   tree rhs2 = gimple_assign_rhs2 (stmt);
3193   tree lhs = gimple_assign_lhs (stmt);
3194   operand_entry_t oe;
3195
3196   /* The final recursion case for this function is that you have
3197      exactly two operations left.
3198      If we had one exactly one op in the entire list to start with, we
3199      would have never called this function, and the tail recursion
3200      rewrites them one at a time.  */
3201   if (opindex + 2 == ops.length ())
3202     {
3203       operand_entry_t oe1, oe2;
3204
3205       oe1 = ops[opindex];
3206       oe2 = ops[opindex + 1];
3207
3208       if (rhs1 != oe1->op || rhs2 != oe2->op)
3209         {
3210           gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3211           unsigned int uid = gimple_uid (stmt);
3212
3213           if (dump_file && (dump_flags & TDF_DETAILS))
3214             {
3215               fprintf (dump_file, "Transforming ");
3216               print_gimple_stmt (dump_file, stmt, 0, 0);
3217             }
3218
3219           if (changed)
3220             {
3221               gimple insert_point = find_insert_point (stmt, oe1->op, oe2->op);
3222               lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3223               stmt
3224                 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3225                                                 lhs, oe1->op, oe2->op);
3226               gimple_set_uid (stmt, uid);
3227               gimple_set_visited (stmt, true);
3228               if (insert_point == gsi_stmt (gsi))
3229                 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3230               else
3231                 insert_stmt_after (stmt, insert_point);
3232             }
3233           else
3234             {
3235               gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
3236                                    == stmt);
3237               gimple_assign_set_rhs1 (stmt, oe1->op);
3238               gimple_assign_set_rhs2 (stmt, oe2->op);
3239               update_stmt (stmt);
3240             }
3241
3242           if (rhs1 != oe1->op && rhs1 != oe2->op)
3243             remove_visited_stmt_chain (rhs1);
3244
3245           if (dump_file && (dump_flags & TDF_DETAILS))
3246             {
3247               fprintf (dump_file, " into ");
3248               print_gimple_stmt (dump_file, stmt, 0, 0);
3249             }
3250         }
3251       return lhs;
3252     }
3253
3254   /* If we hit here, we should have 3 or more ops left.  */
3255   gcc_assert (opindex + 2 < ops.length ());
3256
3257   /* Rewrite the next operator.  */
3258   oe = ops[opindex];
3259
3260   /* Recurse on the LHS of the binary operator, which is guaranteed to
3261      be the non-leaf side.  */
3262   tree new_rhs1
3263     = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
3264                          changed || oe->op != rhs2);
3265
3266   if (oe->op != rhs2 || new_rhs1 != rhs1)
3267     {
3268       if (dump_file && (dump_flags & TDF_DETAILS))
3269         {
3270           fprintf (dump_file, "Transforming ");
3271           print_gimple_stmt (dump_file, stmt, 0, 0);
3272         }
3273
3274       /* If changed is false, this is either opindex == 0
3275          or all outer rhs2's were equal to corresponding oe->op,
3276          and powi_result is NULL.
3277          That means lhs is equivalent before and after reassociation.
3278          Otherwise ensure the old lhs SSA_NAME is not reused and
3279          create a new stmt as well, so that any debug stmts will be
3280          properly adjusted.  */
3281       if (changed)
3282         {
3283           gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3284           unsigned int uid = gimple_uid (stmt);
3285           gimple insert_point = find_insert_point (stmt, new_rhs1, oe->op);
3286
3287           lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3288           stmt = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3289                                                lhs, new_rhs1, oe->op);
3290           gimple_set_uid (stmt, uid);
3291           gimple_set_visited (stmt, true);
3292           if (insert_point == gsi_stmt (gsi))
3293             gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3294           else
3295             insert_stmt_after (stmt, insert_point);
3296         }
3297       else
3298         {
3299           gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
3300                                == stmt);
3301           gimple_assign_set_rhs1 (stmt, new_rhs1);
3302           gimple_assign_set_rhs2 (stmt, oe->op);
3303           update_stmt (stmt);
3304         }
3305
3306       if (dump_file && (dump_flags & TDF_DETAILS))
3307         {
3308           fprintf (dump_file, " into ");
3309           print_gimple_stmt (dump_file, stmt, 0, 0);
3310         }
3311     }
3312   return lhs;
3313 }
3314
3315 /* Find out how many cycles we need to compute statements chain.
3316    OPS_NUM holds number os statements in a chain.  CPU_WIDTH is a
3317    maximum number of independent statements we may execute per cycle.  */
3318
3319 static int
3320 get_required_cycles (int ops_num, int cpu_width)
3321 {
3322   int res;
3323   int elog;
3324   unsigned int rest;
3325
3326   /* While we have more than 2 * cpu_width operands
3327      we may reduce number of operands by cpu_width
3328      per cycle.  */
3329   res = ops_num / (2 * cpu_width);
3330
3331   /* Remained operands count may be reduced twice per cycle
3332      until we have only one operand.  */
3333   rest = (unsigned)(ops_num - res * cpu_width);
3334   elog = exact_log2 (rest);
3335   if (elog >= 0)
3336     res += elog;
3337   else
3338     res += floor_log2 (rest) + 1;
3339
3340   return res;
3341 }
3342
3343 /* Returns an optimal number of registers to use for computation of
3344    given statements.  */
3345
3346 static int
3347 get_reassociation_width (int ops_num, enum tree_code opc,
3348                          enum machine_mode mode)
3349 {
3350   int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
3351   int width;
3352   int width_min;
3353   int cycles_best;
3354
3355   if (param_width > 0)
3356     width = param_width;
3357   else
3358     width = targetm.sched.reassociation_width (opc, mode);
3359
3360   if (width == 1)
3361     return width;
3362
3363   /* Get the minimal time required for sequence computation.  */
3364   cycles_best = get_required_cycles (ops_num, width);
3365
3366   /* Check if we may use less width and still compute sequence for
3367      the same time.  It will allow us to reduce registers usage.
3368      get_required_cycles is monotonically increasing with lower width
3369      so we can perform a binary search for the minimal width that still
3370      results in the optimal cycle count.  */
3371   width_min = 1;
3372   while (width > width_min)
3373     {
3374       int width_mid = (width + width_min) / 2;
3375
3376       if (get_required_cycles (ops_num, width_mid) == cycles_best)
3377         width = width_mid;
3378       else if (width_min < width_mid)
3379         width_min = width_mid;
3380       else
3381         break;
3382     }
3383
3384   return width;
3385 }
3386
3387 /* Recursively rewrite our linearized statements so that the operators
3388    match those in OPS[OPINDEX], putting the computation in rank
3389    order and trying to allow operations to be executed in
3390    parallel.  */
3391
3392 static void
3393 rewrite_expr_tree_parallel (gimple stmt, int width,
3394                             vec<operand_entry_t> ops)
3395 {
3396   enum tree_code opcode = gimple_assign_rhs_code (stmt);
3397   int op_num = ops.length ();
3398   int stmt_num = op_num - 1;
3399   gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3400   int op_index = op_num - 1;
3401   int stmt_index = 0;
3402   int ready_stmts_end = 0;
3403   int i = 0;
3404   tree last_rhs1 = gimple_assign_rhs1 (stmt);
3405
3406   /* We start expression rewriting from the top statements.
3407      So, in this loop we create a full list of statements
3408      we will work with.  */
3409   stmts[stmt_num - 1] = stmt;
3410   for (i = stmt_num - 2; i >= 0; i--)
3411     stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3412
3413   for (i = 0; i < stmt_num; i++)
3414     {
3415       tree op1, op2;
3416
3417       /* Determine whether we should use results of
3418          already handled statements or not.  */
3419       if (ready_stmts_end == 0
3420           && (i - stmt_index >= width || op_index < 1))
3421         ready_stmts_end = i;
3422
3423       /* Now we choose operands for the next statement.  Non zero
3424          value in ready_stmts_end means here that we should use
3425          the result of already generated statements as new operand.  */
3426       if (ready_stmts_end > 0)
3427         {
3428           op1 = gimple_assign_lhs (stmts[stmt_index++]);
3429           if (ready_stmts_end > stmt_index)
3430             op2 = gimple_assign_lhs (stmts[stmt_index++]);
3431           else if (op_index >= 0)
3432             op2 = ops[op_index--]->op;
3433           else
3434             {
3435               gcc_assert (stmt_index < i);
3436               op2 = gimple_assign_lhs (stmts[stmt_index++]);
3437             }
3438
3439           if (stmt_index >= ready_stmts_end)
3440             ready_stmts_end = 0;
3441         }
3442       else
3443         {
3444           if (op_index > 1)
3445             swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3446           op2 = ops[op_index--]->op;
3447           op1 = ops[op_index--]->op;
3448         }
3449
3450       /* If we emit the last statement then we should put
3451          operands into the last statement.  It will also
3452          break the loop.  */
3453       if (op_index < 0 && stmt_index == i)
3454         i = stmt_num - 1;
3455
3456       if (dump_file && (dump_flags & TDF_DETAILS))
3457         {
3458           fprintf (dump_file, "Transforming ");
3459           print_gimple_stmt (dump_file, stmts[i], 0, 0);
3460         }
3461
3462       /* We keep original statement only for the last one.  All
3463          others are recreated.  */
3464       if (i == stmt_num - 1)
3465         {
3466           gimple_assign_set_rhs1 (stmts[i], op1);
3467           gimple_assign_set_rhs2 (stmts[i], op2);
3468           update_stmt (stmts[i]);
3469         }
3470       else
3471         stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3472
3473       if (dump_file && (dump_flags & TDF_DETAILS))
3474         {
3475           fprintf (dump_file, " into ");
3476           print_gimple_stmt (dump_file, stmts[i], 0, 0);
3477         }
3478     }
3479
3480   remove_visited_stmt_chain (last_rhs1);
3481 }
3482
3483 /* Transform STMT, which is really (A +B) + (C + D) into the left
3484    linear form, ((A+B)+C)+D.
3485    Recurse on D if necessary.  */
3486
3487 static void
3488 linearize_expr (gimple stmt)
3489 {
3490   gimple_stmt_iterator gsi;
3491   gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3492   gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3493   gimple oldbinrhs = binrhs;
3494   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3495   gimple newbinrhs = NULL;
3496   struct loop *loop = loop_containing_stmt (stmt);
3497   tree lhs = gimple_assign_lhs (stmt);
3498
3499   gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3500               && is_reassociable_op (binrhs, rhscode, loop));
3501
3502   gsi = gsi_for_stmt (stmt);
3503
3504   gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3505   binrhs = gimple_build_assign_with_ops (gimple_assign_rhs_code (binrhs),
3506                                          make_ssa_name (TREE_TYPE (lhs), NULL),
3507                                          gimple_assign_lhs (binlhs),
3508                                          gimple_assign_rhs2 (binrhs));
3509   gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3510   gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
3511   gimple_set_uid (binrhs, gimple_uid (stmt));
3512
3513   if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3514     newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3515
3516   if (dump_file && (dump_flags & TDF_DETAILS))
3517     {
3518       fprintf (dump_file, "Linearized: ");
3519       print_gimple_stmt (dump_file, stmt, 0, 0);
3520     }
3521
3522   reassociate_stats.linearized++;
3523   update_stmt (stmt);
3524
3525   gsi = gsi_for_stmt (oldbinrhs);
3526   reassoc_remove_stmt (&gsi);
3527   release_defs (oldbinrhs);
3528
3529   gimple_set_visited (stmt, true);
3530   gimple_set_visited (binlhs, true);
3531   gimple_set_visited (binrhs, true);
3532
3533   /* Tail recurse on the new rhs if it still needs reassociation.  */
3534   if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3535     /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3536            want to change the algorithm while converting to tuples.  */
3537     linearize_expr (stmt);
3538 }
3539
3540 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3541    it.  Otherwise, return NULL.  */
3542
3543 static gimple
3544 get_single_immediate_use (tree lhs)
3545 {
3546   use_operand_p immuse;
3547   gimple immusestmt;
3548
3549   if (TREE_CODE (lhs) == SSA_NAME
3550       && single_imm_use (lhs, &immuse, &immusestmt)
3551       && is_gimple_assign (immusestmt))
3552     return immusestmt;
3553
3554   return NULL;
3555 }
3556
3557 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3558    representing the negated value.  Insertions of any necessary
3559    instructions go before GSI.
3560    This function is recursive in that, if you hand it "a_5" as the
3561    value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3562    transform b_3 + b_4 into a_5 = -b_3 + -b_4.  */
3563
3564 static tree
3565 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
3566 {
3567   gimple negatedefstmt = NULL;
3568   tree resultofnegate;
3569   gimple_stmt_iterator gsi;
3570   unsigned int uid;
3571
3572   /* If we are trying to negate a name, defined by an add, negate the
3573      add operands instead.  */
3574   if (TREE_CODE (tonegate) == SSA_NAME)
3575     negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3576   if (TREE_CODE (tonegate) == SSA_NAME
3577       && is_gimple_assign (negatedefstmt)
3578       && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3579       && has_single_use (gimple_assign_lhs (negatedefstmt))
3580       && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3581     {
3582       tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3583       tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3584       tree lhs = gimple_assign_lhs (negatedefstmt);
3585       gimple g;
3586
3587       gsi = gsi_for_stmt (negatedefstmt);
3588       rhs1 = negate_value (rhs1, &gsi);
3589
3590       gsi = gsi_for_stmt (negatedefstmt);
3591       rhs2 = negate_value (rhs2, &gsi);
3592
3593       gsi = gsi_for_stmt (negatedefstmt);
3594       lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3595       gimple_set_visited (negatedefstmt, true);
3596       g = gimple_build_assign_with_ops (PLUS_EXPR, lhs, rhs1, rhs2);
3597       gimple_set_uid (g, gimple_uid (negatedefstmt));
3598       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3599       return lhs;
3600     }
3601
3602   tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3603   resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
3604                                              NULL_TREE, true, GSI_SAME_STMT);
3605   gsi = *gsip;
3606   uid = gimple_uid (gsi_stmt (gsi));
3607   for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
3608     {
3609       gimple stmt = gsi_stmt (gsi);
3610       if (gimple_uid (stmt) != 0)
3611         break;
3612       gimple_set_uid (stmt, uid);
3613     }
3614   return resultofnegate;
3615 }
3616
3617 /* Return true if we should break up the subtract in STMT into an add
3618    with negate.  This is true when we the subtract operands are really
3619    adds, or the subtract itself is used in an add expression.  In
3620    either case, breaking up the subtract into an add with negate
3621    exposes the adds to reassociation.  */
3622
3623 static bool
3624 should_break_up_subtract (gimple stmt)
3625 {
3626   tree lhs = gimple_assign_lhs (stmt);
3627   tree binlhs = gimple_assign_rhs1 (stmt);
3628   tree binrhs = gimple_assign_rhs2 (stmt);
3629   gimple immusestmt;
3630   struct loop *loop = loop_containing_stmt (stmt);
3631
3632   if (TREE_CODE (binlhs) == SSA_NAME
3633       && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3634     return true;
3635
3636   if (TREE_CODE (binrhs) == SSA_NAME
3637       && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3638     return true;
3639
3640   if (TREE_CODE (lhs) == SSA_NAME
3641       && (immusestmt = get_single_immediate_use (lhs))
3642       && is_gimple_assign (immusestmt)
3643       && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3644           ||  gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3645     return true;
3646   return false;
3647 }
3648
3649 /* Transform STMT from A - B into A + -B.  */
3650
3651 static void
3652 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3653 {
3654   tree rhs1 = gimple_assign_rhs1 (stmt);
3655   tree rhs2 = gimple_assign_rhs2 (stmt);
3656
3657   if (dump_file && (dump_flags & TDF_DETAILS))
3658     {
3659       fprintf (dump_file, "Breaking up subtract ");
3660       print_gimple_stmt (dump_file, stmt, 0, 0);
3661     }
3662
3663   rhs2 = negate_value (rhs2, gsip);
3664   gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3665   update_stmt (stmt);
3666 }
3667
3668 /* Determine whether STMT is a builtin call that raises an SSA name
3669    to an integer power and has only one use.  If so, and this is early
3670    reassociation and unsafe math optimizations are permitted, place
3671    the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3672    If any of these conditions does not hold, return FALSE.  */
3673
3674 static bool
3675 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3676 {
3677   tree fndecl, arg1;
3678   REAL_VALUE_TYPE c, cint;
3679
3680   if (!first_pass_instance
3681       || !flag_unsafe_math_optimizations
3682       || !is_gimple_call (stmt)
3683       || !has_single_use (gimple_call_lhs (stmt)))
3684     return false;
3685
3686   fndecl = gimple_call_fndecl (stmt);
3687
3688   if (!fndecl
3689       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3690     return false;
3691
3692   switch (DECL_FUNCTION_CODE (fndecl))
3693     {
3694     CASE_FLT_FN (BUILT_IN_POW):
3695       *base = gimple_call_arg (stmt, 0);
3696       arg1 = gimple_call_arg (stmt, 1);
3697
3698       if (TREE_CODE (arg1) != REAL_CST)
3699         return false;
3700
3701       c = TREE_REAL_CST (arg1);
3702
3703       if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
3704         return false;
3705
3706       *exponent = real_to_integer (&c);
3707       real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
3708       if (!real_identical (&c, &cint))
3709         return false;
3710
3711       break;
3712
3713     CASE_FLT_FN (BUILT_IN_POWI):
3714       *base = gimple_call_arg (stmt, 0);
3715       arg1 = gimple_call_arg (stmt, 1);
3716
3717       if (!tree_fits_shwi_p (arg1))
3718         return false;
3719
3720       *exponent = tree_to_shwi (arg1);
3721       break;
3722
3723     default:
3724       return false;
3725     }
3726
3727   /* Expanding negative exponents is generally unproductive, so we don't
3728      complicate matters with those.  Exponents of zero and one should
3729      have been handled by expression folding.  */
3730   if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
3731     return false;
3732
3733   return true;
3734 }
3735
3736 /* Recursively linearize a binary expression that is the RHS of STMT.
3737    Place the operands of the expression tree in the vector named OPS.  */
3738
3739 static void
3740 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
3741                      bool is_associative, bool set_visited)
3742 {
3743   tree binlhs = gimple_assign_rhs1 (stmt);
3744   tree binrhs = gimple_assign_rhs2 (stmt);
3745   gimple binlhsdef = NULL, binrhsdef = NULL;
3746   bool binlhsisreassoc = false;
3747   bool binrhsisreassoc = false;
3748   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3749   struct loop *loop = loop_containing_stmt (stmt);
3750   tree base = NULL_TREE;
3751   HOST_WIDE_INT exponent = 0;
3752
3753   if (set_visited)
3754     gimple_set_visited (stmt, true);
3755
3756   if (TREE_CODE (binlhs) == SSA_NAME)
3757     {
3758       binlhsdef = SSA_NAME_DEF_STMT (binlhs);
3759       binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
3760                          && !stmt_could_throw_p (binlhsdef));
3761     }
3762
3763   if (TREE_CODE (binrhs) == SSA_NAME)
3764     {
3765       binrhsdef = SSA_NAME_DEF_STMT (binrhs);
3766       binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
3767                          && !stmt_could_throw_p (binrhsdef));
3768     }
3769
3770   /* If the LHS is not reassociable, but the RHS is, we need to swap
3771      them.  If neither is reassociable, there is nothing we can do, so
3772      just put them in the ops vector.  If the LHS is reassociable,
3773      linearize it.  If both are reassociable, then linearize the RHS
3774      and the LHS.  */
3775
3776   if (!binlhsisreassoc)
3777     {
3778       tree temp;
3779
3780       /* If this is not a associative operation like division, give up.  */
3781       if (!is_associative)
3782         {
3783           add_to_ops_vec (ops, binrhs);
3784           return;
3785         }
3786
3787       if (!binrhsisreassoc)
3788         {
3789           if (rhscode == MULT_EXPR
3790               && TREE_CODE (binrhs) == SSA_NAME
3791               && acceptable_pow_call (binrhsdef, &base, &exponent))
3792             {
3793               add_repeat_to_ops_vec (ops, base, exponent);
3794               gimple_set_visited (binrhsdef, true);
3795             }
3796           else
3797             add_to_ops_vec (ops, binrhs);
3798
3799           if (rhscode == MULT_EXPR
3800               && TREE_CODE (binlhs) == SSA_NAME
3801               && acceptable_pow_call (binlhsdef, &base, &exponent))
3802             {
3803               add_repeat_to_ops_vec (ops, base, exponent);
3804               gimple_set_visited (binlhsdef, true);
3805             }
3806           else
3807             add_to_ops_vec (ops, binlhs);
3808
3809           return;
3810         }
3811
3812       if (dump_file && (dump_flags & TDF_DETAILS))
3813         {
3814           fprintf (dump_file, "swapping operands of ");
3815           print_gimple_stmt (dump_file, stmt, 0, 0);
3816         }
3817
3818       swap_ssa_operands (stmt,
3819                          gimple_assign_rhs1_ptr (stmt),
3820                          gimple_assign_rhs2_ptr (stmt));
3821       update_stmt (stmt);
3822
3823       if (dump_file && (dump_flags & TDF_DETAILS))
3824         {
3825           fprintf (dump_file, " is now ");
3826           print_gimple_stmt (dump_file, stmt, 0, 0);
3827         }
3828
3829       /* We want to make it so the lhs is always the reassociative op,
3830          so swap.  */
3831       temp = binlhs;
3832       binlhs = binrhs;
3833       binrhs = temp;
3834     }
3835   else if (binrhsisreassoc)
3836     {
3837       linearize_expr (stmt);
3838       binlhs = gimple_assign_rhs1 (stmt);
3839       binrhs = gimple_assign_rhs2 (stmt);
3840     }
3841
3842   gcc_assert (TREE_CODE (binrhs) != SSA_NAME
3843               || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
3844                                       rhscode, loop));
3845   linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
3846                        is_associative, set_visited);
3847
3848   if (rhscode == MULT_EXPR
3849       && TREE_CODE (binrhs) == SSA_NAME
3850       && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
3851     {
3852       add_repeat_to_ops_vec (ops, base, exponent);
3853       gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
3854     }
3855   else
3856     add_to_ops_vec (ops, binrhs);
3857 }
3858
3859 /* Repropagate the negates back into subtracts, since no other pass
3860    currently does it.  */
3861
3862 static void
3863 repropagate_negates (void)
3864 {
3865   unsigned int i = 0;
3866   tree negate;
3867
3868   FOR_EACH_VEC_ELT (plus_negates, i, negate)
3869     {
3870       gimple user = get_single_immediate_use (negate);
3871
3872       if (!user || !is_gimple_assign (user))
3873         continue;
3874
3875       /* The negate operand can be either operand of a PLUS_EXPR
3876          (it can be the LHS if the RHS is a constant for example).
3877
3878          Force the negate operand to the RHS of the PLUS_EXPR, then
3879          transform the PLUS_EXPR into a MINUS_EXPR.  */
3880       if (gimple_assign_rhs_code (user) == PLUS_EXPR)
3881         {
3882           /* If the negated operand appears on the LHS of the
3883              PLUS_EXPR, exchange the operands of the PLUS_EXPR
3884              to force the negated operand to the RHS of the PLUS_EXPR.  */
3885           if (gimple_assign_rhs1 (user) == negate)
3886             {
3887               swap_ssa_operands (user,
3888                                  gimple_assign_rhs1_ptr (user),
3889                                  gimple_assign_rhs2_ptr (user));
3890             }
3891
3892           /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
3893              the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR.  */
3894           if (gimple_assign_rhs2 (user) == negate)
3895             {
3896               tree rhs1 = gimple_assign_rhs1 (user);
3897               tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
3898               gimple_stmt_iterator gsi = gsi_for_stmt (user);
3899               gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
3900               update_stmt (user);
3901             }
3902         }
3903       else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
3904         {
3905           if (gimple_assign_rhs1 (user) == negate)
3906             {
3907               /* We have
3908                    x = -a
3909                    y = x - b
3910                  which we transform into
3911                    x = a + b
3912                    y = -x .
3913                  This pushes down the negate which we possibly can merge
3914                  into some other operation, hence insert it into the
3915                  plus_negates vector.  */
3916               gimple feed = SSA_NAME_DEF_STMT (negate);
3917               tree a = gimple_assign_rhs1 (feed);
3918               tree b = gimple_assign_rhs2 (user);
3919               gimple_stmt_iterator gsi = gsi_for_stmt (feed);
3920               gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
3921               tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)), NULL);
3922               gimple g = gimple_build_assign_with_ops (PLUS_EXPR, x, a, b);
3923               gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3924               gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x, NULL);
3925               user = gsi_stmt (gsi2);
3926               update_stmt (user);
3927               reassoc_remove_stmt (&gsi);
3928               release_defs (feed);
3929               plus_negates.safe_push (gimple_assign_lhs (user));
3930             }
3931           else
3932             {
3933               /* Transform "x = -a; y = b - x" into "y = b + a", getting
3934                  rid of one operation.  */
3935               gimple feed = SSA_NAME_DEF_STMT (negate);
3936               tree a = gimple_assign_rhs1 (feed);
3937               tree rhs1 = gimple_assign_rhs1 (user);
3938               gimple_stmt_iterator gsi = gsi_for_stmt (user);
3939               gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
3940               update_stmt (gsi_stmt (gsi));
3941             }
3942         }
3943     }
3944 }
3945
3946 /* Returns true if OP is of a type for which we can do reassociation.
3947    That is for integral or non-saturating fixed-point types, and for
3948    floating point type when associative-math is enabled.  */
3949
3950 static bool
3951 can_reassociate_p (tree op)
3952 {
3953   tree type = TREE_TYPE (op);
3954   if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
3955       || NON_SAT_FIXED_POINT_TYPE_P (type)
3956       || (flag_associative_math && FLOAT_TYPE_P (type)))
3957     return true;
3958   return false;
3959 }
3960
3961 /* Break up subtract operations in block BB.
3962
3963    We do this top down because we don't know whether the subtract is
3964    part of a possible chain of reassociation except at the top.
3965
3966    IE given
3967    d = f + g
3968    c = a + e
3969    b = c - d
3970    q = b - r
3971    k = t - q
3972
3973    we want to break up k = t - q, but we won't until we've transformed q
3974    = b - r, which won't be broken up until we transform b = c - d.
3975
3976    En passant, clear the GIMPLE visited flag on every statement
3977    and set UIDs within each basic block.  */
3978
3979 static void
3980 break_up_subtract_bb (basic_block bb)
3981 {
3982   gimple_stmt_iterator gsi;
3983   basic_block son;
3984   unsigned int uid = 1;
3985
3986   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3987     {
3988       gimple stmt = gsi_stmt (gsi);
3989       gimple_set_visited (stmt, false);
3990       gimple_set_uid (stmt, uid++);
3991
3992       if (!is_gimple_assign (stmt)
3993           || !can_reassociate_p (gimple_assign_lhs (stmt)))
3994         continue;
3995
3996       /* Look for simple gimple subtract operations.  */
3997       if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
3998         {
3999           if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
4000               || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
4001             continue;
4002
4003           /* Check for a subtract used only in an addition.  If this
4004              is the case, transform it into add of a negate for better
4005              reassociation.  IE transform C = A-B into C = A + -B if C
4006              is only used in an addition.  */
4007           if (should_break_up_subtract (stmt))
4008             break_up_subtract (stmt, &gsi);
4009         }
4010       else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
4011                && can_reassociate_p (gimple_assign_rhs1 (stmt)))
4012         plus_negates.safe_push (gimple_assign_lhs (stmt));
4013     }
4014   for (son = first_dom_son (CDI_DOMINATORS, bb);
4015        son;
4016        son = next_dom_son (CDI_DOMINATORS, son))
4017     break_up_subtract_bb (son);
4018 }
4019
4020 /* Used for repeated factor analysis.  */
4021 struct repeat_factor_d
4022 {
4023   /* An SSA name that occurs in a multiply chain.  */
4024   tree factor;
4025
4026   /* Cached rank of the factor.  */
4027   unsigned rank;
4028
4029   /* Number of occurrences of the factor in the chain.  */
4030   HOST_WIDE_INT count;
4031
4032   /* An SSA name representing the product of this factor and
4033      all factors appearing later in the repeated factor vector.  */
4034   tree repr;
4035 };
4036
4037 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
4038 typedef const struct repeat_factor_d *const_repeat_factor_t;
4039
4040
4041 static vec<repeat_factor> repeat_factor_vec;
4042
4043 /* Used for sorting the repeat factor vector.  Sort primarily by
4044    ascending occurrence count, secondarily by descending rank.  */
4045
4046 static int
4047 compare_repeat_factors (const void *x1, const void *x2)
4048 {
4049   const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
4050   const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
4051
4052   if (rf1->count != rf2->count)
4053     return rf1->count - rf2->count;
4054
4055   return rf2->rank - rf1->rank;
4056 }
4057
4058 /* Look for repeated operands in OPS in the multiply tree rooted at
4059    STMT.  Replace them with an optimal sequence of multiplies and powi
4060    builtin calls, and remove the used operands from OPS.  Return an
4061    SSA name representing the value of the replacement sequence.  */
4062
4063 static tree
4064 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
4065 {
4066   unsigned i, j, vec_len;
4067   int ii;
4068   operand_entry_t oe;
4069   repeat_factor_t rf1, rf2;
4070   repeat_factor rfnew;
4071   tree result = NULL_TREE;
4072   tree target_ssa, iter_result;
4073   tree type = TREE_TYPE (gimple_get_lhs (stmt));
4074   tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
4075   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4076   gimple mul_stmt, pow_stmt;
4077
4078   /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4079      target.  */
4080   if (!powi_fndecl)
4081     return NULL_TREE;
4082
4083   /* Allocate the repeated factor vector.  */
4084   repeat_factor_vec.create (10);
4085
4086   /* Scan the OPS vector for all SSA names in the product and build
4087      up a vector of occurrence counts for each factor.  */
4088   FOR_EACH_VEC_ELT (*ops, i, oe)
4089     {
4090       if (TREE_CODE (oe->op) == SSA_NAME)
4091         {
4092           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4093             {
4094               if (rf1->factor == oe->op)
4095                 {
4096                   rf1->count += oe->count;
4097                   break;
4098                 }
4099             }
4100
4101           if (j >= repeat_factor_vec.length ())
4102             {
4103               rfnew.factor = oe->op;
4104               rfnew.rank = oe->rank;
4105               rfnew.count = oe->count;
4106               rfnew.repr = NULL_TREE;
4107               repeat_factor_vec.safe_push (rfnew);
4108             }
4109         }
4110     }
4111
4112   /* Sort the repeated factor vector by (a) increasing occurrence count,
4113      and (b) decreasing rank.  */
4114   repeat_factor_vec.qsort (compare_repeat_factors);
4115
4116   /* It is generally best to combine as many base factors as possible
4117      into a product before applying __builtin_powi to the result.
4118      However, the sort order chosen for the repeated factor vector
4119      allows us to cache partial results for the product of the base
4120      factors for subsequent use.  When we already have a cached partial
4121      result from a previous iteration, it is best to make use of it
4122      before looking for another __builtin_pow opportunity.
4123
4124      As an example, consider x * x * y * y * y * z * z * z * z.
4125      We want to first compose the product x * y * z, raise it to the
4126      second power, then multiply this by y * z, and finally multiply
4127      by z.  This can be done in 5 multiplies provided we cache y * z
4128      for use in both expressions:
4129
4130         t1 = y * z
4131         t2 = t1 * x
4132         t3 = t2 * t2
4133         t4 = t1 * t3
4134         result = t4 * z
4135
4136      If we instead ignored the cached y * z and first multiplied by
4137      the __builtin_pow opportunity z * z, we would get the inferior:
4138
4139         t1 = y * z
4140         t2 = t1 * x
4141         t3 = t2 * t2
4142         t4 = z * z
4143         t5 = t3 * t4
4144         result = t5 * y  */
4145
4146   vec_len = repeat_factor_vec.length ();
4147
4148   /* Repeatedly look for opportunities to create a builtin_powi call.  */
4149   while (true)
4150     {
4151       HOST_WIDE_INT power;
4152
4153       /* First look for the largest cached product of factors from
4154          preceding iterations.  If found, create a builtin_powi for
4155          it if the minimum occurrence count for its factors is at
4156          least 2, or just use this cached product as our next
4157          multiplicand if the minimum occurrence count is 1.  */
4158       FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4159         {
4160           if (rf1->repr && rf1->count > 0)
4161             break;
4162         }
4163
4164       if (j < vec_len)
4165         {
4166           power = rf1->count;
4167
4168           if (power == 1)
4169             {
4170               iter_result = rf1->repr;
4171
4172               if (dump_file && (dump_flags & TDF_DETAILS))
4173                 {
4174                   unsigned elt;
4175                   repeat_factor_t rf;
4176                   fputs ("Multiplying by cached product ", dump_file);
4177                   for (elt = j; elt < vec_len; elt++)
4178                     {
4179                       rf = &repeat_factor_vec[elt];
4180                       print_generic_expr (dump_file, rf->factor, 0);
4181                       if (elt < vec_len - 1)
4182                         fputs (" * ", dump_file);
4183                     }
4184                   fputs ("\n", dump_file);
4185                 }
4186             }
4187           else
4188             {
4189               iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4190               pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4191                                             build_int_cst (integer_type_node,
4192                                                            power));
4193               gimple_call_set_lhs (pow_stmt, iter_result);
4194               gimple_set_location (pow_stmt, gimple_location (stmt));
4195               gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4196
4197               if (dump_file && (dump_flags & TDF_DETAILS))
4198                 {
4199                   unsigned elt;
4200                   repeat_factor_t rf;
4201                   fputs ("Building __builtin_pow call for cached product (",
4202                          dump_file);
4203                   for (elt = j; elt < vec_len; elt++)
4204                     {
4205                       rf = &repeat_factor_vec[elt];
4206                       print_generic_expr (dump_file, rf->factor, 0);
4207                       if (elt < vec_len - 1)
4208                         fputs (" * ", dump_file);
4209                     }
4210                   fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n",
4211                            power);
4212                 }
4213             }
4214         }
4215       else
4216         {
4217           /* Otherwise, find the first factor in the repeated factor
4218              vector whose occurrence count is at least 2.  If no such
4219              factor exists, there are no builtin_powi opportunities
4220              remaining.  */
4221           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4222             {
4223               if (rf1->count >= 2)
4224                 break;
4225             }
4226
4227           if (j >= vec_len)
4228             break;
4229
4230           power = rf1->count;
4231
4232           if (dump_file && (dump_flags & TDF_DETAILS))
4233             {
4234               unsigned elt;
4235               repeat_factor_t rf;
4236               fputs ("Building __builtin_pow call for (", dump_file);
4237               for (elt = j; elt < vec_len; elt++)
4238                 {
4239                   rf = &repeat_factor_vec[elt];
4240                   print_generic_expr (dump_file, rf->factor, 0);
4241                   if (elt < vec_len - 1)
4242                     fputs (" * ", dump_file);
4243                 }
4244               fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power);
4245             }
4246
4247           reassociate_stats.pows_created++;
4248
4249           /* Visit each element of the vector in reverse order (so that
4250              high-occurrence elements are visited first, and within the
4251              same occurrence count, lower-ranked elements are visited
4252              first).  Form a linear product of all elements in this order
4253              whose occurrencce count is at least that of element J.
4254              Record the SSA name representing the product of each element
4255              with all subsequent elements in the vector.  */
4256           if (j == vec_len - 1)
4257             rf1->repr = rf1->factor;
4258           else
4259             {
4260               for (ii = vec_len - 2; ii >= (int)j; ii--)
4261                 {
4262                   tree op1, op2;
4263
4264                   rf1 = &repeat_factor_vec[ii];
4265                   rf2 = &repeat_factor_vec[ii + 1];
4266
4267                   /* Init the last factor's representative to be itself.  */
4268                   if (!rf2->repr)
4269                     rf2->repr = rf2->factor;
4270
4271                   op1 = rf1->factor;
4272                   op2 = rf2->repr;
4273
4274                   target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4275                   mul_stmt = gimple_build_assign_with_ops (MULT_EXPR,
4276                                                            target_ssa,
4277                                                            op1, op2);
4278                   gimple_set_location (mul_stmt, gimple_location (stmt));
4279                   gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4280                   rf1->repr = target_ssa;
4281
4282                   /* Don't reprocess the multiply we just introduced.  */
4283                   gimple_set_visited (mul_stmt, true);
4284                 }
4285             }
4286
4287           /* Form a call to __builtin_powi for the maximum product
4288              just formed, raised to the power obtained earlier.  */
4289           rf1 = &repeat_factor_vec[j];
4290           iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4291           pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4292                                         build_int_cst (integer_type_node,
4293                                                        power));
4294           gimple_call_set_lhs (pow_stmt, iter_result);
4295           gimple_set_location (pow_stmt, gimple_location (stmt));
4296           gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4297         }
4298
4299       /* If we previously formed at least one other builtin_powi call,
4300          form the product of this one and those others.  */
4301       if (result)
4302         {
4303           tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4304           mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result,
4305                                                    result, iter_result);
4306           gimple_set_location (mul_stmt, gimple_location (stmt));
4307           gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4308           gimple_set_visited (mul_stmt, true);
4309           result = new_result;
4310         }
4311       else
4312         result = iter_result;
4313
4314       /* Decrement the occurrence count of each element in the product
4315          by the count found above, and remove this many copies of each
4316          factor from OPS.  */
4317       for (i = j; i < vec_len; i++)
4318         {
4319           unsigned k = power;
4320           unsigned n;
4321
4322           rf1 = &repeat_factor_vec[i];
4323           rf1->count -= power;
4324
4325           FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
4326             {
4327               if (oe->op == rf1->factor)
4328                 {
4329                   if (oe->count <= k)
4330                     {
4331                       ops->ordered_remove (n);
4332                       k -= oe->count;
4333
4334                       if (k == 0)
4335                         break;
4336                     }
4337                   else
4338                     {
4339                       oe->count -= k;
4340                       break;
4341                     }
4342                 }
4343             }
4344         }
4345     }
4346
4347   /* At this point all elements in the repeated factor vector have a
4348      remaining occurrence count of 0 or 1, and those with a count of 1
4349      don't have cached representatives.  Re-sort the ops vector and
4350      clean up.  */
4351   ops->qsort (sort_by_operand_rank);
4352   repeat_factor_vec.release ();
4353
4354   /* Return the final product computed herein.  Note that there may
4355      still be some elements with single occurrence count left in OPS;
4356      those will be handled by the normal reassociation logic.  */
4357   return result;
4358 }
4359
4360 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS.  */
4361
4362 static void
4363 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
4364 {
4365   tree rhs1;
4366
4367   if (dump_file && (dump_flags & TDF_DETAILS))
4368     {
4369       fprintf (dump_file, "Transforming ");
4370       print_gimple_stmt (dump_file, stmt, 0, 0);
4371     }
4372
4373   rhs1 = gimple_assign_rhs1 (stmt);
4374   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
4375   update_stmt (stmt);
4376   remove_visited_stmt_chain (rhs1);
4377
4378   if (dump_file && (dump_flags & TDF_DETAILS))
4379     {
4380       fprintf (dump_file, " into ");
4381       print_gimple_stmt (dump_file, stmt, 0, 0);
4382     }
4383 }
4384
4385 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2.  */
4386
4387 static void
4388 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
4389                             tree rhs1, tree rhs2)
4390 {
4391   if (dump_file && (dump_flags & TDF_DETAILS))
4392     {
4393       fprintf (dump_file, "Transforming ");
4394       print_gimple_stmt (dump_file, stmt, 0, 0);
4395     }
4396
4397   gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
4398   update_stmt (gsi_stmt (*gsi));
4399   remove_visited_stmt_chain (rhs1);
4400
4401   if (dump_file && (dump_flags & TDF_DETAILS))
4402     {
4403       fprintf (dump_file, " into ");
4404       print_gimple_stmt (dump_file, stmt, 0, 0);
4405     }
4406 }
4407
4408 /* Reassociate expressions in basic block BB and its post-dominator as
4409    children.  */
4410
4411 static void
4412 reassociate_bb (basic_block bb)
4413 {
4414   gimple_stmt_iterator gsi;
4415   basic_block son;
4416   gimple stmt = last_stmt (bb);
4417
4418   if (stmt && !gimple_visited_p (stmt))
4419     maybe_optimize_range_tests (stmt);
4420
4421   for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4422     {
4423       stmt = gsi_stmt (gsi);
4424
4425       if (is_gimple_assign (stmt)
4426           && !stmt_could_throw_p (stmt))
4427         {
4428           tree lhs, rhs1, rhs2;
4429           enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4430
4431           /* If this is not a gimple binary expression, there is
4432              nothing for us to do with it.  */
4433           if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4434             continue;
4435
4436           /* If this was part of an already processed statement,
4437              we don't need to touch it again. */
4438           if (gimple_visited_p (stmt))
4439             {
4440               /* This statement might have become dead because of previous
4441                  reassociations.  */
4442               if (has_zero_uses (gimple_get_lhs (stmt)))
4443                 {
4444                   reassoc_remove_stmt (&gsi);
4445                   release_defs (stmt);
4446                   /* We might end up removing the last stmt above which
4447                      places the iterator to the end of the sequence.
4448                      Reset it to the last stmt in this case which might
4449                      be the end of the sequence as well if we removed
4450                      the last statement of the sequence.  In which case
4451                      we need to bail out.  */
4452                   if (gsi_end_p (gsi))
4453                     {
4454                       gsi = gsi_last_bb (bb);
4455                       if (gsi_end_p (gsi))
4456                         break;
4457                     }
4458                 }
4459               continue;
4460             }
4461
4462           lhs = gimple_assign_lhs (stmt);
4463           rhs1 = gimple_assign_rhs1 (stmt);
4464           rhs2 = gimple_assign_rhs2 (stmt);
4465
4466           /* For non-bit or min/max operations we can't associate
4467              all types.  Verify that here.  */
4468           if (rhs_code != BIT_IOR_EXPR
4469               && rhs_code != BIT_AND_EXPR
4470               && rhs_code != BIT_XOR_EXPR
4471               && rhs_code != MIN_EXPR
4472               && rhs_code != MAX_EXPR
4473               && (!can_reassociate_p (lhs)
4474                   || !can_reassociate_p (rhs1)
4475                   || !can_reassociate_p (rhs2)))
4476             continue;
4477
4478           if (associative_tree_code (rhs_code))
4479             {
4480               auto_vec<operand_entry_t> ops;
4481               tree powi_result = NULL_TREE;
4482
4483               /* There may be no immediate uses left by the time we
4484                  get here because we may have eliminated them all.  */
4485               if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4486                 continue;
4487
4488               gimple_set_visited (stmt, true);
4489               linearize_expr_tree (&ops, stmt, true, true);
4490               ops.qsort (sort_by_operand_rank);
4491               optimize_ops_list (rhs_code, &ops);
4492               if (undistribute_ops_list (rhs_code, &ops,
4493                                          loop_containing_stmt (stmt)))
4494                 {
4495                   ops.qsort (sort_by_operand_rank);
4496                   optimize_ops_list (rhs_code, &ops);
4497                 }
4498
4499               if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4500                 optimize_range_tests (rhs_code, &ops);
4501
4502               if (first_pass_instance
4503                   && rhs_code == MULT_EXPR
4504                   && flag_unsafe_math_optimizations)
4505                 powi_result = attempt_builtin_powi (stmt, &ops);
4506
4507               /* If the operand vector is now empty, all operands were
4508                  consumed by the __builtin_powi optimization.  */
4509               if (ops.length () == 0)
4510                 transform_stmt_to_copy (&gsi, stmt, powi_result);
4511               else if (ops.length () == 1)
4512                 {
4513                   tree last_op = ops.last ()->op;
4514
4515                   if (powi_result)
4516                     transform_stmt_to_multiply (&gsi, stmt, last_op,
4517                                                 powi_result);
4518                   else
4519                     transform_stmt_to_copy (&gsi, stmt, last_op);
4520                 }
4521               else
4522                 {
4523                   enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4524                   int ops_num = ops.length ();
4525                   int width = get_reassociation_width (ops_num, rhs_code, mode);
4526                   tree new_lhs = lhs;
4527
4528                   if (dump_file && (dump_flags & TDF_DETAILS))
4529                     fprintf (dump_file,
4530                              "Width = %d was chosen for reassociation\n", width);
4531
4532                   if (width > 1
4533                       && ops.length () > 3)
4534                     rewrite_expr_tree_parallel (stmt, width, ops);
4535                   else
4536                     {
4537                       /* When there are three operands left, we want
4538                          to make sure the ones that get the double
4539                          binary op are chosen wisely.  */
4540                       int len = ops.length ();
4541                       if (len >= 3)
4542                         swap_ops_for_binary_stmt (ops, len - 3, stmt);
4543
4544                       new_lhs = rewrite_expr_tree (stmt, 0, ops,
4545                                                    powi_result != NULL);
4546                     }
4547
4548                   /* If we combined some repeated factors into a
4549                      __builtin_powi call, multiply that result by the
4550                      reassociated operands.  */
4551                   if (powi_result)
4552                     {
4553                       gimple mul_stmt, lhs_stmt = SSA_NAME_DEF_STMT (lhs);
4554                       tree type = TREE_TYPE (lhs);
4555                       tree target_ssa = make_temp_ssa_name (type, NULL,
4556                                                             "reassocpow");
4557                       gimple_set_lhs (lhs_stmt, target_ssa);
4558                       update_stmt (lhs_stmt);
4559                       if (lhs != new_lhs)
4560                         target_ssa = new_lhs;
4561                       mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs,
4562                                                                powi_result,
4563                                                                target_ssa);
4564                       gimple_set_location (mul_stmt, gimple_location (stmt));
4565                       gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4566                     }
4567                 }
4568             }
4569         }
4570     }
4571   for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4572        son;
4573        son = next_dom_son (CDI_POST_DOMINATORS, son))
4574     reassociate_bb (son);
4575 }
4576
4577 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4578 void debug_ops_vector (vec<operand_entry_t> ops);
4579
4580 /* Dump the operand entry vector OPS to FILE.  */
4581
4582 void
4583 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
4584 {
4585   operand_entry_t oe;
4586   unsigned int i;
4587
4588   FOR_EACH_VEC_ELT (ops, i, oe)
4589     {
4590       fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
4591       print_generic_expr (file, oe->op, 0);
4592     }
4593 }
4594
4595 /* Dump the operand entry vector OPS to STDERR.  */
4596
4597 DEBUG_FUNCTION void
4598 debug_ops_vector (vec<operand_entry_t> ops)
4599 {
4600   dump_ops_vector (stderr, ops);
4601 }
4602
4603 static void
4604 do_reassoc (void)
4605 {
4606   break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
4607   reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
4608 }
4609
4610 /* Initialize the reassociation pass.  */
4611
4612 static void
4613 init_reassoc (void)
4614 {
4615   int i;
4616   long rank = 2;
4617   int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
4618
4619   /* Find the loops, so that we can prevent moving calculations in
4620      them.  */
4621   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4622
4623   memset (&reassociate_stats, 0, sizeof (reassociate_stats));
4624
4625   operand_entry_pool = create_alloc_pool ("operand entry pool",
4626                                           sizeof (struct operand_entry), 30);
4627   next_operand_entry_id = 0;
4628
4629   /* Reverse RPO (Reverse Post Order) will give us something where
4630      deeper loops come later.  */
4631   pre_and_rev_post_order_compute (NULL, bbs, false);
4632   bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
4633   operand_rank = pointer_map_create ();
4634
4635   /* Give each default definition a distinct rank.  This includes
4636      parameters and the static chain.  Walk backwards over all
4637      SSA names so that we get proper rank ordering according
4638      to tree_swap_operands_p.  */
4639   for (i = num_ssa_names - 1; i > 0; --i)
4640     {
4641       tree name = ssa_name (i);
4642       if (name && SSA_NAME_IS_DEFAULT_DEF (name))
4643         insert_operand_rank (name, ++rank);
4644     }
4645
4646   /* Set up rank for each BB  */
4647   for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
4648     bb_rank[bbs[i]] = ++rank  << 16;
4649
4650   free (bbs);
4651   calculate_dominance_info (CDI_POST_DOMINATORS);
4652   plus_negates = vNULL;
4653 }
4654
4655 /* Cleanup after the reassociation pass, and print stats if
4656    requested.  */
4657
4658 static void
4659 fini_reassoc (void)
4660 {
4661   statistics_counter_event (cfun, "Linearized",
4662                             reassociate_stats.linearized);
4663   statistics_counter_event (cfun, "Constants eliminated",
4664                             reassociate_stats.constants_eliminated);
4665   statistics_counter_event (cfun, "Ops eliminated",
4666                             reassociate_stats.ops_eliminated);
4667   statistics_counter_event (cfun, "Statements rewritten",
4668                             reassociate_stats.rewritten);
4669   statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
4670                             reassociate_stats.pows_encountered);
4671   statistics_counter_event (cfun, "Built-in powi calls created",
4672                             reassociate_stats.pows_created);
4673
4674   pointer_map_destroy (operand_rank);
4675   free_alloc_pool (operand_entry_pool);
4676   free (bb_rank);
4677   plus_negates.release ();
4678   free_dominance_info (CDI_POST_DOMINATORS);
4679   loop_optimizer_finalize ();
4680 }
4681
4682 /* Gate and execute functions for Reassociation.  */
4683
4684 static unsigned int
4685 execute_reassoc (void)
4686 {
4687   init_reassoc ();
4688
4689   do_reassoc ();
4690   repropagate_negates ();
4691
4692   fini_reassoc ();
4693   return 0;
4694 }
4695
4696 namespace {
4697
4698 const pass_data pass_data_reassoc =
4699 {
4700   GIMPLE_PASS, /* type */
4701   "reassoc", /* name */
4702   OPTGROUP_NONE, /* optinfo_flags */
4703   true, /* has_execute */
4704   TV_TREE_REASSOC, /* tv_id */
4705   ( PROP_cfg | PROP_ssa ), /* properties_required */
4706   0, /* properties_provided */
4707   0, /* properties_destroyed */
4708   0, /* todo_flags_start */
4709   TODO_update_ssa_only_virtuals, /* todo_flags_finish */
4710 };
4711
4712 class pass_reassoc : public gimple_opt_pass
4713 {
4714 public:
4715   pass_reassoc (gcc::context *ctxt)
4716     : gimple_opt_pass (pass_data_reassoc, ctxt)
4717   {}
4718
4719   /* opt_pass methods: */
4720   opt_pass * clone () { return new pass_reassoc (m_ctxt); }
4721   virtual bool gate (function *) { return flag_tree_reassoc != 0; }
4722   virtual unsigned int execute (function *) { return execute_reassoc (); }
4723
4724 }; // class pass_reassoc
4725
4726 } // anon namespace
4727
4728 gimple_opt_pass *
4729 make_pass_reassoc (gcc::context *ctxt)
4730 {
4731   return new pass_reassoc (ctxt);
4732 }