gcc/tree-ssa-reassoc.c

   1 /* Reassociation for trees.
   2    Copyright (C) 2005, 2007, 2008, 2009, 2010, 2011, 2012
   3    Free Software Foundation, Inc.
   4    Contributed by Daniel Berlin <dan@dberlin.org>
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation; either version 3, or (at your option)
  11 any later version.
  12
  13 GCC is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "tm.h"
  26 #include "tree.h"
  27 #include "basic-block.h"
  28 #include "gimple-pretty-print.h"
  29 #include "tree-inline.h"
  30 #include "tree-flow.h"
  31 #include "gimple.h"
  32 #include "tree-iterator.h"
  33 #include "tree-pass.h"
  34 #include "alloc-pool.h"
  35 #include "vec.h"
  36 #include "langhooks.h"
  37 #include "pointer-set.h"
  38 #include "cfgloop.h"
  39 #include "flags.h"
  40 #include "target.h"
  41 #include "params.h"
  42 #include "diagnostic-core.h"
  43
  44 /*  This is a simple global reassociation pass.  It is, in part, based
  45     on the LLVM pass of the same name (They do some things more/less
  46     than we do, in different orders, etc).
  47
  48     It consists of five steps:
  49
  50     1. Breaking up subtract operations into addition + negate, where
  51     it would promote the reassociation of adds.
  52
  53     2. Left linearization of the expression trees, so that (A+B)+(C+D)
  54     becomes (((A+B)+C)+D), which is easier for us to rewrite later.
  55     During linearization, we place the operands of the binary
  56     expressions into a vector of operand_entry_t
  57
  58     3. Optimization of the operand lists, eliminating things like a +
  59     -a, a & a, etc.
  60
  61     3a. Combine repeated factors with the same occurrence counts
  62     into a __builtin_powi call that will later be optimized into
  63     an optimal number of multiplies.
  64
  65     4. Rewrite the expression trees we linearized and optimized so
  66     they are in proper rank order.
  67
  68     5. Repropagate negates, as nothing else will clean it up ATM.
  69
  70     A bit of theory on #4, since nobody seems to write anything down
  71     about why it makes sense to do it the way they do it:
  72
  73     We could do this much nicer theoretically, but don't (for reasons
  74     explained after how to do it theoretically nice :P).
  75
  76     In order to promote the most redundancy elimination, you want
  77     binary expressions whose operands are the same rank (or
  78     preferably, the same value) exposed to the redundancy eliminator,
  79     for possible elimination.
  80
  81     So the way to do this if we really cared, is to build the new op
  82     tree from the leaves to the roots, merging as you go, and putting the
  83     new op on the end of the worklist, until you are left with one
  84     thing on the worklist.
  85
  86     IE if you have to rewrite the following set of operands (listed with
  87     rank in parentheses), with opcode PLUS_EXPR:
  88
  89     a (1),  b (1),  c (1),  d (2), e (2)
  90
  91
  92     We start with our merge worklist empty, and the ops list with all of
  93     those on it.
  94
  95     You want to first merge all leaves of the same rank, as much as
  96     possible.
  97
  98     So first build a binary op of
  99
 100     mergetmp = a + b, and put "mergetmp" on the merge worklist.
 101
 102     Because there is no three operand form of PLUS_EXPR, c is not going to
 103     be exposed to redundancy elimination as a rank 1 operand.
 104
 105     So you might as well throw it on the merge worklist (you could also
 106     consider it to now be a rank two operand, and merge it with d and e,
 107     but in this case, you then have evicted e from a binary op. So at
 108     least in this situation, you can't win.)
 109
 110     Then build a binary op of d + e
 111     mergetmp2 = d + e
 112
 113     and put mergetmp2 on the merge worklist.
 114
 115     so merge worklist = {mergetmp, c, mergetmp2}
 116
 117     Continue building binary ops of these operations until you have only
 118     one operation left on the worklist.
 119
 120     So we have
 121
 122     build binary op
 123     mergetmp3 = mergetmp + c
 124
 125     worklist = {mergetmp2, mergetmp3}
 126
 127     mergetmp4 = mergetmp2 + mergetmp3
 128
 129     worklist = {mergetmp4}
 130
 131     because we have one operation left, we can now just set the original
 132     statement equal to the result of that operation.
 133
 134     This will at least expose a + b  and d + e to redundancy elimination
 135     as binary operations.
 136
 137     For extra points, you can reuse the old statements to build the
 138     mergetmps, since you shouldn't run out.
 139
 140     So why don't we do this?
 141
 142     Because it's expensive, and rarely will help.  Most trees we are
 143     reassociating have 3 or less ops.  If they have 2 ops, they already
 144     will be written into a nice single binary op.  If you have 3 ops, a
 145     single simple check suffices to tell you whether the first two are of the
 146     same rank.  If so, you know to order it
 147
 148     mergetmp = op1 + op2
 149     newstmt = mergetmp + op3
 150
 151     instead of
 152     mergetmp = op2 + op3
 153     newstmt = mergetmp + op1
 154
 155     If all three are of the same rank, you can't expose them all in a
 156     single binary operator anyway, so the above is *still* the best you
 157     can do.
 158
 159     Thus, this is what we do.  When we have three ops left, we check to see
 160     what order to put them in, and call it a day.  As a nod to vector sum
 161     reduction, we check if any of the ops are really a phi node that is a
 162     destructive update for the associating op, and keep the destructive
 163     update together for vector sum reduction recognition.  */
 164
 165
 166 /* Statistics */
 167 static struct
 168 {
 169   int linearized;
 170   int constants_eliminated;
 171   int ops_eliminated;
 172   int rewritten;
 173   int pows_encountered;
 174   int pows_created;
 175 } reassociate_stats;
 176
 177 /* Operator, rank pair.  */
 178 typedef struct operand_entry
 179 {
 180   unsigned int rank;
 181   int id;
 182   tree op;
 183   unsigned int count;
 184 } *operand_entry_t;
 185
 186 static alloc_pool operand_entry_pool;
 187
 188 /* This is used to assign a unique ID to each struct operand_entry
 189    so that qsort results are identical on different hosts.  */
 190 static int next_operand_entry_id;
 191
 192 /* Starting rank number for a given basic block, so that we can rank
 193    operations using unmovable instructions in that BB based on the bb
 194    depth.  */
 195 static long *bb_rank;
 196
 197 /* Operand->rank hashtable.  */
 198 static struct pointer_map_t *operand_rank;
 199
 200 /* Forward decls.  */
 201 static long get_rank (tree);
 202
 203
 204 /* Bias amount for loop-carried phis.  We want this to be larger than
 205    the depth of any reassociation tree we can see, but not larger than
 206    the rank difference between two blocks.  */
 207 #define PHI_LOOP_BIAS (1 << 15)
 208
 209 /* Rank assigned to a phi statement.  If STMT is a loop-carried phi of
 210    an innermost loop, and the phi has only a single use which is inside
 211    the loop, then the rank is the block rank of the loop latch plus an
 212    extra bias for the loop-carried dependence.  This causes expressions
 213    calculated into an accumulator variable to be independent for each
 214    iteration of the loop.  If STMT is some other phi, the rank is the
 215    block rank of its containing block.  */
 216 static long
 217 phi_rank (gimple stmt)
 218 {
 219   basic_block bb = gimple_bb (stmt);
 220   struct loop *father = bb->loop_father;
 221   tree res;
 222   unsigned i;
 223   use_operand_p use;
 224   gimple use_stmt;
 225
 226   /* We only care about real loops (those with a latch).  */
 227   if (!father->latch)
 228     return bb_rank[bb->index];
 229
 230   /* Interesting phis must be in headers of innermost loops.  */
 231   if (bb != father->header
 232       || father->inner)
 233     return bb_rank[bb->index];
 234
 235   /* Ignore virtual SSA_NAMEs.  */
 236   res = gimple_phi_result (stmt);
 237   if (virtual_operand_p (res))
 238     return bb_rank[bb->index];
 239
 240   /* The phi definition must have a single use, and that use must be
 241      within the loop.  Otherwise this isn't an accumulator pattern.  */
 242   if (!single_imm_use (res, &use, &use_stmt)
 243       || gimple_bb (use_stmt)->loop_father != father)
 244     return bb_rank[bb->index];
 245
 246   /* Look for phi arguments from within the loop.  If found, bias this phi.  */
 247   for (i = 0; i < gimple_phi_num_args (stmt); i++)
 248     {
 249       tree arg = gimple_phi_arg_def (stmt, i);
 250       if (TREE_CODE (arg) == SSA_NAME
 251           && !SSA_NAME_IS_DEFAULT_DEF (arg))
 252         {
 253           gimple def_stmt = SSA_NAME_DEF_STMT (arg);
 254           if (gimple_bb (def_stmt)->loop_father == father)
 255             return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
 256         }
 257     }
 258
 259   /* Must be an uninteresting phi.  */
 260   return bb_rank[bb->index];
 261 }
 262
 263 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
 264    loop-carried dependence of an innermost loop, return TRUE; else
 265    return FALSE.  */
 266 static bool
 267 loop_carried_phi (tree exp)
 268 {
 269   gimple phi_stmt;
 270   long block_rank;
 271
 272   if (TREE_CODE (exp) != SSA_NAME
 273       || SSA_NAME_IS_DEFAULT_DEF (exp))
 274     return false;
 275
 276   phi_stmt = SSA_NAME_DEF_STMT (exp);
 277
 278   if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
 279     return false;
 280
 281   /* Non-loop-carried phis have block rank.  Loop-carried phis have
 282      an additional bias added in.  If this phi doesn't have block rank,
 283      it's biased and should not be propagated.  */
 284   block_rank = bb_rank[gimple_bb (phi_stmt)->index];
 285
 286   if (phi_rank (phi_stmt) != block_rank)
 287     return true;
 288
 289   return false;
 290 }
 291
 292 /* Return the maximum of RANK and the rank that should be propagated
 293    from expression OP.  For most operands, this is just the rank of OP.
 294    For loop-carried phis, the value is zero to avoid undoing the bias
 295    in favor of the phi.  */
 296 static long
 297 propagate_rank (long rank, tree op)
 298 {
 299   long op_rank;
 300
 301   if (loop_carried_phi (op))
 302     return rank;
 303
 304   op_rank = get_rank (op);
 305
 306   return MAX (rank, op_rank);
 307 }
 308
 309 /* Look up the operand rank structure for expression E.  */
 310
 311 static inline long
 312 find_operand_rank (tree e)
 313 {
 314   void **slot = pointer_map_contains (operand_rank, e);
 315   return slot ? (long) (intptr_t) *slot : -1;
 316 }
 317
 318 /* Insert {E,RANK} into the operand rank hashtable.  */
 319
 320 static inline void
 321 insert_operand_rank (tree e, long rank)
 322 {
 323   void **slot;
 324   gcc_assert (rank > 0);
 325   slot = pointer_map_insert (operand_rank, e);
 326   gcc_assert (!*slot);
 327   *slot = (void *) (intptr_t) rank;
 328 }
 329
 330 /* Given an expression E, return the rank of the expression.  */
 331
 332 static long
 333 get_rank (tree e)
 334 {
 335   /* Constants have rank 0.  */
 336   if (is_gimple_min_invariant (e))
 337     return 0;
 338
 339   /* SSA_NAME's have the rank of the expression they are the result
 340      of.
 341      For globals and uninitialized values, the rank is 0.
 342      For function arguments, use the pre-setup rank.
 343      For PHI nodes, stores, asm statements, etc, we use the rank of
 344      the BB.
 345      For simple operations, the rank is the maximum rank of any of
 346      its operands, or the bb_rank, whichever is less.
 347      I make no claims that this is optimal, however, it gives good
 348      results.  */
 349
 350   /* We make an exception to the normal ranking system to break
 351      dependences of accumulator variables in loops.  Suppose we
 352      have a simple one-block loop containing:
 353
 354        x_1 = phi(x_0, x_2)
 355        b = a + x_1
 356        c = b + d
 357        x_2 = c + e
 358
 359      As shown, each iteration of the calculation into x is fully
 360      dependent upon the iteration before it.  We would prefer to
 361      see this in the form:
 362
 363        x_1 = phi(x_0, x_2)
 364        b = a + d
 365        c = b + e
 366        x_2 = c + x_1
 367
 368      If the loop is unrolled, the calculations of b and c from
 369      different iterations can be interleaved.
 370
 371      To obtain this result during reassociation, we bias the rank
 372      of the phi definition x_1 upward, when it is recognized as an
 373      accumulator pattern.  The artificial rank causes it to be
 374      added last, providing the desired independence.  */
 375
 376   if (TREE_CODE (e) == SSA_NAME)
 377     {
 378       gimple stmt;
 379       long rank;
 380       int i, n;
 381       tree op;
 382
 383       if (SSA_NAME_IS_DEFAULT_DEF (e))
 384         return find_operand_rank (e);
 385
 386       stmt = SSA_NAME_DEF_STMT (e);
 387       if (gimple_code (stmt) == GIMPLE_PHI)
 388         return phi_rank (stmt);
 389
 390       if (!is_gimple_assign (stmt)
 391           || gimple_vdef (stmt))
 392         return bb_rank[gimple_bb (stmt)->index];
 393
 394       /* If we already have a rank for this expression, use that.  */
 395       rank = find_operand_rank (e);
 396       if (rank != -1)
 397         return rank;
 398
 399       /* Otherwise, find the maximum rank for the operands.  As an
 400          exception, remove the bias from loop-carried phis when propagating
 401          the rank so that dependent operations are not also biased.  */
 402       rank = 0;
 403       if (gimple_assign_single_p (stmt))
 404         {
 405           tree rhs = gimple_assign_rhs1 (stmt);
 406           n = TREE_OPERAND_LENGTH (rhs);
 407           if (n == 0)
 408             rank = propagate_rank (rank, rhs);
 409           else
 410             {
 411               for (i = 0; i < n; i++)
 412                 {
 413                   op = TREE_OPERAND (rhs, i);
 414
 415                   if (op != NULL_TREE)
 416                     rank = propagate_rank (rank, op);
 417                 }
 418             }
 419         }
 420       else
 421         {
 422           n = gimple_num_ops (stmt);
 423           for (i = 1; i < n; i++)
 424             {
 425               op = gimple_op (stmt, i);
 426               gcc_assert (op);
 427               rank = propagate_rank (rank, op);
 428             }
 429         }
 430
 431       if (dump_file && (dump_flags & TDF_DETAILS))
 432         {
 433           fprintf (dump_file, "Rank for ");
 434           print_generic_expr (dump_file, e, 0);
 435           fprintf (dump_file, " is %ld\n", (rank + 1));
 436         }
 437
 438       /* Note the rank in the hashtable so we don't recompute it.  */
 439       insert_operand_rank (e, (rank + 1));
 440       return (rank + 1);
 441     }
 442
 443   /* Globals, etc,  are rank 0 */
 444   return 0;
 445 }
 446
 447
 448 /* We want integer ones to end up last no matter what, since they are
 449    the ones we can do the most with.  */
 450 #define INTEGER_CONST_TYPE 1 << 3
 451 #define FLOAT_CONST_TYPE 1 << 2
 452 #define OTHER_CONST_TYPE 1 << 1
 453
 454 /* Classify an invariant tree into integer, float, or other, so that
 455    we can sort them to be near other constants of the same type.  */
 456 static inline int
 457 constant_type (tree t)
 458 {
 459   if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
 460     return INTEGER_CONST_TYPE;
 461   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
 462     return FLOAT_CONST_TYPE;
 463   else
 464     return OTHER_CONST_TYPE;
 465 }
 466
 467 /* qsort comparison function to sort operand entries PA and PB by rank
 468    so that the sorted array is ordered by rank in decreasing order.  */
 469 static int
 470 sort_by_operand_rank (const void *pa, const void *pb)
 471 {
 472   const operand_entry_t oea = *(const operand_entry_t *)pa;
 473   const operand_entry_t oeb = *(const operand_entry_t *)pb;
 474
 475   /* It's nicer for optimize_expression if constants that are likely
 476      to fold when added/multiplied//whatever are put next to each
 477      other.  Since all constants have rank 0, order them by type.  */
 478   if (oeb->rank == 0 && oea->rank == 0)
 479     {
 480       if (constant_type (oeb->op) != constant_type (oea->op))
 481         return constant_type (oeb->op) - constant_type (oea->op);
 482       else
 483         /* To make sorting result stable, we use unique IDs to determine
 484            order.  */
 485         return oeb->id - oea->id;
 486     }
 487
 488   /* Lastly, make sure the versions that are the same go next to each
 489      other.  We use SSA_NAME_VERSION because it's stable.  */
 490   if ((oeb->rank - oea->rank == 0)
 491       && TREE_CODE (oea->op) == SSA_NAME
 492       && TREE_CODE (oeb->op) == SSA_NAME)
 493     {
 494       if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
 495         return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
 496       else
 497         return oeb->id - oea->id;
 498     }
 499
 500   if (oeb->rank != oea->rank)
 501     return oeb->rank - oea->rank;
 502   else
 503     return oeb->id - oea->id;
 504 }
 505
 506 /* Add an operand entry to *OPS for the tree operand OP.  */
 507
 508 static void
 509 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
 510 {
 511   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
 512
 513   oe->op = op;
 514   oe->rank = get_rank (op);
 515   oe->id = next_operand_entry_id++;
 516   oe->count = 1;
 517   ops->safe_push (oe);
 518 }
 519
 520 /* Add an operand entry to *OPS for the tree operand OP with repeat
 521    count REPEAT.  */
 522
 523 static void
 524 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
 525                        HOST_WIDE_INT repeat)
 526 {
 527   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
 528
 529   oe->op = op;
 530   oe->rank = get_rank (op);
 531   oe->id = next_operand_entry_id++;
 532   oe->count = repeat;
 533   ops->safe_push (oe);
 534
 535   reassociate_stats.pows_encountered++;
 536 }
 537
 538 /* Return true if STMT is reassociable operation containing a binary
 539    operation with tree code CODE, and is inside LOOP.  */
 540
 541 static bool
 542 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
 543 {
 544   basic_block bb = gimple_bb (stmt);
 545
 546   if (gimple_bb (stmt) == NULL)
 547     return false;
 548
 549   if (!flow_bb_inside_loop_p (loop, bb))
 550     return false;
 551
 552   if (is_gimple_assign (stmt)
 553       && gimple_assign_rhs_code (stmt) == code
 554       && has_single_use (gimple_assign_lhs (stmt)))
 555     return true;
 556
 557   return false;
 558 }
 559
 560
 561 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
 562    operand of the negate operation.  Otherwise, return NULL.  */
 563
 564 static tree
 565 get_unary_op (tree name, enum tree_code opcode)
 566 {
 567   gimple stmt = SSA_NAME_DEF_STMT (name);
 568
 569   if (!is_gimple_assign (stmt))
 570     return NULL_TREE;
 571
 572   if (gimple_assign_rhs_code (stmt) == opcode)
 573     return gimple_assign_rhs1 (stmt);
 574   return NULL_TREE;
 575 }
 576
 577 /* If CURR and LAST are a pair of ops that OPCODE allows us to
 578    eliminate through equivalences, do so, remove them from OPS, and
 579    return true.  Otherwise, return false.  */
 580
 581 static bool
 582 eliminate_duplicate_pair (enum tree_code opcode,
 583                           vec<operand_entry_t> *ops,
 584                           bool *all_done,
 585                           unsigned int i,
 586                           operand_entry_t curr,
 587                           operand_entry_t last)
 588 {
 589
 590   /* If we have two of the same op, and the opcode is & |, min, or max,
 591      we can eliminate one of them.
 592      If we have two of the same op, and the opcode is ^, we can
 593      eliminate both of them.  */
 594
 595   if (last && last->op == curr->op)
 596     {
 597       switch (opcode)
 598         {
 599         case MAX_EXPR:
 600         case MIN_EXPR:
 601         case BIT_IOR_EXPR:
 602         case BIT_AND_EXPR:
 603           if (dump_file && (dump_flags & TDF_DETAILS))
 604             {
 605               fprintf (dump_file, "Equivalence: ");
 606               print_generic_expr (dump_file, curr->op, 0);
 607               fprintf (dump_file, " [&|minmax] ");
 608               print_generic_expr (dump_file, last->op, 0);
 609               fprintf (dump_file, " -> ");
 610               print_generic_stmt (dump_file, last->op, 0);
 611             }
 612
 613           ops->ordered_remove (i);
 614           reassociate_stats.ops_eliminated ++;
 615
 616           return true;
 617
 618         case BIT_XOR_EXPR:
 619           if (dump_file && (dump_flags & TDF_DETAILS))
 620             {
 621               fprintf (dump_file, "Equivalence: ");
 622               print_generic_expr (dump_file, curr->op, 0);
 623               fprintf (dump_file, " ^ ");
 624               print_generic_expr (dump_file, last->op, 0);
 625               fprintf (dump_file, " -> nothing\n");
 626             }
 627
 628           reassociate_stats.ops_eliminated += 2;
 629
 630           if (ops->length () == 2)
 631             {
 632               ops->create (0);
 633               add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
 634               *all_done = true;
 635             }
 636           else
 637             {
 638               ops->ordered_remove (i-1);
 639               ops->ordered_remove (i-1);
 640             }
 641
 642           return true;
 643
 644         default:
 645           break;
 646         }
 647     }
 648   return false;
 649 }
 650
 651 static vec<tree> plus_negates;
 652
 653 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
 654    expression, look in OPS for a corresponding positive operation to cancel
 655    it out.  If we find one, remove the other from OPS, replace
 656    OPS[CURRINDEX] with 0 or -1, respectively, and return true.  Otherwise,
 657    return false. */
 658
 659 static bool
 660 eliminate_plus_minus_pair (enum tree_code opcode,
 661                            vec<operand_entry_t> *ops,
 662                            unsigned int currindex,
 663                            operand_entry_t curr)
 664 {
 665   tree negateop;
 666   tree notop;
 667   unsigned int i;
 668   operand_entry_t oe;
 669
 670   if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
 671     return false;
 672
 673   negateop = get_unary_op (curr->op, NEGATE_EXPR);
 674   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 675   if (negateop == NULL_TREE && notop == NULL_TREE)
 676     return false;
 677
 678   /* Any non-negated version will have a rank that is one less than
 679      the current rank.  So once we hit those ranks, if we don't find
 680      one, we can stop.  */
 681
 682   for (i = currindex + 1;
 683        ops->iterate (i, &oe)
 684        && oe->rank >= curr->rank - 1 ;
 685        i++)
 686     {
 687       if (oe->op == negateop)
 688         {
 689
 690           if (dump_file && (dump_flags & TDF_DETAILS))
 691             {
 692               fprintf (dump_file, "Equivalence: ");
 693               print_generic_expr (dump_file, negateop, 0);
 694               fprintf (dump_file, " + -");
 695               print_generic_expr (dump_file, oe->op, 0);
 696               fprintf (dump_file, " -> 0\n");
 697             }
 698
 699           ops->ordered_remove (i);
 700           add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
 701           ops->ordered_remove (currindex);
 702           reassociate_stats.ops_eliminated ++;
 703
 704           return true;
 705         }
 706       else if (oe->op == notop)
 707         {
 708           tree op_type = TREE_TYPE (oe->op);
 709
 710           if (dump_file && (dump_flags & TDF_DETAILS))
 711             {
 712               fprintf (dump_file, "Equivalence: ");
 713               print_generic_expr (dump_file, notop, 0);
 714               fprintf (dump_file, " + ~");
 715               print_generic_expr (dump_file, oe->op, 0);
 716               fprintf (dump_file, " -> -1\n");
 717             }
 718
 719           ops->ordered_remove (i);
 720           add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
 721           ops->ordered_remove (currindex);
 722           reassociate_stats.ops_eliminated ++;
 723
 724           return true;
 725         }
 726     }
 727
 728   /* CURR->OP is a negate expr in a plus expr: save it for later
 729      inspection in repropagate_negates().  */
 730   if (negateop != NULL_TREE)
 731     plus_negates.safe_push (curr->op);
 732
 733   return false;
 734 }
 735
 736 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
 737    bitwise not expression, look in OPS for a corresponding operand to
 738    cancel it out.  If we find one, remove the other from OPS, replace
 739    OPS[CURRINDEX] with 0, and return true.  Otherwise, return
 740    false. */
 741
 742 static bool
 743 eliminate_not_pairs (enum tree_code opcode,
 744                      vec<operand_entry_t> *ops,
 745                      unsigned int currindex,
 746                      operand_entry_t curr)
 747 {
 748   tree notop;
 749   unsigned int i;
 750   operand_entry_t oe;
 751
 752   if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
 753       || TREE_CODE (curr->op) != SSA_NAME)
 754     return false;
 755
 756   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 757   if (notop == NULL_TREE)
 758     return false;
 759
 760   /* Any non-not version will have a rank that is one less than
 761      the current rank.  So once we hit those ranks, if we don't find
 762      one, we can stop.  */
 763
 764   for (i = currindex + 1;
 765        ops->iterate (i, &oe)
 766        && oe->rank >= curr->rank - 1;
 767        i++)
 768     {
 769       if (oe->op == notop)
 770         {
 771           if (dump_file && (dump_flags & TDF_DETAILS))
 772             {
 773               fprintf (dump_file, "Equivalence: ");
 774               print_generic_expr (dump_file, notop, 0);
 775               if (opcode == BIT_AND_EXPR)
 776                 fprintf (dump_file, " & ~");
 777               else if (opcode == BIT_IOR_EXPR)
 778                 fprintf (dump_file, " | ~");
 779               print_generic_expr (dump_file, oe->op, 0);
 780               if (opcode == BIT_AND_EXPR)
 781                 fprintf (dump_file, " -> 0\n");
 782               else if (opcode == BIT_IOR_EXPR)
 783                 fprintf (dump_file, " -> -1\n");
 784             }
 785
 786           if (opcode == BIT_AND_EXPR)
 787             oe->op = build_zero_cst (TREE_TYPE (oe->op));
 788           else if (opcode == BIT_IOR_EXPR)
 789             oe->op = build_low_bits_mask (TREE_TYPE (oe->op),
 790                                           TYPE_PRECISION (TREE_TYPE (oe->op)));
 791
 792           reassociate_stats.ops_eliminated += ops->length () - 1;
 793           ops->truncate (0);
 794           ops->quick_push (oe);
 795           return true;
 796         }
 797     }
 798
 799   return false;
 800 }
 801
 802 /* Use constant value that may be present in OPS to try to eliminate
 803    operands.  Note that this function is only really used when we've
 804    eliminated ops for other reasons, or merged constants.  Across
 805    single statements, fold already does all of this, plus more.  There
 806    is little point in duplicating logic, so I've only included the
 807    identities that I could ever construct testcases to trigger.  */
 808
 809 static void
 810 eliminate_using_constants (enum tree_code opcode,
 811                            vec<operand_entry_t> *ops)
 812 {
 813   operand_entry_t oelast = ops->last ();
 814   tree type = TREE_TYPE (oelast->op);
 815
 816   if (oelast->rank == 0
 817       && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
 818     {
 819       switch (opcode)
 820         {
 821         case BIT_AND_EXPR:
 822           if (integer_zerop (oelast->op))
 823             {
 824               if (ops->length () != 1)
 825                 {
 826                   if (dump_file && (dump_flags & TDF_DETAILS))
 827                     fprintf (dump_file, "Found & 0, removing all other ops\n");
 828
 829                   reassociate_stats.ops_eliminated += ops->length () - 1;
 830
 831                   ops->truncate (0);
 832                   ops->quick_push (oelast);
 833                   return;
 834                 }
 835             }
 836           else if (integer_all_onesp (oelast->op))
 837             {
 838               if (ops->length () != 1)
 839                 {
 840                   if (dump_file && (dump_flags & TDF_DETAILS))
 841                     fprintf (dump_file, "Found & -1, removing\n");
 842                   ops->pop ();
 843                   reassociate_stats.ops_eliminated++;
 844                 }
 845             }
 846           break;
 847         case BIT_IOR_EXPR:
 848           if (integer_all_onesp (oelast->op))
 849             {
 850               if (ops->length () != 1)
 851                 {
 852                   if (dump_file && (dump_flags & TDF_DETAILS))
 853                     fprintf (dump_file, "Found | -1, removing all other ops\n");
 854
 855                   reassociate_stats.ops_eliminated += ops->length () - 1;
 856
 857                   ops->truncate (0);
 858                   ops->quick_push (oelast);
 859                   return;
 860                 }
 861             }
 862           else if (integer_zerop (oelast->op))
 863             {
 864               if (ops->length () != 1)
 865                 {
 866                   if (dump_file && (dump_flags & TDF_DETAILS))
 867                     fprintf (dump_file, "Found | 0, removing\n");
 868                   ops->pop ();
 869                   reassociate_stats.ops_eliminated++;
 870                 }
 871             }
 872           break;
 873         case MULT_EXPR:
 874           if (integer_zerop (oelast->op)
 875               || (FLOAT_TYPE_P (type)
 876                   && !HONOR_NANS (TYPE_MODE (type))
 877                   && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
 878                   && real_zerop (oelast->op)))
 879             {
 880               if (ops->length () != 1)
 881                 {
 882                   if (dump_file && (dump_flags & TDF_DETAILS))
 883                     fprintf (dump_file, "Found * 0, removing all other ops\n");
 884
 885                   reassociate_stats.ops_eliminated += ops->length () - 1;
 886                   ops->truncate (1);
 887                   ops->quick_push (oelast);
 888                   return;
 889                 }
 890             }
 891           else if (integer_onep (oelast->op)
 892                    || (FLOAT_TYPE_P (type)
 893                        && !HONOR_SNANS (TYPE_MODE (type))
 894                        && real_onep (oelast->op)))
 895             {
 896               if (ops->length () != 1)
 897                 {
 898                   if (dump_file && (dump_flags & TDF_DETAILS))
 899                     fprintf (dump_file, "Found * 1, removing\n");
 900                   ops->pop ();
 901                   reassociate_stats.ops_eliminated++;
 902                   return;
 903                 }
 904             }
 905           break;
 906         case BIT_XOR_EXPR:
 907         case PLUS_EXPR:
 908         case MINUS_EXPR:
 909           if (integer_zerop (oelast->op)
 910               || (FLOAT_TYPE_P (type)
 911                   && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
 912                   && fold_real_zero_addition_p (type, oelast->op,
 913                                                 opcode == MINUS_EXPR)))
 914             {
 915               if (ops->length () != 1)
 916                 {
 917                   if (dump_file && (dump_flags & TDF_DETAILS))
 918                     fprintf (dump_file, "Found [|^+] 0, removing\n");
 919                   ops->pop ();
 920                   reassociate_stats.ops_eliminated++;
 921                   return;
 922                 }
 923             }
 924           break;
 925         default:
 926           break;
 927         }
 928     }
 929 }
 930
 931
 932 static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
 933                                  bool, bool);
 934
 935 /* Structure for tracking and counting operands.  */
 936 typedef struct oecount_s {
 937   int cnt;
 938   int id;
 939   enum tree_code oecode;
 940   tree op;
 941 } oecount;
 942
 943
 944 /* The heap for the oecount hashtable and the sorted list of operands.  */
 945 static vec<oecount> cvec;
 946
 947 /* Hash function for oecount.  */
 948
 949 static hashval_t
 950 oecount_hash (const void *p)
 951 {
 952   const oecount *c = &cvec[(size_t)p - 42];
 953   return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
 954 }
 955
 956 /* Comparison function for oecount.  */
 957
 958 static int
 959 oecount_eq (const void *p1, const void *p2)
 960 {
 961   const oecount *c1 = &cvec[(size_t)p1 - 42];
 962   const oecount *c2 = &cvec[(size_t)p2 - 42];
 963   return (c1->oecode == c2->oecode
 964           && c1->op == c2->op);
 965 }
 966
 967 /* Comparison function for qsort sorting oecount elements by count.  */
 968
 969 static int
 970 oecount_cmp (const void *p1, const void *p2)
 971 {
 972   const oecount *c1 = (const oecount *)p1;
 973   const oecount *c2 = (const oecount *)p2;
 974   if (c1->cnt != c2->cnt)
 975     return c1->cnt - c2->cnt;
 976   else
 977     /* If counts are identical, use unique IDs to stabilize qsort.  */
 978     return c1->id - c2->id;
 979 }
 980
 981 /* Return TRUE iff STMT represents a builtin call that raises OP
 982    to some exponent.  */
 983
 984 static bool
 985 stmt_is_power_of_op (gimple stmt, tree op)
 986 {
 987   tree fndecl;
 988
 989   if (!is_gimple_call (stmt))
 990     return false;
 991
 992   fndecl = gimple_call_fndecl (stmt);
 993
 994   if (!fndecl
 995       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
 996     return false;
 997
 998   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
 999     {
1000     CASE_FLT_FN (BUILT_IN_POW):
1001     CASE_FLT_FN (BUILT_IN_POWI):
1002       return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1003
1004     default:
1005       return false;
1006     }
1007 }
1008
1009 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1010    in place and return the result.  Assumes that stmt_is_power_of_op
1011    was previously called for STMT and returned TRUE.  */
1012
1013 static HOST_WIDE_INT
1014 decrement_power (gimple stmt)
1015 {
1016   REAL_VALUE_TYPE c, cint;
1017   HOST_WIDE_INT power;
1018   tree arg1;
1019
1020   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1021     {
1022     CASE_FLT_FN (BUILT_IN_POW):
1023       arg1 = gimple_call_arg (stmt, 1);
1024       c = TREE_REAL_CST (arg1);
1025       power = real_to_integer (&c) - 1;
1026       real_from_integer (&cint, VOIDmode, power, 0, 0);
1027       gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1028       return power;
1029
1030     CASE_FLT_FN (BUILT_IN_POWI):
1031       arg1 = gimple_call_arg (stmt, 1);
1032       power = TREE_INT_CST_LOW (arg1) - 1;
1033       gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1034       return power;
1035
1036     default:
1037       gcc_unreachable ();
1038     }
1039 }
1040
1041 /* Find the single immediate use of STMT's LHS, and replace it
1042    with OP.  Remove STMT.  If STMT's LHS is the same as *DEF,
1043    replace *DEF with OP as well.  */
1044
1045 static void
1046 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1047 {
1048   tree lhs;
1049   gimple use_stmt;
1050   use_operand_p use;
1051   gimple_stmt_iterator gsi;
1052
1053   if (is_gimple_call (stmt))
1054     lhs = gimple_call_lhs (stmt);
1055   else
1056     lhs = gimple_assign_lhs (stmt);
1057
1058   gcc_assert (has_single_use (lhs));
1059   single_imm_use (lhs, &use, &use_stmt);
1060   if (lhs == *def)
1061     *def = op;
1062   SET_USE (use, op);
1063   if (TREE_CODE (op) != SSA_NAME)
1064     update_stmt (use_stmt);
1065   gsi = gsi_for_stmt (stmt);
1066   gsi_remove (&gsi, true);
1067   release_defs (stmt);
1068
1069   if (is_gimple_call (stmt))
1070     unlink_stmt_vdef (stmt);
1071 }
1072
1073 /* Walks the linear chain with result *DEF searching for an operation
1074    with operand OP and code OPCODE removing that from the chain.  *DEF
1075    is updated if there is only one operand but no operation left.  */
1076
1077 static void
1078 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1079 {
1080   gimple stmt = SSA_NAME_DEF_STMT (*def);
1081
1082   do
1083     {
1084       tree name;
1085
1086       if (opcode == MULT_EXPR
1087           && stmt_is_power_of_op (stmt, op))
1088         {
1089           if (decrement_power (stmt) == 1)
1090             propagate_op_to_single_use (op, stmt, def);
1091           return;
1092         }
1093
1094       name = gimple_assign_rhs1 (stmt);
1095
1096       /* If this is the operation we look for and one of the operands
1097          is ours simply propagate the other operand into the stmts
1098          single use.  */
1099       if (gimple_assign_rhs_code (stmt) == opcode
1100           && (name == op
1101               || gimple_assign_rhs2 (stmt) == op))
1102         {
1103           if (name == op)
1104             name = gimple_assign_rhs2 (stmt);
1105           propagate_op_to_single_use (name, stmt, def);
1106           return;
1107         }
1108
1109       /* We might have a multiply of two __builtin_pow* calls, and
1110          the operand might be hiding in the rightmost one.  */
1111       if (opcode == MULT_EXPR
1112           && gimple_assign_rhs_code (stmt) == opcode
1113           && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
1114         {
1115           gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1116           if (stmt_is_power_of_op (stmt2, op))
1117             {
1118               if (decrement_power (stmt2) == 1)
1119                 propagate_op_to_single_use (op, stmt2, def);
1120               return;
1121             }
1122         }
1123
1124       /* Continue walking the chain.  */
1125       gcc_assert (name != op
1126                   && TREE_CODE (name) == SSA_NAME);
1127       stmt = SSA_NAME_DEF_STMT (name);
1128     }
1129   while (1);
1130 }
1131
1132 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1133    the result.  Places the statement after the definition of either
1134    OP1 or OP2.  Returns the new statement.  */
1135
1136 static gimple
1137 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1138 {
1139   gimple op1def = NULL, op2def = NULL;
1140   gimple_stmt_iterator gsi;
1141   tree op;
1142   gimple sum;
1143
1144   /* Create the addition statement.  */
1145   op = make_ssa_name (type, NULL);
1146   sum = gimple_build_assign_with_ops (opcode, op, op1, op2);
1147
1148   /* Find an insertion place and insert.  */
1149   if (TREE_CODE (op1) == SSA_NAME)
1150     op1def = SSA_NAME_DEF_STMT (op1);
1151   if (TREE_CODE (op2) == SSA_NAME)
1152     op2def = SSA_NAME_DEF_STMT (op2);
1153   if ((!op1def || gimple_nop_p (op1def))
1154       && (!op2def || gimple_nop_p (op2def)))
1155     {
1156       gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR));
1157       gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1158     }
1159   else if ((!op1def || gimple_nop_p (op1def))
1160            || (op2def && !gimple_nop_p (op2def)
1161                && stmt_dominates_stmt_p (op1def, op2def)))
1162     {
1163       if (gimple_code (op2def) == GIMPLE_PHI)
1164         {
1165           gsi = gsi_after_labels (gimple_bb (op2def));
1166           gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1167         }
1168       else
1169         {
1170           if (!stmt_ends_bb_p (op2def))
1171             {
1172               gsi = gsi_for_stmt (op2def);
1173               gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1174             }
1175           else
1176             {
1177               edge e;
1178               edge_iterator ei;
1179
1180               FOR_EACH_EDGE (e, ei, gimple_bb (op2def)->succs)
1181                 if (e->flags & EDGE_FALLTHRU)
1182                   gsi_insert_on_edge_immediate (e, sum);
1183             }
1184         }
1185     }
1186   else
1187     {
1188       if (gimple_code (op1def) == GIMPLE_PHI)
1189         {
1190           gsi = gsi_after_labels (gimple_bb (op1def));
1191           gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1192         }
1193       else
1194         {
1195           if (!stmt_ends_bb_p (op1def))
1196             {
1197               gsi = gsi_for_stmt (op1def);
1198               gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1199             }
1200           else
1201             {
1202               edge e;
1203               edge_iterator ei;
1204
1205               FOR_EACH_EDGE (e, ei, gimple_bb (op1def)->succs)
1206                 if (e->flags & EDGE_FALLTHRU)
1207                   gsi_insert_on_edge_immediate (e, sum);
1208             }
1209         }
1210     }
1211   update_stmt (sum);
1212
1213   return sum;
1214 }
1215
1216 /* Perform un-distribution of divisions and multiplications.
1217    A * X + B * X is transformed into (A + B) * X and A / X + B / X
1218    to (A + B) / X for real X.
1219
1220    The algorithm is organized as follows.
1221
1222     - First we walk the addition chain *OPS looking for summands that
1223       are defined by a multiplication or a real division.  This results
1224       in the candidates bitmap with relevant indices into *OPS.
1225
1226     - Second we build the chains of multiplications or divisions for
1227       these candidates, counting the number of occurrences of (operand, code)
1228       pairs in all of the candidates chains.
1229
1230     - Third we sort the (operand, code) pairs by number of occurrence and
1231       process them starting with the pair with the most uses.
1232
1233       * For each such pair we walk the candidates again to build a
1234         second candidate bitmap noting all multiplication/division chains
1235         that have at least one occurrence of (operand, code).
1236
1237       * We build an alternate addition chain only covering these
1238         candidates with one (operand, code) operation removed from their
1239         multiplication/division chain.
1240
1241       * The first candidate gets replaced by the alternate addition chain
1242         multiplied/divided by the operand.
1243
1244       * All candidate chains get disabled for further processing and
1245         processing of (operand, code) pairs continues.
1246
1247   The alternate addition chains built are re-processed by the main
1248   reassociation algorithm which allows optimizing a * x * y + b * y * x
1249   to (a + b ) * x * y in one invocation of the reassociation pass.  */
1250
1251 static bool
1252 undistribute_ops_list (enum tree_code opcode,
1253                        vec<operand_entry_t> *ops, struct loop *loop)
1254 {
1255   unsigned int length = ops->length ();
1256   operand_entry_t oe1;
1257   unsigned i, j;
1258   sbitmap candidates, candidates2;
1259   unsigned nr_candidates, nr_candidates2;
1260   sbitmap_iterator sbi0;
1261   vec<operand_entry_t> *subops;
1262   htab_t ctable;
1263   bool changed = false;
1264   int next_oecount_id = 0;
1265
1266   if (length <= 1
1267       || opcode != PLUS_EXPR)
1268     return false;
1269
1270   /* Build a list of candidates to process.  */
1271   candidates = sbitmap_alloc (length);
1272   bitmap_clear (candidates);
1273   nr_candidates = 0;
1274   FOR_EACH_VEC_ELT (*ops, i, oe1)
1275     {
1276       enum tree_code dcode;
1277       gimple oe1def;
1278
1279       if (TREE_CODE (oe1->op) != SSA_NAME)
1280         continue;
1281       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1282       if (!is_gimple_assign (oe1def))
1283         continue;
1284       dcode = gimple_assign_rhs_code (oe1def);
1285       if ((dcode != MULT_EXPR
1286            && dcode != RDIV_EXPR)
1287           || !is_reassociable_op (oe1def, dcode, loop))
1288         continue;
1289
1290       bitmap_set_bit (candidates, i);
1291       nr_candidates++;
1292     }
1293
1294   if (nr_candidates < 2)
1295     {
1296       sbitmap_free (candidates);
1297       return false;
1298     }
1299
1300   if (dump_file && (dump_flags & TDF_DETAILS))
1301     {
1302       fprintf (dump_file, "searching for un-distribute opportunities ");
1303       print_generic_expr (dump_file,
1304         (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1305       fprintf (dump_file, " %d\n", nr_candidates);
1306     }
1307
1308   /* Build linearized sub-operand lists and the counting table.  */
1309   cvec.create (0);
1310   ctable = htab_create (15, oecount_hash, oecount_eq, NULL);
1311   /* ??? Macro arguments cannot have multi-argument template types in
1312      them.  This typedef is needed to workaround that limitation.  */
1313   typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1314   subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1315   EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1316     {
1317       gimple oedef;
1318       enum tree_code oecode;
1319       unsigned j;
1320
1321       oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1322       oecode = gimple_assign_rhs_code (oedef);
1323       linearize_expr_tree (&subops[i], oedef,
1324                            associative_tree_code (oecode), false);
1325
1326       FOR_EACH_VEC_ELT (subops[i], j, oe1)
1327         {
1328           oecount c;
1329           void **slot;
1330           size_t idx;
1331           c.oecode = oecode;
1332           c.cnt = 1;
1333           c.id = next_oecount_id++;
1334           c.op = oe1->op;
1335           cvec.safe_push (c);
1336           idx = cvec.length () + 41;
1337           slot = htab_find_slot (ctable, (void *)idx, INSERT);
1338           if (!*slot)
1339             {
1340               *slot = (void *)idx;
1341             }
1342           else
1343             {
1344               cvec.pop ();
1345               cvec[(size_t)*slot - 42].cnt++;
1346             }
1347         }
1348     }
1349   htab_delete (ctable);
1350
1351   /* Sort the counting table.  */
1352   cvec.qsort (oecount_cmp);
1353
1354   if (dump_file && (dump_flags & TDF_DETAILS))
1355     {
1356       oecount *c;
1357       fprintf (dump_file, "Candidates:\n");
1358       FOR_EACH_VEC_ELT (cvec, j, c)
1359         {
1360           fprintf (dump_file, "  %u %s: ", c->cnt,
1361                    c->oecode == MULT_EXPR
1362                    ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1363           print_generic_expr (dump_file, c->op, 0);
1364           fprintf (dump_file, "\n");
1365         }
1366     }
1367
1368   /* Process the (operand, code) pairs in order of most occurence.  */
1369   candidates2 = sbitmap_alloc (length);
1370   while (!cvec.is_empty ())
1371     {
1372       oecount *c = &cvec.last ();
1373       if (c->cnt < 2)
1374         break;
1375
1376       /* Now collect the operands in the outer chain that contain
1377          the common operand in their inner chain.  */
1378       bitmap_clear (candidates2);
1379       nr_candidates2 = 0;
1380       EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1381         {
1382           gimple oedef;
1383           enum tree_code oecode;
1384           unsigned j;
1385           tree op = (*ops)[i]->op;
1386
1387           /* If we undistributed in this chain already this may be
1388              a constant.  */
1389           if (TREE_CODE (op) != SSA_NAME)
1390             continue;
1391
1392           oedef = SSA_NAME_DEF_STMT (op);
1393           oecode = gimple_assign_rhs_code (oedef);
1394           if (oecode != c->oecode)
1395             continue;
1396
1397           FOR_EACH_VEC_ELT (subops[i], j, oe1)
1398             {
1399               if (oe1->op == c->op)
1400                 {
1401                   bitmap_set_bit (candidates2, i);
1402                   ++nr_candidates2;
1403                   break;
1404                 }
1405             }
1406         }
1407
1408       if (nr_candidates2 >= 2)
1409         {
1410           operand_entry_t oe1, oe2;
1411           gimple prod;
1412           int first = bitmap_first_set_bit (candidates2);
1413
1414           /* Build the new addition chain.  */
1415           oe1 = (*ops)[first];
1416           if (dump_file && (dump_flags & TDF_DETAILS))
1417             {
1418               fprintf (dump_file, "Building (");
1419               print_generic_expr (dump_file, oe1->op, 0);
1420             }
1421           zero_one_operation (&oe1->op, c->oecode, c->op);
1422           EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1423             {
1424               gimple sum;
1425               oe2 = (*ops)[i];
1426               if (dump_file && (dump_flags & TDF_DETAILS))
1427                 {
1428                   fprintf (dump_file, " + ");
1429                   print_generic_expr (dump_file, oe2->op, 0);
1430                 }
1431               zero_one_operation (&oe2->op, c->oecode, c->op);
1432               sum = build_and_add_sum (TREE_TYPE (oe1->op),
1433                                        oe1->op, oe2->op, opcode);
1434               oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1435               oe2->rank = 0;
1436               oe1->op = gimple_get_lhs (sum);
1437             }
1438
1439           /* Apply the multiplication/division.  */
1440           prod = build_and_add_sum (TREE_TYPE (oe1->op),
1441                                     oe1->op, c->op, c->oecode);
1442           if (dump_file && (dump_flags & TDF_DETAILS))
1443             {
1444               fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1445               print_generic_expr (dump_file, c->op, 0);
1446               fprintf (dump_file, "\n");
1447             }
1448
1449           /* Record it in the addition chain and disable further
1450              undistribution with this op.  */
1451           oe1->op = gimple_assign_lhs (prod);
1452           oe1->rank = get_rank (oe1->op);
1453           subops[first].release ();
1454
1455           changed = true;
1456         }
1457
1458       cvec.pop ();
1459     }
1460
1461   for (i = 0; i < ops->length (); ++i)
1462     subops[i].release ();
1463   free (subops);
1464   cvec.release ();
1465   sbitmap_free (candidates);
1466   sbitmap_free (candidates2);
1467
1468   return changed;
1469 }
1470
1471 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1472    expression, examine the other OPS to see if any of them are comparisons
1473    of the same values, which we may be able to combine or eliminate.
1474    For example, we can rewrite (a < b) | (a == b) as (a <= b).  */
1475
1476 static bool
1477 eliminate_redundant_comparison (enum tree_code opcode,
1478                                 vec<operand_entry_t> *ops,
1479                                 unsigned int currindex,
1480                                 operand_entry_t curr)
1481 {
1482   tree op1, op2;
1483   enum tree_code lcode, rcode;
1484   gimple def1, def2;
1485   int i;
1486   operand_entry_t oe;
1487
1488   if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1489     return false;
1490
1491   /* Check that CURR is a comparison.  */
1492   if (TREE_CODE (curr->op) != SSA_NAME)
1493     return false;
1494   def1 = SSA_NAME_DEF_STMT (curr->op);
1495   if (!is_gimple_assign (def1))
1496     return false;
1497   lcode = gimple_assign_rhs_code (def1);
1498   if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1499     return false;
1500   op1 = gimple_assign_rhs1 (def1);
1501   op2 = gimple_assign_rhs2 (def1);
1502
1503   /* Now look for a similar comparison in the remaining OPS.  */
1504   for (i = currindex + 1; ops->iterate (i, &oe); i++)
1505     {
1506       tree t;
1507
1508       if (TREE_CODE (oe->op) != SSA_NAME)
1509         continue;
1510       def2 = SSA_NAME_DEF_STMT (oe->op);
1511       if (!is_gimple_assign (def2))
1512         continue;
1513       rcode = gimple_assign_rhs_code (def2);
1514       if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1515         continue;
1516
1517       /* If we got here, we have a match.  See if we can combine the
1518          two comparisons.  */
1519       if (opcode == BIT_IOR_EXPR)
1520         t = maybe_fold_or_comparisons (lcode, op1, op2,
1521                                        rcode, gimple_assign_rhs1 (def2),
1522                                        gimple_assign_rhs2 (def2));
1523       else
1524         t = maybe_fold_and_comparisons (lcode, op1, op2,
1525                                         rcode, gimple_assign_rhs1 (def2),
1526                                         gimple_assign_rhs2 (def2));
1527       if (!t)
1528         continue;
1529
1530       /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1531          always give us a boolean_type_node value back.  If the original
1532          BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1533          we need to convert.  */
1534       if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1535         t = fold_convert (TREE_TYPE (curr->op), t);
1536
1537       if (TREE_CODE (t) != INTEGER_CST
1538           && !operand_equal_p (t, curr->op, 0))
1539         {
1540           enum tree_code subcode;
1541           tree newop1, newop2;
1542           if (!COMPARISON_CLASS_P (t))
1543             continue;
1544           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1545           STRIP_USELESS_TYPE_CONVERSION (newop1);
1546           STRIP_USELESS_TYPE_CONVERSION (newop2);
1547           if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1548             continue;
1549         }
1550
1551       if (dump_file && (dump_flags & TDF_DETAILS))
1552         {
1553           fprintf (dump_file, "Equivalence: ");
1554           print_generic_expr (dump_file, curr->op, 0);
1555           fprintf (dump_file, " %s ", op_symbol_code (opcode));
1556           print_generic_expr (dump_file, oe->op, 0);
1557           fprintf (dump_file, " -> ");
1558           print_generic_expr (dump_file, t, 0);
1559           fprintf (dump_file, "\n");
1560         }
1561
1562       /* Now we can delete oe, as it has been subsumed by the new combined
1563          expression t.  */
1564       ops->ordered_remove (i);
1565       reassociate_stats.ops_eliminated ++;
1566
1567       /* If t is the same as curr->op, we're done.  Otherwise we must
1568          replace curr->op with t.  Special case is if we got a constant
1569          back, in which case we add it to the end instead of in place of
1570          the current entry.  */
1571       if (TREE_CODE (t) == INTEGER_CST)
1572         {
1573           ops->ordered_remove (currindex);
1574           add_to_ops_vec (ops, t);
1575         }
1576       else if (!operand_equal_p (t, curr->op, 0))
1577         {
1578           gimple sum;
1579           enum tree_code subcode;
1580           tree newop1;
1581           tree newop2;
1582           gcc_assert (COMPARISON_CLASS_P (t));
1583           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1584           STRIP_USELESS_TYPE_CONVERSION (newop1);
1585           STRIP_USELESS_TYPE_CONVERSION (newop2);
1586           gcc_checking_assert (is_gimple_val (newop1)
1587                                && is_gimple_val (newop2));
1588           sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1589           curr->op = gimple_get_lhs (sum);
1590         }
1591       return true;
1592     }
1593
1594   return false;
1595 }
1596
1597 /* Perform various identities and other optimizations on the list of
1598    operand entries, stored in OPS.  The tree code for the binary
1599    operation between all the operands is OPCODE.  */
1600
1601 static void
1602 optimize_ops_list (enum tree_code opcode,
1603                    vec<operand_entry_t> *ops)
1604 {
1605   unsigned int length = ops->length ();
1606   unsigned int i;
1607   operand_entry_t oe;
1608   operand_entry_t oelast = NULL;
1609   bool iterate = false;
1610
1611   if (length == 1)
1612     return;
1613
1614   oelast = ops->last ();
1615
1616   /* If the last two are constants, pop the constants off, merge them
1617      and try the next two.  */
1618   if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1619     {
1620       operand_entry_t oelm1 = (*ops)[length - 2];
1621
1622       if (oelm1->rank == 0
1623           && is_gimple_min_invariant (oelm1->op)
1624           && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1625                                        TREE_TYPE (oelast->op)))
1626         {
1627           tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1628                                      oelm1->op, oelast->op);
1629
1630           if (folded && is_gimple_min_invariant (folded))
1631             {
1632               if (dump_file && (dump_flags & TDF_DETAILS))
1633                 fprintf (dump_file, "Merging constants\n");
1634
1635               ops->pop ();
1636               ops->pop ();
1637
1638               add_to_ops_vec (ops, folded);
1639               reassociate_stats.constants_eliminated++;
1640
1641               optimize_ops_list (opcode, ops);
1642               return;
1643             }
1644         }
1645     }
1646
1647   eliminate_using_constants (opcode, ops);
1648   oelast = NULL;
1649
1650   for (i = 0; ops->iterate (i, &oe);)
1651     {
1652       bool done = false;
1653
1654       if (eliminate_not_pairs (opcode, ops, i, oe))
1655         return;
1656       if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1657           || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1658           || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1659         {
1660           if (done)
1661             return;
1662           iterate = true;
1663           oelast = NULL;
1664           continue;
1665         }
1666       oelast = oe;
1667       i++;
1668     }
1669
1670   length = ops->length ();
1671   oelast = ops->last ();
1672
1673   if (iterate)
1674     optimize_ops_list (opcode, ops);
1675 }
1676
1677 /* The following functions are subroutines to optimize_range_tests and allow
1678    it to try to change a logical combination of comparisons into a range
1679    test.
1680
1681    For example, both
1682         X == 2 || X == 5 || X == 3 || X == 4
1683    and
1684         X >= 2 && X <= 5
1685    are converted to
1686         (unsigned) (X - 2) <= 3
1687
1688    For more information see comments above fold_test_range in fold-const.c,
1689    this implementation is for GIMPLE.  */
1690
1691 struct range_entry
1692 {
1693   tree exp;
1694   tree low;
1695   tree high;
1696   bool in_p;
1697   bool strict_overflow_p;
1698   unsigned int idx, next;
1699 };
1700
1701 /* This is similar to make_range in fold-const.c, but on top of
1702    GIMPLE instead of trees.  If EXP is non-NULL, it should be
1703    an SSA_NAME and STMT argument is ignored, otherwise STMT
1704    argument should be a GIMPLE_COND.  */
1705
1706 static void
1707 init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1708 {
1709   int in_p;
1710   tree low, high;
1711   bool is_bool, strict_overflow_p;
1712
1713   r->exp = NULL_TREE;
1714   r->in_p = false;
1715   r->strict_overflow_p = false;
1716   r->low = NULL_TREE;
1717   r->high = NULL_TREE;
1718   if (exp != NULL_TREE
1719       && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1720     return;
1721
1722   /* Start with simply saying "EXP != 0" and then look at the code of EXP
1723      and see if we can refine the range.  Some of the cases below may not
1724      happen, but it doesn't seem worth worrying about this.  We "continue"
1725      the outer loop when we've changed something; otherwise we "break"
1726      the switch, which will "break" the while.  */
1727   low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1728   high = low;
1729   in_p = 0;
1730   strict_overflow_p = false;
1731   is_bool = false;
1732   if (exp == NULL_TREE)
1733     is_bool = true;
1734   else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1735     {
1736       if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1737         is_bool = true;
1738       else
1739         return;
1740     }
1741   else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1742     is_bool = true;
1743
1744   while (1)
1745     {
1746       enum tree_code code;
1747       tree arg0, arg1, exp_type;
1748       tree nexp;
1749       location_t loc;
1750
1751       if (exp != NULL_TREE)
1752         {
1753           if (TREE_CODE (exp) != SSA_NAME)
1754             break;
1755
1756           stmt = SSA_NAME_DEF_STMT (exp);
1757           if (!is_gimple_assign (stmt))
1758             break;
1759
1760           code = gimple_assign_rhs_code (stmt);
1761           arg0 = gimple_assign_rhs1 (stmt);
1762           arg1 = gimple_assign_rhs2 (stmt);
1763           exp_type = TREE_TYPE (exp);
1764         }
1765       else
1766         {
1767           code = gimple_cond_code (stmt);
1768           arg0 = gimple_cond_lhs (stmt);
1769           arg1 = gimple_cond_rhs (stmt);
1770           exp_type = boolean_type_node;
1771         }
1772
1773       if (TREE_CODE (arg0) != SSA_NAME)
1774         break;
1775       loc = gimple_location (stmt);
1776       switch (code)
1777         {
1778         case BIT_NOT_EXPR:
1779           if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1780             {
1781               in_p = !in_p;
1782               exp = arg0;
1783               continue;
1784             }
1785           break;
1786         case SSA_NAME:
1787           exp = arg0;
1788           continue;
1789         CASE_CONVERT:
1790           if (is_bool)
1791             goto do_default;
1792           if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1793             {
1794               if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1795                 is_bool = true;
1796               else
1797                 return;
1798             }
1799           else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1800             is_bool = true;
1801           goto do_default;
1802         case EQ_EXPR:
1803         case NE_EXPR:
1804         case LT_EXPR:
1805         case LE_EXPR:
1806         case GE_EXPR:
1807         case GT_EXPR:
1808           is_bool = true;
1809           /* FALLTHRU */
1810         default:
1811           if (!is_bool)
1812             return;
1813         do_default:
1814           nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1815                                   &low, &high, &in_p,
1816                                   &strict_overflow_p);
1817           if (nexp != NULL_TREE)
1818             {
1819               exp = nexp;
1820               gcc_assert (TREE_CODE (exp) == SSA_NAME);
1821               continue;
1822             }
1823           break;
1824         }
1825       break;
1826     }
1827   if (is_bool)
1828     {
1829       r->exp = exp;
1830       r->in_p = in_p;
1831       r->low = low;
1832       r->high = high;
1833       r->strict_overflow_p = strict_overflow_p;
1834     }
1835 }
1836
1837 /* Comparison function for qsort.  Sort entries
1838    without SSA_NAME exp first, then with SSA_NAMEs sorted
1839    by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1840    by increasing ->low and if ->low is the same, by increasing
1841    ->high.  ->low == NULL_TREE means minimum, ->high == NULL_TREE
1842    maximum.  */
1843
1844 static int
1845 range_entry_cmp (const void *a, const void *b)
1846 {
1847   const struct range_entry *p = (const struct range_entry *) a;
1848   const struct range_entry *q = (const struct range_entry *) b;
1849
1850   if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
1851     {
1852       if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1853         {
1854           /* Group range_entries for the same SSA_NAME together.  */
1855           if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
1856             return -1;
1857           else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
1858             return 1;
1859           /* If ->low is different, NULL low goes first, then by
1860              ascending low.  */
1861           if (p->low != NULL_TREE)
1862             {
1863               if (q->low != NULL_TREE)
1864                 {
1865                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
1866                                           p->low, q->low);
1867                   if (tem && integer_onep (tem))
1868                     return -1;
1869                   tem = fold_binary (GT_EXPR, boolean_type_node,
1870                                      p->low, q->low);
1871                   if (tem && integer_onep (tem))
1872                     return 1;
1873                 }
1874               else
1875                 return 1;
1876             }
1877           else if (q->low != NULL_TREE)
1878             return -1;
1879           /* If ->high is different, NULL high goes last, before that by
1880              ascending high.  */
1881           if (p->high != NULL_TREE)
1882             {
1883               if (q->high != NULL_TREE)
1884                 {
1885                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
1886                                           p->high, q->high);
1887                   if (tem && integer_onep (tem))
1888                     return -1;
1889                   tem = fold_binary (GT_EXPR, boolean_type_node,
1890                                      p->high, q->high);
1891                   if (tem && integer_onep (tem))
1892                     return 1;
1893                 }
1894               else
1895                 return -1;
1896             }
1897           else if (p->high != NULL_TREE)
1898             return 1;
1899           /* If both ranges are the same, sort below by ascending idx.  */
1900         }
1901       else
1902         return 1;
1903     }
1904   else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1905     return -1;
1906
1907   if (p->idx < q->idx)
1908     return -1;
1909   else
1910     {
1911       gcc_checking_assert (p->idx > q->idx);
1912       return 1;
1913     }
1914 }
1915
1916 /* Helper routine of optimize_range_test.
1917    [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
1918    RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
1919    OPCODE and OPS are arguments of optimize_range_tests.  Return
1920    true if the range merge has been successful.
1921    If OPCODE is ERROR_MARK, this is called from within
1922    maybe_optimize_range_tests and is performing inter-bb range optimization.
1923    Changes should be then performed right away, and whether an op is
1924    BIT_AND_EXPR or BIT_IOR_EXPR is found in oe->rank.  */
1925
1926 static bool
1927 update_range_test (struct range_entry *range, struct range_entry *otherrange,
1928                    unsigned int count, enum tree_code opcode,
1929                    vec<operand_entry_t> *ops, tree exp, bool in_p,
1930                    tree low, tree high, bool strict_overflow_p)
1931 {
1932   operand_entry_t oe = (*ops)[range->idx];
1933   tree op = oe->op;
1934   gimple stmt = op ? SSA_NAME_DEF_STMT (op) : last_stmt (BASIC_BLOCK (oe->id));
1935   location_t loc = gimple_location (stmt);
1936   tree optype = op ? TREE_TYPE (op) : boolean_type_node;
1937   tree tem = build_range_check (loc, optype, exp, in_p, low, high);
1938   enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
1939   gimple_stmt_iterator gsi;
1940
1941   if (tem == NULL_TREE)
1942     return false;
1943
1944   if (strict_overflow_p && issue_strict_overflow_warning (wc))
1945     warning_at (loc, OPT_Wstrict_overflow,
1946                 "assuming signed overflow does not occur "
1947                 "when simplifying range test");
1948
1949   if (dump_file && (dump_flags & TDF_DETAILS))
1950     {
1951       struct range_entry *r;
1952       fprintf (dump_file, "Optimizing range tests ");
1953       print_generic_expr (dump_file, range->exp, 0);
1954       fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
1955       print_generic_expr (dump_file, range->low, 0);
1956       fprintf (dump_file, ", ");
1957       print_generic_expr (dump_file, range->high, 0);
1958       fprintf (dump_file, "]");
1959       for (r = otherrange; r < otherrange + count; r++)
1960         {
1961           fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
1962           print_generic_expr (dump_file, r->low, 0);
1963           fprintf (dump_file, ", ");
1964           print_generic_expr (dump_file, r->high, 0);
1965           fprintf (dump_file, "]");
1966         }
1967       fprintf (dump_file, "\n into ");
1968       print_generic_expr (dump_file, tem, 0);
1969       fprintf (dump_file, "\n");
1970     }
1971
1972   if (opcode == BIT_IOR_EXPR
1973       || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
1974     tem = invert_truthvalue_loc (loc, tem);
1975
1976   tem = fold_convert_loc (loc, optype, tem);
1977   gsi = gsi_for_stmt (stmt);
1978   tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
1979                                   GSI_SAME_STMT);
1980
1981   /* If doing inter-bb range test optimization, update the
1982      stmts immediately.  Start with changing the first range test
1983      immediate use to the new value (TEM), or, if the first range
1984      test is a GIMPLE_COND stmt, change that condition.  */
1985   if (opcode == ERROR_MARK)
1986     {
1987       if (op)
1988         {
1989           imm_use_iterator iter;
1990           use_operand_p use_p;
1991           gimple use_stmt;
1992
1993           FOR_EACH_IMM_USE_STMT (use_stmt, iter, op)
1994             {
1995               if (is_gimple_debug (use_stmt))
1996                 continue;
1997               FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
1998                 SET_USE (use_p, tem);
1999               update_stmt (use_stmt);
2000             }
2001         }
2002       else
2003         {
2004           gimple_cond_set_code (stmt, NE_EXPR);
2005           gimple_cond_set_lhs (stmt, tem);
2006           gimple_cond_set_rhs (stmt, boolean_false_node);
2007           update_stmt (stmt);
2008         }
2009     }
2010   oe->op = tem;
2011   range->exp = exp;
2012   range->low = low;
2013   range->high = high;
2014   range->in_p = in_p;
2015   range->strict_overflow_p = false;
2016
2017   for (range = otherrange; range < otherrange + count; range++)
2018     {
2019       oe = (*ops)[range->idx];
2020       /* Now change all the other range test immediate uses, so that
2021          those tests will be optimized away.  */
2022       if (opcode == ERROR_MARK)
2023         {
2024           if (oe->op)
2025             {
2026               imm_use_iterator iter;
2027               use_operand_p use_p;
2028               gimple use_stmt;
2029
2030               FOR_EACH_IMM_USE_STMT (use_stmt, iter, oe->op)
2031                 {
2032                   if (is_gimple_debug (use_stmt))
2033                     continue;
2034                   /* If imm use of _8 is a statement like _7 = _8 | _9;,
2035                      adjust it into _7 = _9;.  */
2036                   if (is_gimple_assign (use_stmt)
2037                       && gimple_assign_rhs_code (use_stmt) == oe->rank)
2038                     {
2039                       tree expr = NULL_TREE;
2040                       if (oe->op == gimple_assign_rhs1 (use_stmt))
2041                         expr = gimple_assign_rhs2 (use_stmt);
2042                       else if (oe->op == gimple_assign_rhs2 (use_stmt))
2043                         expr = gimple_assign_rhs1 (use_stmt);
2044                       if (expr
2045                           && expr != oe->op
2046                           && TREE_CODE (expr) == SSA_NAME)
2047                         {
2048                           gimple_stmt_iterator gsi2 = gsi_for_stmt (use_stmt);
2049                           gimple_assign_set_rhs_with_ops (&gsi2, SSA_NAME,
2050                                                           expr, NULL_TREE);
2051                           update_stmt (use_stmt);
2052                           continue;
2053                         }
2054                     }
2055                   /* If imm use of _8 is a statement like _7 = (int) _8;,
2056                      adjust it into _7 = 0; or _7 = 1;.  */
2057                   if (gimple_assign_cast_p (use_stmt)
2058                       && oe->op == gimple_assign_rhs1 (use_stmt))
2059                     {
2060                       tree lhs = gimple_assign_lhs (use_stmt);
2061                       if (INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
2062                         {
2063                           gimple_stmt_iterator gsi2
2064                             = gsi_for_stmt (use_stmt);
2065                           tree expr = build_int_cst (TREE_TYPE (lhs),
2066                                                      oe->rank == BIT_IOR_EXPR
2067                                                      ? 0 : 1);
2068                           gimple_assign_set_rhs_with_ops (&gsi2,
2069                                                           INTEGER_CST,
2070                                                           expr, NULL_TREE);
2071                           update_stmt (use_stmt);
2072                           continue;
2073                         }
2074                     }
2075                   /* Otherwise replace the use with 0 or 1.  */
2076                   FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2077                     SET_USE (use_p,
2078                              build_int_cst (TREE_TYPE (oe->op),
2079                                             oe->rank == BIT_IOR_EXPR
2080                                             ? 0 : 1));
2081                   update_stmt (use_stmt);
2082                 }
2083             }
2084           else
2085             {
2086               /* If range test was a GIMPLE_COND, simply change it
2087                  into an always false or always true condition.  */
2088               stmt = last_stmt (BASIC_BLOCK (oe->id));
2089               if (oe->rank == BIT_IOR_EXPR)
2090                 gimple_cond_make_false (stmt);
2091               else
2092                 gimple_cond_make_true (stmt);
2093               update_stmt (stmt);
2094             }
2095         }
2096       oe->op = error_mark_node;
2097       range->exp = NULL_TREE;
2098     }
2099   return true;
2100 }
2101
2102 /* Optimize range tests, similarly how fold_range_test optimizes
2103    it on trees.  The tree code for the binary
2104    operation between all the operands is OPCODE.
2105    If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2106    maybe_optimize_range_tests for inter-bb range optimization.
2107    In that case if oe->op is NULL, oe->id is bb->index whose
2108    GIMPLE_COND is && or ||ed into the test, and oe->rank says
2109    the actual opcode.  */
2110
2111 static void
2112 optimize_range_tests (enum tree_code opcode,
2113                       vec<operand_entry_t> *ops)
2114 {
2115   unsigned int length = ops->length (), i, j, first;
2116   operand_entry_t oe;
2117   struct range_entry *ranges;
2118   bool any_changes = false;
2119
2120   if (length == 1)
2121     return;
2122
2123   ranges = XNEWVEC (struct range_entry, length);
2124   for (i = 0; i < length; i++)
2125     {
2126       oe = (*ops)[i];
2127       ranges[i].idx = i;
2128       init_range_entry (ranges + i, oe->op,
2129                         oe->op ? NULL : last_stmt (BASIC_BLOCK (oe->id)));
2130       /* For | invert it now, we will invert it again before emitting
2131          the optimized expression.  */
2132       if (opcode == BIT_IOR_EXPR
2133           || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2134         ranges[i].in_p = !ranges[i].in_p;
2135     }
2136
2137   qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2138   for (i = 0; i < length; i++)
2139     if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2140       break;
2141
2142   /* Try to merge ranges.  */
2143   for (first = i; i < length; i++)
2144     {
2145       tree low = ranges[i].low;
2146       tree high = ranges[i].high;
2147       int in_p = ranges[i].in_p;
2148       bool strict_overflow_p = ranges[i].strict_overflow_p;
2149       int update_fail_count = 0;
2150
2151       for (j = i + 1; j < length; j++)
2152         {
2153           if (ranges[i].exp != ranges[j].exp)
2154             break;
2155           if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2156                              ranges[j].in_p, ranges[j].low, ranges[j].high))
2157             break;
2158           strict_overflow_p |= ranges[j].strict_overflow_p;
2159         }
2160
2161       if (j == i + 1)
2162         continue;
2163
2164       if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
2165                              ops, ranges[i].exp, in_p, low, high,
2166                              strict_overflow_p))
2167         {
2168           i = j - 1;
2169           any_changes = true;
2170         }
2171       /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2172          while update_range_test would fail.  */
2173       else if (update_fail_count == 64)
2174         i = j - 1;
2175       else
2176         ++update_fail_count;
2177     }
2178
2179   /* Optimize X == CST1 || X == CST2
2180      if popcount (CST1 ^ CST2) == 1 into
2181      (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2182      Similarly for ranges.  E.g.
2183      X != 2 && X != 3 && X != 10 && X != 11
2184      will be transformed by the above loop into
2185      (X - 2U) <= 1U && (X - 10U) <= 1U
2186      and this loop can transform that into
2187      ((X & ~8) - 2U) <= 1U.  */
2188   for (i = first; i < length; i++)
2189     {
2190       tree lowi, highi, lowj, highj, type, lowxor, highxor, tem, exp;
2191
2192       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2193         continue;
2194       type = TREE_TYPE (ranges[i].exp);
2195       if (!INTEGRAL_TYPE_P (type))
2196         continue;
2197       lowi = ranges[i].low;
2198       if (lowi == NULL_TREE)
2199         lowi = TYPE_MIN_VALUE (type);
2200       highi = ranges[i].high;
2201       if (highi == NULL_TREE)
2202         continue;
2203       for (j = i + 1; j < length && j < i + 64; j++)
2204         {
2205           if (ranges[j].exp == NULL_TREE)
2206             continue;
2207           if (ranges[i].exp != ranges[j].exp)
2208             break;
2209           if (ranges[j].in_p)
2210             continue;
2211           lowj = ranges[j].low;
2212           if (lowj == NULL_TREE)
2213             continue;
2214           highj = ranges[j].high;
2215           if (highj == NULL_TREE)
2216             highj = TYPE_MAX_VALUE (type);
2217           tem = fold_binary (GT_EXPR, boolean_type_node,
2218                              lowj, highi);
2219           if (tem == NULL_TREE || !integer_onep (tem))
2220             continue;
2221           lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2222           if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2223             continue;
2224           gcc_checking_assert (!integer_zerop (lowxor));
2225           tem = fold_binary (MINUS_EXPR, type, lowxor,
2226                              build_int_cst (type, 1));
2227           if (tem == NULL_TREE)
2228             continue;
2229           tem = fold_binary (BIT_AND_EXPR, type, lowxor, tem);
2230           if (tem == NULL_TREE || !integer_zerop (tem))
2231             continue;
2232           highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2233           if (!tree_int_cst_equal (lowxor, highxor))
2234             continue;
2235           tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2236           exp = fold_build2 (BIT_AND_EXPR, type, ranges[i].exp, tem);
2237           lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2238           highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2239           if (update_range_test (ranges + i, ranges + j, 1, opcode, ops, exp,
2240                                  ranges[i].in_p, lowj, highj,
2241                                  ranges[i].strict_overflow_p
2242                                  || ranges[j].strict_overflow_p))
2243             {
2244               any_changes = true;
2245               break;
2246             }
2247         }
2248     }
2249
2250   if (any_changes && opcode != ERROR_MARK)
2251     {
2252       j = 0;
2253       FOR_EACH_VEC_ELT (*ops, i, oe)
2254         {
2255           if (oe->op == error_mark_node)
2256             continue;
2257           else if (i != j)
2258             (*ops)[j] = oe;
2259           j++;
2260         }
2261       ops->truncate (j);
2262     }
2263
2264   XDELETEVEC (ranges);
2265 }
2266
2267 /* Return true if STMT is a cast like:
2268    <bb N>:
2269    ...
2270    _123 = (int) _234;
2271
2272    <bb M>:
2273    # _345 = PHI <_123(N), 1(...), 1(...)>
2274    where _234 has bool type, _123 has single use and
2275    bb N has a single successor M.  This is commonly used in
2276    the last block of a range test.  */
2277
2278 static bool
2279 final_range_test_p (gimple stmt)
2280 {
2281   basic_block bb, rhs_bb;
2282   edge e;
2283   tree lhs, rhs;
2284   use_operand_p use_p;
2285   gimple use_stmt;
2286
2287   if (!gimple_assign_cast_p (stmt))
2288     return false;
2289   bb = gimple_bb (stmt);
2290   if (!single_succ_p (bb))
2291     return false;
2292   e = single_succ_edge (bb);
2293   if (e->flags & EDGE_COMPLEX)
2294     return false;
2295
2296   lhs = gimple_assign_lhs (stmt);
2297   rhs = gimple_assign_rhs1 (stmt);
2298   if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2299       || TREE_CODE (rhs) != SSA_NAME
2300       || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2301     return false;
2302
2303   /* Test whether lhs is consumed only by a PHI in the only successor bb.  */
2304   if (!single_imm_use (lhs, &use_p, &use_stmt))
2305     return false;
2306
2307   if (gimple_code (use_stmt) != GIMPLE_PHI
2308       || gimple_bb (use_stmt) != e->dest)
2309     return false;
2310
2311   /* And that the rhs is defined in the same loop.  */
2312   rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2313   if (rhs_bb == NULL
2314       || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2315     return false;
2316
2317   return true;
2318 }
2319
2320 /* Return true if BB is suitable basic block for inter-bb range test
2321    optimization.  If BACKWARD is true, BB should be the only predecessor
2322    of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2323    or compared with to find a common basic block to which all conditions
2324    branch to if true resp. false.  If BACKWARD is false, TEST_BB should
2325    be the only predecessor of BB.  */
2326
2327 static bool
2328 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2329                   bool backward)
2330 {
2331   edge_iterator ei, ei2;
2332   edge e, e2;
2333   gimple stmt;
2334   gimple_stmt_iterator gsi;
2335   bool other_edge_seen = false;
2336   bool is_cond;
2337
2338   if (test_bb == bb)
2339     return false;
2340   /* Check last stmt first.  */
2341   stmt = last_stmt (bb);
2342   if (stmt == NULL
2343       || (gimple_code (stmt) != GIMPLE_COND
2344           && (backward || !final_range_test_p (stmt)))
2345       || gimple_visited_p (stmt)
2346       || stmt_could_throw_p (stmt)
2347       || *other_bb == bb)
2348     return false;
2349   is_cond = gimple_code (stmt) == GIMPLE_COND;
2350   if (is_cond)
2351     {
2352       /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2353          goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2354          to *OTHER_BB (if not set yet, try to find it out).  */
2355       if (EDGE_COUNT (bb->succs) != 2)
2356         return false;
2357       FOR_EACH_EDGE (e, ei, bb->succs)
2358         {
2359           if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2360             return false;
2361           if (e->dest == test_bb)
2362             {
2363               if (backward)
2364                 continue;
2365               else
2366                 return false;
2367             }
2368           if (e->dest == bb)
2369             return false;
2370           if (*other_bb == NULL)
2371             {
2372               FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2373                 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2374                   return false;
2375                 else if (e->dest == e2->dest)
2376                   *other_bb = e->dest;
2377               if (*other_bb == NULL)
2378                 return false;
2379             }
2380           if (e->dest == *other_bb)
2381             other_edge_seen = true;
2382           else if (backward)
2383             return false;
2384         }
2385       if (*other_bb == NULL || !other_edge_seen)
2386         return false;
2387     }
2388   else if (single_succ (bb) != *other_bb)
2389     return false;
2390
2391   /* Now check all PHIs of *OTHER_BB.  */
2392   e = find_edge (bb, *other_bb);
2393   e2 = find_edge (test_bb, *other_bb);
2394   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2395     {
2396       gimple phi = gsi_stmt (gsi);
2397       /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2398          corresponding to BB and TEST_BB predecessor must be the same.  */
2399       if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2400                             gimple_phi_arg_def (phi, e2->dest_idx), 0))
2401         {
2402           /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2403              one of the PHIs should have the lhs of the last stmt in
2404              that block as PHI arg and that PHI should have 0 or 1
2405              corresponding to it in all other range test basic blocks
2406              considered.  */
2407           if (!is_cond)
2408             {
2409               if (gimple_phi_arg_def (phi, e->dest_idx)
2410                   == gimple_assign_lhs (stmt)
2411                   && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2412                       || integer_onep (gimple_phi_arg_def (phi,
2413                                                            e2->dest_idx))))
2414                 continue;
2415             }
2416           else
2417             {
2418               gimple test_last = last_stmt (test_bb);
2419               if (gimple_code (test_last) != GIMPLE_COND
2420                   && gimple_phi_arg_def (phi, e2->dest_idx)
2421                      == gimple_assign_lhs (test_last)
2422                   && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2423                       || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2424                 continue;
2425             }
2426
2427           return false;
2428         }
2429     }
2430   return true;
2431 }
2432
2433 /* Return true if BB doesn't have side-effects that would disallow
2434    range test optimization, all SSA_NAMEs set in the bb are consumed
2435    in the bb and there are no PHIs.  */
2436
2437 static bool
2438 no_side_effect_bb (basic_block bb)
2439 {
2440   gimple_stmt_iterator gsi;
2441   gimple last;
2442
2443   if (!gimple_seq_empty_p (phi_nodes (bb)))
2444     return false;
2445   last = last_stmt (bb);
2446   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2447     {
2448       gimple stmt = gsi_stmt (gsi);
2449       tree lhs;
2450       imm_use_iterator imm_iter;
2451       use_operand_p use_p;
2452
2453       if (is_gimple_debug (stmt))
2454         continue;
2455       if (gimple_has_side_effects (stmt))
2456         return false;
2457       if (stmt == last)
2458         return true;
2459       if (!is_gimple_assign (stmt))
2460         return false;
2461       lhs = gimple_assign_lhs (stmt);
2462       if (TREE_CODE (lhs) != SSA_NAME)
2463         return false;
2464       if (gimple_assign_rhs_could_trap_p (stmt))
2465         return false;
2466       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2467         {
2468           gimple use_stmt = USE_STMT (use_p);
2469           if (is_gimple_debug (use_stmt))
2470             continue;
2471           if (gimple_bb (use_stmt) != bb)
2472             return false;
2473         }
2474     }
2475   return false;
2476 }
2477
2478 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2479    return true and fill in *OPS recursively.  */
2480
2481 static bool
2482 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2483          struct loop *loop)
2484 {
2485   gimple stmt = SSA_NAME_DEF_STMT (var);
2486   tree rhs[2];
2487   int i;
2488
2489   if (!is_reassociable_op (stmt, code, loop))
2490     return false;
2491
2492   rhs[0] = gimple_assign_rhs1 (stmt);
2493   rhs[1] = gimple_assign_rhs2 (stmt);
2494   gimple_set_visited (stmt, true);
2495   for (i = 0; i < 2; i++)
2496     if (TREE_CODE (rhs[i]) == SSA_NAME
2497         && !get_ops (rhs[i], code, ops, loop)
2498         && has_single_use (rhs[i]))
2499       {
2500         operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
2501
2502         oe->op = rhs[i];
2503         oe->rank = code;
2504         oe->id = 0;
2505         oe->count = 1;
2506         ops->safe_push (oe);
2507       }
2508   return true;
2509 }
2510
2511 /* Inter-bb range test optimization.  */
2512
2513 static void
2514 maybe_optimize_range_tests (gimple stmt)
2515 {
2516   basic_block first_bb = gimple_bb (stmt);
2517   basic_block last_bb = first_bb;
2518   basic_block other_bb = NULL;
2519   basic_block bb;
2520   edge_iterator ei;
2521   edge e;
2522   vec<operand_entry_t> ops = vec<operand_entry_t>();
2523
2524   /* Consider only basic blocks that end with GIMPLE_COND or
2525      a cast statement satisfying final_range_test_p.  All
2526      but the last bb in the first_bb .. last_bb range
2527      should end with GIMPLE_COND.  */
2528   if (gimple_code (stmt) == GIMPLE_COND)
2529     {
2530       if (EDGE_COUNT (first_bb->succs) != 2)
2531         return;
2532     }
2533   else if (final_range_test_p (stmt))
2534     other_bb = single_succ (first_bb);
2535   else
2536     return;
2537
2538   if (stmt_could_throw_p (stmt))
2539     return;
2540
2541   /* As relative ordering of post-dominator sons isn't fixed,
2542      maybe_optimize_range_tests can be called first on any
2543      bb in the range we want to optimize.  So, start searching
2544      backwards, if first_bb can be set to a predecessor.  */
2545   while (single_pred_p (first_bb))
2546     {
2547       basic_block pred_bb = single_pred (first_bb);
2548       if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
2549         break;
2550       if (!no_side_effect_bb (first_bb))
2551         break;
2552       first_bb = pred_bb;
2553     }
2554   /* If first_bb is last_bb, other_bb hasn't been computed yet.
2555      Before starting forward search in last_bb successors, find
2556      out the other_bb.  */
2557   if (first_bb == last_bb)
2558     {
2559       other_bb = NULL;
2560       /* As non-GIMPLE_COND last stmt always terminates the range,
2561          if forward search didn't discover anything, just give up.  */
2562       if (gimple_code (stmt) != GIMPLE_COND)
2563         return;
2564       /* Look at both successors.  Either it ends with a GIMPLE_COND
2565          and satisfies suitable_cond_bb, or ends with a cast and
2566          other_bb is that cast's successor.  */
2567       FOR_EACH_EDGE (e, ei, first_bb->succs)
2568         if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
2569             || e->dest == first_bb)
2570           return;
2571         else if (single_pred_p (e->dest))
2572           {
2573             stmt = last_stmt (e->dest);
2574             if (stmt
2575                 && gimple_code (stmt) == GIMPLE_COND
2576                 && EDGE_COUNT (e->dest->succs) == 2)
2577               {
2578                 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
2579                   break;
2580                 else
2581                   other_bb = NULL;
2582               }
2583             else if (stmt
2584                      && final_range_test_p (stmt)
2585                      && find_edge (first_bb, single_succ (e->dest)))
2586               {
2587                 other_bb = single_succ (e->dest);
2588                 if (other_bb == first_bb)
2589                   other_bb = NULL;
2590               }
2591           }
2592       if (other_bb == NULL)
2593         return;
2594     }
2595   /* Now do the forward search, moving last_bb to successor bbs
2596      that aren't other_bb.  */
2597   while (EDGE_COUNT (last_bb->succs) == 2)
2598     {
2599       FOR_EACH_EDGE (e, ei, last_bb->succs)
2600         if (e->dest != other_bb)
2601           break;
2602       if (e == NULL)
2603         break;
2604       if (!single_pred_p (e->dest))
2605         break;
2606       if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
2607         break;
2608       if (!no_side_effect_bb (e->dest))
2609         break;
2610       last_bb = e->dest;
2611     }
2612   if (first_bb == last_bb)
2613     return;
2614   /* Here basic blocks first_bb through last_bb's predecessor
2615      end with GIMPLE_COND, all of them have one of the edges to
2616      other_bb and another to another block in the range,
2617      all blocks except first_bb don't have side-effects and
2618      last_bb ends with either GIMPLE_COND, or cast satisfying
2619      final_range_test_p.  */
2620   for (bb = last_bb; ; bb = single_pred (bb))
2621     {
2622       enum tree_code code;
2623       tree lhs, rhs;
2624
2625       e = find_edge (bb, other_bb);
2626       stmt = last_stmt (bb);
2627       gimple_set_visited (stmt, true);
2628       if (gimple_code (stmt) != GIMPLE_COND)
2629         {
2630           use_operand_p use_p;
2631           gimple phi;
2632           edge e2;
2633           unsigned int d;
2634
2635           lhs = gimple_assign_lhs (stmt);
2636           rhs = gimple_assign_rhs1 (stmt);
2637           gcc_assert (bb == last_bb);
2638
2639           /* stmt is
2640              _123 = (int) _234;
2641
2642              followed by:
2643              <bb M>:
2644              # _345 = PHI <_123(N), 1(...), 1(...)>
2645
2646              or 0 instead of 1.  If it is 0, the _234
2647              range test is anded together with all the
2648              other range tests, if it is 1, it is ored with
2649              them.  */
2650           single_imm_use (lhs, &use_p, &phi);
2651           gcc_assert (gimple_code (phi) == GIMPLE_PHI);
2652           e2 = find_edge (first_bb, other_bb);
2653           d = e2->dest_idx;
2654           gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
2655           if (integer_zerop (gimple_phi_arg_def (phi, d)))
2656             code = BIT_AND_EXPR;
2657           else
2658             {
2659               gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
2660               code = BIT_IOR_EXPR;
2661             }
2662
2663           /* If _234 SSA_NAME_DEF_STMT is
2664              _234 = _567 | _789;
2665              (or &, corresponding to 1/0 in the phi arguments,
2666              push into ops the individual range test arguments
2667              of the bitwise or resp. and, recursively.  */
2668           if (!get_ops (rhs, code, &ops,
2669                         loop_containing_stmt (stmt))
2670               && has_single_use (rhs))
2671             {
2672               /* Otherwise, push the _234 range test itself.  */
2673               operand_entry_t oe
2674                 = (operand_entry_t) pool_alloc (operand_entry_pool);
2675
2676               oe->op = rhs;
2677               oe->rank = code;
2678               oe->id = 0;
2679               oe->count = 1;
2680               ops.safe_push (oe);
2681             }
2682           continue;
2683         }
2684       /* Otherwise stmt is GIMPLE_COND.  */
2685       code = gimple_cond_code (stmt);
2686       lhs = gimple_cond_lhs (stmt);
2687       rhs = gimple_cond_rhs (stmt);
2688       if (TREE_CODE (lhs) == SSA_NAME
2689           && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2690           && ((code != EQ_EXPR && code != NE_EXPR)
2691               || rhs != boolean_false_node
2692                  /* Either push into ops the individual bitwise
2693                     or resp. and operands, depending on which
2694                     edge is other_bb.  */
2695               || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
2696                                  ^ (code == EQ_EXPR))
2697                                 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
2698                            loop_containing_stmt (stmt))))
2699         {
2700           /* Or push the GIMPLE_COND stmt itself.  */
2701           operand_entry_t oe
2702             = (operand_entry_t) pool_alloc (operand_entry_pool);
2703
2704           oe->op = NULL;
2705           oe->rank = (e->flags & EDGE_TRUE_VALUE)
2706                      ? BIT_IOR_EXPR : BIT_AND_EXPR;
2707           /* oe->op = NULL signs that there is no SSA_NAME
2708              for the range test, and oe->id instead is the
2709              basic block number, at which's end the GIMPLE_COND
2710              is.  */
2711           oe->id = bb->index;
2712           oe->count = 1;
2713           ops.safe_push (oe);
2714         }
2715       if (bb == first_bb)
2716         break;
2717     }
2718   if (ops.length () > 1)
2719     optimize_range_tests (ERROR_MARK, &ops);
2720   ops.release ();
2721 }
2722
2723 /* Return true if OPERAND is defined by a PHI node which uses the LHS
2724    of STMT in it's operands.  This is also known as a "destructive
2725    update" operation.  */
2726
2727 static bool
2728 is_phi_for_stmt (gimple stmt, tree operand)
2729 {
2730   gimple def_stmt;
2731   tree lhs;
2732   use_operand_p arg_p;
2733   ssa_op_iter i;
2734
2735   if (TREE_CODE (operand) != SSA_NAME)
2736     return false;
2737
2738   lhs = gimple_assign_lhs (stmt);
2739
2740   def_stmt = SSA_NAME_DEF_STMT (operand);
2741   if (gimple_code (def_stmt) != GIMPLE_PHI)
2742     return false;
2743
2744   FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
2745     if (lhs == USE_FROM_PTR (arg_p))
2746       return true;
2747   return false;
2748 }
2749
2750 /* Remove def stmt of VAR if VAR has zero uses and recurse
2751    on rhs1 operand if so.  */
2752
2753 static void
2754 remove_visited_stmt_chain (tree var)
2755 {
2756   gimple stmt;
2757   gimple_stmt_iterator gsi;
2758
2759   while (1)
2760     {
2761       if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
2762         return;
2763       stmt = SSA_NAME_DEF_STMT (var);
2764       if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
2765         {
2766           var = gimple_assign_rhs1 (stmt);
2767           gsi = gsi_for_stmt (stmt);
2768           gsi_remove (&gsi, true);
2769           release_defs (stmt);
2770         }
2771       else
2772         return;
2773     }
2774 }
2775
2776 /* This function checks three consequtive operands in
2777    passed operands vector OPS starting from OPINDEX and
2778    swaps two operands if it is profitable for binary operation
2779    consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
2780
2781    We pair ops with the same rank if possible.
2782
2783    The alternative we try is to see if STMT is a destructive
2784    update style statement, which is like:
2785    b = phi (a, ...)
2786    a = c + b;
2787    In that case, we want to use the destructive update form to
2788    expose the possible vectorizer sum reduction opportunity.
2789    In that case, the third operand will be the phi node. This
2790    check is not performed if STMT is null.
2791
2792    We could, of course, try to be better as noted above, and do a
2793    lot of work to try to find these opportunities in >3 operand
2794    cases, but it is unlikely to be worth it.  */
2795
2796 static void
2797 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
2798                           unsigned int opindex, gimple stmt)
2799 {
2800   operand_entry_t oe1, oe2, oe3;
2801
2802   oe1 = ops[opindex];
2803   oe2 = ops[opindex + 1];
2804   oe3 = ops[opindex + 2];
2805
2806   if ((oe1->rank == oe2->rank
2807        && oe2->rank != oe3->rank)
2808       || (stmt && is_phi_for_stmt (stmt, oe3->op)
2809           && !is_phi_for_stmt (stmt, oe1->op)
2810           && !is_phi_for_stmt (stmt, oe2->op)))
2811     {
2812       struct operand_entry temp = *oe3;
2813       oe3->op = oe1->op;
2814       oe3->rank = oe1->rank;
2815       oe1->op = temp.op;
2816       oe1->rank= temp.rank;
2817     }
2818   else if ((oe1->rank == oe3->rank
2819             && oe2->rank != oe3->rank)
2820            || (stmt && is_phi_for_stmt (stmt, oe2->op)
2821                && !is_phi_for_stmt (stmt, oe1->op)
2822                && !is_phi_for_stmt (stmt, oe3->op)))
2823     {
2824       struct operand_entry temp = *oe2;
2825       oe2->op = oe1->op;
2826       oe2->rank = oe1->rank;
2827       oe1->op = temp.op;
2828       oe1->rank= temp.rank;
2829     }
2830 }
2831
2832 /* Recursively rewrite our linearized statements so that the operators
2833    match those in OPS[OPINDEX], putting the computation in rank
2834    order.  */
2835
2836 static void
2837 rewrite_expr_tree (gimple stmt, unsigned int opindex,
2838                    vec<operand_entry_t> ops, bool moved)
2839 {
2840   tree rhs1 = gimple_assign_rhs1 (stmt);
2841   tree rhs2 = gimple_assign_rhs2 (stmt);
2842   operand_entry_t oe;
2843
2844   /* If we have three operands left, then we want to make sure the ones
2845      that get the double binary op are chosen wisely.  */
2846   if (opindex + 3 == ops.length ())
2847     swap_ops_for_binary_stmt (ops, opindex, stmt);
2848
2849   /* The final recursion case for this function is that you have
2850      exactly two operations left.
2851      If we had one exactly one op in the entire list to start with, we
2852      would have never called this function, and the tail recursion
2853      rewrites them one at a time.  */
2854   if (opindex + 2 == ops.length ())
2855     {
2856       operand_entry_t oe1, oe2;
2857
2858       oe1 = ops[opindex];
2859       oe2 = ops[opindex + 1];
2860
2861       if (rhs1 != oe1->op || rhs2 != oe2->op)
2862         {
2863           if (dump_file && (dump_flags & TDF_DETAILS))
2864             {
2865               fprintf (dump_file, "Transforming ");
2866               print_gimple_stmt (dump_file, stmt, 0, 0);
2867             }
2868
2869           gimple_assign_set_rhs1 (stmt, oe1->op);
2870           gimple_assign_set_rhs2 (stmt, oe2->op);
2871           update_stmt (stmt);
2872           if (rhs1 != oe1->op && rhs1 != oe2->op)
2873             remove_visited_stmt_chain (rhs1);
2874
2875           if (dump_file && (dump_flags & TDF_DETAILS))
2876             {
2877               fprintf (dump_file, " into ");
2878               print_gimple_stmt (dump_file, stmt, 0, 0);
2879             }
2880         }
2881       return;
2882     }
2883
2884   /* If we hit here, we should have 3 or more ops left.  */
2885   gcc_assert (opindex + 2 < ops.length ());
2886
2887   /* Rewrite the next operator.  */
2888   oe = ops[opindex];
2889
2890   if (oe->op != rhs2)
2891     {
2892       if (!moved)
2893         {
2894           gimple_stmt_iterator gsinow, gsirhs1;
2895           gimple stmt1 = stmt, stmt2;
2896           unsigned int count;
2897
2898           gsinow = gsi_for_stmt (stmt);
2899           count = ops.length () - opindex - 2;
2900           while (count-- != 0)
2901             {
2902               stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
2903               gsirhs1 = gsi_for_stmt (stmt2);
2904               gsi_move_before (&gsirhs1, &gsinow);
2905               gsi_prev (&gsinow);
2906               stmt1 = stmt2;
2907             }
2908           moved = true;
2909         }
2910
2911       if (dump_file && (dump_flags & TDF_DETAILS))
2912         {
2913           fprintf (dump_file, "Transforming ");
2914           print_gimple_stmt (dump_file, stmt, 0, 0);
2915         }
2916
2917       gimple_assign_set_rhs2 (stmt, oe->op);
2918       update_stmt (stmt);
2919
2920       if (dump_file && (dump_flags & TDF_DETAILS))
2921         {
2922           fprintf (dump_file, " into ");
2923           print_gimple_stmt (dump_file, stmt, 0, 0);
2924         }
2925     }
2926   /* Recurse on the LHS of the binary operator, which is guaranteed to
2927      be the non-leaf side.  */
2928   rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, moved);
2929 }
2930
2931 /* Find out how many cycles we need to compute statements chain.
2932    OPS_NUM holds number os statements in a chain.  CPU_WIDTH is a
2933    maximum number of independent statements we may execute per cycle.  */
2934
2935 static int
2936 get_required_cycles (int ops_num, int cpu_width)
2937 {
2938   int res;
2939   int elog;
2940   unsigned int rest;
2941
2942   /* While we have more than 2 * cpu_width operands
2943      we may reduce number of operands by cpu_width
2944      per cycle.  */
2945   res = ops_num / (2 * cpu_width);
2946
2947   /* Remained operands count may be reduced twice per cycle
2948      until we have only one operand.  */
2949   rest = (unsigned)(ops_num - res * cpu_width);
2950   elog = exact_log2 (rest);
2951   if (elog >= 0)
2952     res += elog;
2953   else
2954     res += floor_log2 (rest) + 1;
2955
2956   return res;
2957 }
2958
2959 /* Returns an optimal number of registers to use for computation of
2960    given statements.  */
2961
2962 static int
2963 get_reassociation_width (int ops_num, enum tree_code opc,
2964                          enum machine_mode mode)
2965 {
2966   int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
2967   int width;
2968   int width_min;
2969   int cycles_best;
2970
2971   if (param_width > 0)
2972     width = param_width;
2973   else
2974     width = targetm.sched.reassociation_width (opc, mode);
2975
2976   if (width == 1)
2977     return width;
2978
2979   /* Get the minimal time required for sequence computation.  */
2980   cycles_best = get_required_cycles (ops_num, width);
2981
2982   /* Check if we may use less width and still compute sequence for
2983      the same time.  It will allow us to reduce registers usage.
2984      get_required_cycles is monotonically increasing with lower width
2985      so we can perform a binary search for the minimal width that still
2986      results in the optimal cycle count.  */
2987   width_min = 1;
2988   while (width > width_min)
2989     {
2990       int width_mid = (width + width_min) / 2;
2991
2992       if (get_required_cycles (ops_num, width_mid) == cycles_best)
2993         width = width_mid;
2994       else if (width_min < width_mid)
2995         width_min = width_mid;
2996       else
2997         break;
2998     }
2999
3000   return width;
3001 }
3002
3003 /* Recursively rewrite our linearized statements so that the operators
3004    match those in OPS[OPINDEX], putting the computation in rank
3005    order and trying to allow operations to be executed in
3006    parallel.  */
3007
3008 static void
3009 rewrite_expr_tree_parallel (gimple stmt, int width,
3010                             vec<operand_entry_t>  ops)
3011 {
3012   enum tree_code opcode = gimple_assign_rhs_code (stmt);
3013   int op_num = ops.length ();
3014   int stmt_num = op_num - 1;
3015   gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3016   int op_index = op_num - 1;
3017   int stmt_index = 0;
3018   int ready_stmts_end = 0;
3019   int i = 0;
3020   tree last_rhs1 = gimple_assign_rhs1 (stmt);
3021
3022   /* We start expression rewriting from the top statements.
3023      So, in this loop we create a full list of statements
3024      we will work with.  */
3025   stmts[stmt_num - 1] = stmt;
3026   for (i = stmt_num - 2; i >= 0; i--)
3027     stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3028
3029   for (i = 0; i < stmt_num; i++)
3030     {
3031       tree op1, op2;
3032
3033       /* Determine whether we should use results of
3034          already handled statements or not.  */
3035       if (ready_stmts_end == 0
3036           && (i - stmt_index >= width || op_index < 1))
3037         ready_stmts_end = i;
3038
3039       /* Now we choose operands for the next statement.  Non zero
3040          value in ready_stmts_end means here that we should use
3041          the result of already generated statements as new operand.  */
3042       if (ready_stmts_end > 0)
3043         {
3044           op1 = gimple_assign_lhs (stmts[stmt_index++]);
3045           if (ready_stmts_end > stmt_index)
3046             op2 = gimple_assign_lhs (stmts[stmt_index++]);
3047           else if (op_index >= 0)
3048             op2 = ops[op_index--]->op;
3049           else
3050             {
3051               gcc_assert (stmt_index < i);
3052               op2 = gimple_assign_lhs (stmts[stmt_index++]);
3053             }
3054
3055           if (stmt_index >= ready_stmts_end)
3056             ready_stmts_end = 0;
3057         }
3058       else
3059         {
3060           if (op_index > 1)
3061             swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3062           op2 = ops[op_index--]->op;
3063           op1 = ops[op_index--]->op;
3064         }
3065
3066       /* If we emit the last statement then we should put
3067          operands into the last statement.  It will also
3068          break the loop.  */
3069       if (op_index < 0 && stmt_index == i)
3070         i = stmt_num - 1;
3071
3072       if (dump_file && (dump_flags & TDF_DETAILS))
3073         {
3074           fprintf (dump_file, "Transforming ");
3075           print_gimple_stmt (dump_file, stmts[i], 0, 0);
3076         }
3077
3078       /* We keep original statement only for the last one.  All
3079          others are recreated.  */
3080       if (i == stmt_num - 1)
3081         {
3082           gimple_assign_set_rhs1 (stmts[i], op1);
3083           gimple_assign_set_rhs2 (stmts[i], op2);
3084           update_stmt (stmts[i]);
3085         }
3086       else
3087         stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3088
3089       if (dump_file && (dump_flags & TDF_DETAILS))
3090         {
3091           fprintf (dump_file, " into ");
3092           print_gimple_stmt (dump_file, stmts[i], 0, 0);
3093         }
3094     }
3095
3096   remove_visited_stmt_chain (last_rhs1);
3097 }
3098
3099 /* Transform STMT, which is really (A +B) + (C + D) into the left
3100    linear form, ((A+B)+C)+D.
3101    Recurse on D if necessary.  */
3102
3103 static void
3104 linearize_expr (gimple stmt)
3105 {
3106   gimple_stmt_iterator gsinow, gsirhs;
3107   gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3108   gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3109   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3110   gimple newbinrhs = NULL;
3111   struct loop *loop = loop_containing_stmt (stmt);
3112
3113   gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3114               && is_reassociable_op (binrhs, rhscode, loop));
3115
3116   gsinow = gsi_for_stmt (stmt);
3117   gsirhs = gsi_for_stmt (binrhs);
3118   gsi_move_before (&gsirhs, &gsinow);
3119
3120   gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3121   gimple_assign_set_rhs1 (binrhs, gimple_assign_lhs (binlhs));
3122   gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3123
3124   if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3125     newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3126
3127   if (dump_file && (dump_flags & TDF_DETAILS))
3128     {
3129       fprintf (dump_file, "Linearized: ");
3130       print_gimple_stmt (dump_file, stmt, 0, 0);
3131     }
3132
3133   reassociate_stats.linearized++;
3134   update_stmt (binrhs);
3135   update_stmt (binlhs);
3136   update_stmt (stmt);
3137
3138   gimple_set_visited (stmt, true);
3139   gimple_set_visited (binlhs, true);
3140   gimple_set_visited (binrhs, true);
3141
3142   /* Tail recurse on the new rhs if it still needs reassociation.  */
3143   if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3144     /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3145            want to change the algorithm while converting to tuples.  */
3146     linearize_expr (stmt);
3147 }
3148
3149 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3150    it.  Otherwise, return NULL.  */
3151
3152 static gimple
3153 get_single_immediate_use (tree lhs)
3154 {
3155   use_operand_p immuse;
3156   gimple immusestmt;
3157
3158   if (TREE_CODE (lhs) == SSA_NAME
3159       && single_imm_use (lhs, &immuse, &immusestmt)
3160       && is_gimple_assign (immusestmt))
3161     return immusestmt;
3162
3163   return NULL;
3164 }
3165
3166 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3167    representing the negated value.  Insertions of any necessary
3168    instructions go before GSI.
3169    This function is recursive in that, if you hand it "a_5" as the
3170    value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3171    transform b_3 + b_4 into a_5 = -b_3 + -b_4.  */
3172
3173 static tree
3174 negate_value (tree tonegate, gimple_stmt_iterator *gsi)
3175 {
3176   gimple negatedefstmt= NULL;
3177   tree resultofnegate;
3178
3179   /* If we are trying to negate a name, defined by an add, negate the
3180      add operands instead.  */
3181   if (TREE_CODE (tonegate) == SSA_NAME)
3182     negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3183   if (TREE_CODE (tonegate) == SSA_NAME
3184       && is_gimple_assign (negatedefstmt)
3185       && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3186       && has_single_use (gimple_assign_lhs (negatedefstmt))
3187       && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3188     {
3189       gimple_stmt_iterator gsi;
3190       tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3191       tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3192
3193       gsi = gsi_for_stmt (negatedefstmt);
3194       rhs1 = negate_value (rhs1, &gsi);
3195       gimple_assign_set_rhs1 (negatedefstmt, rhs1);
3196
3197       gsi = gsi_for_stmt (negatedefstmt);
3198       rhs2 = negate_value (rhs2, &gsi);
3199       gimple_assign_set_rhs2 (negatedefstmt, rhs2);
3200
3201       update_stmt (negatedefstmt);
3202       return gimple_assign_lhs (negatedefstmt);
3203     }
3204
3205   tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3206   resultofnegate = force_gimple_operand_gsi (gsi, tonegate, true,
3207                                              NULL_TREE, true, GSI_SAME_STMT);
3208   return resultofnegate;
3209 }
3210
3211 /* Return true if we should break up the subtract in STMT into an add
3212    with negate.  This is true when we the subtract operands are really
3213    adds, or the subtract itself is used in an add expression.  In
3214    either case, breaking up the subtract into an add with negate
3215    exposes the adds to reassociation.  */
3216
3217 static bool
3218 should_break_up_subtract (gimple stmt)
3219 {
3220   tree lhs = gimple_assign_lhs (stmt);
3221   tree binlhs = gimple_assign_rhs1 (stmt);
3222   tree binrhs = gimple_assign_rhs2 (stmt);
3223   gimple immusestmt;
3224   struct loop *loop = loop_containing_stmt (stmt);
3225
3226   if (TREE_CODE (binlhs) == SSA_NAME
3227       && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3228     return true;
3229
3230   if (TREE_CODE (binrhs) == SSA_NAME
3231       && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3232     return true;
3233
3234   if (TREE_CODE (lhs) == SSA_NAME
3235       && (immusestmt = get_single_immediate_use (lhs))
3236       && is_gimple_assign (immusestmt)
3237       && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3238           ||  gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3239     return true;
3240   return false;
3241 }
3242
3243 /* Transform STMT from A - B into A + -B.  */
3244
3245 static void
3246 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3247 {
3248   tree rhs1 = gimple_assign_rhs1 (stmt);
3249   tree rhs2 = gimple_assign_rhs2 (stmt);
3250
3251   if (dump_file && (dump_flags & TDF_DETAILS))
3252     {
3253       fprintf (dump_file, "Breaking up subtract ");
3254       print_gimple_stmt (dump_file, stmt, 0, 0);
3255     }
3256
3257   rhs2 = negate_value (rhs2, gsip);
3258   gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3259   update_stmt (stmt);
3260 }
3261
3262 /* Determine whether STMT is a builtin call that raises an SSA name
3263    to an integer power and has only one use.  If so, and this is early
3264    reassociation and unsafe math optimizations are permitted, place
3265    the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3266    If any of these conditions does not hold, return FALSE.  */
3267
3268 static bool
3269 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3270 {
3271   tree fndecl, arg1;
3272   REAL_VALUE_TYPE c, cint;
3273
3274   if (!first_pass_instance
3275       || !flag_unsafe_math_optimizations
3276       || !is_gimple_call (stmt)
3277       || !has_single_use (gimple_call_lhs (stmt)))
3278     return false;
3279
3280   fndecl = gimple_call_fndecl (stmt);
3281
3282   if (!fndecl
3283       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3284     return false;
3285
3286   switch (DECL_FUNCTION_CODE (fndecl))
3287     {
3288     CASE_FLT_FN (BUILT_IN_POW):
3289       *base = gimple_call_arg (stmt, 0);
3290       arg1 = gimple_call_arg (stmt, 1);
3291
3292       if (TREE_CODE (arg1) != REAL_CST)
3293         return false;
3294
3295       c = TREE_REAL_CST (arg1);
3296
3297       if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
3298         return false;
3299
3300       *exponent = real_to_integer (&c);
3301       real_from_integer (&cint, VOIDmode, *exponent,
3302                          *exponent < 0 ? -1 : 0, 0);
3303       if (!real_identical (&c, &cint))
3304         return false;
3305
3306       break;
3307
3308     CASE_FLT_FN (BUILT_IN_POWI):
3309       *base = gimple_call_arg (stmt, 0);
3310       arg1 = gimple_call_arg (stmt, 1);
3311
3312       if (!host_integerp (arg1, 0))
3313         return false;
3314
3315       *exponent = TREE_INT_CST_LOW (arg1);
3316       break;
3317
3318     default:
3319       return false;
3320     }
3321
3322   /* Expanding negative exponents is generally unproductive, so we don't
3323      complicate matters with those.  Exponents of zero and one should
3324      have been handled by expression folding.  */
3325   if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
3326     return false;
3327
3328   return true;
3329 }
3330
3331 /* Recursively linearize a binary expression that is the RHS of STMT.
3332    Place the operands of the expression tree in the vector named OPS.  */
3333
3334 static void
3335 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
3336                      bool is_associative, bool set_visited)
3337 {
3338   tree binlhs = gimple_assign_rhs1 (stmt);
3339   tree binrhs = gimple_assign_rhs2 (stmt);
3340   gimple binlhsdef = NULL, binrhsdef = NULL;
3341   bool binlhsisreassoc = false;
3342   bool binrhsisreassoc = false;
3343   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3344   struct loop *loop = loop_containing_stmt (stmt);
3345   tree base = NULL_TREE;
3346   HOST_WIDE_INT exponent = 0;
3347
3348   if (set_visited)
3349     gimple_set_visited (stmt, true);
3350
3351   if (TREE_CODE (binlhs) == SSA_NAME)
3352     {
3353       binlhsdef = SSA_NAME_DEF_STMT (binlhs);
3354       binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
3355                          && !stmt_could_throw_p (binlhsdef));
3356     }
3357
3358   if (TREE_CODE (binrhs) == SSA_NAME)
3359     {
3360       binrhsdef = SSA_NAME_DEF_STMT (binrhs);
3361       binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
3362                          && !stmt_could_throw_p (binrhsdef));
3363     }
3364
3365   /* If the LHS is not reassociable, but the RHS is, we need to swap
3366      them.  If neither is reassociable, there is nothing we can do, so
3367      just put them in the ops vector.  If the LHS is reassociable,
3368      linearize it.  If both are reassociable, then linearize the RHS
3369      and the LHS.  */
3370
3371   if (!binlhsisreassoc)
3372     {
3373       tree temp;
3374
3375       /* If this is not a associative operation like division, give up.  */
3376       if (!is_associative)
3377         {
3378           add_to_ops_vec (ops, binrhs);
3379           return;
3380         }
3381
3382       if (!binrhsisreassoc)
3383         {
3384           if (rhscode == MULT_EXPR
3385               && TREE_CODE (binrhs) == SSA_NAME
3386               && acceptable_pow_call (binrhsdef, &base, &exponent))
3387             {
3388               add_repeat_to_ops_vec (ops, base, exponent);
3389               gimple_set_visited (binrhsdef, true);
3390             }
3391           else
3392             add_to_ops_vec (ops, binrhs);
3393
3394           if (rhscode == MULT_EXPR
3395               && TREE_CODE (binlhs) == SSA_NAME
3396               && acceptable_pow_call (binlhsdef, &base, &exponent))
3397             {
3398               add_repeat_to_ops_vec (ops, base, exponent);
3399               gimple_set_visited (binlhsdef, true);
3400             }
3401           else
3402             add_to_ops_vec (ops, binlhs);
3403
3404           return;
3405         }
3406
3407       if (dump_file && (dump_flags & TDF_DETAILS))
3408         {
3409           fprintf (dump_file, "swapping operands of ");
3410           print_gimple_stmt (dump_file, stmt, 0, 0);
3411         }
3412
3413       swap_tree_operands (stmt,
3414                           gimple_assign_rhs1_ptr (stmt),
3415                           gimple_assign_rhs2_ptr (stmt));
3416       update_stmt (stmt);
3417
3418       if (dump_file && (dump_flags & TDF_DETAILS))
3419         {
3420           fprintf (dump_file, " is now ");
3421           print_gimple_stmt (dump_file, stmt, 0, 0);
3422         }
3423
3424       /* We want to make it so the lhs is always the reassociative op,
3425          so swap.  */
3426       temp = binlhs;
3427       binlhs = binrhs;
3428       binrhs = temp;
3429     }
3430   else if (binrhsisreassoc)
3431     {
3432       linearize_expr (stmt);
3433       binlhs = gimple_assign_rhs1 (stmt);
3434       binrhs = gimple_assign_rhs2 (stmt);
3435     }
3436
3437   gcc_assert (TREE_CODE (binrhs) != SSA_NAME
3438               || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
3439                                       rhscode, loop));
3440   linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
3441                        is_associative, set_visited);
3442
3443   if (rhscode == MULT_EXPR
3444       && TREE_CODE (binrhs) == SSA_NAME
3445       && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
3446     {
3447       add_repeat_to_ops_vec (ops, base, exponent);
3448       gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
3449     }
3450   else
3451     add_to_ops_vec (ops, binrhs);
3452 }
3453
3454 /* Repropagate the negates back into subtracts, since no other pass
3455    currently does it.  */
3456
3457 static void
3458 repropagate_negates (void)
3459 {
3460   unsigned int i = 0;
3461   tree negate;
3462
3463   FOR_EACH_VEC_ELT (plus_negates, i, negate)
3464     {
3465       gimple user = get_single_immediate_use (negate);
3466
3467       if (!user || !is_gimple_assign (user))
3468         continue;
3469
3470       /* The negate operand can be either operand of a PLUS_EXPR
3471          (it can be the LHS if the RHS is a constant for example).
3472
3473          Force the negate operand to the RHS of the PLUS_EXPR, then
3474          transform the PLUS_EXPR into a MINUS_EXPR.  */
3475       if (gimple_assign_rhs_code (user) == PLUS_EXPR)
3476         {
3477           /* If the negated operand appears on the LHS of the
3478              PLUS_EXPR, exchange the operands of the PLUS_EXPR
3479              to force the negated operand to the RHS of the PLUS_EXPR.  */
3480           if (gimple_assign_rhs1 (user) == negate)
3481             {
3482               swap_tree_operands (user,
3483                                   gimple_assign_rhs1_ptr (user),
3484                                   gimple_assign_rhs2_ptr (user));
3485             }
3486
3487           /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
3488              the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR.  */
3489           if (gimple_assign_rhs2 (user) == negate)
3490             {
3491               tree rhs1 = gimple_assign_rhs1 (user);
3492               tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
3493               gimple_stmt_iterator gsi = gsi_for_stmt (user);
3494               gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
3495               update_stmt (user);
3496             }
3497         }
3498       else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
3499         {
3500           if (gimple_assign_rhs1 (user) == negate)
3501             {
3502               /* We have
3503                    x = -a
3504                    y = x - b
3505                  which we transform into
3506                    x = a + b
3507                    y = -x .
3508                  This pushes down the negate which we possibly can merge
3509                  into some other operation, hence insert it into the
3510                  plus_negates vector.  */
3511               gimple feed = SSA_NAME_DEF_STMT (negate);
3512               tree a = gimple_assign_rhs1 (feed);
3513               tree rhs2 = gimple_assign_rhs2 (user);
3514               gimple_stmt_iterator gsi = gsi_for_stmt (feed), gsi2;
3515               gimple_replace_lhs (feed, negate);
3516               gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, a, rhs2);
3517               update_stmt (gsi_stmt (gsi));
3518               gsi2 = gsi_for_stmt (user);
3519               gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, negate, NULL);
3520               update_stmt (gsi_stmt (gsi2));
3521               gsi_move_before (&gsi, &gsi2);
3522               plus_negates.safe_push (gimple_assign_lhs (gsi_stmt (gsi2)));
3523             }
3524           else
3525             {
3526               /* Transform "x = -a; y = b - x" into "y = b + a", getting
3527                  rid of one operation.  */
3528               gimple feed = SSA_NAME_DEF_STMT (negate);
3529               tree a = gimple_assign_rhs1 (feed);
3530               tree rhs1 = gimple_assign_rhs1 (user);
3531               gimple_stmt_iterator gsi = gsi_for_stmt (user);
3532               gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
3533               update_stmt (gsi_stmt (gsi));
3534             }
3535         }
3536     }
3537 }
3538
3539 /* Returns true if OP is of a type for which we can do reassociation.
3540    That is for integral or non-saturating fixed-point types, and for
3541    floating point type when associative-math is enabled.  */
3542
3543 static bool
3544 can_reassociate_p (tree op)
3545 {
3546   tree type = TREE_TYPE (op);
3547   if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
3548       || NON_SAT_FIXED_POINT_TYPE_P (type)
3549       || (flag_associative_math && FLOAT_TYPE_P (type)))
3550     return true;
3551   return false;
3552 }
3553
3554 /* Break up subtract operations in block BB.
3555
3556    We do this top down because we don't know whether the subtract is
3557    part of a possible chain of reassociation except at the top.
3558
3559    IE given
3560    d = f + g
3561    c = a + e
3562    b = c - d
3563    q = b - r
3564    k = t - q
3565
3566    we want to break up k = t - q, but we won't until we've transformed q
3567    = b - r, which won't be broken up until we transform b = c - d.
3568
3569    En passant, clear the GIMPLE visited flag on every statement.  */
3570
3571 static void
3572 break_up_subtract_bb (basic_block bb)
3573 {
3574   gimple_stmt_iterator gsi;
3575   basic_block son;
3576
3577   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3578     {
3579       gimple stmt = gsi_stmt (gsi);
3580       gimple_set_visited (stmt, false);
3581
3582       if (!is_gimple_assign (stmt)
3583           || !can_reassociate_p (gimple_assign_lhs (stmt)))
3584         continue;
3585
3586       /* Look for simple gimple subtract operations.  */
3587       if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
3588         {
3589           if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
3590               || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
3591             continue;
3592
3593           /* Check for a subtract used only in an addition.  If this
3594              is the case, transform it into add of a negate for better
3595              reassociation.  IE transform C = A-B into C = A + -B if C
3596              is only used in an addition.  */
3597           if (should_break_up_subtract (stmt))
3598             break_up_subtract (stmt, &gsi);
3599         }
3600       else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
3601                && can_reassociate_p (gimple_assign_rhs1 (stmt)))
3602         plus_negates.safe_push (gimple_assign_lhs (stmt));
3603     }
3604   for (son = first_dom_son (CDI_DOMINATORS, bb);
3605        son;
3606        son = next_dom_son (CDI_DOMINATORS, son))
3607     break_up_subtract_bb (son);
3608 }
3609
3610 /* Used for repeated factor analysis.  */
3611 struct repeat_factor_d
3612 {
3613   /* An SSA name that occurs in a multiply chain.  */
3614   tree factor;
3615
3616   /* Cached rank of the factor.  */
3617   unsigned rank;
3618
3619   /* Number of occurrences of the factor in the chain.  */
3620   HOST_WIDE_INT count;
3621
3622   /* An SSA name representing the product of this factor and
3623      all factors appearing later in the repeated factor vector.  */
3624   tree repr;
3625 };
3626
3627 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
3628 typedef const struct repeat_factor_d *const_repeat_factor_t;
3629
3630
3631 static vec<repeat_factor> repeat_factor_vec;
3632
3633 /* Used for sorting the repeat factor vector.  Sort primarily by
3634    ascending occurrence count, secondarily by descending rank.  */
3635
3636 static int
3637 compare_repeat_factors (const void *x1, const void *x2)
3638 {
3639   const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
3640   const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
3641
3642   if (rf1->count != rf2->count)
3643     return rf1->count - rf2->count;
3644
3645   return rf2->rank - rf1->rank;
3646 }
3647
3648 /* Look for repeated operands in OPS in the multiply tree rooted at
3649    STMT.  Replace them with an optimal sequence of multiplies and powi
3650    builtin calls, and remove the used operands from OPS.  Return an
3651    SSA name representing the value of the replacement sequence.  */
3652
3653 static tree
3654 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
3655 {
3656   unsigned i, j, vec_len;
3657   int ii;
3658   operand_entry_t oe;
3659   repeat_factor_t rf1, rf2;
3660   repeat_factor rfnew;
3661   tree result = NULL_TREE;
3662   tree target_ssa, iter_result;
3663   tree type = TREE_TYPE (gimple_get_lhs (stmt));
3664   tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
3665   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3666   gimple mul_stmt, pow_stmt;
3667
3668   /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
3669      target.  */
3670   if (!powi_fndecl)
3671     return NULL_TREE;
3672
3673   /* Allocate the repeated factor vector.  */
3674   repeat_factor_vec.create (10);
3675
3676   /* Scan the OPS vector for all SSA names in the product and build
3677      up a vector of occurrence counts for each factor.  */
3678   FOR_EACH_VEC_ELT (*ops, i, oe)
3679     {
3680       if (TREE_CODE (oe->op) == SSA_NAME)
3681         {
3682           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
3683             {
3684               if (rf1->factor == oe->op)
3685                 {
3686                   rf1->count += oe->count;
3687                   break;
3688                 }
3689             }
3690
3691           if (j >= repeat_factor_vec.length ())
3692             {
3693               rfnew.factor = oe->op;
3694               rfnew.rank = oe->rank;
3695               rfnew.count = oe->count;
3696               rfnew.repr = NULL_TREE;
3697               repeat_factor_vec.safe_push (rfnew);
3698             }
3699         }
3700     }
3701
3702   /* Sort the repeated factor vector by (a) increasing occurrence count,
3703      and (b) decreasing rank.  */
3704   repeat_factor_vec.qsort (compare_repeat_factors);
3705
3706   /* It is generally best to combine as many base factors as possible
3707      into a product before applying __builtin_powi to the result.
3708      However, the sort order chosen for the repeated factor vector
3709      allows us to cache partial results for the product of the base
3710      factors for subsequent use.  When we already have a cached partial
3711      result from a previous iteration, it is best to make use of it
3712      before looking for another __builtin_pow opportunity.
3713
3714      As an example, consider x * x * y * y * y * z * z * z * z.
3715      We want to first compose the product x * y * z, raise it to the
3716      second power, then multiply this by y * z, and finally multiply
3717      by z.  This can be done in 5 multiplies provided we cache y * z
3718      for use in both expressions:
3719
3720         t1 = y * z
3721         t2 = t1 * x
3722         t3 = t2 * t2
3723         t4 = t1 * t3
3724         result = t4 * z
3725
3726      If we instead ignored the cached y * z and first multiplied by
3727      the __builtin_pow opportunity z * z, we would get the inferior:
3728
3729         t1 = y * z
3730         t2 = t1 * x
3731         t3 = t2 * t2
3732         t4 = z * z
3733         t5 = t3 * t4
3734         result = t5 * y  */
3735
3736   vec_len = repeat_factor_vec.length ();
3737
3738   /* Repeatedly look for opportunities to create a builtin_powi call.  */
3739   while (true)
3740     {
3741       HOST_WIDE_INT power;
3742
3743       /* First look for the largest cached product of factors from
3744          preceding iterations.  If found, create a builtin_powi for
3745          it if the minimum occurrence count for its factors is at
3746          least 2, or just use this cached product as our next
3747          multiplicand if the minimum occurrence count is 1.  */
3748       FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
3749         {
3750           if (rf1->repr && rf1->count > 0)
3751             break;
3752         }
3753
3754       if (j < vec_len)
3755         {
3756           power = rf1->count;
3757
3758           if (power == 1)
3759             {
3760               iter_result = rf1->repr;
3761
3762               if (dump_file && (dump_flags & TDF_DETAILS))
3763                 {
3764                   unsigned elt;
3765                   repeat_factor_t rf;
3766                   fputs ("Multiplying by cached product ", dump_file);
3767                   for (elt = j; elt < vec_len; elt++)
3768                     {
3769                       rf = &repeat_factor_vec[elt];
3770                       print_generic_expr (dump_file, rf->factor, 0);
3771                       if (elt < vec_len - 1)
3772                         fputs (" * ", dump_file);
3773                     }
3774                   fputs ("\n", dump_file);
3775                 }
3776             }
3777           else
3778             {
3779               iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
3780               pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
3781                                             build_int_cst (integer_type_node,
3782                                                            power));
3783               gimple_call_set_lhs (pow_stmt, iter_result);
3784               gimple_set_location (pow_stmt, gimple_location (stmt));
3785               gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
3786
3787               if (dump_file && (dump_flags & TDF_DETAILS))
3788                 {
3789                   unsigned elt;
3790                   repeat_factor_t rf;
3791                   fputs ("Building __builtin_pow call for cached product (",
3792                          dump_file);
3793                   for (elt = j; elt < vec_len; elt++)
3794                     {
3795                       rf = &repeat_factor_vec[elt];
3796                       print_generic_expr (dump_file, rf->factor, 0);
3797                       if (elt < vec_len - 1)
3798                         fputs (" * ", dump_file);
3799                     }
3800                   fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n",
3801                            power);
3802                 }
3803             }
3804         }
3805       else
3806         {
3807           /* Otherwise, find the first factor in the repeated factor
3808              vector whose occurrence count is at least 2.  If no such
3809              factor exists, there are no builtin_powi opportunities
3810              remaining.  */
3811           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
3812             {
3813               if (rf1->count >= 2)
3814                 break;
3815             }
3816
3817           if (j >= vec_len)
3818             break;
3819
3820           power = rf1->count;
3821
3822           if (dump_file && (dump_flags & TDF_DETAILS))
3823             {
3824               unsigned elt;
3825               repeat_factor_t rf;
3826               fputs ("Building __builtin_pow call for (", dump_file);
3827               for (elt = j; elt < vec_len; elt++)
3828                 {
3829                   rf = &repeat_factor_vec[elt];
3830                   print_generic_expr (dump_file, rf->factor, 0);
3831                   if (elt < vec_len - 1)
3832                     fputs (" * ", dump_file);
3833                 }
3834               fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power);
3835             }
3836
3837           reassociate_stats.pows_created++;
3838
3839           /* Visit each element of the vector in reverse order (so that
3840              high-occurrence elements are visited first, and within the
3841              same occurrence count, lower-ranked elements are visited
3842              first).  Form a linear product of all elements in this order
3843              whose occurrencce count is at least that of element J.
3844              Record the SSA name representing the product of each element
3845              with all subsequent elements in the vector.  */
3846           if (j == vec_len - 1)
3847             rf1->repr = rf1->factor;
3848           else
3849             {
3850               for (ii = vec_len - 2; ii >= (int)j; ii--)
3851                 {
3852                   tree op1, op2;
3853
3854                   rf1 = &repeat_factor_vec[ii];
3855                   rf2 = &repeat_factor_vec[ii + 1];
3856
3857                   /* Init the last factor's representative to be itself.  */
3858                   if (!rf2->repr)
3859                     rf2->repr = rf2->factor;
3860
3861                   op1 = rf1->factor;
3862                   op2 = rf2->repr;
3863
3864                   target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
3865                   mul_stmt = gimple_build_assign_with_ops (MULT_EXPR,
3866                                                            target_ssa,
3867                                                            op1, op2);
3868                   gimple_set_location (mul_stmt, gimple_location (stmt));
3869                   gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
3870                   rf1->repr = target_ssa;
3871
3872                   /* Don't reprocess the multiply we just introduced.  */
3873                   gimple_set_visited (mul_stmt, true);
3874                 }
3875             }
3876
3877           /* Form a call to __builtin_powi for the maximum product
3878              just formed, raised to the power obtained earlier.  */
3879           rf1 = &repeat_factor_vec[j];
3880           iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
3881           pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
3882                                         build_int_cst (integer_type_node,
3883                                                        power));
3884           gimple_call_set_lhs (pow_stmt, iter_result);
3885           gimple_set_location (pow_stmt, gimple_location (stmt));
3886           gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
3887         }
3888
3889       /* If we previously formed at least one other builtin_powi call,
3890          form the product of this one and those others.  */
3891       if (result)
3892         {
3893           tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
3894           mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result,
3895                                                    result, iter_result);
3896           gimple_set_location (mul_stmt, gimple_location (stmt));
3897           gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
3898           gimple_set_visited (mul_stmt, true);
3899           result = new_result;
3900         }
3901       else
3902         result = iter_result;
3903
3904       /* Decrement the occurrence count of each element in the product
3905          by the count found above, and remove this many copies of each
3906          factor from OPS.  */
3907       for (i = j; i < vec_len; i++)
3908         {
3909           unsigned k = power;
3910           unsigned n;
3911
3912           rf1 = &repeat_factor_vec[i];
3913           rf1->count -= power;
3914
3915           FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
3916             {
3917               if (oe->op == rf1->factor)
3918                 {
3919                   if (oe->count <= k)
3920                     {
3921                       ops->ordered_remove (n);
3922                       k -= oe->count;
3923
3924                       if (k == 0)
3925                         break;
3926                     }
3927                   else
3928                     {
3929                       oe->count -= k;
3930                       break;
3931                     }
3932                 }
3933             }
3934         }
3935     }
3936
3937   /* At this point all elements in the repeated factor vector have a
3938      remaining occurrence count of 0 or 1, and those with a count of 1
3939      don't have cached representatives.  Re-sort the ops vector and
3940      clean up.  */
3941   ops->qsort (sort_by_operand_rank);
3942   repeat_factor_vec.release ();
3943
3944   /* Return the final product computed herein.  Note that there may
3945      still be some elements with single occurrence count left in OPS;
3946      those will be handled by the normal reassociation logic.  */
3947   return result;
3948 }
3949
3950 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS.  */
3951
3952 static void
3953 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
3954 {
3955   tree rhs1;
3956
3957   if (dump_file && (dump_flags & TDF_DETAILS))
3958     {
3959       fprintf (dump_file, "Transforming ");
3960       print_gimple_stmt (dump_file, stmt, 0, 0);
3961     }
3962
3963   rhs1 = gimple_assign_rhs1 (stmt);
3964   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
3965   update_stmt (stmt);
3966   remove_visited_stmt_chain (rhs1);
3967
3968   if (dump_file && (dump_flags & TDF_DETAILS))
3969     {
3970       fprintf (dump_file, " into ");
3971       print_gimple_stmt (dump_file, stmt, 0, 0);
3972     }
3973 }
3974
3975 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2.  */
3976
3977 static void
3978 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
3979                             tree rhs1, tree rhs2)
3980 {
3981   if (dump_file && (dump_flags & TDF_DETAILS))
3982     {
3983       fprintf (dump_file, "Transforming ");
3984       print_gimple_stmt (dump_file, stmt, 0, 0);
3985     }
3986
3987   gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
3988   update_stmt (gsi_stmt (*gsi));
3989   remove_visited_stmt_chain (rhs1);
3990
3991   if (dump_file && (dump_flags & TDF_DETAILS))
3992     {
3993       fprintf (dump_file, " into ");
3994       print_gimple_stmt (dump_file, stmt, 0, 0);
3995     }
3996 }
3997
3998 /* Reassociate expressions in basic block BB and its post-dominator as
3999    children.  */
4000
4001 static void
4002 reassociate_bb (basic_block bb)
4003 {
4004   gimple_stmt_iterator gsi;
4005   basic_block son;
4006   gimple stmt = last_stmt (bb);
4007
4008   if (stmt && !gimple_visited_p (stmt))
4009     maybe_optimize_range_tests (stmt);
4010
4011   for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4012     {
4013       stmt = gsi_stmt (gsi);
4014
4015       if (is_gimple_assign (stmt)
4016           && !stmt_could_throw_p (stmt))
4017         {
4018           tree lhs, rhs1, rhs2;
4019           enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4020
4021           /* If this is not a gimple binary expression, there is
4022              nothing for us to do with it.  */
4023           if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4024             continue;
4025
4026           /* If this was part of an already processed statement,
4027              we don't need to touch it again. */
4028           if (gimple_visited_p (stmt))
4029             {
4030               /* This statement might have become dead because of previous
4031                  reassociations.  */
4032               if (has_zero_uses (gimple_get_lhs (stmt)))
4033                 {
4034                   gsi_remove (&gsi, true);
4035                   release_defs (stmt);
4036                   /* We might end up removing the last stmt above which
4037                      places the iterator to the end of the sequence.
4038                      Reset it to the last stmt in this case which might
4039                      be the end of the sequence as well if we removed
4040                      the last statement of the sequence.  In which case
4041                      we need to bail out.  */
4042                   if (gsi_end_p (gsi))
4043                     {
4044                       gsi = gsi_last_bb (bb);
4045                       if (gsi_end_p (gsi))
4046                         break;
4047                     }
4048                 }
4049               continue;
4050             }
4051
4052           lhs = gimple_assign_lhs (stmt);
4053           rhs1 = gimple_assign_rhs1 (stmt);
4054           rhs2 = gimple_assign_rhs2 (stmt);
4055
4056           /* For non-bit or min/max operations we can't associate
4057              all types.  Verify that here.  */
4058           if (rhs_code != BIT_IOR_EXPR
4059               && rhs_code != BIT_AND_EXPR
4060               && rhs_code != BIT_XOR_EXPR
4061               && rhs_code != MIN_EXPR
4062               && rhs_code != MAX_EXPR
4063               && (!can_reassociate_p (lhs)
4064                   || !can_reassociate_p (rhs1)
4065                   || !can_reassociate_p (rhs2)))
4066             continue;
4067
4068           if (associative_tree_code (rhs_code))
4069             {
4070               vec<operand_entry_t> ops = vec<operand_entry_t>();
4071               tree powi_result = NULL_TREE;
4072
4073               /* There may be no immediate uses left by the time we
4074                  get here because we may have eliminated them all.  */
4075               if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4076                 continue;
4077
4078               gimple_set_visited (stmt, true);
4079               linearize_expr_tree (&ops, stmt, true, true);
4080               ops.qsort (sort_by_operand_rank);
4081               optimize_ops_list (rhs_code, &ops);
4082               if (undistribute_ops_list (rhs_code, &ops,
4083                                          loop_containing_stmt (stmt)))
4084                 {
4085                   ops.qsort (sort_by_operand_rank);
4086                   optimize_ops_list (rhs_code, &ops);
4087                 }
4088
4089               if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4090                 optimize_range_tests (rhs_code, &ops);
4091
4092               if (first_pass_instance
4093                   && rhs_code == MULT_EXPR
4094                   && flag_unsafe_math_optimizations)
4095                 powi_result = attempt_builtin_powi (stmt, &ops);
4096
4097               /* If the operand vector is now empty, all operands were
4098                  consumed by the __builtin_powi optimization.  */
4099               if (ops.length () == 0)
4100                 transform_stmt_to_copy (&gsi, stmt, powi_result);
4101               else if (ops.length () == 1)
4102                 {
4103                   tree last_op = ops.last ()->op;
4104
4105                   if (powi_result)
4106                     transform_stmt_to_multiply (&gsi, stmt, last_op,
4107                                                 powi_result);
4108                   else
4109                     transform_stmt_to_copy (&gsi, stmt, last_op);
4110                 }
4111               else
4112                 {
4113                   enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4114                   int ops_num = ops.length ();
4115                   int width = get_reassociation_width (ops_num, rhs_code, mode);
4116
4117                   if (dump_file && (dump_flags & TDF_DETAILS))
4118                     fprintf (dump_file,
4119                              "Width = %d was chosen for reassociation\n", width);
4120
4121                   if (width > 1
4122                       && ops.length () > 3)
4123                     rewrite_expr_tree_parallel (stmt, width, ops);
4124                   else
4125                     rewrite_expr_tree (stmt, 0, ops, false);
4126
4127                   /* If we combined some repeated factors into a
4128                      __builtin_powi call, multiply that result by the
4129                      reassociated operands.  */
4130                   if (powi_result)
4131                     {
4132                       gimple mul_stmt;
4133                       tree type = TREE_TYPE (gimple_get_lhs (stmt));
4134                       tree target_ssa = make_temp_ssa_name (type, NULL,
4135                                                             "reassocpow");
4136                       gimple_set_lhs (stmt, target_ssa);
4137                       update_stmt (stmt);
4138                       mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs,
4139                                                                powi_result,
4140                                                                target_ssa);
4141                       gimple_set_location (mul_stmt, gimple_location (stmt));
4142                       gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4143                     }
4144                 }
4145
4146               ops.release ();
4147             }
4148         }
4149     }
4150   for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4151        son;
4152        son = next_dom_son (CDI_POST_DOMINATORS, son))
4153     reassociate_bb (son);
4154 }
4155
4156 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4157 void debug_ops_vector (vec<operand_entry_t> ops);
4158
4159 /* Dump the operand entry vector OPS to FILE.  */
4160
4161 void
4162 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
4163 {
4164   operand_entry_t oe;
4165   unsigned int i;
4166
4167   FOR_EACH_VEC_ELT (ops, i, oe)
4168     {
4169       fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
4170       print_generic_expr (file, oe->op, 0);
4171     }
4172 }
4173
4174 /* Dump the operand entry vector OPS to STDERR.  */
4175
4176 DEBUG_FUNCTION void
4177 debug_ops_vector (vec<operand_entry_t> ops)
4178 {
4179   dump_ops_vector (stderr, ops);
4180 }
4181
4182 static void
4183 do_reassoc (void)
4184 {
4185   break_up_subtract_bb (ENTRY_BLOCK_PTR);
4186   reassociate_bb (EXIT_BLOCK_PTR);
4187 }
4188
4189 /* Initialize the reassociation pass.  */
4190
4191 static void
4192 init_reassoc (void)
4193 {
4194   int i;
4195   long rank = 2;
4196   int *bbs = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
4197
4198   /* Find the loops, so that we can prevent moving calculations in
4199      them.  */
4200   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4201
4202   memset (&reassociate_stats, 0, sizeof (reassociate_stats));
4203
4204   operand_entry_pool = create_alloc_pool ("operand entry pool",
4205                                           sizeof (struct operand_entry), 30);
4206   next_operand_entry_id = 0;
4207
4208   /* Reverse RPO (Reverse Post Order) will give us something where
4209      deeper loops come later.  */
4210   pre_and_rev_post_order_compute (NULL, bbs, false);
4211   bb_rank = XCNEWVEC (long, last_basic_block);
4212   operand_rank = pointer_map_create ();
4213
4214   /* Give each default definition a distinct rank.  This includes
4215      parameters and the static chain.  Walk backwards over all
4216      SSA names so that we get proper rank ordering according
4217      to tree_swap_operands_p.  */
4218   for (i = num_ssa_names - 1; i > 0; --i)
4219     {
4220       tree name = ssa_name (i);
4221       if (name && SSA_NAME_IS_DEFAULT_DEF (name))
4222         insert_operand_rank (name, ++rank);
4223     }
4224
4225   /* Set up rank for each BB  */
4226   for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
4227     bb_rank[bbs[i]] = ++rank  << 16;
4228
4229   free (bbs);
4230   calculate_dominance_info (CDI_POST_DOMINATORS);
4231   plus_negates = vec<tree>();
4232 }
4233
4234 /* Cleanup after the reassociation pass, and print stats if
4235    requested.  */
4236
4237 static void
4238 fini_reassoc (void)
4239 {
4240   statistics_counter_event (cfun, "Linearized",
4241                             reassociate_stats.linearized);
4242   statistics_counter_event (cfun, "Constants eliminated",
4243                             reassociate_stats.constants_eliminated);
4244   statistics_counter_event (cfun, "Ops eliminated",
4245                             reassociate_stats.ops_eliminated);
4246   statistics_counter_event (cfun, "Statements rewritten",
4247                             reassociate_stats.rewritten);
4248   statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
4249                             reassociate_stats.pows_encountered);
4250   statistics_counter_event (cfun, "Built-in powi calls created",
4251                             reassociate_stats.pows_created);
4252
4253   pointer_map_destroy (operand_rank);
4254   free_alloc_pool (operand_entry_pool);
4255   free (bb_rank);
4256   plus_negates.release ();
4257   free_dominance_info (CDI_POST_DOMINATORS);
4258   loop_optimizer_finalize ();
4259 }
4260
4261 /* Gate and execute functions for Reassociation.  */
4262
4263 static unsigned int
4264 execute_reassoc (void)
4265 {
4266   init_reassoc ();
4267
4268   do_reassoc ();
4269   repropagate_negates ();
4270
4271   fini_reassoc ();
4272   return 0;
4273 }
4274
4275 static bool
4276 gate_tree_ssa_reassoc (void)
4277 {
4278   return flag_tree_reassoc != 0;
4279 }
4280
4281 struct gimple_opt_pass pass_reassoc =
4282 {
4283  {
4284   GIMPLE_PASS,
4285   "reassoc",                            /* name */
4286   OPTGROUP_NONE,                        /* optinfo_flags */
4287   gate_tree_ssa_reassoc,                /* gate */
4288   execute_reassoc,                      /* execute */
4289   NULL,                                 /* sub */
4290   NULL,                                 /* next */
4291   0,                                    /* static_pass_number */
4292   TV_TREE_REASSOC,                      /* tv_id */
4293   PROP_cfg | PROP_ssa,                  /* properties_required */
4294   0,                                    /* properties_provided */
4295   0,                                    /* properties_destroyed */
4296   0,                                    /* todo_flags_start */
4297   TODO_verify_ssa
4298     | TODO_verify_flow
4299     | TODO_ggc_collect                  /* todo_flags_finish */
4300  }
4301 };