gcc/tree-ssa-reassoc.c

   1 /* Reassociation for trees.
   2    Copyright (C) 2005-2013 Free Software Foundation, Inc.
   3    Contributed by Daniel Berlin <dan@dberlin.org>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 3, or (at your option)
  10 any later version.
  11
  12 GCC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "hash-table.h"
  25 #include "tm.h"
  26 #include "tree.h"
  27 #include "basic-block.h"
  28 #include "gimple-pretty-print.h"
  29 #include "tree-inline.h"
  30 #include "tree-flow.h"
  31 #include "gimple.h"
  32 #include "tree-iterator.h"
  33 #include "tree-pass.h"
  34 #include "alloc-pool.h"
  35 #include "vec.h"
  36 #include "langhooks.h"
  37 #include "pointer-set.h"
  38 #include "cfgloop.h"
  39 #include "flags.h"
  40 #include "target.h"
  41 #include "params.h"
  42 #include "diagnostic-core.h"
  43
  44 /*  This is a simple global reassociation pass.  It is, in part, based
  45     on the LLVM pass of the same name (They do some things more/less
  46     than we do, in different orders, etc).
  47
  48     It consists of five steps:
  49
  50     1. Breaking up subtract operations into addition + negate, where
  51     it would promote the reassociation of adds.
  52
  53     2. Left linearization of the expression trees, so that (A+B)+(C+D)
  54     becomes (((A+B)+C)+D), which is easier for us to rewrite later.
  55     During linearization, we place the operands of the binary
  56     expressions into a vector of operand_entry_t
  57
  58     3. Optimization of the operand lists, eliminating things like a +
  59     -a, a & a, etc.
  60
  61     3a. Combine repeated factors with the same occurrence counts
  62     into a __builtin_powi call that will later be optimized into
  63     an optimal number of multiplies.
  64
  65     4. Rewrite the expression trees we linearized and optimized so
  66     they are in proper rank order.
  67
  68     5. Repropagate negates, as nothing else will clean it up ATM.
  69
  70     A bit of theory on #4, since nobody seems to write anything down
  71     about why it makes sense to do it the way they do it:
  72
  73     We could do this much nicer theoretically, but don't (for reasons
  74     explained after how to do it theoretically nice :P).
  75
  76     In order to promote the most redundancy elimination, you want
  77     binary expressions whose operands are the same rank (or
  78     preferably, the same value) exposed to the redundancy eliminator,
  79     for possible elimination.
  80
  81     So the way to do this if we really cared, is to build the new op
  82     tree from the leaves to the roots, merging as you go, and putting the
  83     new op on the end of the worklist, until you are left with one
  84     thing on the worklist.
  85
  86     IE if you have to rewrite the following set of operands (listed with
  87     rank in parentheses), with opcode PLUS_EXPR:
  88
  89     a (1),  b (1),  c (1),  d (2), e (2)
  90
  91
  92     We start with our merge worklist empty, and the ops list with all of
  93     those on it.
  94
  95     You want to first merge all leaves of the same rank, as much as
  96     possible.
  97
  98     So first build a binary op of
  99
 100     mergetmp = a + b, and put "mergetmp" on the merge worklist.
 101
 102     Because there is no three operand form of PLUS_EXPR, c is not going to
 103     be exposed to redundancy elimination as a rank 1 operand.
 104
 105     So you might as well throw it on the merge worklist (you could also
 106     consider it to now be a rank two operand, and merge it with d and e,
 107     but in this case, you then have evicted e from a binary op. So at
 108     least in this situation, you can't win.)
 109
 110     Then build a binary op of d + e
 111     mergetmp2 = d + e
 112
 113     and put mergetmp2 on the merge worklist.
 114
 115     so merge worklist = {mergetmp, c, mergetmp2}
 116
 117     Continue building binary ops of these operations until you have only
 118     one operation left on the worklist.
 119
 120     So we have
 121
 122     build binary op
 123     mergetmp3 = mergetmp + c
 124
 125     worklist = {mergetmp2, mergetmp3}
 126
 127     mergetmp4 = mergetmp2 + mergetmp3
 128
 129     worklist = {mergetmp4}
 130
 131     because we have one operation left, we can now just set the original
 132     statement equal to the result of that operation.
 133
 134     This will at least expose a + b  and d + e to redundancy elimination
 135     as binary operations.
 136
 137     For extra points, you can reuse the old statements to build the
 138     mergetmps, since you shouldn't run out.
 139
 140     So why don't we do this?
 141
 142     Because it's expensive, and rarely will help.  Most trees we are
 143     reassociating have 3 or less ops.  If they have 2 ops, they already
 144     will be written into a nice single binary op.  If you have 3 ops, a
 145     single simple check suffices to tell you whether the first two are of the
 146     same rank.  If so, you know to order it
 147
 148     mergetmp = op1 + op2
 149     newstmt = mergetmp + op3
 150
 151     instead of
 152     mergetmp = op2 + op3
 153     newstmt = mergetmp + op1
 154
 155     If all three are of the same rank, you can't expose them all in a
 156     single binary operator anyway, so the above is *still* the best you
 157     can do.
 158
 159     Thus, this is what we do.  When we have three ops left, we check to see
 160     what order to put them in, and call it a day.  As a nod to vector sum
 161     reduction, we check if any of the ops are really a phi node that is a
 162     destructive update for the associating op, and keep the destructive
 163     update together for vector sum reduction recognition.  */
 164
 165
 166 /* Statistics */
 167 static struct
 168 {
 169   int linearized;
 170   int constants_eliminated;
 171   int ops_eliminated;
 172   int rewritten;
 173   int pows_encountered;
 174   int pows_created;
 175 } reassociate_stats;
 176
 177 /* Operator, rank pair.  */
 178 typedef struct operand_entry
 179 {
 180   unsigned int rank;
 181   int id;
 182   tree op;
 183   unsigned int count;
 184 } *operand_entry_t;
 185
 186 static alloc_pool operand_entry_pool;
 187
 188 /* This is used to assign a unique ID to each struct operand_entry
 189    so that qsort results are identical on different hosts.  */
 190 static int next_operand_entry_id;
 191
 192 /* Starting rank number for a given basic block, so that we can rank
 193    operations using unmovable instructions in that BB based on the bb
 194    depth.  */
 195 static long *bb_rank;
 196
 197 /* Operand->rank hashtable.  */
 198 static struct pointer_map_t *operand_rank;
 199
 200 /* Forward decls.  */
 201 static long get_rank (tree);
 202
 203
 204 /* Bias amount for loop-carried phis.  We want this to be larger than
 205    the depth of any reassociation tree we can see, but not larger than
 206    the rank difference between two blocks.  */
 207 #define PHI_LOOP_BIAS (1 << 15)
 208
 209 /* Rank assigned to a phi statement.  If STMT is a loop-carried phi of
 210    an innermost loop, and the phi has only a single use which is inside
 211    the loop, then the rank is the block rank of the loop latch plus an
 212    extra bias for the loop-carried dependence.  This causes expressions
 213    calculated into an accumulator variable to be independent for each
 214    iteration of the loop.  If STMT is some other phi, the rank is the
 215    block rank of its containing block.  */
 216 static long
 217 phi_rank (gimple stmt)
 218 {
 219   basic_block bb = gimple_bb (stmt);
 220   struct loop *father = bb->loop_father;
 221   tree res;
 222   unsigned i;
 223   use_operand_p use;
 224   gimple use_stmt;
 225
 226   /* We only care about real loops (those with a latch).  */
 227   if (!father->latch)
 228     return bb_rank[bb->index];
 229
 230   /* Interesting phis must be in headers of innermost loops.  */
 231   if (bb != father->header
 232       || father->inner)
 233     return bb_rank[bb->index];
 234
 235   /* Ignore virtual SSA_NAMEs.  */
 236   res = gimple_phi_result (stmt);
 237   if (virtual_operand_p (res))
 238     return bb_rank[bb->index];
 239
 240   /* The phi definition must have a single use, and that use must be
 241      within the loop.  Otherwise this isn't an accumulator pattern.  */
 242   if (!single_imm_use (res, &use, &use_stmt)
 243       || gimple_bb (use_stmt)->loop_father != father)
 244     return bb_rank[bb->index];
 245
 246   /* Look for phi arguments from within the loop.  If found, bias this phi.  */
 247   for (i = 0; i < gimple_phi_num_args (stmt); i++)
 248     {
 249       tree arg = gimple_phi_arg_def (stmt, i);
 250       if (TREE_CODE (arg) == SSA_NAME
 251           && !SSA_NAME_IS_DEFAULT_DEF (arg))
 252         {
 253           gimple def_stmt = SSA_NAME_DEF_STMT (arg);
 254           if (gimple_bb (def_stmt)->loop_father == father)
 255             return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
 256         }
 257     }
 258
 259   /* Must be an uninteresting phi.  */
 260   return bb_rank[bb->index];
 261 }
 262
 263 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
 264    loop-carried dependence of an innermost loop, return TRUE; else
 265    return FALSE.  */
 266 static bool
 267 loop_carried_phi (tree exp)
 268 {
 269   gimple phi_stmt;
 270   long block_rank;
 271
 272   if (TREE_CODE (exp) != SSA_NAME
 273       || SSA_NAME_IS_DEFAULT_DEF (exp))
 274     return false;
 275
 276   phi_stmt = SSA_NAME_DEF_STMT (exp);
 277
 278   if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
 279     return false;
 280
 281   /* Non-loop-carried phis have block rank.  Loop-carried phis have
 282      an additional bias added in.  If this phi doesn't have block rank,
 283      it's biased and should not be propagated.  */
 284   block_rank = bb_rank[gimple_bb (phi_stmt)->index];
 285
 286   if (phi_rank (phi_stmt) != block_rank)
 287     return true;
 288
 289   return false;
 290 }
 291
 292 /* Return the maximum of RANK and the rank that should be propagated
 293    from expression OP.  For most operands, this is just the rank of OP.
 294    For loop-carried phis, the value is zero to avoid undoing the bias
 295    in favor of the phi.  */
 296 static long
 297 propagate_rank (long rank, tree op)
 298 {
 299   long op_rank;
 300
 301   if (loop_carried_phi (op))
 302     return rank;
 303
 304   op_rank = get_rank (op);
 305
 306   return MAX (rank, op_rank);
 307 }
 308
 309 /* Look up the operand rank structure for expression E.  */
 310
 311 static inline long
 312 find_operand_rank (tree e)
 313 {
 314   void **slot = pointer_map_contains (operand_rank, e);
 315   return slot ? (long) (intptr_t) *slot : -1;
 316 }
 317
 318 /* Insert {E,RANK} into the operand rank hashtable.  */
 319
 320 static inline void
 321 insert_operand_rank (tree e, long rank)
 322 {
 323   void **slot;
 324   gcc_assert (rank > 0);
 325   slot = pointer_map_insert (operand_rank, e);
 326   gcc_assert (!*slot);
 327   *slot = (void *) (intptr_t) rank;
 328 }
 329
 330 /* Given an expression E, return the rank of the expression.  */
 331
 332 static long
 333 get_rank (tree e)
 334 {
 335   /* Constants have rank 0.  */
 336   if (is_gimple_min_invariant (e))
 337     return 0;
 338
 339   /* SSA_NAME's have the rank of the expression they are the result
 340      of.
 341      For globals and uninitialized values, the rank is 0.
 342      For function arguments, use the pre-setup rank.
 343      For PHI nodes, stores, asm statements, etc, we use the rank of
 344      the BB.
 345      For simple operations, the rank is the maximum rank of any of
 346      its operands, or the bb_rank, whichever is less.
 347      I make no claims that this is optimal, however, it gives good
 348      results.  */
 349
 350   /* We make an exception to the normal ranking system to break
 351      dependences of accumulator variables in loops.  Suppose we
 352      have a simple one-block loop containing:
 353
 354        x_1 = phi(x_0, x_2)
 355        b = a + x_1
 356        c = b + d
 357        x_2 = c + e
 358
 359      As shown, each iteration of the calculation into x is fully
 360      dependent upon the iteration before it.  We would prefer to
 361      see this in the form:
 362
 363        x_1 = phi(x_0, x_2)
 364        b = a + d
 365        c = b + e
 366        x_2 = c + x_1
 367
 368      If the loop is unrolled, the calculations of b and c from
 369      different iterations can be interleaved.
 370
 371      To obtain this result during reassociation, we bias the rank
 372      of the phi definition x_1 upward, when it is recognized as an
 373      accumulator pattern.  The artificial rank causes it to be
 374      added last, providing the desired independence.  */
 375
 376   if (TREE_CODE (e) == SSA_NAME)
 377     {
 378       gimple stmt;
 379       long rank;
 380       int i, n;
 381       tree op;
 382
 383       if (SSA_NAME_IS_DEFAULT_DEF (e))
 384         return find_operand_rank (e);
 385
 386       stmt = SSA_NAME_DEF_STMT (e);
 387       if (gimple_code (stmt) == GIMPLE_PHI)
 388         return phi_rank (stmt);
 389
 390       if (!is_gimple_assign (stmt)
 391           || gimple_vdef (stmt))
 392         return bb_rank[gimple_bb (stmt)->index];
 393
 394       /* If we already have a rank for this expression, use that.  */
 395       rank = find_operand_rank (e);
 396       if (rank != -1)
 397         return rank;
 398
 399       /* Otherwise, find the maximum rank for the operands.  As an
 400          exception, remove the bias from loop-carried phis when propagating
 401          the rank so that dependent operations are not also biased.  */
 402       rank = 0;
 403       if (gimple_assign_single_p (stmt))
 404         {
 405           tree rhs = gimple_assign_rhs1 (stmt);
 406           n = TREE_OPERAND_LENGTH (rhs);
 407           if (n == 0)
 408             rank = propagate_rank (rank, rhs);
 409           else
 410             {
 411               for (i = 0; i < n; i++)
 412                 {
 413                   op = TREE_OPERAND (rhs, i);
 414
 415                   if (op != NULL_TREE)
 416                     rank = propagate_rank (rank, op);
 417                 }
 418             }
 419         }
 420       else
 421         {
 422           n = gimple_num_ops (stmt);
 423           for (i = 1; i < n; i++)
 424             {
 425               op = gimple_op (stmt, i);
 426               gcc_assert (op);
 427               rank = propagate_rank (rank, op);
 428             }
 429         }
 430
 431       if (dump_file && (dump_flags & TDF_DETAILS))
 432         {
 433           fprintf (dump_file, "Rank for ");
 434           print_generic_expr (dump_file, e, 0);
 435           fprintf (dump_file, " is %ld\n", (rank + 1));
 436         }
 437
 438       /* Note the rank in the hashtable so we don't recompute it.  */
 439       insert_operand_rank (e, (rank + 1));
 440       return (rank + 1);
 441     }
 442
 443   /* Globals, etc,  are rank 0 */
 444   return 0;
 445 }
 446
 447
 448 /* We want integer ones to end up last no matter what, since they are
 449    the ones we can do the most with.  */
 450 #define INTEGER_CONST_TYPE 1 << 3
 451 #define FLOAT_CONST_TYPE 1 << 2
 452 #define OTHER_CONST_TYPE 1 << 1
 453
 454 /* Classify an invariant tree into integer, float, or other, so that
 455    we can sort them to be near other constants of the same type.  */
 456 static inline int
 457 constant_type (tree t)
 458 {
 459   if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
 460     return INTEGER_CONST_TYPE;
 461   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
 462     return FLOAT_CONST_TYPE;
 463   else
 464     return OTHER_CONST_TYPE;
 465 }
 466
 467 /* qsort comparison function to sort operand entries PA and PB by rank
 468    so that the sorted array is ordered by rank in decreasing order.  */
 469 static int
 470 sort_by_operand_rank (const void *pa, const void *pb)
 471 {
 472   const operand_entry_t oea = *(const operand_entry_t *)pa;
 473   const operand_entry_t oeb = *(const operand_entry_t *)pb;
 474
 475   /* It's nicer for optimize_expression if constants that are likely
 476      to fold when added/multiplied//whatever are put next to each
 477      other.  Since all constants have rank 0, order them by type.  */
 478   if (oeb->rank == 0 && oea->rank == 0)
 479     {
 480       if (constant_type (oeb->op) != constant_type (oea->op))
 481         return constant_type (oeb->op) - constant_type (oea->op);
 482       else
 483         /* To make sorting result stable, we use unique IDs to determine
 484            order.  */
 485         return oeb->id - oea->id;
 486     }
 487
 488   /* Lastly, make sure the versions that are the same go next to each
 489      other.  We use SSA_NAME_VERSION because it's stable.  */
 490   if ((oeb->rank - oea->rank == 0)
 491       && TREE_CODE (oea->op) == SSA_NAME
 492       && TREE_CODE (oeb->op) == SSA_NAME)
 493     {
 494       if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
 495         return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
 496       else
 497         return oeb->id - oea->id;
 498     }
 499
 500   if (oeb->rank != oea->rank)
 501     return oeb->rank - oea->rank;
 502   else
 503     return oeb->id - oea->id;
 504 }
 505
 506 /* Add an operand entry to *OPS for the tree operand OP.  */
 507
 508 static void
 509 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
 510 {
 511   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
 512
 513   oe->op = op;
 514   oe->rank = get_rank (op);
 515   oe->id = next_operand_entry_id++;
 516   oe->count = 1;
 517   ops->safe_push (oe);
 518 }
 519
 520 /* Add an operand entry to *OPS for the tree operand OP with repeat
 521    count REPEAT.  */
 522
 523 static void
 524 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
 525                        HOST_WIDE_INT repeat)
 526 {
 527   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
 528
 529   oe->op = op;
 530   oe->rank = get_rank (op);
 531   oe->id = next_operand_entry_id++;
 532   oe->count = repeat;
 533   ops->safe_push (oe);
 534
 535   reassociate_stats.pows_encountered++;
 536 }
 537
 538 /* Return true if STMT is reassociable operation containing a binary
 539    operation with tree code CODE, and is inside LOOP.  */
 540
 541 static bool
 542 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
 543 {
 544   basic_block bb = gimple_bb (stmt);
 545
 546   if (gimple_bb (stmt) == NULL)
 547     return false;
 548
 549   if (!flow_bb_inside_loop_p (loop, bb))
 550     return false;
 551
 552   if (is_gimple_assign (stmt)
 553       && gimple_assign_rhs_code (stmt) == code
 554       && has_single_use (gimple_assign_lhs (stmt)))
 555     return true;
 556
 557   return false;
 558 }
 559
 560
 561 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
 562    operand of the negate operation.  Otherwise, return NULL.  */
 563
 564 static tree
 565 get_unary_op (tree name, enum tree_code opcode)
 566 {
 567   gimple stmt = SSA_NAME_DEF_STMT (name);
 568
 569   if (!is_gimple_assign (stmt))
 570     return NULL_TREE;
 571
 572   if (gimple_assign_rhs_code (stmt) == opcode)
 573     return gimple_assign_rhs1 (stmt);
 574   return NULL_TREE;
 575 }
 576
 577 /* If CURR and LAST are a pair of ops that OPCODE allows us to
 578    eliminate through equivalences, do so, remove them from OPS, and
 579    return true.  Otherwise, return false.  */
 580
 581 static bool
 582 eliminate_duplicate_pair (enum tree_code opcode,
 583                           vec<operand_entry_t> *ops,
 584                           bool *all_done,
 585                           unsigned int i,
 586                           operand_entry_t curr,
 587                           operand_entry_t last)
 588 {
 589
 590   /* If we have two of the same op, and the opcode is & |, min, or max,
 591      we can eliminate one of them.
 592      If we have two of the same op, and the opcode is ^, we can
 593      eliminate both of them.  */
 594
 595   if (last && last->op == curr->op)
 596     {
 597       switch (opcode)
 598         {
 599         case MAX_EXPR:
 600         case MIN_EXPR:
 601         case BIT_IOR_EXPR:
 602         case BIT_AND_EXPR:
 603           if (dump_file && (dump_flags & TDF_DETAILS))
 604             {
 605               fprintf (dump_file, "Equivalence: ");
 606               print_generic_expr (dump_file, curr->op, 0);
 607               fprintf (dump_file, " [&|minmax] ");
 608               print_generic_expr (dump_file, last->op, 0);
 609               fprintf (dump_file, " -> ");
 610               print_generic_stmt (dump_file, last->op, 0);
 611             }
 612
 613           ops->ordered_remove (i);
 614           reassociate_stats.ops_eliminated ++;
 615
 616           return true;
 617
 618         case BIT_XOR_EXPR:
 619           if (dump_file && (dump_flags & TDF_DETAILS))
 620             {
 621               fprintf (dump_file, "Equivalence: ");
 622               print_generic_expr (dump_file, curr->op, 0);
 623               fprintf (dump_file, " ^ ");
 624               print_generic_expr (dump_file, last->op, 0);
 625               fprintf (dump_file, " -> nothing\n");
 626             }
 627
 628           reassociate_stats.ops_eliminated += 2;
 629
 630           if (ops->length () == 2)
 631             {
 632               ops->create (0);
 633               add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
 634               *all_done = true;
 635             }
 636           else
 637             {
 638               ops->ordered_remove (i-1);
 639               ops->ordered_remove (i-1);
 640             }
 641
 642           return true;
 643
 644         default:
 645           break;
 646         }
 647     }
 648   return false;
 649 }
 650
 651 static vec<tree> plus_negates;
 652
 653 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
 654    expression, look in OPS for a corresponding positive operation to cancel
 655    it out.  If we find one, remove the other from OPS, replace
 656    OPS[CURRINDEX] with 0 or -1, respectively, and return true.  Otherwise,
 657    return false. */
 658
 659 static bool
 660 eliminate_plus_minus_pair (enum tree_code opcode,
 661                            vec<operand_entry_t> *ops,
 662                            unsigned int currindex,
 663                            operand_entry_t curr)
 664 {
 665   tree negateop;
 666   tree notop;
 667   unsigned int i;
 668   operand_entry_t oe;
 669
 670   if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
 671     return false;
 672
 673   negateop = get_unary_op (curr->op, NEGATE_EXPR);
 674   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 675   if (negateop == NULL_TREE && notop == NULL_TREE)
 676     return false;
 677
 678   /* Any non-negated version will have a rank that is one less than
 679      the current rank.  So once we hit those ranks, if we don't find
 680      one, we can stop.  */
 681
 682   for (i = currindex + 1;
 683        ops->iterate (i, &oe)
 684        && oe->rank >= curr->rank - 1 ;
 685        i++)
 686     {
 687       if (oe->op == negateop)
 688         {
 689
 690           if (dump_file && (dump_flags & TDF_DETAILS))
 691             {
 692               fprintf (dump_file, "Equivalence: ");
 693               print_generic_expr (dump_file, negateop, 0);
 694               fprintf (dump_file, " + -");
 695               print_generic_expr (dump_file, oe->op, 0);
 696               fprintf (dump_file, " -> 0\n");
 697             }
 698
 699           ops->ordered_remove (i);
 700           add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
 701           ops->ordered_remove (currindex);
 702           reassociate_stats.ops_eliminated ++;
 703
 704           return true;
 705         }
 706       else if (oe->op == notop)
 707         {
 708           tree op_type = TREE_TYPE (oe->op);
 709
 710           if (dump_file && (dump_flags & TDF_DETAILS))
 711             {
 712               fprintf (dump_file, "Equivalence: ");
 713               print_generic_expr (dump_file, notop, 0);
 714               fprintf (dump_file, " + ~");
 715               print_generic_expr (dump_file, oe->op, 0);
 716               fprintf (dump_file, " -> -1\n");
 717             }
 718
 719           ops->ordered_remove (i);
 720           add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
 721           ops->ordered_remove (currindex);
 722           reassociate_stats.ops_eliminated ++;
 723
 724           return true;
 725         }
 726     }
 727
 728   /* CURR->OP is a negate expr in a plus expr: save it for later
 729      inspection in repropagate_negates().  */
 730   if (negateop != NULL_TREE)
 731     plus_negates.safe_push (curr->op);
 732
 733   return false;
 734 }
 735
 736 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
 737    bitwise not expression, look in OPS for a corresponding operand to
 738    cancel it out.  If we find one, remove the other from OPS, replace
 739    OPS[CURRINDEX] with 0, and return true.  Otherwise, return
 740    false. */
 741
 742 static bool
 743 eliminate_not_pairs (enum tree_code opcode,
 744                      vec<operand_entry_t> *ops,
 745                      unsigned int currindex,
 746                      operand_entry_t curr)
 747 {
 748   tree notop;
 749   unsigned int i;
 750   operand_entry_t oe;
 751
 752   if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
 753       || TREE_CODE (curr->op) != SSA_NAME)
 754     return false;
 755
 756   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 757   if (notop == NULL_TREE)
 758     return false;
 759
 760   /* Any non-not version will have a rank that is one less than
 761      the current rank.  So once we hit those ranks, if we don't find
 762      one, we can stop.  */
 763
 764   for (i = currindex + 1;
 765        ops->iterate (i, &oe)
 766        && oe->rank >= curr->rank - 1;
 767        i++)
 768     {
 769       if (oe->op == notop)
 770         {
 771           if (dump_file && (dump_flags & TDF_DETAILS))
 772             {
 773               fprintf (dump_file, "Equivalence: ");
 774               print_generic_expr (dump_file, notop, 0);
 775               if (opcode == BIT_AND_EXPR)
 776                 fprintf (dump_file, " & ~");
 777               else if (opcode == BIT_IOR_EXPR)
 778                 fprintf (dump_file, " | ~");
 779               print_generic_expr (dump_file, oe->op, 0);
 780               if (opcode == BIT_AND_EXPR)
 781                 fprintf (dump_file, " -> 0\n");
 782               else if (opcode == BIT_IOR_EXPR)
 783                 fprintf (dump_file, " -> -1\n");
 784             }
 785
 786           if (opcode == BIT_AND_EXPR)
 787             oe->op = build_zero_cst (TREE_TYPE (oe->op));
 788           else if (opcode == BIT_IOR_EXPR)
 789             oe->op = build_low_bits_mask (TREE_TYPE (oe->op),
 790                                           TYPE_PRECISION (TREE_TYPE (oe->op)));
 791
 792           reassociate_stats.ops_eliminated += ops->length () - 1;
 793           ops->truncate (0);
 794           ops->quick_push (oe);
 795           return true;
 796         }
 797     }
 798
 799   return false;
 800 }
 801
 802 /* Use constant value that may be present in OPS to try to eliminate
 803    operands.  Note that this function is only really used when we've
 804    eliminated ops for other reasons, or merged constants.  Across
 805    single statements, fold already does all of this, plus more.  There
 806    is little point in duplicating logic, so I've only included the
 807    identities that I could ever construct testcases to trigger.  */
 808
 809 static void
 810 eliminate_using_constants (enum tree_code opcode,
 811                            vec<operand_entry_t> *ops)
 812 {
 813   operand_entry_t oelast = ops->last ();
 814   tree type = TREE_TYPE (oelast->op);
 815
 816   if (oelast->rank == 0
 817       && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
 818     {
 819       switch (opcode)
 820         {
 821         case BIT_AND_EXPR:
 822           if (integer_zerop (oelast->op))
 823             {
 824               if (ops->length () != 1)
 825                 {
 826                   if (dump_file && (dump_flags & TDF_DETAILS))
 827                     fprintf (dump_file, "Found & 0, removing all other ops\n");
 828
 829                   reassociate_stats.ops_eliminated += ops->length () - 1;
 830
 831                   ops->truncate (0);
 832                   ops->quick_push (oelast);
 833                   return;
 834                 }
 835             }
 836           else if (integer_all_onesp (oelast->op))
 837             {
 838               if (ops->length () != 1)
 839                 {
 840                   if (dump_file && (dump_flags & TDF_DETAILS))
 841                     fprintf (dump_file, "Found & -1, removing\n");
 842                   ops->pop ();
 843                   reassociate_stats.ops_eliminated++;
 844                 }
 845             }
 846           break;
 847         case BIT_IOR_EXPR:
 848           if (integer_all_onesp (oelast->op))
 849             {
 850               if (ops->length () != 1)
 851                 {
 852                   if (dump_file && (dump_flags & TDF_DETAILS))
 853                     fprintf (dump_file, "Found | -1, removing all other ops\n");
 854
 855                   reassociate_stats.ops_eliminated += ops->length () - 1;
 856
 857                   ops->truncate (0);
 858                   ops->quick_push (oelast);
 859                   return;
 860                 }
 861             }
 862           else if (integer_zerop (oelast->op))
 863             {
 864               if (ops->length () != 1)
 865                 {
 866                   if (dump_file && (dump_flags & TDF_DETAILS))
 867                     fprintf (dump_file, "Found | 0, removing\n");
 868                   ops->pop ();
 869                   reassociate_stats.ops_eliminated++;
 870                 }
 871             }
 872           break;
 873         case MULT_EXPR:
 874           if (integer_zerop (oelast->op)
 875               || (FLOAT_TYPE_P (type)
 876                   && !HONOR_NANS (TYPE_MODE (type))
 877                   && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
 878                   && real_zerop (oelast->op)))
 879             {
 880               if (ops->length () != 1)
 881                 {
 882                   if (dump_file && (dump_flags & TDF_DETAILS))
 883                     fprintf (dump_file, "Found * 0, removing all other ops\n");
 884
 885                   reassociate_stats.ops_eliminated += ops->length () - 1;
 886                   ops->truncate (1);
 887                   ops->quick_push (oelast);
 888                   return;
 889                 }
 890             }
 891           else if (integer_onep (oelast->op)
 892                    || (FLOAT_TYPE_P (type)
 893                        && !HONOR_SNANS (TYPE_MODE (type))
 894                        && real_onep (oelast->op)))
 895             {
 896               if (ops->length () != 1)
 897                 {
 898                   if (dump_file && (dump_flags & TDF_DETAILS))
 899                     fprintf (dump_file, "Found * 1, removing\n");
 900                   ops->pop ();
 901                   reassociate_stats.ops_eliminated++;
 902                   return;
 903                 }
 904             }
 905           break;
 906         case BIT_XOR_EXPR:
 907         case PLUS_EXPR:
 908         case MINUS_EXPR:
 909           if (integer_zerop (oelast->op)
 910               || (FLOAT_TYPE_P (type)
 911                   && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
 912                   && fold_real_zero_addition_p (type, oelast->op,
 913                                                 opcode == MINUS_EXPR)))
 914             {
 915               if (ops->length () != 1)
 916                 {
 917                   if (dump_file && (dump_flags & TDF_DETAILS))
 918                     fprintf (dump_file, "Found [|^+] 0, removing\n");
 919                   ops->pop ();
 920                   reassociate_stats.ops_eliminated++;
 921                   return;
 922                 }
 923             }
 924           break;
 925         default:
 926           break;
 927         }
 928     }
 929 }
 930
 931
 932 static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
 933                                  bool, bool);
 934
 935 /* Structure for tracking and counting operands.  */
 936 typedef struct oecount_s {
 937   int cnt;
 938   int id;
 939   enum tree_code oecode;
 940   tree op;
 941 } oecount;
 942
 943
 944 /* The heap for the oecount hashtable and the sorted list of operands.  */
 945 static vec<oecount> cvec;
 946
 947
 948 /* Oecount hashtable helpers.  */
 949
 950 struct oecount_hasher : typed_noop_remove <void>
 951 {
 952   /* Note that this hash table stores integers, not pointers.
 953      So, observe the casting in the member functions.  */
 954   typedef void value_type;
 955   typedef void compare_type;
 956   static inline hashval_t hash (const value_type *);
 957   static inline bool equal (const value_type *, const compare_type *);
 958 };
 959
 960 /* Hash function for oecount.  */
 961
 962 inline hashval_t
 963 oecount_hasher::hash (const value_type *p)
 964 {
 965   const oecount *c = &cvec[(size_t)p - 42];
 966   return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
 967 }
 968
 969 /* Comparison function for oecount.  */
 970
 971 inline bool
 972 oecount_hasher::equal (const value_type *p1, const compare_type *p2)
 973 {
 974   const oecount *c1 = &cvec[(size_t)p1 - 42];
 975   const oecount *c2 = &cvec[(size_t)p2 - 42];
 976   return (c1->oecode == c2->oecode
 977           && c1->op == c2->op);
 978 }
 979
 980 /* Comparison function for qsort sorting oecount elements by count.  */
 981
 982 static int
 983 oecount_cmp (const void *p1, const void *p2)
 984 {
 985   const oecount *c1 = (const oecount *)p1;
 986   const oecount *c2 = (const oecount *)p2;
 987   if (c1->cnt != c2->cnt)
 988     return c1->cnt - c2->cnt;
 989   else
 990     /* If counts are identical, use unique IDs to stabilize qsort.  */
 991     return c1->id - c2->id;
 992 }
 993
 994 /* Return TRUE iff STMT represents a builtin call that raises OP
 995    to some exponent.  */
 996
 997 static bool
 998 stmt_is_power_of_op (gimple stmt, tree op)
 999 {
1000   tree fndecl;
1001
1002   if (!is_gimple_call (stmt))
1003     return false;
1004
1005   fndecl = gimple_call_fndecl (stmt);
1006
1007   if (!fndecl
1008       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
1009     return false;
1010
1011   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1012     {
1013     CASE_FLT_FN (BUILT_IN_POW):
1014     CASE_FLT_FN (BUILT_IN_POWI):
1015       return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1016
1017     default:
1018       return false;
1019     }
1020 }
1021
1022 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1023    in place and return the result.  Assumes that stmt_is_power_of_op
1024    was previously called for STMT and returned TRUE.  */
1025
1026 static HOST_WIDE_INT
1027 decrement_power (gimple stmt)
1028 {
1029   REAL_VALUE_TYPE c, cint;
1030   HOST_WIDE_INT power;
1031   tree arg1;
1032
1033   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1034     {
1035     CASE_FLT_FN (BUILT_IN_POW):
1036       arg1 = gimple_call_arg (stmt, 1);
1037       c = TREE_REAL_CST (arg1);
1038       power = real_to_integer (&c) - 1;
1039       real_from_integer (&cint, VOIDmode, power, 0, 0);
1040       gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1041       return power;
1042
1043     CASE_FLT_FN (BUILT_IN_POWI):
1044       arg1 = gimple_call_arg (stmt, 1);
1045       power = TREE_INT_CST_LOW (arg1) - 1;
1046       gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1047       return power;
1048
1049     default:
1050       gcc_unreachable ();
1051     }
1052 }
1053
1054 /* Find the single immediate use of STMT's LHS, and replace it
1055    with OP.  Remove STMT.  If STMT's LHS is the same as *DEF,
1056    replace *DEF with OP as well.  */
1057
1058 static void
1059 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1060 {
1061   tree lhs;
1062   gimple use_stmt;
1063   use_operand_p use;
1064   gimple_stmt_iterator gsi;
1065
1066   if (is_gimple_call (stmt))
1067     lhs = gimple_call_lhs (stmt);
1068   else
1069     lhs = gimple_assign_lhs (stmt);
1070
1071   gcc_assert (has_single_use (lhs));
1072   single_imm_use (lhs, &use, &use_stmt);
1073   if (lhs == *def)
1074     *def = op;
1075   SET_USE (use, op);
1076   if (TREE_CODE (op) != SSA_NAME)
1077     update_stmt (use_stmt);
1078   gsi = gsi_for_stmt (stmt);
1079   unlink_stmt_vdef (stmt);
1080   gsi_remove (&gsi, true);
1081   release_defs (stmt);
1082 }
1083
1084 /* Walks the linear chain with result *DEF searching for an operation
1085    with operand OP and code OPCODE removing that from the chain.  *DEF
1086    is updated if there is only one operand but no operation left.  */
1087
1088 static void
1089 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1090 {
1091   gimple stmt = SSA_NAME_DEF_STMT (*def);
1092
1093   do
1094     {
1095       tree name;
1096
1097       if (opcode == MULT_EXPR
1098           && stmt_is_power_of_op (stmt, op))
1099         {
1100           if (decrement_power (stmt) == 1)
1101             propagate_op_to_single_use (op, stmt, def);
1102           return;
1103         }
1104
1105       name = gimple_assign_rhs1 (stmt);
1106
1107       /* If this is the operation we look for and one of the operands
1108          is ours simply propagate the other operand into the stmts
1109          single use.  */
1110       if (gimple_assign_rhs_code (stmt) == opcode
1111           && (name == op
1112               || gimple_assign_rhs2 (stmt) == op))
1113         {
1114           if (name == op)
1115             name = gimple_assign_rhs2 (stmt);
1116           propagate_op_to_single_use (name, stmt, def);
1117           return;
1118         }
1119
1120       /* We might have a multiply of two __builtin_pow* calls, and
1121          the operand might be hiding in the rightmost one.  */
1122       if (opcode == MULT_EXPR
1123           && gimple_assign_rhs_code (stmt) == opcode
1124           && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1125           && has_single_use (gimple_assign_rhs2 (stmt)))
1126         {
1127           gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1128           if (stmt_is_power_of_op (stmt2, op))
1129             {
1130               if (decrement_power (stmt2) == 1)
1131                 propagate_op_to_single_use (op, stmt2, def);
1132               return;
1133             }
1134         }
1135
1136       /* Continue walking the chain.  */
1137       gcc_assert (name != op
1138                   && TREE_CODE (name) == SSA_NAME);
1139       stmt = SSA_NAME_DEF_STMT (name);
1140     }
1141   while (1);
1142 }
1143
1144 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1145    the result.  Places the statement after the definition of either
1146    OP1 or OP2.  Returns the new statement.  */
1147
1148 static gimple
1149 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1150 {
1151   gimple op1def = NULL, op2def = NULL;
1152   gimple_stmt_iterator gsi;
1153   tree op;
1154   gimple sum;
1155
1156   /* Create the addition statement.  */
1157   op = make_ssa_name (type, NULL);
1158   sum = gimple_build_assign_with_ops (opcode, op, op1, op2);
1159
1160   /* Find an insertion place and insert.  */
1161   if (TREE_CODE (op1) == SSA_NAME)
1162     op1def = SSA_NAME_DEF_STMT (op1);
1163   if (TREE_CODE (op2) == SSA_NAME)
1164     op2def = SSA_NAME_DEF_STMT (op2);
1165   if ((!op1def || gimple_nop_p (op1def))
1166       && (!op2def || gimple_nop_p (op2def)))
1167     {
1168       gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR));
1169       gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1170     }
1171   else if ((!op1def || gimple_nop_p (op1def))
1172            || (op2def && !gimple_nop_p (op2def)
1173                && stmt_dominates_stmt_p (op1def, op2def)))
1174     {
1175       if (gimple_code (op2def) == GIMPLE_PHI)
1176         {
1177           gsi = gsi_after_labels (gimple_bb (op2def));
1178           gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1179         }
1180       else
1181         {
1182           if (!stmt_ends_bb_p (op2def))
1183             {
1184               gsi = gsi_for_stmt (op2def);
1185               gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1186             }
1187           else
1188             {
1189               edge e;
1190               edge_iterator ei;
1191
1192               FOR_EACH_EDGE (e, ei, gimple_bb (op2def)->succs)
1193                 if (e->flags & EDGE_FALLTHRU)
1194                   gsi_insert_on_edge_immediate (e, sum);
1195             }
1196         }
1197     }
1198   else
1199     {
1200       if (gimple_code (op1def) == GIMPLE_PHI)
1201         {
1202           gsi = gsi_after_labels (gimple_bb (op1def));
1203           gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1204         }
1205       else
1206         {
1207           if (!stmt_ends_bb_p (op1def))
1208             {
1209               gsi = gsi_for_stmt (op1def);
1210               gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1211             }
1212           else
1213             {
1214               edge e;
1215               edge_iterator ei;
1216
1217               FOR_EACH_EDGE (e, ei, gimple_bb (op1def)->succs)
1218                 if (e->flags & EDGE_FALLTHRU)
1219                   gsi_insert_on_edge_immediate (e, sum);
1220             }
1221         }
1222     }
1223   update_stmt (sum);
1224
1225   return sum;
1226 }
1227
1228 /* Perform un-distribution of divisions and multiplications.
1229    A * X + B * X is transformed into (A + B) * X and A / X + B / X
1230    to (A + B) / X for real X.
1231
1232    The algorithm is organized as follows.
1233
1234     - First we walk the addition chain *OPS looking for summands that
1235       are defined by a multiplication or a real division.  This results
1236       in the candidates bitmap with relevant indices into *OPS.
1237
1238     - Second we build the chains of multiplications or divisions for
1239       these candidates, counting the number of occurrences of (operand, code)
1240       pairs in all of the candidates chains.
1241
1242     - Third we sort the (operand, code) pairs by number of occurrence and
1243       process them starting with the pair with the most uses.
1244
1245       * For each such pair we walk the candidates again to build a
1246         second candidate bitmap noting all multiplication/division chains
1247         that have at least one occurrence of (operand, code).
1248
1249       * We build an alternate addition chain only covering these
1250         candidates with one (operand, code) operation removed from their
1251         multiplication/division chain.
1252
1253       * The first candidate gets replaced by the alternate addition chain
1254         multiplied/divided by the operand.
1255
1256       * All candidate chains get disabled for further processing and
1257         processing of (operand, code) pairs continues.
1258
1259   The alternate addition chains built are re-processed by the main
1260   reassociation algorithm which allows optimizing a * x * y + b * y * x
1261   to (a + b ) * x * y in one invocation of the reassociation pass.  */
1262
1263 static bool
1264 undistribute_ops_list (enum tree_code opcode,
1265                        vec<operand_entry_t> *ops, struct loop *loop)
1266 {
1267   unsigned int length = ops->length ();
1268   operand_entry_t oe1;
1269   unsigned i, j;
1270   sbitmap candidates, candidates2;
1271   unsigned nr_candidates, nr_candidates2;
1272   sbitmap_iterator sbi0;
1273   vec<operand_entry_t> *subops;
1274   hash_table <oecount_hasher> ctable;
1275   bool changed = false;
1276   int next_oecount_id = 0;
1277
1278   if (length <= 1
1279       || opcode != PLUS_EXPR)
1280     return false;
1281
1282   /* Build a list of candidates to process.  */
1283   candidates = sbitmap_alloc (length);
1284   bitmap_clear (candidates);
1285   nr_candidates = 0;
1286   FOR_EACH_VEC_ELT (*ops, i, oe1)
1287     {
1288       enum tree_code dcode;
1289       gimple oe1def;
1290
1291       if (TREE_CODE (oe1->op) != SSA_NAME)
1292         continue;
1293       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1294       if (!is_gimple_assign (oe1def))
1295         continue;
1296       dcode = gimple_assign_rhs_code (oe1def);
1297       if ((dcode != MULT_EXPR
1298            && dcode != RDIV_EXPR)
1299           || !is_reassociable_op (oe1def, dcode, loop))
1300         continue;
1301
1302       bitmap_set_bit (candidates, i);
1303       nr_candidates++;
1304     }
1305
1306   if (nr_candidates < 2)
1307     {
1308       sbitmap_free (candidates);
1309       return false;
1310     }
1311
1312   if (dump_file && (dump_flags & TDF_DETAILS))
1313     {
1314       fprintf (dump_file, "searching for un-distribute opportunities ");
1315       print_generic_expr (dump_file,
1316         (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1317       fprintf (dump_file, " %d\n", nr_candidates);
1318     }
1319
1320   /* Build linearized sub-operand lists and the counting table.  */
1321   cvec.create (0);
1322   ctable.create (15);
1323   /* ??? Macro arguments cannot have multi-argument template types in
1324      them.  This typedef is needed to workaround that limitation.  */
1325   typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1326   subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1327   EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1328     {
1329       gimple oedef;
1330       enum tree_code oecode;
1331       unsigned j;
1332
1333       oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1334       oecode = gimple_assign_rhs_code (oedef);
1335       linearize_expr_tree (&subops[i], oedef,
1336                            associative_tree_code (oecode), false);
1337
1338       FOR_EACH_VEC_ELT (subops[i], j, oe1)
1339         {
1340           oecount c;
1341           void **slot;
1342           size_t idx;
1343           c.oecode = oecode;
1344           c.cnt = 1;
1345           c.id = next_oecount_id++;
1346           c.op = oe1->op;
1347           cvec.safe_push (c);
1348           idx = cvec.length () + 41;
1349           slot = ctable.find_slot ((void *)idx, INSERT);
1350           if (!*slot)
1351             {
1352               *slot = (void *)idx;
1353             }
1354           else
1355             {
1356               cvec.pop ();
1357               cvec[(size_t)*slot - 42].cnt++;
1358             }
1359         }
1360     }
1361   ctable.dispose ();
1362
1363   /* Sort the counting table.  */
1364   cvec.qsort (oecount_cmp);
1365
1366   if (dump_file && (dump_flags & TDF_DETAILS))
1367     {
1368       oecount *c;
1369       fprintf (dump_file, "Candidates:\n");
1370       FOR_EACH_VEC_ELT (cvec, j, c)
1371         {
1372           fprintf (dump_file, "  %u %s: ", c->cnt,
1373                    c->oecode == MULT_EXPR
1374                    ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1375           print_generic_expr (dump_file, c->op, 0);
1376           fprintf (dump_file, "\n");
1377         }
1378     }
1379
1380   /* Process the (operand, code) pairs in order of most occurence.  */
1381   candidates2 = sbitmap_alloc (length);
1382   while (!cvec.is_empty ())
1383     {
1384       oecount *c = &cvec.last ();
1385       if (c->cnt < 2)
1386         break;
1387
1388       /* Now collect the operands in the outer chain that contain
1389          the common operand in their inner chain.  */
1390       bitmap_clear (candidates2);
1391       nr_candidates2 = 0;
1392       EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1393         {
1394           gimple oedef;
1395           enum tree_code oecode;
1396           unsigned j;
1397           tree op = (*ops)[i]->op;
1398
1399           /* If we undistributed in this chain already this may be
1400              a constant.  */
1401           if (TREE_CODE (op) != SSA_NAME)
1402             continue;
1403
1404           oedef = SSA_NAME_DEF_STMT (op);
1405           oecode = gimple_assign_rhs_code (oedef);
1406           if (oecode != c->oecode)
1407             continue;
1408
1409           FOR_EACH_VEC_ELT (subops[i], j, oe1)
1410             {
1411               if (oe1->op == c->op)
1412                 {
1413                   bitmap_set_bit (candidates2, i);
1414                   ++nr_candidates2;
1415                   break;
1416                 }
1417             }
1418         }
1419
1420       if (nr_candidates2 >= 2)
1421         {
1422           operand_entry_t oe1, oe2;
1423           gimple prod;
1424           int first = bitmap_first_set_bit (candidates2);
1425
1426           /* Build the new addition chain.  */
1427           oe1 = (*ops)[first];
1428           if (dump_file && (dump_flags & TDF_DETAILS))
1429             {
1430               fprintf (dump_file, "Building (");
1431               print_generic_expr (dump_file, oe1->op, 0);
1432             }
1433           zero_one_operation (&oe1->op, c->oecode, c->op);
1434           EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1435             {
1436               gimple sum;
1437               oe2 = (*ops)[i];
1438               if (dump_file && (dump_flags & TDF_DETAILS))
1439                 {
1440                   fprintf (dump_file, " + ");
1441                   print_generic_expr (dump_file, oe2->op, 0);
1442                 }
1443               zero_one_operation (&oe2->op, c->oecode, c->op);
1444               sum = build_and_add_sum (TREE_TYPE (oe1->op),
1445                                        oe1->op, oe2->op, opcode);
1446               oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1447               oe2->rank = 0;
1448               oe1->op = gimple_get_lhs (sum);
1449             }
1450
1451           /* Apply the multiplication/division.  */
1452           prod = build_and_add_sum (TREE_TYPE (oe1->op),
1453                                     oe1->op, c->op, c->oecode);
1454           if (dump_file && (dump_flags & TDF_DETAILS))
1455             {
1456               fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1457               print_generic_expr (dump_file, c->op, 0);
1458               fprintf (dump_file, "\n");
1459             }
1460
1461           /* Record it in the addition chain and disable further
1462              undistribution with this op.  */
1463           oe1->op = gimple_assign_lhs (prod);
1464           oe1->rank = get_rank (oe1->op);
1465           subops[first].release ();
1466
1467           changed = true;
1468         }
1469
1470       cvec.pop ();
1471     }
1472
1473   for (i = 0; i < ops->length (); ++i)
1474     subops[i].release ();
1475   free (subops);
1476   cvec.release ();
1477   sbitmap_free (candidates);
1478   sbitmap_free (candidates2);
1479
1480   return changed;
1481 }
1482
1483 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1484    expression, examine the other OPS to see if any of them are comparisons
1485    of the same values, which we may be able to combine or eliminate.
1486    For example, we can rewrite (a < b) | (a == b) as (a <= b).  */
1487
1488 static bool
1489 eliminate_redundant_comparison (enum tree_code opcode,
1490                                 vec<operand_entry_t> *ops,
1491                                 unsigned int currindex,
1492                                 operand_entry_t curr)
1493 {
1494   tree op1, op2;
1495   enum tree_code lcode, rcode;
1496   gimple def1, def2;
1497   int i;
1498   operand_entry_t oe;
1499
1500   if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1501     return false;
1502
1503   /* Check that CURR is a comparison.  */
1504   if (TREE_CODE (curr->op) != SSA_NAME)
1505     return false;
1506   def1 = SSA_NAME_DEF_STMT (curr->op);
1507   if (!is_gimple_assign (def1))
1508     return false;
1509   lcode = gimple_assign_rhs_code (def1);
1510   if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1511     return false;
1512   op1 = gimple_assign_rhs1 (def1);
1513   op2 = gimple_assign_rhs2 (def1);
1514
1515   /* Now look for a similar comparison in the remaining OPS.  */
1516   for (i = currindex + 1; ops->iterate (i, &oe); i++)
1517     {
1518       tree t;
1519
1520       if (TREE_CODE (oe->op) != SSA_NAME)
1521         continue;
1522       def2 = SSA_NAME_DEF_STMT (oe->op);
1523       if (!is_gimple_assign (def2))
1524         continue;
1525       rcode = gimple_assign_rhs_code (def2);
1526       if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1527         continue;
1528
1529       /* If we got here, we have a match.  See if we can combine the
1530          two comparisons.  */
1531       if (opcode == BIT_IOR_EXPR)
1532         t = maybe_fold_or_comparisons (lcode, op1, op2,
1533                                        rcode, gimple_assign_rhs1 (def2),
1534                                        gimple_assign_rhs2 (def2));
1535       else
1536         t = maybe_fold_and_comparisons (lcode, op1, op2,
1537                                         rcode, gimple_assign_rhs1 (def2),
1538                                         gimple_assign_rhs2 (def2));
1539       if (!t)
1540         continue;
1541
1542       /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1543          always give us a boolean_type_node value back.  If the original
1544          BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1545          we need to convert.  */
1546       if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1547         t = fold_convert (TREE_TYPE (curr->op), t);
1548
1549       if (TREE_CODE (t) != INTEGER_CST
1550           && !operand_equal_p (t, curr->op, 0))
1551         {
1552           enum tree_code subcode;
1553           tree newop1, newop2;
1554           if (!COMPARISON_CLASS_P (t))
1555             continue;
1556           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1557           STRIP_USELESS_TYPE_CONVERSION (newop1);
1558           STRIP_USELESS_TYPE_CONVERSION (newop2);
1559           if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1560             continue;
1561         }
1562
1563       if (dump_file && (dump_flags & TDF_DETAILS))
1564         {
1565           fprintf (dump_file, "Equivalence: ");
1566           print_generic_expr (dump_file, curr->op, 0);
1567           fprintf (dump_file, " %s ", op_symbol_code (opcode));
1568           print_generic_expr (dump_file, oe->op, 0);
1569           fprintf (dump_file, " -> ");
1570           print_generic_expr (dump_file, t, 0);
1571           fprintf (dump_file, "\n");
1572         }
1573
1574       /* Now we can delete oe, as it has been subsumed by the new combined
1575          expression t.  */
1576       ops->ordered_remove (i);
1577       reassociate_stats.ops_eliminated ++;
1578
1579       /* If t is the same as curr->op, we're done.  Otherwise we must
1580          replace curr->op with t.  Special case is if we got a constant
1581          back, in which case we add it to the end instead of in place of
1582          the current entry.  */
1583       if (TREE_CODE (t) == INTEGER_CST)
1584         {
1585           ops->ordered_remove (currindex);
1586           add_to_ops_vec (ops, t);
1587         }
1588       else if (!operand_equal_p (t, curr->op, 0))
1589         {
1590           gimple sum;
1591           enum tree_code subcode;
1592           tree newop1;
1593           tree newop2;
1594           gcc_assert (COMPARISON_CLASS_P (t));
1595           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1596           STRIP_USELESS_TYPE_CONVERSION (newop1);
1597           STRIP_USELESS_TYPE_CONVERSION (newop2);
1598           gcc_checking_assert (is_gimple_val (newop1)
1599                                && is_gimple_val (newop2));
1600           sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1601           curr->op = gimple_get_lhs (sum);
1602         }
1603       return true;
1604     }
1605
1606   return false;
1607 }
1608
1609 /* Perform various identities and other optimizations on the list of
1610    operand entries, stored in OPS.  The tree code for the binary
1611    operation between all the operands is OPCODE.  */
1612
1613 static void
1614 optimize_ops_list (enum tree_code opcode,
1615                    vec<operand_entry_t> *ops)
1616 {
1617   unsigned int length = ops->length ();
1618   unsigned int i;
1619   operand_entry_t oe;
1620   operand_entry_t oelast = NULL;
1621   bool iterate = false;
1622
1623   if (length == 1)
1624     return;
1625
1626   oelast = ops->last ();
1627
1628   /* If the last two are constants, pop the constants off, merge them
1629      and try the next two.  */
1630   if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1631     {
1632       operand_entry_t oelm1 = (*ops)[length - 2];
1633
1634       if (oelm1->rank == 0
1635           && is_gimple_min_invariant (oelm1->op)
1636           && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1637                                        TREE_TYPE (oelast->op)))
1638         {
1639           tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1640                                      oelm1->op, oelast->op);
1641
1642           if (folded && is_gimple_min_invariant (folded))
1643             {
1644               if (dump_file && (dump_flags & TDF_DETAILS))
1645                 fprintf (dump_file, "Merging constants\n");
1646
1647               ops->pop ();
1648               ops->pop ();
1649
1650               add_to_ops_vec (ops, folded);
1651               reassociate_stats.constants_eliminated++;
1652
1653               optimize_ops_list (opcode, ops);
1654               return;
1655             }
1656         }
1657     }
1658
1659   eliminate_using_constants (opcode, ops);
1660   oelast = NULL;
1661
1662   for (i = 0; ops->iterate (i, &oe);)
1663     {
1664       bool done = false;
1665
1666       if (eliminate_not_pairs (opcode, ops, i, oe))
1667         return;
1668       if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1669           || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1670           || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1671         {
1672           if (done)
1673             return;
1674           iterate = true;
1675           oelast = NULL;
1676           continue;
1677         }
1678       oelast = oe;
1679       i++;
1680     }
1681
1682   length = ops->length ();
1683   oelast = ops->last ();
1684
1685   if (iterate)
1686     optimize_ops_list (opcode, ops);
1687 }
1688
1689 /* The following functions are subroutines to optimize_range_tests and allow
1690    it to try to change a logical combination of comparisons into a range
1691    test.
1692
1693    For example, both
1694         X == 2 || X == 5 || X == 3 || X == 4
1695    and
1696         X >= 2 && X <= 5
1697    are converted to
1698         (unsigned) (X - 2) <= 3
1699
1700    For more information see comments above fold_test_range in fold-const.c,
1701    this implementation is for GIMPLE.  */
1702
1703 struct range_entry
1704 {
1705   tree exp;
1706   tree low;
1707   tree high;
1708   bool in_p;
1709   bool strict_overflow_p;
1710   unsigned int idx, next;
1711 };
1712
1713 /* This is similar to make_range in fold-const.c, but on top of
1714    GIMPLE instead of trees.  If EXP is non-NULL, it should be
1715    an SSA_NAME and STMT argument is ignored, otherwise STMT
1716    argument should be a GIMPLE_COND.  */
1717
1718 static void
1719 init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1720 {
1721   int in_p;
1722   tree low, high;
1723   bool is_bool, strict_overflow_p;
1724
1725   r->exp = NULL_TREE;
1726   r->in_p = false;
1727   r->strict_overflow_p = false;
1728   r->low = NULL_TREE;
1729   r->high = NULL_TREE;
1730   if (exp != NULL_TREE
1731       && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1732     return;
1733
1734   /* Start with simply saying "EXP != 0" and then look at the code of EXP
1735      and see if we can refine the range.  Some of the cases below may not
1736      happen, but it doesn't seem worth worrying about this.  We "continue"
1737      the outer loop when we've changed something; otherwise we "break"
1738      the switch, which will "break" the while.  */
1739   low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1740   high = low;
1741   in_p = 0;
1742   strict_overflow_p = false;
1743   is_bool = false;
1744   if (exp == NULL_TREE)
1745     is_bool = true;
1746   else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1747     {
1748       if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1749         is_bool = true;
1750       else
1751         return;
1752     }
1753   else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1754     is_bool = true;
1755
1756   while (1)
1757     {
1758       enum tree_code code;
1759       tree arg0, arg1, exp_type;
1760       tree nexp;
1761       location_t loc;
1762
1763       if (exp != NULL_TREE)
1764         {
1765           if (TREE_CODE (exp) != SSA_NAME)
1766             break;
1767
1768           stmt = SSA_NAME_DEF_STMT (exp);
1769           if (!is_gimple_assign (stmt))
1770             break;
1771
1772           code = gimple_assign_rhs_code (stmt);
1773           arg0 = gimple_assign_rhs1 (stmt);
1774           arg1 = gimple_assign_rhs2 (stmt);
1775           exp_type = TREE_TYPE (exp);
1776         }
1777       else
1778         {
1779           code = gimple_cond_code (stmt);
1780           arg0 = gimple_cond_lhs (stmt);
1781           arg1 = gimple_cond_rhs (stmt);
1782           exp_type = boolean_type_node;
1783         }
1784
1785       if (TREE_CODE (arg0) != SSA_NAME)
1786         break;
1787       loc = gimple_location (stmt);
1788       switch (code)
1789         {
1790         case BIT_NOT_EXPR:
1791           if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1792             {
1793               in_p = !in_p;
1794               exp = arg0;
1795               continue;
1796             }
1797           break;
1798         case SSA_NAME:
1799           exp = arg0;
1800           continue;
1801         CASE_CONVERT:
1802           if (is_bool)
1803             goto do_default;
1804           if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1805             {
1806               if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1807                 is_bool = true;
1808               else
1809                 return;
1810             }
1811           else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1812             is_bool = true;
1813           goto do_default;
1814         case EQ_EXPR:
1815         case NE_EXPR:
1816         case LT_EXPR:
1817         case LE_EXPR:
1818         case GE_EXPR:
1819         case GT_EXPR:
1820           is_bool = true;
1821           /* FALLTHRU */
1822         default:
1823           if (!is_bool)
1824             return;
1825         do_default:
1826           nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1827                                   &low, &high, &in_p,
1828                                   &strict_overflow_p);
1829           if (nexp != NULL_TREE)
1830             {
1831               exp = nexp;
1832               gcc_assert (TREE_CODE (exp) == SSA_NAME);
1833               continue;
1834             }
1835           break;
1836         }
1837       break;
1838     }
1839   if (is_bool)
1840     {
1841       r->exp = exp;
1842       r->in_p = in_p;
1843       r->low = low;
1844       r->high = high;
1845       r->strict_overflow_p = strict_overflow_p;
1846     }
1847 }
1848
1849 /* Comparison function for qsort.  Sort entries
1850    without SSA_NAME exp first, then with SSA_NAMEs sorted
1851    by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1852    by increasing ->low and if ->low is the same, by increasing
1853    ->high.  ->low == NULL_TREE means minimum, ->high == NULL_TREE
1854    maximum.  */
1855
1856 static int
1857 range_entry_cmp (const void *a, const void *b)
1858 {
1859   const struct range_entry *p = (const struct range_entry *) a;
1860   const struct range_entry *q = (const struct range_entry *) b;
1861
1862   if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
1863     {
1864       if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1865         {
1866           /* Group range_entries for the same SSA_NAME together.  */
1867           if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
1868             return -1;
1869           else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
1870             return 1;
1871           /* If ->low is different, NULL low goes first, then by
1872              ascending low.  */
1873           if (p->low != NULL_TREE)
1874             {
1875               if (q->low != NULL_TREE)
1876                 {
1877                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
1878                                           p->low, q->low);
1879                   if (tem && integer_onep (tem))
1880                     return -1;
1881                   tem = fold_binary (GT_EXPR, boolean_type_node,
1882                                      p->low, q->low);
1883                   if (tem && integer_onep (tem))
1884                     return 1;
1885                 }
1886               else
1887                 return 1;
1888             }
1889           else if (q->low != NULL_TREE)
1890             return -1;
1891           /* If ->high is different, NULL high goes last, before that by
1892              ascending high.  */
1893           if (p->high != NULL_TREE)
1894             {
1895               if (q->high != NULL_TREE)
1896                 {
1897                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
1898                                           p->high, q->high);
1899                   if (tem && integer_onep (tem))
1900                     return -1;
1901                   tem = fold_binary (GT_EXPR, boolean_type_node,
1902                                      p->high, q->high);
1903                   if (tem && integer_onep (tem))
1904                     return 1;
1905                 }
1906               else
1907                 return -1;
1908             }
1909           else if (p->high != NULL_TREE)
1910             return 1;
1911           /* If both ranges are the same, sort below by ascending idx.  */
1912         }
1913       else
1914         return 1;
1915     }
1916   else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1917     return -1;
1918
1919   if (p->idx < q->idx)
1920     return -1;
1921   else
1922     {
1923       gcc_checking_assert (p->idx > q->idx);
1924       return 1;
1925     }
1926 }
1927
1928 /* Helper routine of optimize_range_test.
1929    [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
1930    RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
1931    OPCODE and OPS are arguments of optimize_range_tests.  Return
1932    true if the range merge has been successful.
1933    If OPCODE is ERROR_MARK, this is called from within
1934    maybe_optimize_range_tests and is performing inter-bb range optimization.
1935    Changes should be then performed right away, and whether an op is
1936    BIT_AND_EXPR or BIT_IOR_EXPR is found in oe->rank.  */
1937
1938 static bool
1939 update_range_test (struct range_entry *range, struct range_entry *otherrange,
1940                    unsigned int count, enum tree_code opcode,
1941                    vec<operand_entry_t> *ops, tree exp, bool in_p,
1942                    tree low, tree high, bool strict_overflow_p)
1943 {
1944   operand_entry_t oe = (*ops)[range->idx];
1945   tree op = oe->op;
1946   gimple stmt = op ? SSA_NAME_DEF_STMT (op) : last_stmt (BASIC_BLOCK (oe->id));
1947   location_t loc = gimple_location (stmt);
1948   tree optype = op ? TREE_TYPE (op) : boolean_type_node;
1949   tree tem = build_range_check (loc, optype, exp, in_p, low, high);
1950   enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
1951   gimple_stmt_iterator gsi;
1952
1953   if (tem == NULL_TREE)
1954     return false;
1955
1956   if (strict_overflow_p && issue_strict_overflow_warning (wc))
1957     warning_at (loc, OPT_Wstrict_overflow,
1958                 "assuming signed overflow does not occur "
1959                 "when simplifying range test");
1960
1961   if (dump_file && (dump_flags & TDF_DETAILS))
1962     {
1963       struct range_entry *r;
1964       fprintf (dump_file, "Optimizing range tests ");
1965       print_generic_expr (dump_file, range->exp, 0);
1966       fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
1967       print_generic_expr (dump_file, range->low, 0);
1968       fprintf (dump_file, ", ");
1969       print_generic_expr (dump_file, range->high, 0);
1970       fprintf (dump_file, "]");
1971       for (r = otherrange; r < otherrange + count; r++)
1972         {
1973           fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
1974           print_generic_expr (dump_file, r->low, 0);
1975           fprintf (dump_file, ", ");
1976           print_generic_expr (dump_file, r->high, 0);
1977           fprintf (dump_file, "]");
1978         }
1979       fprintf (dump_file, "\n into ");
1980       print_generic_expr (dump_file, tem, 0);
1981       fprintf (dump_file, "\n");
1982     }
1983
1984   if (opcode == BIT_IOR_EXPR
1985       || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
1986     tem = invert_truthvalue_loc (loc, tem);
1987
1988   tem = fold_convert_loc (loc, optype, tem);
1989   gsi = gsi_for_stmt (stmt);
1990   tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
1991                                   GSI_SAME_STMT);
1992
1993   /* If doing inter-bb range test optimization, update the
1994      stmts immediately.  Start with changing the first range test
1995      immediate use to the new value (TEM), or, if the first range
1996      test is a GIMPLE_COND stmt, change that condition.  */
1997   if (opcode == ERROR_MARK)
1998     {
1999       if (op)
2000         {
2001           imm_use_iterator iter;
2002           use_operand_p use_p;
2003           gimple use_stmt;
2004
2005           FOR_EACH_IMM_USE_STMT (use_stmt, iter, op)
2006             {
2007               if (is_gimple_debug (use_stmt))
2008                 continue;
2009               FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2010                 SET_USE (use_p, tem);
2011               update_stmt (use_stmt);
2012             }
2013         }
2014       else
2015         {
2016           gimple_cond_set_code (stmt, NE_EXPR);
2017           gimple_cond_set_lhs (stmt, tem);
2018           gimple_cond_set_rhs (stmt, boolean_false_node);
2019           update_stmt (stmt);
2020         }
2021     }
2022   oe->op = tem;
2023   range->exp = exp;
2024   range->low = low;
2025   range->high = high;
2026   range->in_p = in_p;
2027   range->strict_overflow_p = false;
2028
2029   for (range = otherrange; range < otherrange + count; range++)
2030     {
2031       oe = (*ops)[range->idx];
2032       /* Now change all the other range test immediate uses, so that
2033          those tests will be optimized away.  */
2034       if (opcode == ERROR_MARK)
2035         {
2036           if (oe->op)
2037             {
2038               imm_use_iterator iter;
2039               use_operand_p use_p;
2040               gimple use_stmt;
2041
2042               FOR_EACH_IMM_USE_STMT (use_stmt, iter, oe->op)
2043                 {
2044                   if (is_gimple_debug (use_stmt))
2045                     continue;
2046                   /* If imm use of _8 is a statement like _7 = _8 | _9;,
2047                      adjust it into _7 = _9;.  */
2048                   if (is_gimple_assign (use_stmt)
2049                       && gimple_assign_rhs_code (use_stmt) == oe->rank)
2050                     {
2051                       tree expr = NULL_TREE;
2052                       if (oe->op == gimple_assign_rhs1 (use_stmt))
2053                         expr = gimple_assign_rhs2 (use_stmt);
2054                       else if (oe->op == gimple_assign_rhs2 (use_stmt))
2055                         expr = gimple_assign_rhs1 (use_stmt);
2056                       if (expr
2057                           && expr != oe->op
2058                           && TREE_CODE (expr) == SSA_NAME)
2059                         {
2060                           gimple_stmt_iterator gsi2 = gsi_for_stmt (use_stmt);
2061                           gimple_assign_set_rhs_with_ops (&gsi2, SSA_NAME,
2062                                                           expr, NULL_TREE);
2063                           update_stmt (use_stmt);
2064                           continue;
2065                         }
2066                     }
2067                   /* If imm use of _8 is a statement like _7 = (int) _8;,
2068                      adjust it into _7 = 0; or _7 = 1;.  */
2069                   if (gimple_assign_cast_p (use_stmt)
2070                       && oe->op == gimple_assign_rhs1 (use_stmt))
2071                     {
2072                       tree lhs = gimple_assign_lhs (use_stmt);
2073                       if (INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
2074                         {
2075                           gimple_stmt_iterator gsi2
2076                             = gsi_for_stmt (use_stmt);
2077                           tree expr = build_int_cst (TREE_TYPE (lhs),
2078                                                      oe->rank == BIT_IOR_EXPR
2079                                                      ? 0 : 1);
2080                           gimple_assign_set_rhs_with_ops (&gsi2,
2081                                                           INTEGER_CST,
2082                                                           expr, NULL_TREE);
2083                           update_stmt (use_stmt);
2084                           continue;
2085                         }
2086                     }
2087                   /* Otherwise replace the use with 0 or 1.  */
2088                   FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2089                     SET_USE (use_p,
2090                              build_int_cst (TREE_TYPE (oe->op),
2091                                             oe->rank == BIT_IOR_EXPR
2092                                             ? 0 : 1));
2093                   update_stmt (use_stmt);
2094                 }
2095             }
2096           else
2097             {
2098               /* If range test was a GIMPLE_COND, simply change it
2099                  into an always false or always true condition.  */
2100               stmt = last_stmt (BASIC_BLOCK (oe->id));
2101               if (oe->rank == BIT_IOR_EXPR)
2102                 gimple_cond_make_false (stmt);
2103               else
2104                 gimple_cond_make_true (stmt);
2105               update_stmt (stmt);
2106             }
2107         }
2108       oe->op = error_mark_node;
2109       range->exp = NULL_TREE;
2110     }
2111   return true;
2112 }
2113
2114 /* Optimize range tests, similarly how fold_range_test optimizes
2115    it on trees.  The tree code for the binary
2116    operation between all the operands is OPCODE.
2117    If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2118    maybe_optimize_range_tests for inter-bb range optimization.
2119    In that case if oe->op is NULL, oe->id is bb->index whose
2120    GIMPLE_COND is && or ||ed into the test, and oe->rank says
2121    the actual opcode.  */
2122
2123 static void
2124 optimize_range_tests (enum tree_code opcode,
2125                       vec<operand_entry_t> *ops)
2126 {
2127   unsigned int length = ops->length (), i, j, first;
2128   operand_entry_t oe;
2129   struct range_entry *ranges;
2130   bool any_changes = false;
2131
2132   if (length == 1)
2133     return;
2134
2135   ranges = XNEWVEC (struct range_entry, length);
2136   for (i = 0; i < length; i++)
2137     {
2138       oe = (*ops)[i];
2139       ranges[i].idx = i;
2140       init_range_entry (ranges + i, oe->op,
2141                         oe->op ? NULL : last_stmt (BASIC_BLOCK (oe->id)));
2142       /* For | invert it now, we will invert it again before emitting
2143          the optimized expression.  */
2144       if (opcode == BIT_IOR_EXPR
2145           || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2146         ranges[i].in_p = !ranges[i].in_p;
2147     }
2148
2149   qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2150   for (i = 0; i < length; i++)
2151     if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2152       break;
2153
2154   /* Try to merge ranges.  */
2155   for (first = i; i < length; i++)
2156     {
2157       tree low = ranges[i].low;
2158       tree high = ranges[i].high;
2159       int in_p = ranges[i].in_p;
2160       bool strict_overflow_p = ranges[i].strict_overflow_p;
2161       int update_fail_count = 0;
2162
2163       for (j = i + 1; j < length; j++)
2164         {
2165           if (ranges[i].exp != ranges[j].exp)
2166             break;
2167           if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2168                              ranges[j].in_p, ranges[j].low, ranges[j].high))
2169             break;
2170           strict_overflow_p |= ranges[j].strict_overflow_p;
2171         }
2172
2173       if (j == i + 1)
2174         continue;
2175
2176       if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
2177                              ops, ranges[i].exp, in_p, low, high,
2178                              strict_overflow_p))
2179         {
2180           i = j - 1;
2181           any_changes = true;
2182         }
2183       /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2184          while update_range_test would fail.  */
2185       else if (update_fail_count == 64)
2186         i = j - 1;
2187       else
2188         ++update_fail_count;
2189     }
2190
2191   /* Optimize X == CST1 || X == CST2
2192      if popcount (CST1 ^ CST2) == 1 into
2193      (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2194      Similarly for ranges.  E.g.
2195      X != 2 && X != 3 && X != 10 && X != 11
2196      will be transformed by the above loop into
2197      (X - 2U) <= 1U && (X - 10U) <= 1U
2198      and this loop can transform that into
2199      ((X & ~8) - 2U) <= 1U.  */
2200   for (i = first; i < length; i++)
2201     {
2202       tree lowi, highi, lowj, highj, type, lowxor, highxor, tem, exp;
2203
2204       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2205         continue;
2206       type = TREE_TYPE (ranges[i].exp);
2207       if (!INTEGRAL_TYPE_P (type))
2208         continue;
2209       lowi = ranges[i].low;
2210       if (lowi == NULL_TREE)
2211         lowi = TYPE_MIN_VALUE (type);
2212       highi = ranges[i].high;
2213       if (highi == NULL_TREE)
2214         continue;
2215       for (j = i + 1; j < length && j < i + 64; j++)
2216         {
2217           if (ranges[j].exp == NULL_TREE)
2218             continue;
2219           if (ranges[i].exp != ranges[j].exp)
2220             break;
2221           if (ranges[j].in_p)
2222             continue;
2223           lowj = ranges[j].low;
2224           if (lowj == NULL_TREE)
2225             continue;
2226           highj = ranges[j].high;
2227           if (highj == NULL_TREE)
2228             highj = TYPE_MAX_VALUE (type);
2229           tem = fold_binary (GT_EXPR, boolean_type_node,
2230                              lowj, highi);
2231           if (tem == NULL_TREE || !integer_onep (tem))
2232             continue;
2233           lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2234           if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2235             continue;
2236           gcc_checking_assert (!integer_zerop (lowxor));
2237           tem = fold_binary (MINUS_EXPR, type, lowxor,
2238                              build_int_cst (type, 1));
2239           if (tem == NULL_TREE)
2240             continue;
2241           tem = fold_binary (BIT_AND_EXPR, type, lowxor, tem);
2242           if (tem == NULL_TREE || !integer_zerop (tem))
2243             continue;
2244           highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2245           if (!tree_int_cst_equal (lowxor, highxor))
2246             continue;
2247           tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2248           exp = fold_build2 (BIT_AND_EXPR, type, ranges[i].exp, tem);
2249           lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2250           highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2251           if (update_range_test (ranges + i, ranges + j, 1, opcode, ops, exp,
2252                                  ranges[i].in_p, lowj, highj,
2253                                  ranges[i].strict_overflow_p
2254                                  || ranges[j].strict_overflow_p))
2255             {
2256               any_changes = true;
2257               break;
2258             }
2259         }
2260     }
2261
2262   if (any_changes && opcode != ERROR_MARK)
2263     {
2264       j = 0;
2265       FOR_EACH_VEC_ELT (*ops, i, oe)
2266         {
2267           if (oe->op == error_mark_node)
2268             continue;
2269           else if (i != j)
2270             (*ops)[j] = oe;
2271           j++;
2272         }
2273       ops->truncate (j);
2274     }
2275
2276   XDELETEVEC (ranges);
2277 }
2278
2279 /* Return true if STMT is a cast like:
2280    <bb N>:
2281    ...
2282    _123 = (int) _234;
2283
2284    <bb M>:
2285    # _345 = PHI <_123(N), 1(...), 1(...)>
2286    where _234 has bool type, _123 has single use and
2287    bb N has a single successor M.  This is commonly used in
2288    the last block of a range test.  */
2289
2290 static bool
2291 final_range_test_p (gimple stmt)
2292 {
2293   basic_block bb, rhs_bb;
2294   edge e;
2295   tree lhs, rhs;
2296   use_operand_p use_p;
2297   gimple use_stmt;
2298
2299   if (!gimple_assign_cast_p (stmt))
2300     return false;
2301   bb = gimple_bb (stmt);
2302   if (!single_succ_p (bb))
2303     return false;
2304   e = single_succ_edge (bb);
2305   if (e->flags & EDGE_COMPLEX)
2306     return false;
2307
2308   lhs = gimple_assign_lhs (stmt);
2309   rhs = gimple_assign_rhs1 (stmt);
2310   if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2311       || TREE_CODE (rhs) != SSA_NAME
2312       || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2313     return false;
2314
2315   /* Test whether lhs is consumed only by a PHI in the only successor bb.  */
2316   if (!single_imm_use (lhs, &use_p, &use_stmt))
2317     return false;
2318
2319   if (gimple_code (use_stmt) != GIMPLE_PHI
2320       || gimple_bb (use_stmt) != e->dest)
2321     return false;
2322
2323   /* And that the rhs is defined in the same loop.  */
2324   rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2325   if (rhs_bb == NULL
2326       || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2327     return false;
2328
2329   return true;
2330 }
2331
2332 /* Return true if BB is suitable basic block for inter-bb range test
2333    optimization.  If BACKWARD is true, BB should be the only predecessor
2334    of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2335    or compared with to find a common basic block to which all conditions
2336    branch to if true resp. false.  If BACKWARD is false, TEST_BB should
2337    be the only predecessor of BB.  */
2338
2339 static bool
2340 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2341                   bool backward)
2342 {
2343   edge_iterator ei, ei2;
2344   edge e, e2;
2345   gimple stmt;
2346   gimple_stmt_iterator gsi;
2347   bool other_edge_seen = false;
2348   bool is_cond;
2349
2350   if (test_bb == bb)
2351     return false;
2352   /* Check last stmt first.  */
2353   stmt = last_stmt (bb);
2354   if (stmt == NULL
2355       || (gimple_code (stmt) != GIMPLE_COND
2356           && (backward || !final_range_test_p (stmt)))
2357       || gimple_visited_p (stmt)
2358       || stmt_could_throw_p (stmt)
2359       || *other_bb == bb)
2360     return false;
2361   is_cond = gimple_code (stmt) == GIMPLE_COND;
2362   if (is_cond)
2363     {
2364       /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2365          goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2366          to *OTHER_BB (if not set yet, try to find it out).  */
2367       if (EDGE_COUNT (bb->succs) != 2)
2368         return false;
2369       FOR_EACH_EDGE (e, ei, bb->succs)
2370         {
2371           if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2372             return false;
2373           if (e->dest == test_bb)
2374             {
2375               if (backward)
2376                 continue;
2377               else
2378                 return false;
2379             }
2380           if (e->dest == bb)
2381             return false;
2382           if (*other_bb == NULL)
2383             {
2384               FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2385                 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2386                   return false;
2387                 else if (e->dest == e2->dest)
2388                   *other_bb = e->dest;
2389               if (*other_bb == NULL)
2390                 return false;
2391             }
2392           if (e->dest == *other_bb)
2393             other_edge_seen = true;
2394           else if (backward)
2395             return false;
2396         }
2397       if (*other_bb == NULL || !other_edge_seen)
2398         return false;
2399     }
2400   else if (single_succ (bb) != *other_bb)
2401     return false;
2402
2403   /* Now check all PHIs of *OTHER_BB.  */
2404   e = find_edge (bb, *other_bb);
2405   e2 = find_edge (test_bb, *other_bb);
2406   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2407     {
2408       gimple phi = gsi_stmt (gsi);
2409       /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2410          corresponding to BB and TEST_BB predecessor must be the same.  */
2411       if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2412                             gimple_phi_arg_def (phi, e2->dest_idx), 0))
2413         {
2414           /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2415              one of the PHIs should have the lhs of the last stmt in
2416              that block as PHI arg and that PHI should have 0 or 1
2417              corresponding to it in all other range test basic blocks
2418              considered.  */
2419           if (!is_cond)
2420             {
2421               if (gimple_phi_arg_def (phi, e->dest_idx)
2422                   == gimple_assign_lhs (stmt)
2423                   && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2424                       || integer_onep (gimple_phi_arg_def (phi,
2425                                                            e2->dest_idx))))
2426                 continue;
2427             }
2428           else
2429             {
2430               gimple test_last = last_stmt (test_bb);
2431               if (gimple_code (test_last) != GIMPLE_COND
2432                   && gimple_phi_arg_def (phi, e2->dest_idx)
2433                      == gimple_assign_lhs (test_last)
2434                   && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2435                       || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2436                 continue;
2437             }
2438
2439           return false;
2440         }
2441     }
2442   return true;
2443 }
2444
2445 /* Return true if BB doesn't have side-effects that would disallow
2446    range test optimization, all SSA_NAMEs set in the bb are consumed
2447    in the bb and there are no PHIs.  */
2448
2449 static bool
2450 no_side_effect_bb (basic_block bb)
2451 {
2452   gimple_stmt_iterator gsi;
2453   gimple last;
2454
2455   if (!gimple_seq_empty_p (phi_nodes (bb)))
2456     return false;
2457   last = last_stmt (bb);
2458   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2459     {
2460       gimple stmt = gsi_stmt (gsi);
2461       tree lhs;
2462       imm_use_iterator imm_iter;
2463       use_operand_p use_p;
2464
2465       if (is_gimple_debug (stmt))
2466         continue;
2467       if (gimple_has_side_effects (stmt))
2468         return false;
2469       if (stmt == last)
2470         return true;
2471       if (!is_gimple_assign (stmt))
2472         return false;
2473       lhs = gimple_assign_lhs (stmt);
2474       if (TREE_CODE (lhs) != SSA_NAME)
2475         return false;
2476       if (gimple_assign_rhs_could_trap_p (stmt))
2477         return false;
2478       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2479         {
2480           gimple use_stmt = USE_STMT (use_p);
2481           if (is_gimple_debug (use_stmt))
2482             continue;
2483           if (gimple_bb (use_stmt) != bb)
2484             return false;
2485         }
2486     }
2487   return false;
2488 }
2489
2490 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2491    return true and fill in *OPS recursively.  */
2492
2493 static bool
2494 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2495          struct loop *loop)
2496 {
2497   gimple stmt = SSA_NAME_DEF_STMT (var);
2498   tree rhs[2];
2499   int i;
2500
2501   if (!is_reassociable_op (stmt, code, loop))
2502     return false;
2503
2504   rhs[0] = gimple_assign_rhs1 (stmt);
2505   rhs[1] = gimple_assign_rhs2 (stmt);
2506   gimple_set_visited (stmt, true);
2507   for (i = 0; i < 2; i++)
2508     if (TREE_CODE (rhs[i]) == SSA_NAME
2509         && !get_ops (rhs[i], code, ops, loop)
2510         && has_single_use (rhs[i]))
2511       {
2512         operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
2513
2514         oe->op = rhs[i];
2515         oe->rank = code;
2516         oe->id = 0;
2517         oe->count = 1;
2518         ops->safe_push (oe);
2519       }
2520   return true;
2521 }
2522
2523 /* Inter-bb range test optimization.  */
2524
2525 static void
2526 maybe_optimize_range_tests (gimple stmt)
2527 {
2528   basic_block first_bb = gimple_bb (stmt);
2529   basic_block last_bb = first_bb;
2530   basic_block other_bb = NULL;
2531   basic_block bb;
2532   edge_iterator ei;
2533   edge e;
2534   vec<operand_entry_t> ops = vNULL;
2535
2536   /* Consider only basic blocks that end with GIMPLE_COND or
2537      a cast statement satisfying final_range_test_p.  All
2538      but the last bb in the first_bb .. last_bb range
2539      should end with GIMPLE_COND.  */
2540   if (gimple_code (stmt) == GIMPLE_COND)
2541     {
2542       if (EDGE_COUNT (first_bb->succs) != 2)
2543         return;
2544     }
2545   else if (final_range_test_p (stmt))
2546     other_bb = single_succ (first_bb);
2547   else
2548     return;
2549
2550   if (stmt_could_throw_p (stmt))
2551     return;
2552
2553   /* As relative ordering of post-dominator sons isn't fixed,
2554      maybe_optimize_range_tests can be called first on any
2555      bb in the range we want to optimize.  So, start searching
2556      backwards, if first_bb can be set to a predecessor.  */
2557   while (single_pred_p (first_bb))
2558     {
2559       basic_block pred_bb = single_pred (first_bb);
2560       if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
2561         break;
2562       if (!no_side_effect_bb (first_bb))
2563         break;
2564       first_bb = pred_bb;
2565     }
2566   /* If first_bb is last_bb, other_bb hasn't been computed yet.
2567      Before starting forward search in last_bb successors, find
2568      out the other_bb.  */
2569   if (first_bb == last_bb)
2570     {
2571       other_bb = NULL;
2572       /* As non-GIMPLE_COND last stmt always terminates the range,
2573          if forward search didn't discover anything, just give up.  */
2574       if (gimple_code (stmt) != GIMPLE_COND)
2575         return;
2576       /* Look at both successors.  Either it ends with a GIMPLE_COND
2577          and satisfies suitable_cond_bb, or ends with a cast and
2578          other_bb is that cast's successor.  */
2579       FOR_EACH_EDGE (e, ei, first_bb->succs)
2580         if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
2581             || e->dest == first_bb)
2582           return;
2583         else if (single_pred_p (e->dest))
2584           {
2585             stmt = last_stmt (e->dest);
2586             if (stmt
2587                 && gimple_code (stmt) == GIMPLE_COND
2588                 && EDGE_COUNT (e->dest->succs) == 2)
2589               {
2590                 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
2591                   break;
2592                 else
2593                   other_bb = NULL;
2594               }
2595             else if (stmt
2596                      && final_range_test_p (stmt)
2597                      && find_edge (first_bb, single_succ (e->dest)))
2598               {
2599                 other_bb = single_succ (e->dest);
2600                 if (other_bb == first_bb)
2601                   other_bb = NULL;
2602               }
2603           }
2604       if (other_bb == NULL)
2605         return;
2606     }
2607   /* Now do the forward search, moving last_bb to successor bbs
2608      that aren't other_bb.  */
2609   while (EDGE_COUNT (last_bb->succs) == 2)
2610     {
2611       FOR_EACH_EDGE (e, ei, last_bb->succs)
2612         if (e->dest != other_bb)
2613           break;
2614       if (e == NULL)
2615         break;
2616       if (!single_pred_p (e->dest))
2617         break;
2618       if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
2619         break;
2620       if (!no_side_effect_bb (e->dest))
2621         break;
2622       last_bb = e->dest;
2623     }
2624   if (first_bb == last_bb)
2625     return;
2626   /* Here basic blocks first_bb through last_bb's predecessor
2627      end with GIMPLE_COND, all of them have one of the edges to
2628      other_bb and another to another block in the range,
2629      all blocks except first_bb don't have side-effects and
2630      last_bb ends with either GIMPLE_COND, or cast satisfying
2631      final_range_test_p.  */
2632   for (bb = last_bb; ; bb = single_pred (bb))
2633     {
2634       enum tree_code code;
2635       tree lhs, rhs;
2636
2637       e = find_edge (bb, other_bb);
2638       stmt = last_stmt (bb);
2639       gimple_set_visited (stmt, true);
2640       if (gimple_code (stmt) != GIMPLE_COND)
2641         {
2642           use_operand_p use_p;
2643           gimple phi;
2644           edge e2;
2645           unsigned int d;
2646
2647           lhs = gimple_assign_lhs (stmt);
2648           rhs = gimple_assign_rhs1 (stmt);
2649           gcc_assert (bb == last_bb);
2650
2651           /* stmt is
2652              _123 = (int) _234;
2653
2654              followed by:
2655              <bb M>:
2656              # _345 = PHI <_123(N), 1(...), 1(...)>
2657
2658              or 0 instead of 1.  If it is 0, the _234
2659              range test is anded together with all the
2660              other range tests, if it is 1, it is ored with
2661              them.  */
2662           single_imm_use (lhs, &use_p, &phi);
2663           gcc_assert (gimple_code (phi) == GIMPLE_PHI);
2664           e2 = find_edge (first_bb, other_bb);
2665           d = e2->dest_idx;
2666           gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
2667           if (integer_zerop (gimple_phi_arg_def (phi, d)))
2668             code = BIT_AND_EXPR;
2669           else
2670             {
2671               gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
2672               code = BIT_IOR_EXPR;
2673             }
2674
2675           /* If _234 SSA_NAME_DEF_STMT is
2676              _234 = _567 | _789;
2677              (or &, corresponding to 1/0 in the phi arguments,
2678              push into ops the individual range test arguments
2679              of the bitwise or resp. and, recursively.  */
2680           if (!get_ops (rhs, code, &ops,
2681                         loop_containing_stmt (stmt))
2682               && has_single_use (rhs))
2683             {
2684               /* Otherwise, push the _234 range test itself.  */
2685               operand_entry_t oe
2686                 = (operand_entry_t) pool_alloc (operand_entry_pool);
2687
2688               oe->op = rhs;
2689               oe->rank = code;
2690               oe->id = 0;
2691               oe->count = 1;
2692               ops.safe_push (oe);
2693             }
2694           continue;
2695         }
2696       /* Otherwise stmt is GIMPLE_COND.  */
2697       code = gimple_cond_code (stmt);
2698       lhs = gimple_cond_lhs (stmt);
2699       rhs = gimple_cond_rhs (stmt);
2700       if (TREE_CODE (lhs) == SSA_NAME
2701           && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2702           && ((code != EQ_EXPR && code != NE_EXPR)
2703               || rhs != boolean_false_node
2704                  /* Either push into ops the individual bitwise
2705                     or resp. and operands, depending on which
2706                     edge is other_bb.  */
2707               || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
2708                                  ^ (code == EQ_EXPR))
2709                                 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
2710                            loop_containing_stmt (stmt))))
2711         {
2712           /* Or push the GIMPLE_COND stmt itself.  */
2713           operand_entry_t oe
2714             = (operand_entry_t) pool_alloc (operand_entry_pool);
2715
2716           oe->op = NULL;
2717           oe->rank = (e->flags & EDGE_TRUE_VALUE)
2718                      ? BIT_IOR_EXPR : BIT_AND_EXPR;
2719           /* oe->op = NULL signs that there is no SSA_NAME
2720              for the range test, and oe->id instead is the
2721              basic block number, at which's end the GIMPLE_COND
2722              is.  */
2723           oe->id = bb->index;
2724           oe->count = 1;
2725           ops.safe_push (oe);
2726         }
2727       if (bb == first_bb)
2728         break;
2729     }
2730   if (ops.length () > 1)
2731     optimize_range_tests (ERROR_MARK, &ops);
2732   ops.release ();
2733 }
2734
2735 /* Return true if OPERAND is defined by a PHI node which uses the LHS
2736    of STMT in it's operands.  This is also known as a "destructive
2737    update" operation.  */
2738
2739 static bool
2740 is_phi_for_stmt (gimple stmt, tree operand)
2741 {
2742   gimple def_stmt;
2743   tree lhs;
2744   use_operand_p arg_p;
2745   ssa_op_iter i;
2746
2747   if (TREE_CODE (operand) != SSA_NAME)
2748     return false;
2749
2750   lhs = gimple_assign_lhs (stmt);
2751
2752   def_stmt = SSA_NAME_DEF_STMT (operand);
2753   if (gimple_code (def_stmt) != GIMPLE_PHI)
2754     return false;
2755
2756   FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
2757     if (lhs == USE_FROM_PTR (arg_p))
2758       return true;
2759   return false;
2760 }
2761
2762 /* Remove def stmt of VAR if VAR has zero uses and recurse
2763    on rhs1 operand if so.  */
2764
2765 static void
2766 remove_visited_stmt_chain (tree var)
2767 {
2768   gimple stmt;
2769   gimple_stmt_iterator gsi;
2770
2771   while (1)
2772     {
2773       if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
2774         return;
2775       stmt = SSA_NAME_DEF_STMT (var);
2776       if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
2777         {
2778           var = gimple_assign_rhs1 (stmt);
2779           gsi = gsi_for_stmt (stmt);
2780           gsi_remove (&gsi, true);
2781           release_defs (stmt);
2782         }
2783       else
2784         return;
2785     }
2786 }
2787
2788 /* This function checks three consequtive operands in
2789    passed operands vector OPS starting from OPINDEX and
2790    swaps two operands if it is profitable for binary operation
2791    consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
2792
2793    We pair ops with the same rank if possible.
2794
2795    The alternative we try is to see if STMT is a destructive
2796    update style statement, which is like:
2797    b = phi (a, ...)
2798    a = c + b;
2799    In that case, we want to use the destructive update form to
2800    expose the possible vectorizer sum reduction opportunity.
2801    In that case, the third operand will be the phi node. This
2802    check is not performed if STMT is null.
2803
2804    We could, of course, try to be better as noted above, and do a
2805    lot of work to try to find these opportunities in >3 operand
2806    cases, but it is unlikely to be worth it.  */
2807
2808 static void
2809 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
2810                           unsigned int opindex, gimple stmt)
2811 {
2812   operand_entry_t oe1, oe2, oe3;
2813
2814   oe1 = ops[opindex];
2815   oe2 = ops[opindex + 1];
2816   oe3 = ops[opindex + 2];
2817
2818   if ((oe1->rank == oe2->rank
2819        && oe2->rank != oe3->rank)
2820       || (stmt && is_phi_for_stmt (stmt, oe3->op)
2821           && !is_phi_for_stmt (stmt, oe1->op)
2822           && !is_phi_for_stmt (stmt, oe2->op)))
2823     {
2824       struct operand_entry temp = *oe3;
2825       oe3->op = oe1->op;
2826       oe3->rank = oe1->rank;
2827       oe1->op = temp.op;
2828       oe1->rank= temp.rank;
2829     }
2830   else if ((oe1->rank == oe3->rank
2831             && oe2->rank != oe3->rank)
2832            || (stmt && is_phi_for_stmt (stmt, oe2->op)
2833                && !is_phi_for_stmt (stmt, oe1->op)
2834                && !is_phi_for_stmt (stmt, oe3->op)))
2835     {
2836       struct operand_entry temp = *oe2;
2837       oe2->op = oe1->op;
2838       oe2->rank = oe1->rank;
2839       oe1->op = temp.op;
2840       oe1->rank= temp.rank;
2841     }
2842 }
2843
2844 /* Recursively rewrite our linearized statements so that the operators
2845    match those in OPS[OPINDEX], putting the computation in rank
2846    order.  */
2847
2848 static void
2849 rewrite_expr_tree (gimple stmt, unsigned int opindex,
2850                    vec<operand_entry_t> ops, bool moved)
2851 {
2852   tree rhs1 = gimple_assign_rhs1 (stmt);
2853   tree rhs2 = gimple_assign_rhs2 (stmt);
2854   operand_entry_t oe;
2855
2856   /* If we have three operands left, then we want to make sure the ones
2857      that get the double binary op are chosen wisely.  */
2858   if (opindex + 3 == ops.length ())
2859     swap_ops_for_binary_stmt (ops, opindex, stmt);
2860
2861   /* The final recursion case for this function is that you have
2862      exactly two operations left.
2863      If we had one exactly one op in the entire list to start with, we
2864      would have never called this function, and the tail recursion
2865      rewrites them one at a time.  */
2866   if (opindex + 2 == ops.length ())
2867     {
2868       operand_entry_t oe1, oe2;
2869
2870       oe1 = ops[opindex];
2871       oe2 = ops[opindex + 1];
2872
2873       if (rhs1 != oe1->op || rhs2 != oe2->op)
2874         {
2875           if (dump_file && (dump_flags & TDF_DETAILS))
2876             {
2877               fprintf (dump_file, "Transforming ");
2878               print_gimple_stmt (dump_file, stmt, 0, 0);
2879             }
2880
2881           gimple_assign_set_rhs1 (stmt, oe1->op);
2882           gimple_assign_set_rhs2 (stmt, oe2->op);
2883           update_stmt (stmt);
2884           if (rhs1 != oe1->op && rhs1 != oe2->op)
2885             remove_visited_stmt_chain (rhs1);
2886
2887           if (dump_file && (dump_flags & TDF_DETAILS))
2888             {
2889               fprintf (dump_file, " into ");
2890               print_gimple_stmt (dump_file, stmt, 0, 0);
2891             }
2892         }
2893       return;
2894     }
2895
2896   /* If we hit here, we should have 3 or more ops left.  */
2897   gcc_assert (opindex + 2 < ops.length ());
2898
2899   /* Rewrite the next operator.  */
2900   oe = ops[opindex];
2901
2902   if (oe->op != rhs2)
2903     {
2904       if (!moved)
2905         {
2906           gimple_stmt_iterator gsinow, gsirhs1;
2907           gimple stmt1 = stmt, stmt2;
2908           unsigned int count;
2909
2910           gsinow = gsi_for_stmt (stmt);
2911           count = ops.length () - opindex - 2;
2912           while (count-- != 0)
2913             {
2914               stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
2915               gsirhs1 = gsi_for_stmt (stmt2);
2916               gsi_move_before (&gsirhs1, &gsinow);
2917               gsi_prev (&gsinow);
2918               stmt1 = stmt2;
2919             }
2920           moved = true;
2921         }
2922
2923       if (dump_file && (dump_flags & TDF_DETAILS))
2924         {
2925           fprintf (dump_file, "Transforming ");
2926           print_gimple_stmt (dump_file, stmt, 0, 0);
2927         }
2928
2929       gimple_assign_set_rhs2 (stmt, oe->op);
2930       update_stmt (stmt);
2931
2932       if (dump_file && (dump_flags & TDF_DETAILS))
2933         {
2934           fprintf (dump_file, " into ");
2935           print_gimple_stmt (dump_file, stmt, 0, 0);
2936         }
2937     }
2938   /* Recurse on the LHS of the binary operator, which is guaranteed to
2939      be the non-leaf side.  */
2940   rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, moved);
2941 }
2942
2943 /* Find out how many cycles we need to compute statements chain.
2944    OPS_NUM holds number os statements in a chain.  CPU_WIDTH is a
2945    maximum number of independent statements we may execute per cycle.  */
2946
2947 static int
2948 get_required_cycles (int ops_num, int cpu_width)
2949 {
2950   int res;
2951   int elog;
2952   unsigned int rest;
2953
2954   /* While we have more than 2 * cpu_width operands
2955      we may reduce number of operands by cpu_width
2956      per cycle.  */
2957   res = ops_num / (2 * cpu_width);
2958
2959   /* Remained operands count may be reduced twice per cycle
2960      until we have only one operand.  */
2961   rest = (unsigned)(ops_num - res * cpu_width);
2962   elog = exact_log2 (rest);
2963   if (elog >= 0)
2964     res += elog;
2965   else
2966     res += floor_log2 (rest) + 1;
2967
2968   return res;
2969 }
2970
2971 /* Returns an optimal number of registers to use for computation of
2972    given statements.  */
2973
2974 static int
2975 get_reassociation_width (int ops_num, enum tree_code opc,
2976                          enum machine_mode mode)
2977 {
2978   int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
2979   int width;
2980   int width_min;
2981   int cycles_best;
2982
2983   if (param_width > 0)
2984     width = param_width;
2985   else
2986     width = targetm.sched.reassociation_width (opc, mode);
2987
2988   if (width == 1)
2989     return width;
2990
2991   /* Get the minimal time required for sequence computation.  */
2992   cycles_best = get_required_cycles (ops_num, width);
2993
2994   /* Check if we may use less width and still compute sequence for
2995      the same time.  It will allow us to reduce registers usage.
2996      get_required_cycles is monotonically increasing with lower width
2997      so we can perform a binary search for the minimal width that still
2998      results in the optimal cycle count.  */
2999   width_min = 1;
3000   while (width > width_min)
3001     {
3002       int width_mid = (width + width_min) / 2;
3003
3004       if (get_required_cycles (ops_num, width_mid) == cycles_best)
3005         width = width_mid;
3006       else if (width_min < width_mid)
3007         width_min = width_mid;
3008       else
3009         break;
3010     }
3011
3012   return width;
3013 }
3014
3015 /* Recursively rewrite our linearized statements so that the operators
3016    match those in OPS[OPINDEX], putting the computation in rank
3017    order and trying to allow operations to be executed in
3018    parallel.  */
3019
3020 static void
3021 rewrite_expr_tree_parallel (gimple stmt, int width,
3022                             vec<operand_entry_t>  ops)
3023 {
3024   enum tree_code opcode = gimple_assign_rhs_code (stmt);
3025   int op_num = ops.length ();
3026   int stmt_num = op_num - 1;
3027   gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3028   int op_index = op_num - 1;
3029   int stmt_index = 0;
3030   int ready_stmts_end = 0;
3031   int i = 0;
3032   tree last_rhs1 = gimple_assign_rhs1 (stmt);
3033
3034   /* We start expression rewriting from the top statements.
3035      So, in this loop we create a full list of statements
3036      we will work with.  */
3037   stmts[stmt_num - 1] = stmt;
3038   for (i = stmt_num - 2; i >= 0; i--)
3039     stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3040
3041   for (i = 0; i < stmt_num; i++)
3042     {
3043       tree op1, op2;
3044
3045       /* Determine whether we should use results of
3046          already handled statements or not.  */
3047       if (ready_stmts_end == 0
3048           && (i - stmt_index >= width || op_index < 1))
3049         ready_stmts_end = i;
3050
3051       /* Now we choose operands for the next statement.  Non zero
3052          value in ready_stmts_end means here that we should use
3053          the result of already generated statements as new operand.  */
3054       if (ready_stmts_end > 0)
3055         {
3056           op1 = gimple_assign_lhs (stmts[stmt_index++]);
3057           if (ready_stmts_end > stmt_index)
3058             op2 = gimple_assign_lhs (stmts[stmt_index++]);
3059           else if (op_index >= 0)
3060             op2 = ops[op_index--]->op;
3061           else
3062             {
3063               gcc_assert (stmt_index < i);
3064               op2 = gimple_assign_lhs (stmts[stmt_index++]);
3065             }
3066
3067           if (stmt_index >= ready_stmts_end)
3068             ready_stmts_end = 0;
3069         }
3070       else
3071         {
3072           if (op_index > 1)
3073             swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3074           op2 = ops[op_index--]->op;
3075           op1 = ops[op_index--]->op;
3076         }
3077
3078       /* If we emit the last statement then we should put
3079          operands into the last statement.  It will also
3080          break the loop.  */
3081       if (op_index < 0 && stmt_index == i)
3082         i = stmt_num - 1;
3083
3084       if (dump_file && (dump_flags & TDF_DETAILS))
3085         {
3086           fprintf (dump_file, "Transforming ");
3087           print_gimple_stmt (dump_file, stmts[i], 0, 0);
3088         }
3089
3090       /* We keep original statement only for the last one.  All
3091          others are recreated.  */
3092       if (i == stmt_num - 1)
3093         {
3094           gimple_assign_set_rhs1 (stmts[i], op1);
3095           gimple_assign_set_rhs2 (stmts[i], op2);
3096           update_stmt (stmts[i]);
3097         }
3098       else
3099         stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3100
3101       if (dump_file && (dump_flags & TDF_DETAILS))
3102         {
3103           fprintf (dump_file, " into ");
3104           print_gimple_stmt (dump_file, stmts[i], 0, 0);
3105         }
3106     }
3107
3108   remove_visited_stmt_chain (last_rhs1);
3109 }
3110
3111 /* Transform STMT, which is really (A +B) + (C + D) into the left
3112    linear form, ((A+B)+C)+D.
3113    Recurse on D if necessary.  */
3114
3115 static void
3116 linearize_expr (gimple stmt)
3117 {
3118   gimple_stmt_iterator gsinow, gsirhs;
3119   gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3120   gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3121   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3122   gimple newbinrhs = NULL;
3123   struct loop *loop = loop_containing_stmt (stmt);
3124
3125   gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3126               && is_reassociable_op (binrhs, rhscode, loop));
3127
3128   gsinow = gsi_for_stmt (stmt);
3129   gsirhs = gsi_for_stmt (binrhs);
3130   gsi_move_before (&gsirhs, &gsinow);
3131
3132   gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3133   gimple_assign_set_rhs1 (binrhs, gimple_assign_lhs (binlhs));
3134   gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3135
3136   if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3137     newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3138
3139   if (dump_file && (dump_flags & TDF_DETAILS))
3140     {
3141       fprintf (dump_file, "Linearized: ");
3142       print_gimple_stmt (dump_file, stmt, 0, 0);
3143     }
3144
3145   reassociate_stats.linearized++;
3146   update_stmt (binrhs);
3147   update_stmt (binlhs);
3148   update_stmt (stmt);
3149
3150   gimple_set_visited (stmt, true);
3151   gimple_set_visited (binlhs, true);
3152   gimple_set_visited (binrhs, true);
3153
3154   /* Tail recurse on the new rhs if it still needs reassociation.  */
3155   if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3156     /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3157            want to change the algorithm while converting to tuples.  */
3158     linearize_expr (stmt);
3159 }
3160
3161 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3162    it.  Otherwise, return NULL.  */
3163
3164 static gimple
3165 get_single_immediate_use (tree lhs)
3166 {
3167   use_operand_p immuse;
3168   gimple immusestmt;
3169
3170   if (TREE_CODE (lhs) == SSA_NAME
3171       && single_imm_use (lhs, &immuse, &immusestmt)
3172       && is_gimple_assign (immusestmt))
3173     return immusestmt;
3174
3175   return NULL;
3176 }
3177
3178 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3179    representing the negated value.  Insertions of any necessary
3180    instructions go before GSI.
3181    This function is recursive in that, if you hand it "a_5" as the
3182    value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3183    transform b_3 + b_4 into a_5 = -b_3 + -b_4.  */
3184
3185 static tree
3186 negate_value (tree tonegate, gimple_stmt_iterator *gsi)
3187 {
3188   gimple negatedefstmt= NULL;
3189   tree resultofnegate;
3190
3191   /* If we are trying to negate a name, defined by an add, negate the
3192      add operands instead.  */
3193   if (TREE_CODE (tonegate) == SSA_NAME)
3194     negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3195   if (TREE_CODE (tonegate) == SSA_NAME
3196       && is_gimple_assign (negatedefstmt)
3197       && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3198       && has_single_use (gimple_assign_lhs (negatedefstmt))
3199       && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3200     {
3201       gimple_stmt_iterator gsi;
3202       tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3203       tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3204
3205       gsi = gsi_for_stmt (negatedefstmt);
3206       rhs1 = negate_value (rhs1, &gsi);
3207       gimple_assign_set_rhs1 (negatedefstmt, rhs1);
3208
3209       gsi = gsi_for_stmt (negatedefstmt);
3210       rhs2 = negate_value (rhs2, &gsi);
3211       gimple_assign_set_rhs2 (negatedefstmt, rhs2);
3212
3213       update_stmt (negatedefstmt);
3214       return gimple_assign_lhs (negatedefstmt);
3215     }
3216
3217   tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3218   resultofnegate = force_gimple_operand_gsi (gsi, tonegate, true,
3219                                              NULL_TREE, true, GSI_SAME_STMT);
3220   return resultofnegate;
3221 }
3222
3223 /* Return true if we should break up the subtract in STMT into an add
3224    with negate.  This is true when we the subtract operands are really
3225    adds, or the subtract itself is used in an add expression.  In
3226    either case, breaking up the subtract into an add with negate
3227    exposes the adds to reassociation.  */
3228
3229 static bool
3230 should_break_up_subtract (gimple stmt)
3231 {
3232   tree lhs = gimple_assign_lhs (stmt);
3233   tree binlhs = gimple_assign_rhs1 (stmt);
3234   tree binrhs = gimple_assign_rhs2 (stmt);
3235   gimple immusestmt;
3236   struct loop *loop = loop_containing_stmt (stmt);
3237
3238   if (TREE_CODE (binlhs) == SSA_NAME
3239       && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3240     return true;
3241
3242   if (TREE_CODE (binrhs) == SSA_NAME
3243       && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3244     return true;
3245
3246   if (TREE_CODE (lhs) == SSA_NAME
3247       && (immusestmt = get_single_immediate_use (lhs))
3248       && is_gimple_assign (immusestmt)
3249       && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3250           ||  gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3251     return true;
3252   return false;
3253 }
3254
3255 /* Transform STMT from A - B into A + -B.  */
3256
3257 static void
3258 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3259 {
3260   tree rhs1 = gimple_assign_rhs1 (stmt);
3261   tree rhs2 = gimple_assign_rhs2 (stmt);
3262
3263   if (dump_file && (dump_flags & TDF_DETAILS))
3264     {
3265       fprintf (dump_file, "Breaking up subtract ");
3266       print_gimple_stmt (dump_file, stmt, 0, 0);
3267     }
3268
3269   rhs2 = negate_value (rhs2, gsip);
3270   gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3271   update_stmt (stmt);
3272 }
3273
3274 /* Determine whether STMT is a builtin call that raises an SSA name
3275    to an integer power and has only one use.  If so, and this is early
3276    reassociation and unsafe math optimizations are permitted, place
3277    the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3278    If any of these conditions does not hold, return FALSE.  */
3279
3280 static bool
3281 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3282 {
3283   tree fndecl, arg1;
3284   REAL_VALUE_TYPE c, cint;
3285
3286   if (!first_pass_instance
3287       || !flag_unsafe_math_optimizations
3288       || !is_gimple_call (stmt)
3289       || !has_single_use (gimple_call_lhs (stmt)))
3290     return false;
3291
3292   fndecl = gimple_call_fndecl (stmt);
3293
3294   if (!fndecl
3295       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3296     return false;
3297
3298   switch (DECL_FUNCTION_CODE (fndecl))
3299     {
3300     CASE_FLT_FN (BUILT_IN_POW):
3301       *base = gimple_call_arg (stmt, 0);
3302       arg1 = gimple_call_arg (stmt, 1);
3303
3304       if (TREE_CODE (arg1) != REAL_CST)
3305         return false;
3306
3307       c = TREE_REAL_CST (arg1);
3308
3309       if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
3310         return false;
3311
3312       *exponent = real_to_integer (&c);
3313       real_from_integer (&cint, VOIDmode, *exponent,
3314                          *exponent < 0 ? -1 : 0, 0);
3315       if (!real_identical (&c, &cint))
3316         return false;
3317
3318       break;
3319
3320     CASE_FLT_FN (BUILT_IN_POWI):
3321       *base = gimple_call_arg (stmt, 0);
3322       arg1 = gimple_call_arg (stmt, 1);
3323
3324       if (!host_integerp (arg1, 0))
3325         return false;
3326
3327       *exponent = TREE_INT_CST_LOW (arg1);
3328       break;
3329
3330     default:
3331       return false;
3332     }
3333
3334   /* Expanding negative exponents is generally unproductive, so we don't
3335      complicate matters with those.  Exponents of zero and one should
3336      have been handled by expression folding.  */
3337   if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
3338     return false;
3339
3340   return true;
3341 }
3342
3343 /* Recursively linearize a binary expression that is the RHS of STMT.
3344    Place the operands of the expression tree in the vector named OPS.  */
3345
3346 static void
3347 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
3348                      bool is_associative, bool set_visited)
3349 {
3350   tree binlhs = gimple_assign_rhs1 (stmt);
3351   tree binrhs = gimple_assign_rhs2 (stmt);
3352   gimple binlhsdef = NULL, binrhsdef = NULL;
3353   bool binlhsisreassoc = false;
3354   bool binrhsisreassoc = false;
3355   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3356   struct loop *loop = loop_containing_stmt (stmt);
3357   tree base = NULL_TREE;
3358   HOST_WIDE_INT exponent = 0;
3359
3360   if (set_visited)
3361     gimple_set_visited (stmt, true);
3362
3363   if (TREE_CODE (binlhs) == SSA_NAME)
3364     {
3365       binlhsdef = SSA_NAME_DEF_STMT (binlhs);
3366       binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
3367                          && !stmt_could_throw_p (binlhsdef));
3368     }
3369
3370   if (TREE_CODE (binrhs) == SSA_NAME)
3371     {
3372       binrhsdef = SSA_NAME_DEF_STMT (binrhs);
3373       binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
3374                          && !stmt_could_throw_p (binrhsdef));
3375     }
3376
3377   /* If the LHS is not reassociable, but the RHS is, we need to swap
3378      them.  If neither is reassociable, there is nothing we can do, so
3379      just put them in the ops vector.  If the LHS is reassociable,
3380      linearize it.  If both are reassociable, then linearize the RHS
3381      and the LHS.  */
3382
3383   if (!binlhsisreassoc)
3384     {
3385       tree temp;
3386
3387       /* If this is not a associative operation like division, give up.  */
3388       if (!is_associative)
3389         {
3390           add_to_ops_vec (ops, binrhs);
3391           return;
3392         }
3393
3394       if (!binrhsisreassoc)
3395         {
3396           if (rhscode == MULT_EXPR
3397               && TREE_CODE (binrhs) == SSA_NAME
3398               && acceptable_pow_call (binrhsdef, &base, &exponent))
3399             {
3400               add_repeat_to_ops_vec (ops, base, exponent);
3401               gimple_set_visited (binrhsdef, true);
3402             }
3403           else
3404             add_to_ops_vec (ops, binrhs);
3405
3406           if (rhscode == MULT_EXPR
3407               && TREE_CODE (binlhs) == SSA_NAME
3408               && acceptable_pow_call (binlhsdef, &base, &exponent))
3409             {
3410               add_repeat_to_ops_vec (ops, base, exponent);
3411               gimple_set_visited (binlhsdef, true);
3412             }
3413           else
3414             add_to_ops_vec (ops, binlhs);
3415
3416           return;
3417         }
3418
3419       if (dump_file && (dump_flags & TDF_DETAILS))
3420         {
3421           fprintf (dump_file, "swapping operands of ");
3422           print_gimple_stmt (dump_file, stmt, 0, 0);
3423         }
3424
3425       swap_tree_operands (stmt,
3426                           gimple_assign_rhs1_ptr (stmt),
3427                           gimple_assign_rhs2_ptr (stmt));
3428       update_stmt (stmt);
3429
3430       if (dump_file && (dump_flags & TDF_DETAILS))
3431         {
3432           fprintf (dump_file, " is now ");
3433           print_gimple_stmt (dump_file, stmt, 0, 0);
3434         }
3435
3436       /* We want to make it so the lhs is always the reassociative op,
3437          so swap.  */
3438       temp = binlhs;
3439       binlhs = binrhs;
3440       binrhs = temp;
3441     }
3442   else if (binrhsisreassoc)
3443     {
3444       linearize_expr (stmt);
3445       binlhs = gimple_assign_rhs1 (stmt);
3446       binrhs = gimple_assign_rhs2 (stmt);
3447     }
3448
3449   gcc_assert (TREE_CODE (binrhs) != SSA_NAME
3450               || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
3451                                       rhscode, loop));
3452   linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
3453                        is_associative, set_visited);
3454
3455   if (rhscode == MULT_EXPR
3456       && TREE_CODE (binrhs) == SSA_NAME
3457       && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
3458     {
3459       add_repeat_to_ops_vec (ops, base, exponent);
3460       gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
3461     }
3462   else
3463     add_to_ops_vec (ops, binrhs);
3464 }
3465
3466 /* Repropagate the negates back into subtracts, since no other pass
3467    currently does it.  */
3468
3469 static void
3470 repropagate_negates (void)
3471 {
3472   unsigned int i = 0;
3473   tree negate;
3474
3475   FOR_EACH_VEC_ELT (plus_negates, i, negate)
3476     {
3477       gimple user = get_single_immediate_use (negate);
3478
3479       if (!user || !is_gimple_assign (user))
3480         continue;
3481
3482       /* The negate operand can be either operand of a PLUS_EXPR
3483          (it can be the LHS if the RHS is a constant for example).
3484
3485          Force the negate operand to the RHS of the PLUS_EXPR, then
3486          transform the PLUS_EXPR into a MINUS_EXPR.  */
3487       if (gimple_assign_rhs_code (user) == PLUS_EXPR)
3488         {
3489           /* If the negated operand appears on the LHS of the
3490              PLUS_EXPR, exchange the operands of the PLUS_EXPR
3491              to force the negated operand to the RHS of the PLUS_EXPR.  */
3492           if (gimple_assign_rhs1 (user) == negate)
3493             {
3494               swap_tree_operands (user,
3495                                   gimple_assign_rhs1_ptr (user),
3496                                   gimple_assign_rhs2_ptr (user));
3497             }
3498
3499           /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
3500              the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR.  */
3501           if (gimple_assign_rhs2 (user) == negate)
3502             {
3503               tree rhs1 = gimple_assign_rhs1 (user);
3504               tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
3505               gimple_stmt_iterator gsi = gsi_for_stmt (user);
3506               gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
3507               update_stmt (user);
3508             }
3509         }
3510       else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
3511         {
3512           if (gimple_assign_rhs1 (user) == negate)
3513             {
3514               /* We have
3515                    x = -a
3516                    y = x - b
3517                  which we transform into
3518                    x = a + b
3519                    y = -x .
3520                  This pushes down the negate which we possibly can merge
3521                  into some other operation, hence insert it into the
3522                  plus_negates vector.  */
3523               gimple feed = SSA_NAME_DEF_STMT (negate);
3524               tree a = gimple_assign_rhs1 (feed);
3525               tree rhs2 = gimple_assign_rhs2 (user);
3526               gimple_stmt_iterator gsi = gsi_for_stmt (feed), gsi2;
3527               gimple_replace_lhs (feed, negate);
3528               gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, a, rhs2);
3529               update_stmt (gsi_stmt (gsi));
3530               gsi2 = gsi_for_stmt (user);
3531               gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, negate, NULL);
3532               update_stmt (gsi_stmt (gsi2));
3533               gsi_move_before (&gsi, &gsi2);
3534               plus_negates.safe_push (gimple_assign_lhs (gsi_stmt (gsi2)));
3535             }
3536           else
3537             {
3538               /* Transform "x = -a; y = b - x" into "y = b + a", getting
3539                  rid of one operation.  */
3540               gimple feed = SSA_NAME_DEF_STMT (negate);
3541               tree a = gimple_assign_rhs1 (feed);
3542               tree rhs1 = gimple_assign_rhs1 (user);
3543               gimple_stmt_iterator gsi = gsi_for_stmt (user);
3544               gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
3545               update_stmt (gsi_stmt (gsi));
3546             }
3547         }
3548     }
3549 }
3550
3551 /* Returns true if OP is of a type for which we can do reassociation.
3552    That is for integral or non-saturating fixed-point types, and for
3553    floating point type when associative-math is enabled.  */
3554
3555 static bool
3556 can_reassociate_p (tree op)
3557 {
3558   tree type = TREE_TYPE (op);
3559   if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
3560       || NON_SAT_FIXED_POINT_TYPE_P (type)
3561       || (flag_associative_math && FLOAT_TYPE_P (type)))
3562     return true;
3563   return false;
3564 }
3565
3566 /* Break up subtract operations in block BB.
3567
3568    We do this top down because we don't know whether the subtract is
3569    part of a possible chain of reassociation except at the top.
3570
3571    IE given
3572    d = f + g
3573    c = a + e
3574    b = c - d
3575    q = b - r
3576    k = t - q
3577
3578    we want to break up k = t - q, but we won't until we've transformed q
3579    = b - r, which won't be broken up until we transform b = c - d.
3580
3581    En passant, clear the GIMPLE visited flag on every statement.  */
3582
3583 static void
3584 break_up_subtract_bb (basic_block bb)
3585 {
3586   gimple_stmt_iterator gsi;
3587   basic_block son;
3588
3589   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3590     {
3591       gimple stmt = gsi_stmt (gsi);
3592       gimple_set_visited (stmt, false);
3593
3594       if (!is_gimple_assign (stmt)
3595           || !can_reassociate_p (gimple_assign_lhs (stmt)))
3596         continue;
3597
3598       /* Look for simple gimple subtract operations.  */
3599       if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
3600         {
3601           if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
3602               || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
3603             continue;
3604
3605           /* Check for a subtract used only in an addition.  If this
3606              is the case, transform it into add of a negate for better
3607              reassociation.  IE transform C = A-B into C = A + -B if C
3608              is only used in an addition.  */
3609           if (should_break_up_subtract (stmt))
3610             break_up_subtract (stmt, &gsi);
3611         }
3612       else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
3613                && can_reassociate_p (gimple_assign_rhs1 (stmt)))
3614         plus_negates.safe_push (gimple_assign_lhs (stmt));
3615     }
3616   for (son = first_dom_son (CDI_DOMINATORS, bb);
3617        son;
3618        son = next_dom_son (CDI_DOMINATORS, son))
3619     break_up_subtract_bb (son);
3620 }
3621
3622 /* Used for repeated factor analysis.  */
3623 struct repeat_factor_d
3624 {
3625   /* An SSA name that occurs in a multiply chain.  */
3626   tree factor;
3627
3628   /* Cached rank of the factor.  */
3629   unsigned rank;
3630
3631   /* Number of occurrences of the factor in the chain.  */
3632   HOST_WIDE_INT count;
3633
3634   /* An SSA name representing the product of this factor and
3635      all factors appearing later in the repeated factor vector.  */
3636   tree repr;
3637 };
3638
3639 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
3640 typedef const struct repeat_factor_d *const_repeat_factor_t;
3641
3642
3643 static vec<repeat_factor> repeat_factor_vec;
3644
3645 /* Used for sorting the repeat factor vector.  Sort primarily by
3646    ascending occurrence count, secondarily by descending rank.  */
3647
3648 static int
3649 compare_repeat_factors (const void *x1, const void *x2)
3650 {
3651   const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
3652   const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
3653
3654   if (rf1->count != rf2->count)
3655     return rf1->count - rf2->count;
3656
3657   return rf2->rank - rf1->rank;
3658 }
3659
3660 /* Look for repeated operands in OPS in the multiply tree rooted at
3661    STMT.  Replace them with an optimal sequence of multiplies and powi
3662    builtin calls, and remove the used operands from OPS.  Return an
3663    SSA name representing the value of the replacement sequence.  */
3664
3665 static tree
3666 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
3667 {
3668   unsigned i, j, vec_len;
3669   int ii;
3670   operand_entry_t oe;
3671   repeat_factor_t rf1, rf2;
3672   repeat_factor rfnew;
3673   tree result = NULL_TREE;
3674   tree target_ssa, iter_result;
3675   tree type = TREE_TYPE (gimple_get_lhs (stmt));
3676   tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
3677   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3678   gimple mul_stmt, pow_stmt;
3679
3680   /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
3681      target.  */
3682   if (!powi_fndecl)
3683     return NULL_TREE;
3684
3685   /* Allocate the repeated factor vector.  */
3686   repeat_factor_vec.create (10);
3687
3688   /* Scan the OPS vector for all SSA names in the product and build
3689      up a vector of occurrence counts for each factor.  */
3690   FOR_EACH_VEC_ELT (*ops, i, oe)
3691     {
3692       if (TREE_CODE (oe->op) == SSA_NAME)
3693         {
3694           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
3695             {
3696               if (rf1->factor == oe->op)
3697                 {
3698                   rf1->count += oe->count;
3699                   break;
3700                 }
3701             }
3702
3703           if (j >= repeat_factor_vec.length ())
3704             {
3705               rfnew.factor = oe->op;
3706               rfnew.rank = oe->rank;
3707               rfnew.count = oe->count;
3708               rfnew.repr = NULL_TREE;
3709               repeat_factor_vec.safe_push (rfnew);
3710             }
3711         }
3712     }
3713
3714   /* Sort the repeated factor vector by (a) increasing occurrence count,
3715      and (b) decreasing rank.  */
3716   repeat_factor_vec.qsort (compare_repeat_factors);
3717
3718   /* It is generally best to combine as many base factors as possible
3719      into a product before applying __builtin_powi to the result.
3720      However, the sort order chosen for the repeated factor vector
3721      allows us to cache partial results for the product of the base
3722      factors for subsequent use.  When we already have a cached partial
3723      result from a previous iteration, it is best to make use of it
3724      before looking for another __builtin_pow opportunity.
3725
3726      As an example, consider x * x * y * y * y * z * z * z * z.
3727      We want to first compose the product x * y * z, raise it to the
3728      second power, then multiply this by y * z, and finally multiply
3729      by z.  This can be done in 5 multiplies provided we cache y * z
3730      for use in both expressions:
3731
3732         t1 = y * z
3733         t2 = t1 * x
3734         t3 = t2 * t2
3735         t4 = t1 * t3
3736         result = t4 * z
3737
3738      If we instead ignored the cached y * z and first multiplied by
3739      the __builtin_pow opportunity z * z, we would get the inferior:
3740
3741         t1 = y * z
3742         t2 = t1 * x
3743         t3 = t2 * t2
3744         t4 = z * z
3745         t5 = t3 * t4
3746         result = t5 * y  */
3747
3748   vec_len = repeat_factor_vec.length ();
3749
3750   /* Repeatedly look for opportunities to create a builtin_powi call.  */
3751   while (true)
3752     {
3753       HOST_WIDE_INT power;
3754
3755       /* First look for the largest cached product of factors from
3756          preceding iterations.  If found, create a builtin_powi for
3757          it if the minimum occurrence count for its factors is at
3758          least 2, or just use this cached product as our next
3759          multiplicand if the minimum occurrence count is 1.  */
3760       FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
3761         {
3762           if (rf1->repr && rf1->count > 0)
3763             break;
3764         }
3765
3766       if (j < vec_len)
3767         {
3768           power = rf1->count;
3769
3770           if (power == 1)
3771             {
3772               iter_result = rf1->repr;
3773
3774               if (dump_file && (dump_flags & TDF_DETAILS))
3775                 {
3776                   unsigned elt;
3777                   repeat_factor_t rf;
3778                   fputs ("Multiplying by cached product ", dump_file);
3779                   for (elt = j; elt < vec_len; elt++)
3780                     {
3781                       rf = &repeat_factor_vec[elt];
3782                       print_generic_expr (dump_file, rf->factor, 0);
3783                       if (elt < vec_len - 1)
3784                         fputs (" * ", dump_file);
3785                     }
3786                   fputs ("\n", dump_file);
3787                 }
3788             }
3789           else
3790             {
3791               iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
3792               pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
3793                                             build_int_cst (integer_type_node,
3794                                                            power));
3795               gimple_call_set_lhs (pow_stmt, iter_result);
3796               gimple_set_location (pow_stmt, gimple_location (stmt));
3797               gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
3798
3799               if (dump_file && (dump_flags & TDF_DETAILS))
3800                 {
3801                   unsigned elt;
3802                   repeat_factor_t rf;
3803                   fputs ("Building __builtin_pow call for cached product (",
3804                          dump_file);
3805                   for (elt = j; elt < vec_len; elt++)
3806                     {
3807                       rf = &repeat_factor_vec[elt];
3808                       print_generic_expr (dump_file, rf->factor, 0);
3809                       if (elt < vec_len - 1)
3810                         fputs (" * ", dump_file);
3811                     }
3812                   fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n",
3813                            power);
3814                 }
3815             }
3816         }
3817       else
3818         {
3819           /* Otherwise, find the first factor in the repeated factor
3820              vector whose occurrence count is at least 2.  If no such
3821              factor exists, there are no builtin_powi opportunities
3822              remaining.  */
3823           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
3824             {
3825               if (rf1->count >= 2)
3826                 break;
3827             }
3828
3829           if (j >= vec_len)
3830             break;
3831
3832           power = rf1->count;
3833
3834           if (dump_file && (dump_flags & TDF_DETAILS))
3835             {
3836               unsigned elt;
3837               repeat_factor_t rf;
3838               fputs ("Building __builtin_pow call for (", dump_file);
3839               for (elt = j; elt < vec_len; elt++)
3840                 {
3841                   rf = &repeat_factor_vec[elt];
3842                   print_generic_expr (dump_file, rf->factor, 0);
3843                   if (elt < vec_len - 1)
3844                     fputs (" * ", dump_file);
3845                 }
3846               fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power);
3847             }
3848
3849           reassociate_stats.pows_created++;
3850
3851           /* Visit each element of the vector in reverse order (so that
3852              high-occurrence elements are visited first, and within the
3853              same occurrence count, lower-ranked elements are visited
3854              first).  Form a linear product of all elements in this order
3855              whose occurrencce count is at least that of element J.
3856              Record the SSA name representing the product of each element
3857              with all subsequent elements in the vector.  */
3858           if (j == vec_len - 1)
3859             rf1->repr = rf1->factor;
3860           else
3861             {
3862               for (ii = vec_len - 2; ii >= (int)j; ii--)
3863                 {
3864                   tree op1, op2;
3865
3866                   rf1 = &repeat_factor_vec[ii];
3867                   rf2 = &repeat_factor_vec[ii + 1];
3868
3869                   /* Init the last factor's representative to be itself.  */
3870                   if (!rf2->repr)
3871                     rf2->repr = rf2->factor;
3872
3873                   op1 = rf1->factor;
3874                   op2 = rf2->repr;
3875
3876                   target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
3877                   mul_stmt = gimple_build_assign_with_ops (MULT_EXPR,
3878                                                            target_ssa,
3879                                                            op1, op2);
3880                   gimple_set_location (mul_stmt, gimple_location (stmt));
3881                   gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
3882                   rf1->repr = target_ssa;
3883
3884                   /* Don't reprocess the multiply we just introduced.  */
3885                   gimple_set_visited (mul_stmt, true);
3886                 }
3887             }
3888
3889           /* Form a call to __builtin_powi for the maximum product
3890              just formed, raised to the power obtained earlier.  */
3891           rf1 = &repeat_factor_vec[j];
3892           iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
3893           pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
3894                                         build_int_cst (integer_type_node,
3895                                                        power));
3896           gimple_call_set_lhs (pow_stmt, iter_result);
3897           gimple_set_location (pow_stmt, gimple_location (stmt));
3898           gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
3899         }
3900
3901       /* If we previously formed at least one other builtin_powi call,
3902          form the product of this one and those others.  */
3903       if (result)
3904         {
3905           tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
3906           mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result,
3907                                                    result, iter_result);
3908           gimple_set_location (mul_stmt, gimple_location (stmt));
3909           gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
3910           gimple_set_visited (mul_stmt, true);
3911           result = new_result;
3912         }
3913       else
3914         result = iter_result;
3915
3916       /* Decrement the occurrence count of each element in the product
3917          by the count found above, and remove this many copies of each
3918          factor from OPS.  */
3919       for (i = j; i < vec_len; i++)
3920         {
3921           unsigned k = power;
3922           unsigned n;
3923
3924           rf1 = &repeat_factor_vec[i];
3925           rf1->count -= power;
3926
3927           FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
3928             {
3929               if (oe->op == rf1->factor)
3930                 {
3931                   if (oe->count <= k)
3932                     {
3933                       ops->ordered_remove (n);
3934                       k -= oe->count;
3935
3936                       if (k == 0)
3937                         break;
3938                     }
3939                   else
3940                     {
3941                       oe->count -= k;
3942                       break;
3943                     }
3944                 }
3945             }
3946         }
3947     }
3948
3949   /* At this point all elements in the repeated factor vector have a
3950      remaining occurrence count of 0 or 1, and those with a count of 1
3951      don't have cached representatives.  Re-sort the ops vector and
3952      clean up.  */
3953   ops->qsort (sort_by_operand_rank);
3954   repeat_factor_vec.release ();
3955
3956   /* Return the final product computed herein.  Note that there may
3957      still be some elements with single occurrence count left in OPS;
3958      those will be handled by the normal reassociation logic.  */
3959   return result;
3960 }
3961
3962 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS.  */
3963
3964 static void
3965 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
3966 {
3967   tree rhs1;
3968
3969   if (dump_file && (dump_flags & TDF_DETAILS))
3970     {
3971       fprintf (dump_file, "Transforming ");
3972       print_gimple_stmt (dump_file, stmt, 0, 0);
3973     }
3974
3975   rhs1 = gimple_assign_rhs1 (stmt);
3976   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
3977   update_stmt (stmt);
3978   remove_visited_stmt_chain (rhs1);
3979
3980   if (dump_file && (dump_flags & TDF_DETAILS))
3981     {
3982       fprintf (dump_file, " into ");
3983       print_gimple_stmt (dump_file, stmt, 0, 0);
3984     }
3985 }
3986
3987 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2.  */
3988
3989 static void
3990 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
3991                             tree rhs1, tree rhs2)
3992 {
3993   if (dump_file && (dump_flags & TDF_DETAILS))
3994     {
3995       fprintf (dump_file, "Transforming ");
3996       print_gimple_stmt (dump_file, stmt, 0, 0);
3997     }
3998
3999   gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
4000   update_stmt (gsi_stmt (*gsi));
4001   remove_visited_stmt_chain (rhs1);
4002
4003   if (dump_file && (dump_flags & TDF_DETAILS))
4004     {
4005       fprintf (dump_file, " into ");
4006       print_gimple_stmt (dump_file, stmt, 0, 0);
4007     }
4008 }
4009
4010 /* Reassociate expressions in basic block BB and its post-dominator as
4011    children.  */
4012
4013 static void
4014 reassociate_bb (basic_block bb)
4015 {
4016   gimple_stmt_iterator gsi;
4017   basic_block son;
4018   gimple stmt = last_stmt (bb);
4019
4020   if (stmt && !gimple_visited_p (stmt))
4021     maybe_optimize_range_tests (stmt);
4022
4023   for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4024     {
4025       stmt = gsi_stmt (gsi);
4026
4027       if (is_gimple_assign (stmt)
4028           && !stmt_could_throw_p (stmt))
4029         {
4030           tree lhs, rhs1, rhs2;
4031           enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4032
4033           /* If this is not a gimple binary expression, there is
4034              nothing for us to do with it.  */
4035           if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4036             continue;
4037
4038           /* If this was part of an already processed statement,
4039              we don't need to touch it again. */
4040           if (gimple_visited_p (stmt))
4041             {
4042               /* This statement might have become dead because of previous
4043                  reassociations.  */
4044               if (has_zero_uses (gimple_get_lhs (stmt)))
4045                 {
4046                   gsi_remove (&gsi, true);
4047                   release_defs (stmt);
4048                   /* We might end up removing the last stmt above which
4049                      places the iterator to the end of the sequence.
4050                      Reset it to the last stmt in this case which might
4051                      be the end of the sequence as well if we removed
4052                      the last statement of the sequence.  In which case
4053                      we need to bail out.  */
4054                   if (gsi_end_p (gsi))
4055                     {
4056                       gsi = gsi_last_bb (bb);
4057                       if (gsi_end_p (gsi))
4058                         break;
4059                     }
4060                 }
4061               continue;
4062             }
4063
4064           lhs = gimple_assign_lhs (stmt);
4065           rhs1 = gimple_assign_rhs1 (stmt);
4066           rhs2 = gimple_assign_rhs2 (stmt);
4067
4068           /* For non-bit or min/max operations we can't associate
4069              all types.  Verify that here.  */
4070           if (rhs_code != BIT_IOR_EXPR
4071               && rhs_code != BIT_AND_EXPR
4072               && rhs_code != BIT_XOR_EXPR
4073               && rhs_code != MIN_EXPR
4074               && rhs_code != MAX_EXPR
4075               && (!can_reassociate_p (lhs)
4076                   || !can_reassociate_p (rhs1)
4077                   || !can_reassociate_p (rhs2)))
4078             continue;
4079
4080           if (associative_tree_code (rhs_code))
4081             {
4082               vec<operand_entry_t> ops = vNULL;
4083               tree powi_result = NULL_TREE;
4084
4085               /* There may be no immediate uses left by the time we
4086                  get here because we may have eliminated them all.  */
4087               if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4088                 continue;
4089
4090               gimple_set_visited (stmt, true);
4091               linearize_expr_tree (&ops, stmt, true, true);
4092               ops.qsort (sort_by_operand_rank);
4093               optimize_ops_list (rhs_code, &ops);
4094               if (undistribute_ops_list (rhs_code, &ops,
4095                                          loop_containing_stmt (stmt)))
4096                 {
4097                   ops.qsort (sort_by_operand_rank);
4098                   optimize_ops_list (rhs_code, &ops);
4099                 }
4100
4101               if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4102                 optimize_range_tests (rhs_code, &ops);
4103
4104               if (first_pass_instance
4105                   && rhs_code == MULT_EXPR
4106                   && flag_unsafe_math_optimizations)
4107                 powi_result = attempt_builtin_powi (stmt, &ops);
4108
4109               /* If the operand vector is now empty, all operands were
4110                  consumed by the __builtin_powi optimization.  */
4111               if (ops.length () == 0)
4112                 transform_stmt_to_copy (&gsi, stmt, powi_result);
4113               else if (ops.length () == 1)
4114                 {
4115                   tree last_op = ops.last ()->op;
4116
4117                   if (powi_result)
4118                     transform_stmt_to_multiply (&gsi, stmt, last_op,
4119                                                 powi_result);
4120                   else
4121                     transform_stmt_to_copy (&gsi, stmt, last_op);
4122                 }
4123               else
4124                 {
4125                   enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4126                   int ops_num = ops.length ();
4127                   int width = get_reassociation_width (ops_num, rhs_code, mode);
4128
4129                   if (dump_file && (dump_flags & TDF_DETAILS))
4130                     fprintf (dump_file,
4131                              "Width = %d was chosen for reassociation\n", width);
4132
4133                   if (width > 1
4134                       && ops.length () > 3)
4135                     rewrite_expr_tree_parallel (stmt, width, ops);
4136                   else
4137                     rewrite_expr_tree (stmt, 0, ops, false);
4138
4139                   /* If we combined some repeated factors into a
4140                      __builtin_powi call, multiply that result by the
4141                      reassociated operands.  */
4142                   if (powi_result)
4143                     {
4144                       gimple mul_stmt;
4145                       tree type = TREE_TYPE (gimple_get_lhs (stmt));
4146                       tree target_ssa = make_temp_ssa_name (type, NULL,
4147                                                             "reassocpow");
4148                       gimple_set_lhs (stmt, target_ssa);
4149                       update_stmt (stmt);
4150                       mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs,
4151                                                                powi_result,
4152                                                                target_ssa);
4153                       gimple_set_location (mul_stmt, gimple_location (stmt));
4154                       gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4155                     }
4156                 }
4157
4158               ops.release ();
4159             }
4160         }
4161     }
4162   for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4163        son;
4164        son = next_dom_son (CDI_POST_DOMINATORS, son))
4165     reassociate_bb (son);
4166 }
4167
4168 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4169 void debug_ops_vector (vec<operand_entry_t> ops);
4170
4171 /* Dump the operand entry vector OPS to FILE.  */
4172
4173 void
4174 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
4175 {
4176   operand_entry_t oe;
4177   unsigned int i;
4178
4179   FOR_EACH_VEC_ELT (ops, i, oe)
4180     {
4181       fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
4182       print_generic_expr (file, oe->op, 0);
4183     }
4184 }
4185
4186 /* Dump the operand entry vector OPS to STDERR.  */
4187
4188 DEBUG_FUNCTION void
4189 debug_ops_vector (vec<operand_entry_t> ops)
4190 {
4191   dump_ops_vector (stderr, ops);
4192 }
4193
4194 static void
4195 do_reassoc (void)
4196 {
4197   break_up_subtract_bb (ENTRY_BLOCK_PTR);
4198   reassociate_bb (EXIT_BLOCK_PTR);
4199 }
4200
4201 /* Initialize the reassociation pass.  */
4202
4203 static void
4204 init_reassoc (void)
4205 {
4206   int i;
4207   long rank = 2;
4208   int *bbs = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
4209
4210   /* Find the loops, so that we can prevent moving calculations in
4211      them.  */
4212   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4213
4214   memset (&reassociate_stats, 0, sizeof (reassociate_stats));
4215
4216   operand_entry_pool = create_alloc_pool ("operand entry pool",
4217                                           sizeof (struct operand_entry), 30);
4218   next_operand_entry_id = 0;
4219
4220   /* Reverse RPO (Reverse Post Order) will give us something where
4221      deeper loops come later.  */
4222   pre_and_rev_post_order_compute (NULL, bbs, false);
4223   bb_rank = XCNEWVEC (long, last_basic_block);
4224   operand_rank = pointer_map_create ();
4225
4226   /* Give each default definition a distinct rank.  This includes
4227      parameters and the static chain.  Walk backwards over all
4228      SSA names so that we get proper rank ordering according
4229      to tree_swap_operands_p.  */
4230   for (i = num_ssa_names - 1; i > 0; --i)
4231     {
4232       tree name = ssa_name (i);
4233       if (name && SSA_NAME_IS_DEFAULT_DEF (name))
4234         insert_operand_rank (name, ++rank);
4235     }
4236
4237   /* Set up rank for each BB  */
4238   for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
4239     bb_rank[bbs[i]] = ++rank  << 16;
4240
4241   free (bbs);
4242   calculate_dominance_info (CDI_POST_DOMINATORS);
4243   plus_negates = vNULL;
4244 }
4245
4246 /* Cleanup after the reassociation pass, and print stats if
4247    requested.  */
4248
4249 static void
4250 fini_reassoc (void)
4251 {
4252   statistics_counter_event (cfun, "Linearized",
4253                             reassociate_stats.linearized);
4254   statistics_counter_event (cfun, "Constants eliminated",
4255                             reassociate_stats.constants_eliminated);
4256   statistics_counter_event (cfun, "Ops eliminated",
4257                             reassociate_stats.ops_eliminated);
4258   statistics_counter_event (cfun, "Statements rewritten",
4259                             reassociate_stats.rewritten);
4260   statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
4261                             reassociate_stats.pows_encountered);
4262   statistics_counter_event (cfun, "Built-in powi calls created",
4263                             reassociate_stats.pows_created);
4264
4265   pointer_map_destroy (operand_rank);
4266   free_alloc_pool (operand_entry_pool);
4267   free (bb_rank);
4268   plus_negates.release ();
4269   free_dominance_info (CDI_POST_DOMINATORS);
4270   loop_optimizer_finalize ();
4271 }
4272
4273 /* Gate and execute functions for Reassociation.  */
4274
4275 static unsigned int
4276 execute_reassoc (void)
4277 {
4278   init_reassoc ();
4279
4280   do_reassoc ();
4281   repropagate_negates ();
4282
4283   fini_reassoc ();
4284   return 0;
4285 }
4286
4287 static bool
4288 gate_tree_ssa_reassoc (void)
4289 {
4290   return flag_tree_reassoc != 0;
4291 }
4292
4293 struct gimple_opt_pass pass_reassoc =
4294 {
4295  {
4296   GIMPLE_PASS,
4297   "reassoc",                            /* name */
4298   OPTGROUP_NONE,                        /* optinfo_flags */
4299   gate_tree_ssa_reassoc,                /* gate */
4300   execute_reassoc,                      /* execute */
4301   NULL,                                 /* sub */
4302   NULL,                                 /* next */
4303   0,                                    /* static_pass_number */
4304   TV_TREE_REASSOC,                      /* tv_id */
4305   PROP_cfg | PROP_ssa,                  /* properties_required */
4306   0,                                    /* properties_provided */
4307   0,                                    /* properties_destroyed */
4308   0,                                    /* todo_flags_start */
4309   TODO_verify_ssa
4310   | TODO_update_ssa_only_virtuals
4311   | TODO_verify_flow                    /* todo_flags_finish */
4312  }
4313 };