gcc/tree-ssa-reassoc.c

   1 /* Reassociation for trees.
   2    Copyright (C) 2005, 2007, 2008, 2009, 2010, 2011
   3    Free Software Foundation, Inc.
   4    Contributed by Daniel Berlin <dan@dberlin.org>
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation; either version 3, or (at your option)
  11 any later version.
  12
  13 GCC is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "tm.h"
  26 #include "tree.h"
  27 #include "basic-block.h"
  28 #include "gimple-pretty-print.h"
  29 #include "tree-inline.h"
  30 #include "tree-flow.h"
  31 #include "gimple.h"
  32 #include "tree-iterator.h"
  33 #include "tree-pass.h"
  34 #include "alloc-pool.h"
  35 #include "vec.h"
  36 #include "langhooks.h"
  37 #include "pointer-set.h"
  38 #include "cfgloop.h"
  39 #include "flags.h"
  40 #include "target.h"
  41 #include "params.h"
  42 #include "diagnostic-core.h"
  43
  44 /*  This is a simple global reassociation pass.  It is, in part, based
  45     on the LLVM pass of the same name (They do some things more/less
  46     than we do, in different orders, etc).
  47
  48     It consists of five steps:
  49
  50     1. Breaking up subtract operations into addition + negate, where
  51     it would promote the reassociation of adds.
  52
  53     2. Left linearization of the expression trees, so that (A+B)+(C+D)
  54     becomes (((A+B)+C)+D), which is easier for us to rewrite later.
  55     During linearization, we place the operands of the binary
  56     expressions into a vector of operand_entry_t
  57
  58     3. Optimization of the operand lists, eliminating things like a +
  59     -a, a & a, etc.
  60
  61     3a. Combine repeated factors with the same occurrence counts
  62     into a __builtin_powi call that will later be optimized into
  63     an optimal number of multiplies.
  64
  65     4. Rewrite the expression trees we linearized and optimized so
  66     they are in proper rank order.
  67
  68     5. Repropagate negates, as nothing else will clean it up ATM.
  69
  70     A bit of theory on #4, since nobody seems to write anything down
  71     about why it makes sense to do it the way they do it:
  72
  73     We could do this much nicer theoretically, but don't (for reasons
  74     explained after how to do it theoretically nice :P).
  75
  76     In order to promote the most redundancy elimination, you want
  77     binary expressions whose operands are the same rank (or
  78     preferably, the same value) exposed to the redundancy eliminator,
  79     for possible elimination.
  80
  81     So the way to do this if we really cared, is to build the new op
  82     tree from the leaves to the roots, merging as you go, and putting the
  83     new op on the end of the worklist, until you are left with one
  84     thing on the worklist.
  85
  86     IE if you have to rewrite the following set of operands (listed with
  87     rank in parentheses), with opcode PLUS_EXPR:
  88
  89     a (1),  b (1),  c (1),  d (2), e (2)
  90
  91
  92     We start with our merge worklist empty, and the ops list with all of
  93     those on it.
  94
  95     You want to first merge all leaves of the same rank, as much as
  96     possible.
  97
  98     So first build a binary op of
  99
 100     mergetmp = a + b, and put "mergetmp" on the merge worklist.
 101
 102     Because there is no three operand form of PLUS_EXPR, c is not going to
 103     be exposed to redundancy elimination as a rank 1 operand.
 104
 105     So you might as well throw it on the merge worklist (you could also
 106     consider it to now be a rank two operand, and merge it with d and e,
 107     but in this case, you then have evicted e from a binary op. So at
 108     least in this situation, you can't win.)
 109
 110     Then build a binary op of d + e
 111     mergetmp2 = d + e
 112
 113     and put mergetmp2 on the merge worklist.
 114
 115     so merge worklist = {mergetmp, c, mergetmp2}
 116
 117     Continue building binary ops of these operations until you have only
 118     one operation left on the worklist.
 119
 120     So we have
 121
 122     build binary op
 123     mergetmp3 = mergetmp + c
 124
 125     worklist = {mergetmp2, mergetmp3}
 126
 127     mergetmp4 = mergetmp2 + mergetmp3
 128
 129     worklist = {mergetmp4}
 130
 131     because we have one operation left, we can now just set the original
 132     statement equal to the result of that operation.
 133
 134     This will at least expose a + b  and d + e to redundancy elimination
 135     as binary operations.
 136
 137     For extra points, you can reuse the old statements to build the
 138     mergetmps, since you shouldn't run out.
 139
 140     So why don't we do this?
 141
 142     Because it's expensive, and rarely will help.  Most trees we are
 143     reassociating have 3 or less ops.  If they have 2 ops, they already
 144     will be written into a nice single binary op.  If you have 3 ops, a
 145     single simple check suffices to tell you whether the first two are of the
 146     same rank.  If so, you know to order it
 147
 148     mergetmp = op1 + op2
 149     newstmt = mergetmp + op3
 150
 151     instead of
 152     mergetmp = op2 + op3
 153     newstmt = mergetmp + op1
 154
 155     If all three are of the same rank, you can't expose them all in a
 156     single binary operator anyway, so the above is *still* the best you
 157     can do.
 158
 159     Thus, this is what we do.  When we have three ops left, we check to see
 160     what order to put them in, and call it a day.  As a nod to vector sum
 161     reduction, we check if any of the ops are really a phi node that is a
 162     destructive update for the associating op, and keep the destructive
 163     update together for vector sum reduction recognition.  */
 164
 165
 166 /* Statistics */
 167 static struct
 168 {
 169   int linearized;
 170   int constants_eliminated;
 171   int ops_eliminated;
 172   int rewritten;
 173   int pows_encountered;
 174   int pows_created;
 175 } reassociate_stats;
 176
 177 /* Operator, rank pair.  */
 178 typedef struct operand_entry
 179 {
 180   unsigned int rank;
 181   int id;
 182   tree op;
 183   unsigned int count;
 184 } *operand_entry_t;
 185
 186 static alloc_pool operand_entry_pool;
 187
 188 /* This is used to assign a unique ID to each struct operand_entry
 189    so that qsort results are identical on different hosts.  */
 190 static int next_operand_entry_id;
 191
 192 /* Starting rank number for a given basic block, so that we can rank
 193    operations using unmovable instructions in that BB based on the bb
 194    depth.  */
 195 static long *bb_rank;
 196
 197 /* Operand->rank hashtable.  */
 198 static struct pointer_map_t *operand_rank;
 199
 200 /* Forward decls.  */
 201 static long get_rank (tree);
 202
 203
 204 /* Bias amount for loop-carried phis.  We want this to be larger than
 205    the depth of any reassociation tree we can see, but not larger than
 206    the rank difference between two blocks.  */
 207 #define PHI_LOOP_BIAS (1 << 15)
 208
 209 /* Rank assigned to a phi statement.  If STMT is a loop-carried phi of
 210    an innermost loop, and the phi has only a single use which is inside
 211    the loop, then the rank is the block rank of the loop latch plus an
 212    extra bias for the loop-carried dependence.  This causes expressions
 213    calculated into an accumulator variable to be independent for each
 214    iteration of the loop.  If STMT is some other phi, the rank is the
 215    block rank of its containing block.  */
 216 static long
 217 phi_rank (gimple stmt)
 218 {
 219   basic_block bb = gimple_bb (stmt);
 220   struct loop *father = bb->loop_father;
 221   tree res;
 222   unsigned i;
 223   use_operand_p use;
 224   gimple use_stmt;
 225
 226   /* We only care about real loops (those with a latch).  */
 227   if (!father->latch)
 228     return bb_rank[bb->index];
 229
 230   /* Interesting phis must be in headers of innermost loops.  */
 231   if (bb != father->header
 232       || father->inner)
 233     return bb_rank[bb->index];
 234
 235   /* Ignore virtual SSA_NAMEs.  */
 236   res = gimple_phi_result (stmt);
 237   if (!is_gimple_reg (SSA_NAME_VAR (res)))
 238     return bb_rank[bb->index];
 239
 240   /* The phi definition must have a single use, and that use must be
 241      within the loop.  Otherwise this isn't an accumulator pattern.  */
 242   if (!single_imm_use (res, &use, &use_stmt)
 243       || gimple_bb (use_stmt)->loop_father != father)
 244     return bb_rank[bb->index];
 245
 246   /* Look for phi arguments from within the loop.  If found, bias this phi.  */
 247   for (i = 0; i < gimple_phi_num_args (stmt); i++)
 248     {
 249       tree arg = gimple_phi_arg_def (stmt, i);
 250       if (TREE_CODE (arg) == SSA_NAME
 251           && !SSA_NAME_IS_DEFAULT_DEF (arg))
 252         {
 253           gimple def_stmt = SSA_NAME_DEF_STMT (arg);
 254           if (gimple_bb (def_stmt)->loop_father == father)
 255             return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
 256         }
 257     }
 258
 259   /* Must be an uninteresting phi.  */
 260   return bb_rank[bb->index];
 261 }
 262
 263 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
 264    loop-carried dependence of an innermost loop, return TRUE; else
 265    return FALSE.  */
 266 static bool
 267 loop_carried_phi (tree exp)
 268 {
 269   gimple phi_stmt;
 270   long block_rank;
 271
 272   if (TREE_CODE (exp) != SSA_NAME
 273       || SSA_NAME_IS_DEFAULT_DEF (exp))
 274     return false;
 275
 276   phi_stmt = SSA_NAME_DEF_STMT (exp);
 277
 278   if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
 279     return false;
 280
 281   /* Non-loop-carried phis have block rank.  Loop-carried phis have
 282      an additional bias added in.  If this phi doesn't have block rank,
 283      it's biased and should not be propagated.  */
 284   block_rank = bb_rank[gimple_bb (phi_stmt)->index];
 285
 286   if (phi_rank (phi_stmt) != block_rank)
 287     return true;
 288
 289   return false;
 290 }
 291
 292 /* Return the maximum of RANK and the rank that should be propagated
 293    from expression OP.  For most operands, this is just the rank of OP.
 294    For loop-carried phis, the value is zero to avoid undoing the bias
 295    in favor of the phi.  */
 296 static long
 297 propagate_rank (long rank, tree op)
 298 {
 299   long op_rank;
 300
 301   if (loop_carried_phi (op))
 302     return rank;
 303
 304   op_rank = get_rank (op);
 305
 306   return MAX (rank, op_rank);
 307 }
 308
 309 /* Look up the operand rank structure for expression E.  */
 310
 311 static inline long
 312 find_operand_rank (tree e)
 313 {
 314   void **slot = pointer_map_contains (operand_rank, e);
 315   return slot ? (long) (intptr_t) *slot : -1;
 316 }
 317
 318 /* Insert {E,RANK} into the operand rank hashtable.  */
 319
 320 static inline void
 321 insert_operand_rank (tree e, long rank)
 322 {
 323   void **slot;
 324   gcc_assert (rank > 0);
 325   slot = pointer_map_insert (operand_rank, e);
 326   gcc_assert (!*slot);
 327   *slot = (void *) (intptr_t) rank;
 328 }
 329
 330 /* Given an expression E, return the rank of the expression.  */
 331
 332 static long
 333 get_rank (tree e)
 334 {
 335   /* Constants have rank 0.  */
 336   if (is_gimple_min_invariant (e))
 337     return 0;
 338
 339   /* SSA_NAME's have the rank of the expression they are the result
 340      of.
 341      For globals and uninitialized values, the rank is 0.
 342      For function arguments, use the pre-setup rank.
 343      For PHI nodes, stores, asm statements, etc, we use the rank of
 344      the BB.
 345      For simple operations, the rank is the maximum rank of any of
 346      its operands, or the bb_rank, whichever is less.
 347      I make no claims that this is optimal, however, it gives good
 348      results.  */
 349
 350   /* We make an exception to the normal ranking system to break
 351      dependences of accumulator variables in loops.  Suppose we
 352      have a simple one-block loop containing:
 353
 354        x_1 = phi(x_0, x_2)
 355        b = a + x_1
 356        c = b + d
 357        x_2 = c + e
 358
 359      As shown, each iteration of the calculation into x is fully
 360      dependent upon the iteration before it.  We would prefer to
 361      see this in the form:
 362
 363        x_1 = phi(x_0, x_2)
 364        b = a + d
 365        c = b + e
 366        x_2 = c + x_1
 367
 368      If the loop is unrolled, the calculations of b and c from
 369      different iterations can be interleaved.
 370
 371      To obtain this result during reassociation, we bias the rank
 372      of the phi definition x_1 upward, when it is recognized as an
 373      accumulator pattern.  The artificial rank causes it to be
 374      added last, providing the desired independence.  */
 375
 376   if (TREE_CODE (e) == SSA_NAME)
 377     {
 378       gimple stmt;
 379       long rank;
 380       int i, n;
 381       tree op;
 382
 383       if (SSA_NAME_IS_DEFAULT_DEF (e))
 384         return find_operand_rank (e);
 385
 386       stmt = SSA_NAME_DEF_STMT (e);
 387       if (gimple_code (stmt) == GIMPLE_PHI)
 388         return phi_rank (stmt);
 389
 390       if (!is_gimple_assign (stmt)
 391           || gimple_vdef (stmt))
 392         return bb_rank[gimple_bb (stmt)->index];
 393
 394       /* If we already have a rank for this expression, use that.  */
 395       rank = find_operand_rank (e);
 396       if (rank != -1)
 397         return rank;
 398
 399       /* Otherwise, find the maximum rank for the operands.  As an
 400          exception, remove the bias from loop-carried phis when propagating
 401          the rank so that dependent operations are not also biased.  */
 402       rank = 0;
 403       if (gimple_assign_single_p (stmt))
 404         {
 405           tree rhs = gimple_assign_rhs1 (stmt);
 406           n = TREE_OPERAND_LENGTH (rhs);
 407           if (n == 0)
 408             rank = propagate_rank (rank, rhs);
 409           else
 410             {
 411               for (i = 0; i < n; i++)
 412                 {
 413                   op = TREE_OPERAND (rhs, i);
 414
 415                   if (op != NULL_TREE)
 416                     rank = propagate_rank (rank, op);
 417                 }
 418             }
 419         }
 420       else
 421         {
 422           n = gimple_num_ops (stmt);
 423           for (i = 1; i < n; i++)
 424             {
 425               op = gimple_op (stmt, i);
 426               gcc_assert (op);
 427               rank = propagate_rank (rank, op);
 428             }
 429         }
 430
 431       if (dump_file && (dump_flags & TDF_DETAILS))
 432         {
 433           fprintf (dump_file, "Rank for ");
 434           print_generic_expr (dump_file, e, 0);
 435           fprintf (dump_file, " is %ld\n", (rank + 1));
 436         }
 437
 438       /* Note the rank in the hashtable so we don't recompute it.  */
 439       insert_operand_rank (e, (rank + 1));
 440       return (rank + 1);
 441     }
 442
 443   /* Globals, etc,  are rank 0 */
 444   return 0;
 445 }
 446
 447 DEF_VEC_P(operand_entry_t);
 448 DEF_VEC_ALLOC_P(operand_entry_t, heap);
 449
 450 /* We want integer ones to end up last no matter what, since they are
 451    the ones we can do the most with.  */
 452 #define INTEGER_CONST_TYPE 1 << 3
 453 #define FLOAT_CONST_TYPE 1 << 2
 454 #define OTHER_CONST_TYPE 1 << 1
 455
 456 /* Classify an invariant tree into integer, float, or other, so that
 457    we can sort them to be near other constants of the same type.  */
 458 static inline int
 459 constant_type (tree t)
 460 {
 461   if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
 462     return INTEGER_CONST_TYPE;
 463   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
 464     return FLOAT_CONST_TYPE;
 465   else
 466     return OTHER_CONST_TYPE;
 467 }
 468
 469 /* qsort comparison function to sort operand entries PA and PB by rank
 470    so that the sorted array is ordered by rank in decreasing order.  */
 471 static int
 472 sort_by_operand_rank (const void *pa, const void *pb)
 473 {
 474   const operand_entry_t oea = *(const operand_entry_t *)pa;
 475   const operand_entry_t oeb = *(const operand_entry_t *)pb;
 476
 477   /* It's nicer for optimize_expression if constants that are likely
 478      to fold when added/multiplied//whatever are put next to each
 479      other.  Since all constants have rank 0, order them by type.  */
 480   if (oeb->rank == 0 && oea->rank == 0)
 481     {
 482       if (constant_type (oeb->op) != constant_type (oea->op))
 483         return constant_type (oeb->op) - constant_type (oea->op);
 484       else
 485         /* To make sorting result stable, we use unique IDs to determine
 486            order.  */
 487         return oeb->id - oea->id;
 488     }
 489
 490   /* Lastly, make sure the versions that are the same go next to each
 491      other.  We use SSA_NAME_VERSION because it's stable.  */
 492   if ((oeb->rank - oea->rank == 0)
 493       && TREE_CODE (oea->op) == SSA_NAME
 494       && TREE_CODE (oeb->op) == SSA_NAME)
 495     {
 496       if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
 497         return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
 498       else
 499         return oeb->id - oea->id;
 500     }
 501
 502   if (oeb->rank != oea->rank)
 503     return oeb->rank - oea->rank;
 504   else
 505     return oeb->id - oea->id;
 506 }
 507
 508 /* Add an operand entry to *OPS for the tree operand OP.  */
 509
 510 static void
 511 add_to_ops_vec (VEC(operand_entry_t, heap) **ops, tree op)
 512 {
 513   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
 514
 515   oe->op = op;
 516   oe->rank = get_rank (op);
 517   oe->id = next_operand_entry_id++;
 518   oe->count = 1;
 519   VEC_safe_push (operand_entry_t, heap, *ops, oe);
 520 }
 521
 522 /* Add an operand entry to *OPS for the tree operand OP with repeat
 523    count REPEAT.  */
 524
 525 static void
 526 add_repeat_to_ops_vec (VEC(operand_entry_t, heap) **ops, tree op,
 527                        HOST_WIDE_INT repeat)
 528 {
 529   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
 530
 531   oe->op = op;
 532   oe->rank = get_rank (op);
 533   oe->id = next_operand_entry_id++;
 534   oe->count = repeat;
 535   VEC_safe_push (operand_entry_t, heap, *ops, oe);
 536
 537   reassociate_stats.pows_encountered++;
 538 }
 539
 540 /* Return true if STMT is reassociable operation containing a binary
 541    operation with tree code CODE, and is inside LOOP.  */
 542
 543 static bool
 544 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
 545 {
 546   basic_block bb = gimple_bb (stmt);
 547
 548   if (gimple_bb (stmt) == NULL)
 549     return false;
 550
 551   if (!flow_bb_inside_loop_p (loop, bb))
 552     return false;
 553
 554   if (is_gimple_assign (stmt)
 555       && gimple_assign_rhs_code (stmt) == code
 556       && has_single_use (gimple_assign_lhs (stmt)))
 557     return true;
 558
 559   return false;
 560 }
 561
 562
 563 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
 564    operand of the negate operation.  Otherwise, return NULL.  */
 565
 566 static tree
 567 get_unary_op (tree name, enum tree_code opcode)
 568 {
 569   gimple stmt = SSA_NAME_DEF_STMT (name);
 570
 571   if (!is_gimple_assign (stmt))
 572     return NULL_TREE;
 573
 574   if (gimple_assign_rhs_code (stmt) == opcode)
 575     return gimple_assign_rhs1 (stmt);
 576   return NULL_TREE;
 577 }
 578
 579 /* If CURR and LAST are a pair of ops that OPCODE allows us to
 580    eliminate through equivalences, do so, remove them from OPS, and
 581    return true.  Otherwise, return false.  */
 582
 583 static bool
 584 eliminate_duplicate_pair (enum tree_code opcode,
 585                           VEC (operand_entry_t, heap) **ops,
 586                           bool *all_done,
 587                           unsigned int i,
 588                           operand_entry_t curr,
 589                           operand_entry_t last)
 590 {
 591
 592   /* If we have two of the same op, and the opcode is & |, min, or max,
 593      we can eliminate one of them.
 594      If we have two of the same op, and the opcode is ^, we can
 595      eliminate both of them.  */
 596
 597   if (last && last->op == curr->op)
 598     {
 599       switch (opcode)
 600         {
 601         case MAX_EXPR:
 602         case MIN_EXPR:
 603         case BIT_IOR_EXPR:
 604         case BIT_AND_EXPR:
 605           if (dump_file && (dump_flags & TDF_DETAILS))
 606             {
 607               fprintf (dump_file, "Equivalence: ");
 608               print_generic_expr (dump_file, curr->op, 0);
 609               fprintf (dump_file, " [&|minmax] ");
 610               print_generic_expr (dump_file, last->op, 0);
 611               fprintf (dump_file, " -> ");
 612               print_generic_stmt (dump_file, last->op, 0);
 613             }
 614
 615           VEC_ordered_remove (operand_entry_t, *ops, i);
 616           reassociate_stats.ops_eliminated ++;
 617
 618           return true;
 619
 620         case BIT_XOR_EXPR:
 621           if (dump_file && (dump_flags & TDF_DETAILS))
 622             {
 623               fprintf (dump_file, "Equivalence: ");
 624               print_generic_expr (dump_file, curr->op, 0);
 625               fprintf (dump_file, " ^ ");
 626               print_generic_expr (dump_file, last->op, 0);
 627               fprintf (dump_file, " -> nothing\n");
 628             }
 629
 630           reassociate_stats.ops_eliminated += 2;
 631
 632           if (VEC_length (operand_entry_t, *ops) == 2)
 633             {
 634               VEC_free (operand_entry_t, heap, *ops);
 635               *ops = NULL;
 636               add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
 637               *all_done = true;
 638             }
 639           else
 640             {
 641               VEC_ordered_remove (operand_entry_t, *ops, i-1);
 642               VEC_ordered_remove (operand_entry_t, *ops, i-1);
 643             }
 644
 645           return true;
 646
 647         default:
 648           break;
 649         }
 650     }
 651   return false;
 652 }
 653
 654 static VEC(tree, heap) *plus_negates;
 655
 656 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
 657    expression, look in OPS for a corresponding positive operation to cancel
 658    it out.  If we find one, remove the other from OPS, replace
 659    OPS[CURRINDEX] with 0 or -1, respectively, and return true.  Otherwise,
 660    return false. */
 661
 662 static bool
 663 eliminate_plus_minus_pair (enum tree_code opcode,
 664                            VEC (operand_entry_t, heap) **ops,
 665                            unsigned int currindex,
 666                            operand_entry_t curr)
 667 {
 668   tree negateop;
 669   tree notop;
 670   unsigned int i;
 671   operand_entry_t oe;
 672
 673   if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
 674     return false;
 675
 676   negateop = get_unary_op (curr->op, NEGATE_EXPR);
 677   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 678   if (negateop == NULL_TREE && notop == NULL_TREE)
 679     return false;
 680
 681   /* Any non-negated version will have a rank that is one less than
 682      the current rank.  So once we hit those ranks, if we don't find
 683      one, we can stop.  */
 684
 685   for (i = currindex + 1;
 686        VEC_iterate (operand_entry_t, *ops, i, oe)
 687        && oe->rank >= curr->rank - 1 ;
 688        i++)
 689     {
 690       if (oe->op == negateop)
 691         {
 692
 693           if (dump_file && (dump_flags & TDF_DETAILS))
 694             {
 695               fprintf (dump_file, "Equivalence: ");
 696               print_generic_expr (dump_file, negateop, 0);
 697               fprintf (dump_file, " + -");
 698               print_generic_expr (dump_file, oe->op, 0);
 699               fprintf (dump_file, " -> 0\n");
 700             }
 701
 702           VEC_ordered_remove (operand_entry_t, *ops, i);
 703           add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
 704           VEC_ordered_remove (operand_entry_t, *ops, currindex);
 705           reassociate_stats.ops_eliminated ++;
 706
 707           return true;
 708         }
 709       else if (oe->op == notop)
 710         {
 711           tree op_type = TREE_TYPE (oe->op);
 712
 713           if (dump_file && (dump_flags & TDF_DETAILS))
 714             {
 715               fprintf (dump_file, "Equivalence: ");
 716               print_generic_expr (dump_file, notop, 0);
 717               fprintf (dump_file, " + ~");
 718               print_generic_expr (dump_file, oe->op, 0);
 719               fprintf (dump_file, " -> -1\n");
 720             }
 721
 722           VEC_ordered_remove (operand_entry_t, *ops, i);
 723           add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
 724           VEC_ordered_remove (operand_entry_t, *ops, currindex);
 725           reassociate_stats.ops_eliminated ++;
 726
 727           return true;
 728         }
 729     }
 730
 731   /* CURR->OP is a negate expr in a plus expr: save it for later
 732      inspection in repropagate_negates().  */
 733   if (negateop != NULL_TREE)
 734     VEC_safe_push (tree, heap, plus_negates, curr->op);
 735
 736   return false;
 737 }
 738
 739 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
 740    bitwise not expression, look in OPS for a corresponding operand to
 741    cancel it out.  If we find one, remove the other from OPS, replace
 742    OPS[CURRINDEX] with 0, and return true.  Otherwise, return
 743    false. */
 744
 745 static bool
 746 eliminate_not_pairs (enum tree_code opcode,
 747                      VEC (operand_entry_t, heap) **ops,
 748                      unsigned int currindex,
 749                      operand_entry_t curr)
 750 {
 751   tree notop;
 752   unsigned int i;
 753   operand_entry_t oe;
 754
 755   if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
 756       || TREE_CODE (curr->op) != SSA_NAME)
 757     return false;
 758
 759   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 760   if (notop == NULL_TREE)
 761     return false;
 762
 763   /* Any non-not version will have a rank that is one less than
 764      the current rank.  So once we hit those ranks, if we don't find
 765      one, we can stop.  */
 766
 767   for (i = currindex + 1;
 768        VEC_iterate (operand_entry_t, *ops, i, oe)
 769        && oe->rank >= curr->rank - 1;
 770        i++)
 771     {
 772       if (oe->op == notop)
 773         {
 774           if (dump_file && (dump_flags & TDF_DETAILS))
 775             {
 776               fprintf (dump_file, "Equivalence: ");
 777               print_generic_expr (dump_file, notop, 0);
 778               if (opcode == BIT_AND_EXPR)
 779                 fprintf (dump_file, " & ~");
 780               else if (opcode == BIT_IOR_EXPR)
 781                 fprintf (dump_file, " | ~");
 782               print_generic_expr (dump_file, oe->op, 0);
 783               if (opcode == BIT_AND_EXPR)
 784                 fprintf (dump_file, " -> 0\n");
 785               else if (opcode == BIT_IOR_EXPR)
 786                 fprintf (dump_file, " -> -1\n");
 787             }
 788
 789           if (opcode == BIT_AND_EXPR)
 790             oe->op = build_zero_cst (TREE_TYPE (oe->op));
 791           else if (opcode == BIT_IOR_EXPR)
 792             oe->op = build_low_bits_mask (TREE_TYPE (oe->op),
 793                                           TYPE_PRECISION (TREE_TYPE (oe->op)));
 794
 795           reassociate_stats.ops_eliminated
 796             += VEC_length (operand_entry_t, *ops) - 1;
 797           VEC_free (operand_entry_t, heap, *ops);
 798           *ops = NULL;
 799           VEC_safe_push (operand_entry_t, heap, *ops, oe);
 800           return true;
 801         }
 802     }
 803
 804   return false;
 805 }
 806
 807 /* Use constant value that may be present in OPS to try to eliminate
 808    operands.  Note that this function is only really used when we've
 809    eliminated ops for other reasons, or merged constants.  Across
 810    single statements, fold already does all of this, plus more.  There
 811    is little point in duplicating logic, so I've only included the
 812    identities that I could ever construct testcases to trigger.  */
 813
 814 static void
 815 eliminate_using_constants (enum tree_code opcode,
 816                            VEC(operand_entry_t, heap) **ops)
 817 {
 818   operand_entry_t oelast = VEC_last (operand_entry_t, *ops);
 819   tree type = TREE_TYPE (oelast->op);
 820
 821   if (oelast->rank == 0
 822       && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
 823     {
 824       switch (opcode)
 825         {
 826         case BIT_AND_EXPR:
 827           if (integer_zerop (oelast->op))
 828             {
 829               if (VEC_length (operand_entry_t, *ops) != 1)
 830                 {
 831                   if (dump_file && (dump_flags & TDF_DETAILS))
 832                     fprintf (dump_file, "Found & 0, removing all other ops\n");
 833
 834                   reassociate_stats.ops_eliminated
 835                     += VEC_length (operand_entry_t, *ops) - 1;
 836
 837                   VEC_free (operand_entry_t, heap, *ops);
 838                   *ops = NULL;
 839                   VEC_safe_push (operand_entry_t, heap, *ops, oelast);
 840                   return;
 841                 }
 842             }
 843           else if (integer_all_onesp (oelast->op))
 844             {
 845               if (VEC_length (operand_entry_t, *ops) != 1)
 846                 {
 847                   if (dump_file && (dump_flags & TDF_DETAILS))
 848                     fprintf (dump_file, "Found & -1, removing\n");
 849                   VEC_pop (operand_entry_t, *ops);
 850                   reassociate_stats.ops_eliminated++;
 851                 }
 852             }
 853           break;
 854         case BIT_IOR_EXPR:
 855           if (integer_all_onesp (oelast->op))
 856             {
 857               if (VEC_length (operand_entry_t, *ops) != 1)
 858                 {
 859                   if (dump_file && (dump_flags & TDF_DETAILS))
 860                     fprintf (dump_file, "Found | -1, removing all other ops\n");
 861
 862                   reassociate_stats.ops_eliminated
 863                     += VEC_length (operand_entry_t, *ops) - 1;
 864
 865                   VEC_free (operand_entry_t, heap, *ops);
 866                   *ops = NULL;
 867                   VEC_safe_push (operand_entry_t, heap, *ops, oelast);
 868                   return;
 869                 }
 870             }
 871           else if (integer_zerop (oelast->op))
 872             {
 873               if (VEC_length (operand_entry_t, *ops) != 1)
 874                 {
 875                   if (dump_file && (dump_flags & TDF_DETAILS))
 876                     fprintf (dump_file, "Found | 0, removing\n");
 877                   VEC_pop (operand_entry_t, *ops);
 878                   reassociate_stats.ops_eliminated++;
 879                 }
 880             }
 881           break;
 882         case MULT_EXPR:
 883           if (integer_zerop (oelast->op)
 884               || (FLOAT_TYPE_P (type)
 885                   && !HONOR_NANS (TYPE_MODE (type))
 886                   && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
 887                   && real_zerop (oelast->op)))
 888             {
 889               if (VEC_length (operand_entry_t, *ops) != 1)
 890                 {
 891                   if (dump_file && (dump_flags & TDF_DETAILS))
 892                     fprintf (dump_file, "Found * 0, removing all other ops\n");
 893
 894                   reassociate_stats.ops_eliminated
 895                     += VEC_length (operand_entry_t, *ops) - 1;
 896                   VEC_free (operand_entry_t, heap, *ops);
 897                   *ops = NULL;
 898                   VEC_safe_push (operand_entry_t, heap, *ops, oelast);
 899                   return;
 900                 }
 901             }
 902           else if (integer_onep (oelast->op)
 903                    || (FLOAT_TYPE_P (type)
 904                        && !HONOR_SNANS (TYPE_MODE (type))
 905                        && real_onep (oelast->op)))
 906             {
 907               if (VEC_length (operand_entry_t, *ops) != 1)
 908                 {
 909                   if (dump_file && (dump_flags & TDF_DETAILS))
 910                     fprintf (dump_file, "Found * 1, removing\n");
 911                   VEC_pop (operand_entry_t, *ops);
 912                   reassociate_stats.ops_eliminated++;
 913                   return;
 914                 }
 915             }
 916           break;
 917         case BIT_XOR_EXPR:
 918         case PLUS_EXPR:
 919         case MINUS_EXPR:
 920           if (integer_zerop (oelast->op)
 921               || (FLOAT_TYPE_P (type)
 922                   && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
 923                   && fold_real_zero_addition_p (type, oelast->op,
 924                                                 opcode == MINUS_EXPR)))
 925             {
 926               if (VEC_length (operand_entry_t, *ops) != 1)
 927                 {
 928                   if (dump_file && (dump_flags & TDF_DETAILS))
 929                     fprintf (dump_file, "Found [|^+] 0, removing\n");
 930                   VEC_pop (operand_entry_t, *ops);
 931                   reassociate_stats.ops_eliminated++;
 932                   return;
 933                 }
 934             }
 935           break;
 936         default:
 937           break;
 938         }
 939     }
 940 }
 941
 942
 943 static void linearize_expr_tree (VEC(operand_entry_t, heap) **, gimple,
 944                                  bool, bool);
 945
 946 /* Structure for tracking and counting operands.  */
 947 typedef struct oecount_s {
 948   int cnt;
 949   int id;
 950   enum tree_code oecode;
 951   tree op;
 952 } oecount;
 953
 954 DEF_VEC_O(oecount);
 955 DEF_VEC_ALLOC_O(oecount,heap);
 956
 957 /* The heap for the oecount hashtable and the sorted list of operands.  */
 958 static VEC (oecount, heap) *cvec;
 959
 960 /* Hash function for oecount.  */
 961
 962 static hashval_t
 963 oecount_hash (const void *p)
 964 {
 965   const oecount *c = VEC_index (oecount, cvec, (size_t)p - 42);
 966   return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
 967 }
 968
 969 /* Comparison function for oecount.  */
 970
 971 static int
 972 oecount_eq (const void *p1, const void *p2)
 973 {
 974   const oecount *c1 = VEC_index (oecount, cvec, (size_t)p1 - 42);
 975   const oecount *c2 = VEC_index (oecount, cvec, (size_t)p2 - 42);
 976   return (c1->oecode == c2->oecode
 977           && c1->op == c2->op);
 978 }
 979
 980 /* Comparison function for qsort sorting oecount elements by count.  */
 981
 982 static int
 983 oecount_cmp (const void *p1, const void *p2)
 984 {
 985   const oecount *c1 = (const oecount *)p1;
 986   const oecount *c2 = (const oecount *)p2;
 987   if (c1->cnt != c2->cnt)
 988     return c1->cnt - c2->cnt;
 989   else
 990     /* If counts are identical, use unique IDs to stabilize qsort.  */
 991     return c1->id - c2->id;
 992 }
 993
 994 /* Return TRUE iff STMT represents a builtin call that raises OP
 995    to some exponent.  */
 996
 997 static bool
 998 stmt_is_power_of_op (gimple stmt, tree op)
 999 {
1000   tree fndecl;
1001
1002   if (!is_gimple_call (stmt))
1003     return false;
1004
1005   fndecl = gimple_call_fndecl (stmt);
1006
1007   if (!fndecl
1008       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
1009     return false;
1010
1011   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1012     {
1013     CASE_FLT_FN (BUILT_IN_POW):
1014     CASE_FLT_FN (BUILT_IN_POWI):
1015       return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1016
1017     default:
1018       return false;
1019     }
1020 }
1021
1022 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1023    in place and return the result.  Assumes that stmt_is_power_of_op
1024    was previously called for STMT and returned TRUE.  */
1025
1026 static HOST_WIDE_INT
1027 decrement_power (gimple stmt)
1028 {
1029   REAL_VALUE_TYPE c, cint;
1030   HOST_WIDE_INT power;
1031   tree arg1;
1032
1033   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1034     {
1035     CASE_FLT_FN (BUILT_IN_POW):
1036       arg1 = gimple_call_arg (stmt, 1);
1037       c = TREE_REAL_CST (arg1);
1038       power = real_to_integer (&c) - 1;
1039       real_from_integer (&cint, VOIDmode, power, 0, 0);
1040       gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1041       return power;
1042
1043     CASE_FLT_FN (BUILT_IN_POWI):
1044       arg1 = gimple_call_arg (stmt, 1);
1045       power = TREE_INT_CST_LOW (arg1) - 1;
1046       gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1047       return power;
1048
1049     default:
1050       gcc_unreachable ();
1051     }
1052 }
1053
1054 /* Find the single immediate use of STMT's LHS, and replace it
1055    with OP.  Remove STMT.  If STMT's LHS is the same as *DEF,
1056    replace *DEF with OP as well.  */
1057
1058 static void
1059 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1060 {
1061   tree lhs;
1062   gimple use_stmt;
1063   use_operand_p use;
1064   gimple_stmt_iterator gsi;
1065
1066   if (is_gimple_call (stmt))
1067     lhs = gimple_call_lhs (stmt);
1068   else
1069     lhs = gimple_assign_lhs (stmt);
1070
1071   gcc_assert (has_single_use (lhs));
1072   single_imm_use (lhs, &use, &use_stmt);
1073   if (lhs == *def)
1074     *def = op;
1075   SET_USE (use, op);
1076   if (TREE_CODE (op) != SSA_NAME)
1077     update_stmt (use_stmt);
1078   gsi = gsi_for_stmt (stmt);
1079   gsi_remove (&gsi, true);
1080   release_defs (stmt);
1081
1082   if (is_gimple_call (stmt))
1083     unlink_stmt_vdef (stmt);
1084 }
1085
1086 /* Walks the linear chain with result *DEF searching for an operation
1087    with operand OP and code OPCODE removing that from the chain.  *DEF
1088    is updated if there is only one operand but no operation left.  */
1089
1090 static void
1091 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1092 {
1093   gimple stmt = SSA_NAME_DEF_STMT (*def);
1094
1095   do
1096     {
1097       tree name;
1098
1099       if (opcode == MULT_EXPR
1100           && stmt_is_power_of_op (stmt, op))
1101         {
1102           if (decrement_power (stmt) == 1)
1103             propagate_op_to_single_use (op, stmt, def);
1104           return;
1105         }
1106
1107       name = gimple_assign_rhs1 (stmt);
1108
1109       /* If this is the operation we look for and one of the operands
1110          is ours simply propagate the other operand into the stmts
1111          single use.  */
1112       if (gimple_assign_rhs_code (stmt) == opcode
1113           && (name == op
1114               || gimple_assign_rhs2 (stmt) == op))
1115         {
1116           if (name == op)
1117             name = gimple_assign_rhs2 (stmt);
1118           propagate_op_to_single_use (name, stmt, def);
1119           return;
1120         }
1121
1122       /* We might have a multiply of two __builtin_pow* calls, and
1123          the operand might be hiding in the rightmost one.  */
1124       if (opcode == MULT_EXPR
1125           && gimple_assign_rhs_code (stmt) == opcode
1126           && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
1127         {
1128           gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1129           if (stmt_is_power_of_op (stmt2, op))
1130             {
1131               if (decrement_power (stmt2) == 1)
1132                 propagate_op_to_single_use (op, stmt2, def);
1133               return;
1134             }
1135         }
1136
1137       /* Continue walking the chain.  */
1138       gcc_assert (name != op
1139                   && TREE_CODE (name) == SSA_NAME);
1140       stmt = SSA_NAME_DEF_STMT (name);
1141     }
1142   while (1);
1143 }
1144
1145 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1146    the result.  Places the statement after the definition of either
1147    OP1 or OP2.  Returns the new statement.  */
1148
1149 static gimple
1150 build_and_add_sum (tree tmpvar, tree op1, tree op2, enum tree_code opcode)
1151 {
1152   gimple op1def = NULL, op2def = NULL;
1153   gimple_stmt_iterator gsi;
1154   tree op;
1155   gimple sum;
1156
1157   /* Create the addition statement.  */
1158   sum = gimple_build_assign_with_ops (opcode, tmpvar, op1, op2);
1159   op = make_ssa_name (tmpvar, sum);
1160   gimple_assign_set_lhs (sum, op);
1161
1162   /* Find an insertion place and insert.  */
1163   if (TREE_CODE (op1) == SSA_NAME)
1164     op1def = SSA_NAME_DEF_STMT (op1);
1165   if (TREE_CODE (op2) == SSA_NAME)
1166     op2def = SSA_NAME_DEF_STMT (op2);
1167   if ((!op1def || gimple_nop_p (op1def))
1168       && (!op2def || gimple_nop_p (op2def)))
1169     {
1170       gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR));
1171       gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1172     }
1173   else if ((!op1def || gimple_nop_p (op1def))
1174            || (op2def && !gimple_nop_p (op2def)
1175                && stmt_dominates_stmt_p (op1def, op2def)))
1176     {
1177       if (gimple_code (op2def) == GIMPLE_PHI)
1178         {
1179           gsi = gsi_after_labels (gimple_bb (op2def));
1180           gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1181         }
1182       else
1183         {
1184           if (!stmt_ends_bb_p (op2def))
1185             {
1186               gsi = gsi_for_stmt (op2def);
1187               gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1188             }
1189           else
1190             {
1191               edge e;
1192               edge_iterator ei;
1193
1194               FOR_EACH_EDGE (e, ei, gimple_bb (op2def)->succs)
1195                 if (e->flags & EDGE_FALLTHRU)
1196                   gsi_insert_on_edge_immediate (e, sum);
1197             }
1198         }
1199     }
1200   else
1201     {
1202       if (gimple_code (op1def) == GIMPLE_PHI)
1203         {
1204           gsi = gsi_after_labels (gimple_bb (op1def));
1205           gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1206         }
1207       else
1208         {
1209           if (!stmt_ends_bb_p (op1def))
1210             {
1211               gsi = gsi_for_stmt (op1def);
1212               gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1213             }
1214           else
1215             {
1216               edge e;
1217               edge_iterator ei;
1218
1219               FOR_EACH_EDGE (e, ei, gimple_bb (op1def)->succs)
1220                 if (e->flags & EDGE_FALLTHRU)
1221                   gsi_insert_on_edge_immediate (e, sum);
1222             }
1223         }
1224     }
1225   update_stmt (sum);
1226
1227   return sum;
1228 }
1229
1230 /* Perform un-distribution of divisions and multiplications.
1231    A * X + B * X is transformed into (A + B) * X and A / X + B / X
1232    to (A + B) / X for real X.
1233
1234    The algorithm is organized as follows.
1235
1236     - First we walk the addition chain *OPS looking for summands that
1237       are defined by a multiplication or a real division.  This results
1238       in the candidates bitmap with relevant indices into *OPS.
1239
1240     - Second we build the chains of multiplications or divisions for
1241       these candidates, counting the number of occurrences of (operand, code)
1242       pairs in all of the candidates chains.
1243
1244     - Third we sort the (operand, code) pairs by number of occurrence and
1245       process them starting with the pair with the most uses.
1246
1247       * For each such pair we walk the candidates again to build a
1248         second candidate bitmap noting all multiplication/division chains
1249         that have at least one occurrence of (operand, code).
1250
1251       * We build an alternate addition chain only covering these
1252         candidates with one (operand, code) operation removed from their
1253         multiplication/division chain.
1254
1255       * The first candidate gets replaced by the alternate addition chain
1256         multiplied/divided by the operand.
1257
1258       * All candidate chains get disabled for further processing and
1259         processing of (operand, code) pairs continues.
1260
1261   The alternate addition chains built are re-processed by the main
1262   reassociation algorithm which allows optimizing a * x * y + b * y * x
1263   to (a + b ) * x * y in one invocation of the reassociation pass.  */
1264
1265 static bool
1266 undistribute_ops_list (enum tree_code opcode,
1267                        VEC (operand_entry_t, heap) **ops, struct loop *loop)
1268 {
1269   unsigned int length = VEC_length (operand_entry_t, *ops);
1270   operand_entry_t oe1;
1271   unsigned i, j;
1272   sbitmap candidates, candidates2;
1273   unsigned nr_candidates, nr_candidates2;
1274   sbitmap_iterator sbi0;
1275   VEC (operand_entry_t, heap) **subops;
1276   htab_t ctable;
1277   bool changed = false;
1278   int next_oecount_id = 0;
1279
1280   if (length <= 1
1281       || opcode != PLUS_EXPR)
1282     return false;
1283
1284   /* Build a list of candidates to process.  */
1285   candidates = sbitmap_alloc (length);
1286   sbitmap_zero (candidates);
1287   nr_candidates = 0;
1288   FOR_EACH_VEC_ELT (operand_entry_t, *ops, i, oe1)
1289     {
1290       enum tree_code dcode;
1291       gimple oe1def;
1292
1293       if (TREE_CODE (oe1->op) != SSA_NAME)
1294         continue;
1295       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1296       if (!is_gimple_assign (oe1def))
1297         continue;
1298       dcode = gimple_assign_rhs_code (oe1def);
1299       if ((dcode != MULT_EXPR
1300            && dcode != RDIV_EXPR)
1301           || !is_reassociable_op (oe1def, dcode, loop))
1302         continue;
1303
1304       SET_BIT (candidates, i);
1305       nr_candidates++;
1306     }
1307
1308   if (nr_candidates < 2)
1309     {
1310       sbitmap_free (candidates);
1311       return false;
1312     }
1313
1314   if (dump_file && (dump_flags & TDF_DETAILS))
1315     {
1316       fprintf (dump_file, "searching for un-distribute opportunities ");
1317       print_generic_expr (dump_file,
1318         VEC_index (operand_entry_t, *ops,
1319                    sbitmap_first_set_bit (candidates))->op, 0);
1320       fprintf (dump_file, " %d\n", nr_candidates);
1321     }
1322
1323   /* Build linearized sub-operand lists and the counting table.  */
1324   cvec = NULL;
1325   ctable = htab_create (15, oecount_hash, oecount_eq, NULL);
1326   subops = XCNEWVEC (VEC (operand_entry_t, heap) *,
1327                      VEC_length (operand_entry_t, *ops));
1328   EXECUTE_IF_SET_IN_SBITMAP (candidates, 0, i, sbi0)
1329     {
1330       gimple oedef;
1331       enum tree_code oecode;
1332       unsigned j;
1333
1334       oedef = SSA_NAME_DEF_STMT (VEC_index (operand_entry_t, *ops, i)->op);
1335       oecode = gimple_assign_rhs_code (oedef);
1336       linearize_expr_tree (&subops[i], oedef,
1337                            associative_tree_code (oecode), false);
1338
1339       FOR_EACH_VEC_ELT (operand_entry_t, subops[i], j, oe1)
1340         {
1341           oecount c;
1342           void **slot;
1343           size_t idx;
1344           c.oecode = oecode;
1345           c.cnt = 1;
1346           c.id = next_oecount_id++;
1347           c.op = oe1->op;
1348           VEC_safe_push (oecount, heap, cvec, &c);
1349           idx = VEC_length (oecount, cvec) + 41;
1350           slot = htab_find_slot (ctable, (void *)idx, INSERT);
1351           if (!*slot)
1352             {
1353               *slot = (void *)idx;
1354             }
1355           else
1356             {
1357               VEC_pop (oecount, cvec);
1358               VEC_index (oecount, cvec, (size_t)*slot - 42)->cnt++;
1359             }
1360         }
1361     }
1362   htab_delete (ctable);
1363
1364   /* Sort the counting table.  */
1365   VEC_qsort (oecount, cvec, oecount_cmp);
1366
1367   if (dump_file && (dump_flags & TDF_DETAILS))
1368     {
1369       oecount *c;
1370       fprintf (dump_file, "Candidates:\n");
1371       FOR_EACH_VEC_ELT (oecount, cvec, j, c)
1372         {
1373           fprintf (dump_file, "  %u %s: ", c->cnt,
1374                    c->oecode == MULT_EXPR
1375                    ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1376           print_generic_expr (dump_file, c->op, 0);
1377           fprintf (dump_file, "\n");
1378         }
1379     }
1380
1381   /* Process the (operand, code) pairs in order of most occurence.  */
1382   candidates2 = sbitmap_alloc (length);
1383   while (!VEC_empty (oecount, cvec))
1384     {
1385       oecount *c = VEC_last (oecount, cvec);
1386       if (c->cnt < 2)
1387         break;
1388
1389       /* Now collect the operands in the outer chain that contain
1390          the common operand in their inner chain.  */
1391       sbitmap_zero (candidates2);
1392       nr_candidates2 = 0;
1393       EXECUTE_IF_SET_IN_SBITMAP (candidates, 0, i, sbi0)
1394         {
1395           gimple oedef;
1396           enum tree_code oecode;
1397           unsigned j;
1398           tree op = VEC_index (operand_entry_t, *ops, i)->op;
1399
1400           /* If we undistributed in this chain already this may be
1401              a constant.  */
1402           if (TREE_CODE (op) != SSA_NAME)
1403             continue;
1404
1405           oedef = SSA_NAME_DEF_STMT (op);
1406           oecode = gimple_assign_rhs_code (oedef);
1407           if (oecode != c->oecode)
1408             continue;
1409
1410           FOR_EACH_VEC_ELT (operand_entry_t, subops[i], j, oe1)
1411             {
1412               if (oe1->op == c->op)
1413                 {
1414                   SET_BIT (candidates2, i);
1415                   ++nr_candidates2;
1416                   break;
1417                 }
1418             }
1419         }
1420
1421       if (nr_candidates2 >= 2)
1422         {
1423           operand_entry_t oe1, oe2;
1424           tree tmpvar;
1425           gimple prod;
1426           int first = sbitmap_first_set_bit (candidates2);
1427
1428           /* Build the new addition chain.  */
1429           oe1 = VEC_index (operand_entry_t, *ops, first);
1430           if (dump_file && (dump_flags & TDF_DETAILS))
1431             {
1432               fprintf (dump_file, "Building (");
1433               print_generic_expr (dump_file, oe1->op, 0);
1434             }
1435           tmpvar = create_tmp_reg (TREE_TYPE (oe1->op), NULL);
1436           add_referenced_var (tmpvar);
1437           zero_one_operation (&oe1->op, c->oecode, c->op);
1438           EXECUTE_IF_SET_IN_SBITMAP (candidates2, first+1, i, sbi0)
1439             {
1440               gimple sum;
1441               oe2 = VEC_index (operand_entry_t, *ops, i);
1442               if (dump_file && (dump_flags & TDF_DETAILS))
1443                 {
1444                   fprintf (dump_file, " + ");
1445                   print_generic_expr (dump_file, oe2->op, 0);
1446                 }
1447               zero_one_operation (&oe2->op, c->oecode, c->op);
1448               sum = build_and_add_sum (tmpvar, oe1->op, oe2->op, opcode);
1449               oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1450               oe2->rank = 0;
1451               oe1->op = gimple_get_lhs (sum);
1452             }
1453
1454           /* Apply the multiplication/division.  */
1455           prod = build_and_add_sum (tmpvar, oe1->op, c->op, c->oecode);
1456           if (dump_file && (dump_flags & TDF_DETAILS))
1457             {
1458               fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1459               print_generic_expr (dump_file, c->op, 0);
1460               fprintf (dump_file, "\n");
1461             }
1462
1463           /* Record it in the addition chain and disable further
1464              undistribution with this op.  */
1465           oe1->op = gimple_assign_lhs (prod);
1466           oe1->rank = get_rank (oe1->op);
1467           VEC_free (operand_entry_t, heap, subops[first]);
1468
1469           changed = true;
1470         }
1471
1472       VEC_pop (oecount, cvec);
1473     }
1474
1475   for (i = 0; i < VEC_length (operand_entry_t, *ops); ++i)
1476     VEC_free (operand_entry_t, heap, subops[i]);
1477   free (subops);
1478   VEC_free (oecount, heap, cvec);
1479   sbitmap_free (candidates);
1480   sbitmap_free (candidates2);
1481
1482   return changed;
1483 }
1484
1485 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1486    expression, examine the other OPS to see if any of them are comparisons
1487    of the same values, which we may be able to combine or eliminate.
1488    For example, we can rewrite (a < b) | (a == b) as (a <= b).  */
1489
1490 static bool
1491 eliminate_redundant_comparison (enum tree_code opcode,
1492                                 VEC (operand_entry_t, heap) **ops,
1493                                 unsigned int currindex,
1494                                 operand_entry_t curr)
1495 {
1496   tree op1, op2;
1497   enum tree_code lcode, rcode;
1498   gimple def1, def2;
1499   int i;
1500   operand_entry_t oe;
1501
1502   if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1503     return false;
1504
1505   /* Check that CURR is a comparison.  */
1506   if (TREE_CODE (curr->op) != SSA_NAME)
1507     return false;
1508   def1 = SSA_NAME_DEF_STMT (curr->op);
1509   if (!is_gimple_assign (def1))
1510     return false;
1511   lcode = gimple_assign_rhs_code (def1);
1512   if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1513     return false;
1514   op1 = gimple_assign_rhs1 (def1);
1515   op2 = gimple_assign_rhs2 (def1);
1516
1517   /* Now look for a similar comparison in the remaining OPS.  */
1518   for (i = currindex + 1;
1519        VEC_iterate (operand_entry_t, *ops, i, oe);
1520        i++)
1521     {
1522       tree t;
1523
1524       if (TREE_CODE (oe->op) != SSA_NAME)
1525         continue;
1526       def2 = SSA_NAME_DEF_STMT (oe->op);
1527       if (!is_gimple_assign (def2))
1528         continue;
1529       rcode = gimple_assign_rhs_code (def2);
1530       if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1531         continue;
1532
1533       /* If we got here, we have a match.  See if we can combine the
1534          two comparisons.  */
1535       if (opcode == BIT_IOR_EXPR)
1536         t = maybe_fold_or_comparisons (lcode, op1, op2,
1537                                        rcode, gimple_assign_rhs1 (def2),
1538                                        gimple_assign_rhs2 (def2));
1539       else
1540         t = maybe_fold_and_comparisons (lcode, op1, op2,
1541                                         rcode, gimple_assign_rhs1 (def2),
1542                                         gimple_assign_rhs2 (def2));
1543       if (!t)
1544         continue;
1545
1546       /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1547          always give us a boolean_type_node value back.  If the original
1548          BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1549          we need to convert.  */
1550       if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1551         t = fold_convert (TREE_TYPE (curr->op), t);
1552
1553       if (TREE_CODE (t) != INTEGER_CST
1554           && !operand_equal_p (t, curr->op, 0))
1555         {
1556           enum tree_code subcode;
1557           tree newop1, newop2;
1558           if (!COMPARISON_CLASS_P (t))
1559             continue;
1560           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1561           STRIP_USELESS_TYPE_CONVERSION (newop1);
1562           STRIP_USELESS_TYPE_CONVERSION (newop2);
1563           if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1564             continue;
1565         }
1566
1567       if (dump_file && (dump_flags & TDF_DETAILS))
1568         {
1569           fprintf (dump_file, "Equivalence: ");
1570           print_generic_expr (dump_file, curr->op, 0);
1571           fprintf (dump_file, " %s ", op_symbol_code (opcode));
1572           print_generic_expr (dump_file, oe->op, 0);
1573           fprintf (dump_file, " -> ");
1574           print_generic_expr (dump_file, t, 0);
1575           fprintf (dump_file, "\n");
1576         }
1577
1578       /* Now we can delete oe, as it has been subsumed by the new combined
1579          expression t.  */
1580       VEC_ordered_remove (operand_entry_t, *ops, i);
1581       reassociate_stats.ops_eliminated ++;
1582
1583       /* If t is the same as curr->op, we're done.  Otherwise we must
1584          replace curr->op with t.  Special case is if we got a constant
1585          back, in which case we add it to the end instead of in place of
1586          the current entry.  */
1587       if (TREE_CODE (t) == INTEGER_CST)
1588         {
1589           VEC_ordered_remove (operand_entry_t, *ops, currindex);
1590           add_to_ops_vec (ops, t);
1591         }
1592       else if (!operand_equal_p (t, curr->op, 0))
1593         {
1594           tree tmpvar;
1595           gimple sum;
1596           enum tree_code subcode;
1597           tree newop1;
1598           tree newop2;
1599           gcc_assert (COMPARISON_CLASS_P (t));
1600           tmpvar = create_tmp_var (TREE_TYPE (t), NULL);
1601           add_referenced_var (tmpvar);
1602           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1603           STRIP_USELESS_TYPE_CONVERSION (newop1);
1604           STRIP_USELESS_TYPE_CONVERSION (newop2);
1605           gcc_checking_assert (is_gimple_val (newop1)
1606                                && is_gimple_val (newop2));
1607           sum = build_and_add_sum (tmpvar, newop1, newop2, subcode);
1608           curr->op = gimple_get_lhs (sum);
1609         }
1610       return true;
1611     }
1612
1613   return false;
1614 }
1615
1616 /* Perform various identities and other optimizations on the list of
1617    operand entries, stored in OPS.  The tree code for the binary
1618    operation between all the operands is OPCODE.  */
1619
1620 static void
1621 optimize_ops_list (enum tree_code opcode,
1622                    VEC (operand_entry_t, heap) **ops)
1623 {
1624   unsigned int length = VEC_length (operand_entry_t, *ops);
1625   unsigned int i;
1626   operand_entry_t oe;
1627   operand_entry_t oelast = NULL;
1628   bool iterate = false;
1629
1630   if (length == 1)
1631     return;
1632
1633   oelast = VEC_last (operand_entry_t, *ops);
1634
1635   /* If the last two are constants, pop the constants off, merge them
1636      and try the next two.  */
1637   if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1638     {
1639       operand_entry_t oelm1 = VEC_index (operand_entry_t, *ops, length - 2);
1640
1641       if (oelm1->rank == 0
1642           && is_gimple_min_invariant (oelm1->op)
1643           && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1644                                        TREE_TYPE (oelast->op)))
1645         {
1646           tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1647                                      oelm1->op, oelast->op);
1648
1649           if (folded && is_gimple_min_invariant (folded))
1650             {
1651               if (dump_file && (dump_flags & TDF_DETAILS))
1652                 fprintf (dump_file, "Merging constants\n");
1653
1654               VEC_pop (operand_entry_t, *ops);
1655               VEC_pop (operand_entry_t, *ops);
1656
1657               add_to_ops_vec (ops, folded);
1658               reassociate_stats.constants_eliminated++;
1659
1660               optimize_ops_list (opcode, ops);
1661               return;
1662             }
1663         }
1664     }
1665
1666   eliminate_using_constants (opcode, ops);
1667   oelast = NULL;
1668
1669   for (i = 0; VEC_iterate (operand_entry_t, *ops, i, oe);)
1670     {
1671       bool done = false;
1672
1673       if (eliminate_not_pairs (opcode, ops, i, oe))
1674         return;
1675       if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1676           || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1677           || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1678         {
1679           if (done)
1680             return;
1681           iterate = true;
1682           oelast = NULL;
1683           continue;
1684         }
1685       oelast = oe;
1686       i++;
1687     }
1688
1689   length  = VEC_length (operand_entry_t, *ops);
1690   oelast = VEC_last (operand_entry_t, *ops);
1691
1692   if (iterate)
1693     optimize_ops_list (opcode, ops);
1694 }
1695
1696 /* The following functions are subroutines to optimize_range_tests and allow
1697    it to try to change a logical combination of comparisons into a range
1698    test.
1699
1700    For example, both
1701         X == 2 || X == 5 || X == 3 || X == 4
1702    and
1703         X >= 2 && X <= 5
1704    are converted to
1705         (unsigned) (X - 2) <= 3
1706
1707    For more information see comments above fold_test_range in fold-const.c,
1708    this implementation is for GIMPLE.  */
1709
1710 struct range_entry
1711 {
1712   tree exp;
1713   tree low;
1714   tree high;
1715   bool in_p;
1716   bool strict_overflow_p;
1717   unsigned int idx, next;
1718 };
1719
1720 /* This is similar to make_range in fold-const.c, but on top of
1721    GIMPLE instead of trees.  */
1722
1723 static void
1724 init_range_entry (struct range_entry *r, tree exp)
1725 {
1726   int in_p;
1727   tree low, high;
1728   bool is_bool, strict_overflow_p;
1729
1730   r->exp = NULL_TREE;
1731   r->in_p = false;
1732   r->strict_overflow_p = false;
1733   r->low = NULL_TREE;
1734   r->high = NULL_TREE;
1735   if (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp)))
1736     return;
1737
1738   /* Start with simply saying "EXP != 0" and then look at the code of EXP
1739      and see if we can refine the range.  Some of the cases below may not
1740      happen, but it doesn't seem worth worrying about this.  We "continue"
1741      the outer loop when we've changed something; otherwise we "break"
1742      the switch, which will "break" the while.  */
1743   low = build_int_cst (TREE_TYPE (exp), 0);
1744   high = low;
1745   in_p = 0;
1746   strict_overflow_p = false;
1747   is_bool = false;
1748   if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1749     {
1750       if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1751         is_bool = true;
1752       else
1753         return;
1754     }
1755   else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1756     is_bool = true;
1757
1758   while (1)
1759     {
1760       gimple stmt;
1761       enum tree_code code;
1762       tree arg0, arg1, exp_type;
1763       tree nexp;
1764       location_t loc;
1765
1766       if (TREE_CODE (exp) != SSA_NAME)
1767         break;
1768
1769       stmt = SSA_NAME_DEF_STMT (exp);
1770       if (!is_gimple_assign (stmt))
1771         break;
1772
1773       code = gimple_assign_rhs_code (stmt);
1774       arg0 = gimple_assign_rhs1 (stmt);
1775       if (TREE_CODE (arg0) != SSA_NAME)
1776         break;
1777       arg1 = gimple_assign_rhs2 (stmt);
1778       exp_type = TREE_TYPE (exp);
1779       loc = gimple_location (stmt);
1780       switch (code)
1781         {
1782         case BIT_NOT_EXPR:
1783           if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1784             {
1785               in_p = !in_p;
1786               exp = arg0;
1787               continue;
1788             }
1789           break;
1790         case SSA_NAME:
1791           exp = arg0;
1792           continue;
1793         CASE_CONVERT:
1794           if (is_bool)
1795             goto do_default;
1796           if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1797             {
1798               if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1799                 is_bool = true;
1800               else
1801                 return;
1802             }
1803           else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1804             is_bool = true;
1805           goto do_default;
1806         case EQ_EXPR:
1807         case NE_EXPR:
1808         case LT_EXPR:
1809         case LE_EXPR:
1810         case GE_EXPR:
1811         case GT_EXPR:
1812           is_bool = true;
1813           /* FALLTHRU */
1814         default:
1815           if (!is_bool)
1816             return;
1817         do_default:
1818           nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1819                                   &low, &high, &in_p,
1820                                   &strict_overflow_p);
1821           if (nexp != NULL_TREE)
1822             {
1823               exp = nexp;
1824               gcc_assert (TREE_CODE (exp) == SSA_NAME);
1825               continue;
1826             }
1827           break;
1828         }
1829       break;
1830     }
1831   if (is_bool)
1832     {
1833       r->exp = exp;
1834       r->in_p = in_p;
1835       r->low = low;
1836       r->high = high;
1837       r->strict_overflow_p = strict_overflow_p;
1838     }
1839 }
1840
1841 /* Comparison function for qsort.  Sort entries
1842    without SSA_NAME exp first, then with SSA_NAMEs sorted
1843    by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1844    by increasing ->low and if ->low is the same, by increasing
1845    ->high.  ->low == NULL_TREE means minimum, ->high == NULL_TREE
1846    maximum.  */
1847
1848 static int
1849 range_entry_cmp (const void *a, const void *b)
1850 {
1851   const struct range_entry *p = (const struct range_entry *) a;
1852   const struct range_entry *q = (const struct range_entry *) b;
1853
1854   if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
1855     {
1856       if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1857         {
1858           /* Group range_entries for the same SSA_NAME together.  */
1859           if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
1860             return -1;
1861           else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
1862             return 1;
1863           /* If ->low is different, NULL low goes first, then by
1864              ascending low.  */
1865           if (p->low != NULL_TREE)
1866             {
1867               if (q->low != NULL_TREE)
1868                 {
1869                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
1870                                           p->low, q->low);
1871                   if (tem && integer_onep (tem))
1872                     return -1;
1873                   tem = fold_binary (GT_EXPR, boolean_type_node,
1874                                      p->low, q->low);
1875                   if (tem && integer_onep (tem))
1876                     return 1;
1877                 }
1878               else
1879                 return 1;
1880             }
1881           else if (q->low != NULL_TREE)
1882             return -1;
1883           /* If ->high is different, NULL high goes last, before that by
1884              ascending high.  */
1885           if (p->high != NULL_TREE)
1886             {
1887               if (q->high != NULL_TREE)
1888                 {
1889                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
1890                                           p->high, q->high);
1891                   if (tem && integer_onep (tem))
1892                     return -1;
1893                   tem = fold_binary (GT_EXPR, boolean_type_node,
1894                                      p->high, q->high);
1895                   if (tem && integer_onep (tem))
1896                     return 1;
1897                 }
1898               else
1899                 return -1;
1900             }
1901           else if (p->high != NULL_TREE)
1902             return 1;
1903           /* If both ranges are the same, sort below by ascending idx.  */
1904         }
1905       else
1906         return 1;
1907     }
1908   else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1909     return -1;
1910
1911   if (p->idx < q->idx)
1912     return -1;
1913   else
1914     {
1915       gcc_checking_assert (p->idx > q->idx);
1916       return 1;
1917     }
1918 }
1919
1920 /* Helper routine of optimize_range_test.
1921    [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
1922    RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
1923    OPCODE and OPS are arguments of optimize_range_tests.  Return
1924    true if the range merge has been successful.  */
1925
1926 static bool
1927 update_range_test (struct range_entry *range, struct range_entry *otherrange,
1928                    unsigned int count, enum tree_code opcode,
1929                    VEC (operand_entry_t, heap) **ops, tree exp, bool in_p,
1930                    tree low, tree high, bool strict_overflow_p)
1931 {
1932   tree op = VEC_index (operand_entry_t, *ops, range->idx)->op;
1933   location_t loc = gimple_location (SSA_NAME_DEF_STMT (op));
1934   tree tem = build_range_check (loc, TREE_TYPE (op), exp, in_p, low, high);
1935   enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
1936   gimple_stmt_iterator gsi;
1937
1938   if (tem == NULL_TREE)
1939     return false;
1940
1941   if (strict_overflow_p && issue_strict_overflow_warning (wc))
1942     warning_at (loc, OPT_Wstrict_overflow,
1943                 "assuming signed overflow does not occur "
1944                 "when simplifying range test");
1945
1946   if (dump_file && (dump_flags & TDF_DETAILS))
1947     {
1948       struct range_entry *r;
1949       fprintf (dump_file, "Optimizing range tests ");
1950       print_generic_expr (dump_file, range->exp, 0);
1951       fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
1952       print_generic_expr (dump_file, range->low, 0);
1953       fprintf (dump_file, ", ");
1954       print_generic_expr (dump_file, range->high, 0);
1955       fprintf (dump_file, "]");
1956       for (r = otherrange; r < otherrange + count; r++)
1957         {
1958           fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
1959           print_generic_expr (dump_file, r->low, 0);
1960           fprintf (dump_file, ", ");
1961           print_generic_expr (dump_file, r->high, 0);
1962           fprintf (dump_file, "]");
1963         }
1964       fprintf (dump_file, "\n into ");
1965       print_generic_expr (dump_file, tem, 0);
1966       fprintf (dump_file, "\n");
1967     }
1968
1969   if (opcode == BIT_IOR_EXPR)
1970     tem = invert_truthvalue_loc (loc, tem);
1971
1972   tem = fold_convert_loc (loc, TREE_TYPE (op), tem);
1973   gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (op));
1974   tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
1975                                   GSI_SAME_STMT);
1976
1977   VEC_index (operand_entry_t, *ops, range->idx)->op = tem;
1978   range->exp = exp;
1979   range->low = low;
1980   range->high = high;
1981   range->in_p = in_p;
1982   range->strict_overflow_p = false;
1983
1984   for (range = otherrange; range < otherrange + count; range++)
1985     {
1986       VEC_index (operand_entry_t, *ops, range->idx)->op = error_mark_node;
1987       range->exp = NULL_TREE;
1988     }
1989   return true;
1990 }
1991
1992 /* Optimize range tests, similarly how fold_range_test optimizes
1993    it on trees.  The tree code for the binary
1994    operation between all the operands is OPCODE.  */
1995
1996 static void
1997 optimize_range_tests (enum tree_code opcode,
1998                       VEC (operand_entry_t, heap) **ops)
1999 {
2000   unsigned int length = VEC_length (operand_entry_t, *ops), i, j, first;
2001   operand_entry_t oe;
2002   struct range_entry *ranges;
2003   bool any_changes = false;
2004
2005   if (length == 1)
2006     return;
2007
2008   ranges = XNEWVEC (struct range_entry, length);
2009   for (i = 0; i < length; i++)
2010     {
2011       ranges[i].idx = i;
2012       init_range_entry (ranges + i, VEC_index (operand_entry_t, *ops, i)->op);
2013       /* For | invert it now, we will invert it again before emitting
2014          the optimized expression.  */
2015       if (opcode == BIT_IOR_EXPR)
2016         ranges[i].in_p = !ranges[i].in_p;
2017     }
2018
2019   qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2020   for (i = 0; i < length; i++)
2021     if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2022       break;
2023
2024   /* Try to merge ranges.  */
2025   for (first = i; i < length; i++)
2026     {
2027       tree low = ranges[i].low;
2028       tree high = ranges[i].high;
2029       int in_p = ranges[i].in_p;
2030       bool strict_overflow_p = ranges[i].strict_overflow_p;
2031       int update_fail_count = 0;
2032
2033       for (j = i + 1; j < length; j++)
2034         {
2035           if (ranges[i].exp != ranges[j].exp)
2036             break;
2037           if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2038                              ranges[j].in_p, ranges[j].low, ranges[j].high))
2039             break;
2040           strict_overflow_p |= ranges[j].strict_overflow_p;
2041         }
2042
2043       if (j == i + 1)
2044         continue;
2045
2046       if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
2047                              ops, ranges[i].exp, in_p, low, high,
2048                              strict_overflow_p))
2049         {
2050           i = j - 1;
2051           any_changes = true;
2052         }
2053       /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2054          while update_range_test would fail.  */
2055       else if (update_fail_count == 64)
2056         i = j - 1;
2057       else
2058         ++update_fail_count;
2059     }
2060
2061   /* Optimize X == CST1 || X == CST2
2062      if popcount (CST1 ^ CST2) == 1 into
2063      (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2064      Similarly for ranges.  E.g.
2065      X != 2 && X != 3 && X != 10 && X != 11
2066      will be transformed by the above loop into
2067      (X - 2U) <= 1U && (X - 10U) <= 1U
2068      and this loop can transform that into
2069      ((X & ~8) - 2U) <= 1U.  */
2070   for (i = first; i < length; i++)
2071     {
2072       tree lowi, highi, lowj, highj, type, lowxor, highxor, tem, exp;
2073
2074       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2075         continue;
2076       type = TREE_TYPE (ranges[i].exp);
2077       if (!INTEGRAL_TYPE_P (type))
2078         continue;
2079       lowi = ranges[i].low;
2080       if (lowi == NULL_TREE)
2081         lowi = TYPE_MIN_VALUE (type);
2082       highi = ranges[i].high;
2083       if (highi == NULL_TREE)
2084         continue;
2085       for (j = i + 1; j < length && j < i + 64; j++)
2086         {
2087           if (ranges[j].exp == NULL_TREE)
2088             continue;
2089           if (ranges[i].exp != ranges[j].exp)
2090             break;
2091           if (ranges[j].in_p)
2092             continue;
2093           lowj = ranges[j].low;
2094           if (lowj == NULL_TREE)
2095             continue;
2096           highj = ranges[j].high;
2097           if (highj == NULL_TREE)
2098             highj = TYPE_MAX_VALUE (type);
2099           tem = fold_binary (GT_EXPR, boolean_type_node,
2100                              lowj, highi);
2101           if (tem == NULL_TREE || !integer_onep (tem))
2102             continue;
2103           lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2104           if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2105             continue;
2106           gcc_checking_assert (!integer_zerop (lowxor));
2107           tem = fold_binary (MINUS_EXPR, type, lowxor,
2108                              build_int_cst (type, 1));
2109           if (tem == NULL_TREE)
2110             continue;
2111           tem = fold_binary (BIT_AND_EXPR, type, lowxor, tem);
2112           if (tem == NULL_TREE || !integer_zerop (tem))
2113             continue;
2114           highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2115           if (!tree_int_cst_equal (lowxor, highxor))
2116             continue;
2117           tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2118           exp = fold_build2 (BIT_AND_EXPR, type, ranges[i].exp, tem);
2119           lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2120           highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2121           if (update_range_test (ranges + i, ranges + j, 1, opcode, ops, exp,
2122                                  ranges[i].in_p, lowj, highj,
2123                                  ranges[i].strict_overflow_p
2124                                  || ranges[j].strict_overflow_p))
2125             {
2126               any_changes = true;
2127               break;
2128             }
2129         }
2130     }
2131
2132   if (any_changes)
2133     {
2134       j = 0;
2135       FOR_EACH_VEC_ELT (operand_entry_t, *ops, i, oe)
2136         {
2137           if (oe->op == error_mark_node)
2138             continue;
2139           else if (i != j)
2140             VEC_replace (operand_entry_t, *ops, j, oe);
2141           j++;
2142         }
2143       VEC_truncate (operand_entry_t, *ops, j);
2144     }
2145
2146   XDELETEVEC (ranges);
2147 }
2148
2149 /* Return true if OPERAND is defined by a PHI node which uses the LHS
2150    of STMT in it's operands.  This is also known as a "destructive
2151    update" operation.  */
2152
2153 static bool
2154 is_phi_for_stmt (gimple stmt, tree operand)
2155 {
2156   gimple def_stmt;
2157   tree lhs;
2158   use_operand_p arg_p;
2159   ssa_op_iter i;
2160
2161   if (TREE_CODE (operand) != SSA_NAME)
2162     return false;
2163
2164   lhs = gimple_assign_lhs (stmt);
2165
2166   def_stmt = SSA_NAME_DEF_STMT (operand);
2167   if (gimple_code (def_stmt) != GIMPLE_PHI)
2168     return false;
2169
2170   FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
2171     if (lhs == USE_FROM_PTR (arg_p))
2172       return true;
2173   return false;
2174 }
2175
2176 /* Remove def stmt of VAR if VAR has zero uses and recurse
2177    on rhs1 operand if so.  */
2178
2179 static void
2180 remove_visited_stmt_chain (tree var)
2181 {
2182   gimple stmt;
2183   gimple_stmt_iterator gsi;
2184
2185   while (1)
2186     {
2187       if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
2188         return;
2189       stmt = SSA_NAME_DEF_STMT (var);
2190       if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
2191         {
2192           var = gimple_assign_rhs1 (stmt);
2193           gsi = gsi_for_stmt (stmt);
2194           gsi_remove (&gsi, true);
2195           release_defs (stmt);
2196         }
2197       else
2198         return;
2199     }
2200 }
2201
2202 /* This function checks three consequtive operands in
2203    passed operands vector OPS starting from OPINDEX and
2204    swaps two operands if it is profitable for binary operation
2205    consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
2206
2207    We pair ops with the same rank if possible.
2208
2209    The alternative we try is to see if STMT is a destructive
2210    update style statement, which is like:
2211    b = phi (a, ...)
2212    a = c + b;
2213    In that case, we want to use the destructive update form to
2214    expose the possible vectorizer sum reduction opportunity.
2215    In that case, the third operand will be the phi node. This
2216    check is not performed if STMT is null.
2217
2218    We could, of course, try to be better as noted above, and do a
2219    lot of work to try to find these opportunities in >3 operand
2220    cases, but it is unlikely to be worth it.  */
2221
2222 static void
2223 swap_ops_for_binary_stmt (VEC(operand_entry_t, heap) * ops,
2224                           unsigned int opindex, gimple stmt)
2225 {
2226   operand_entry_t oe1, oe2, oe3;
2227
2228   oe1 = VEC_index (operand_entry_t, ops, opindex);
2229   oe2 = VEC_index (operand_entry_t, ops, opindex + 1);
2230   oe3 = VEC_index (operand_entry_t, ops, opindex + 2);
2231
2232   if ((oe1->rank == oe2->rank
2233        && oe2->rank != oe3->rank)
2234       || (stmt && is_phi_for_stmt (stmt, oe3->op)
2235           && !is_phi_for_stmt (stmt, oe1->op)
2236           && !is_phi_for_stmt (stmt, oe2->op)))
2237     {
2238       struct operand_entry temp = *oe3;
2239       oe3->op = oe1->op;
2240       oe3->rank = oe1->rank;
2241       oe1->op = temp.op;
2242       oe1->rank= temp.rank;
2243     }
2244   else if ((oe1->rank == oe3->rank
2245             && oe2->rank != oe3->rank)
2246            || (stmt && is_phi_for_stmt (stmt, oe2->op)
2247                && !is_phi_for_stmt (stmt, oe1->op)
2248                && !is_phi_for_stmt (stmt, oe3->op)))
2249     {
2250       struct operand_entry temp = *oe2;
2251       oe2->op = oe1->op;
2252       oe2->rank = oe1->rank;
2253       oe1->op = temp.op;
2254       oe1->rank= temp.rank;
2255     }
2256 }
2257
2258 /* Recursively rewrite our linearized statements so that the operators
2259    match those in OPS[OPINDEX], putting the computation in rank
2260    order.  */
2261
2262 static void
2263 rewrite_expr_tree (gimple stmt, unsigned int opindex,
2264                    VEC(operand_entry_t, heap) * ops, bool moved)
2265 {
2266   tree rhs1 = gimple_assign_rhs1 (stmt);
2267   tree rhs2 = gimple_assign_rhs2 (stmt);
2268   operand_entry_t oe;
2269
2270   /* If we have three operands left, then we want to make sure the ones
2271      that get the double binary op are chosen wisely.  */
2272   if (opindex + 3 == VEC_length (operand_entry_t, ops))
2273     swap_ops_for_binary_stmt (ops, opindex, stmt);
2274
2275   /* The final recursion case for this function is that you have
2276      exactly two operations left.
2277      If we had one exactly one op in the entire list to start with, we
2278      would have never called this function, and the tail recursion
2279      rewrites them one at a time.  */
2280   if (opindex + 2 == VEC_length (operand_entry_t, ops))
2281     {
2282       operand_entry_t oe1, oe2;
2283
2284       oe1 = VEC_index (operand_entry_t, ops, opindex);
2285       oe2 = VEC_index (operand_entry_t, ops, opindex + 1);
2286
2287       if (rhs1 != oe1->op || rhs2 != oe2->op)
2288         {
2289           if (dump_file && (dump_flags & TDF_DETAILS))
2290             {
2291               fprintf (dump_file, "Transforming ");
2292               print_gimple_stmt (dump_file, stmt, 0, 0);
2293             }
2294
2295           gimple_assign_set_rhs1 (stmt, oe1->op);
2296           gimple_assign_set_rhs2 (stmt, oe2->op);
2297           update_stmt (stmt);
2298           if (rhs1 != oe1->op && rhs1 != oe2->op)
2299             remove_visited_stmt_chain (rhs1);
2300
2301           if (dump_file && (dump_flags & TDF_DETAILS))
2302             {
2303               fprintf (dump_file, " into ");
2304               print_gimple_stmt (dump_file, stmt, 0, 0);
2305             }
2306         }
2307       return;
2308     }
2309
2310   /* If we hit here, we should have 3 or more ops left.  */
2311   gcc_assert (opindex + 2 < VEC_length (operand_entry_t, ops));
2312
2313   /* Rewrite the next operator.  */
2314   oe = VEC_index (operand_entry_t, ops, opindex);
2315
2316   if (oe->op != rhs2)
2317     {
2318       if (!moved)
2319         {
2320           gimple_stmt_iterator gsinow, gsirhs1;
2321           gimple stmt1 = stmt, stmt2;
2322           unsigned int count;
2323
2324           gsinow = gsi_for_stmt (stmt);
2325           count = VEC_length (operand_entry_t, ops) - opindex - 2;
2326           while (count-- != 0)
2327             {
2328               stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
2329               gsirhs1 = gsi_for_stmt (stmt2);
2330               gsi_move_before (&gsirhs1, &gsinow);
2331               gsi_prev (&gsinow);
2332               stmt1 = stmt2;
2333             }
2334           moved = true;
2335         }
2336
2337       if (dump_file && (dump_flags & TDF_DETAILS))
2338         {
2339           fprintf (dump_file, "Transforming ");
2340           print_gimple_stmt (dump_file, stmt, 0, 0);
2341         }
2342
2343       gimple_assign_set_rhs2 (stmt, oe->op);
2344       update_stmt (stmt);
2345
2346       if (dump_file && (dump_flags & TDF_DETAILS))
2347         {
2348           fprintf (dump_file, " into ");
2349           print_gimple_stmt (dump_file, stmt, 0, 0);
2350         }
2351     }
2352   /* Recurse on the LHS of the binary operator, which is guaranteed to
2353      be the non-leaf side.  */
2354   rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, moved);
2355 }
2356
2357 /* Find out how many cycles we need to compute statements chain.
2358    OPS_NUM holds number os statements in a chain.  CPU_WIDTH is a
2359    maximum number of independent statements we may execute per cycle.  */
2360
2361 static int
2362 get_required_cycles (int ops_num, int cpu_width)
2363 {
2364   int res;
2365   int elog;
2366   unsigned int rest;
2367
2368   /* While we have more than 2 * cpu_width operands
2369      we may reduce number of operands by cpu_width
2370      per cycle.  */
2371   res = ops_num / (2 * cpu_width);
2372
2373   /* Remained operands count may be reduced twice per cycle
2374      until we have only one operand.  */
2375   rest = (unsigned)(ops_num - res * cpu_width);
2376   elog = exact_log2 (rest);
2377   if (elog >= 0)
2378     res += elog;
2379   else
2380     res += floor_log2 (rest) + 1;
2381
2382   return res;
2383 }
2384
2385 /* Returns an optimal number of registers to use for computation of
2386    given statements.  */
2387
2388 static int
2389 get_reassociation_width (int ops_num, enum tree_code opc,
2390                          enum machine_mode mode)
2391 {
2392   int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
2393   int width;
2394   int width_min;
2395   int cycles_best;
2396
2397   if (param_width > 0)
2398     width = param_width;
2399   else
2400     width = targetm.sched.reassociation_width (opc, mode);
2401
2402   if (width == 1)
2403     return width;
2404
2405   /* Get the minimal time required for sequence computation.  */
2406   cycles_best = get_required_cycles (ops_num, width);
2407
2408   /* Check if we may use less width and still compute sequence for
2409      the same time.  It will allow us to reduce registers usage.
2410      get_required_cycles is monotonically increasing with lower width
2411      so we can perform a binary search for the minimal width that still
2412      results in the optimal cycle count.  */
2413   width_min = 1;
2414   while (width > width_min)
2415     {
2416       int width_mid = (width + width_min) / 2;
2417
2418       if (get_required_cycles (ops_num, width_mid) == cycles_best)
2419         width = width_mid;
2420       else if (width_min < width_mid)
2421         width_min = width_mid;
2422       else
2423         break;
2424     }
2425
2426   return width;
2427 }
2428
2429 /* Recursively rewrite our linearized statements so that the operators
2430    match those in OPS[OPINDEX], putting the computation in rank
2431    order and trying to allow operations to be executed in
2432    parallel.  */
2433
2434 static void
2435 rewrite_expr_tree_parallel (gimple stmt, int width,
2436                             VEC(operand_entry_t, heap) * ops)
2437 {
2438   enum tree_code opcode = gimple_assign_rhs_code (stmt);
2439   int op_num = VEC_length (operand_entry_t, ops);
2440   int stmt_num = op_num - 1;
2441   gimple *stmts = XALLOCAVEC (gimple, stmt_num);
2442   int op_index = op_num - 1;
2443   int stmt_index = 0;
2444   int ready_stmts_end = 0;
2445   int i = 0;
2446   tree last_rhs1 = gimple_assign_rhs1 (stmt);
2447   tree lhs_var;
2448
2449   /* We start expression rewriting from the top statements.
2450      So, in this loop we create a full list of statements
2451      we will work with.  */
2452   stmts[stmt_num - 1] = stmt;
2453   for (i = stmt_num - 2; i >= 0; i--)
2454     stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
2455
2456   lhs_var = create_tmp_reg (TREE_TYPE (last_rhs1), NULL);
2457   add_referenced_var (lhs_var);
2458
2459   for (i = 0; i < stmt_num; i++)
2460     {
2461       tree op1, op2;
2462
2463       /* Determine whether we should use results of
2464          already handled statements or not.  */
2465       if (ready_stmts_end == 0
2466           && (i - stmt_index >= width || op_index < 1))
2467         ready_stmts_end = i;
2468
2469       /* Now we choose operands for the next statement.  Non zero
2470          value in ready_stmts_end means here that we should use
2471          the result of already generated statements as new operand.  */
2472       if (ready_stmts_end > 0)
2473         {
2474           op1 = gimple_assign_lhs (stmts[stmt_index++]);
2475           if (ready_stmts_end > stmt_index)
2476             op2 = gimple_assign_lhs (stmts[stmt_index++]);
2477           else if (op_index >= 0)
2478             op2 = VEC_index (operand_entry_t, ops, op_index--)->op;
2479           else
2480             {
2481               gcc_assert (stmt_index < i);
2482               op2 = gimple_assign_lhs (stmts[stmt_index++]);
2483             }
2484
2485           if (stmt_index >= ready_stmts_end)
2486             ready_stmts_end = 0;
2487         }
2488       else
2489         {
2490           if (op_index > 1)
2491             swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
2492           op2 = VEC_index (operand_entry_t, ops, op_index--)->op;
2493           op1 = VEC_index (operand_entry_t, ops, op_index--)->op;
2494         }
2495
2496       /* If we emit the last statement then we should put
2497          operands into the last statement.  It will also
2498          break the loop.  */
2499       if (op_index < 0 && stmt_index == i)
2500         i = stmt_num - 1;
2501
2502       if (dump_file && (dump_flags & TDF_DETAILS))
2503         {
2504           fprintf (dump_file, "Transforming ");
2505           print_gimple_stmt (dump_file, stmts[i], 0, 0);
2506         }
2507
2508       /* We keep original statement only for the last one.  All
2509          others are recreated.  */
2510       if (i == stmt_num - 1)
2511         {
2512           gimple_assign_set_rhs1 (stmts[i], op1);
2513           gimple_assign_set_rhs2 (stmts[i], op2);
2514           update_stmt (stmts[i]);
2515         }
2516       else
2517         stmts[i] = build_and_add_sum (lhs_var, op1, op2, opcode);
2518
2519       if (dump_file && (dump_flags & TDF_DETAILS))
2520         {
2521           fprintf (dump_file, " into ");
2522           print_gimple_stmt (dump_file, stmts[i], 0, 0);
2523         }
2524     }
2525
2526   remove_visited_stmt_chain (last_rhs1);
2527 }
2528
2529 /* Transform STMT, which is really (A +B) + (C + D) into the left
2530    linear form, ((A+B)+C)+D.
2531    Recurse on D if necessary.  */
2532
2533 static void
2534 linearize_expr (gimple stmt)
2535 {
2536   gimple_stmt_iterator gsinow, gsirhs;
2537   gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
2538   gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
2539   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
2540   gimple newbinrhs = NULL;
2541   struct loop *loop = loop_containing_stmt (stmt);
2542
2543   gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
2544               && is_reassociable_op (binrhs, rhscode, loop));
2545
2546   gsinow = gsi_for_stmt (stmt);
2547   gsirhs = gsi_for_stmt (binrhs);
2548   gsi_move_before (&gsirhs, &gsinow);
2549
2550   gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
2551   gimple_assign_set_rhs1 (binrhs, gimple_assign_lhs (binlhs));
2552   gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
2553
2554   if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
2555     newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
2556
2557   if (dump_file && (dump_flags & TDF_DETAILS))
2558     {
2559       fprintf (dump_file, "Linearized: ");
2560       print_gimple_stmt (dump_file, stmt, 0, 0);
2561     }
2562
2563   reassociate_stats.linearized++;
2564   update_stmt (binrhs);
2565   update_stmt (binlhs);
2566   update_stmt (stmt);
2567
2568   gimple_set_visited (stmt, true);
2569   gimple_set_visited (binlhs, true);
2570   gimple_set_visited (binrhs, true);
2571
2572   /* Tail recurse on the new rhs if it still needs reassociation.  */
2573   if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
2574     /* ??? This should probably be linearize_expr (newbinrhs) but I don't
2575            want to change the algorithm while converting to tuples.  */
2576     linearize_expr (stmt);
2577 }
2578
2579 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
2580    it.  Otherwise, return NULL.  */
2581
2582 static gimple
2583 get_single_immediate_use (tree lhs)
2584 {
2585   use_operand_p immuse;
2586   gimple immusestmt;
2587
2588   if (TREE_CODE (lhs) == SSA_NAME
2589       && single_imm_use (lhs, &immuse, &immusestmt)
2590       && is_gimple_assign (immusestmt))
2591     return immusestmt;
2592
2593   return NULL;
2594 }
2595
2596 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
2597    representing the negated value.  Insertions of any necessary
2598    instructions go before GSI.
2599    This function is recursive in that, if you hand it "a_5" as the
2600    value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
2601    transform b_3 + b_4 into a_5 = -b_3 + -b_4.  */
2602
2603 static tree
2604 negate_value (tree tonegate, gimple_stmt_iterator *gsi)
2605 {
2606   gimple negatedefstmt= NULL;
2607   tree resultofnegate;
2608
2609   /* If we are trying to negate a name, defined by an add, negate the
2610      add operands instead.  */
2611   if (TREE_CODE (tonegate) == SSA_NAME)
2612     negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
2613   if (TREE_CODE (tonegate) == SSA_NAME
2614       && is_gimple_assign (negatedefstmt)
2615       && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
2616       && has_single_use (gimple_assign_lhs (negatedefstmt))
2617       && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
2618     {
2619       gimple_stmt_iterator gsi;
2620       tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
2621       tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
2622
2623       gsi = gsi_for_stmt (negatedefstmt);
2624       rhs1 = negate_value (rhs1, &gsi);
2625       gimple_assign_set_rhs1 (negatedefstmt, rhs1);
2626
2627       gsi = gsi_for_stmt (negatedefstmt);
2628       rhs2 = negate_value (rhs2, &gsi);
2629       gimple_assign_set_rhs2 (negatedefstmt, rhs2);
2630
2631       update_stmt (negatedefstmt);
2632       return gimple_assign_lhs (negatedefstmt);
2633     }
2634
2635   tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
2636   resultofnegate = force_gimple_operand_gsi (gsi, tonegate, true,
2637                                              NULL_TREE, true, GSI_SAME_STMT);
2638   return resultofnegate;
2639 }
2640
2641 /* Return true if we should break up the subtract in STMT into an add
2642    with negate.  This is true when we the subtract operands are really
2643    adds, or the subtract itself is used in an add expression.  In
2644    either case, breaking up the subtract into an add with negate
2645    exposes the adds to reassociation.  */
2646
2647 static bool
2648 should_break_up_subtract (gimple stmt)
2649 {
2650   tree lhs = gimple_assign_lhs (stmt);
2651   tree binlhs = gimple_assign_rhs1 (stmt);
2652   tree binrhs = gimple_assign_rhs2 (stmt);
2653   gimple immusestmt;
2654   struct loop *loop = loop_containing_stmt (stmt);
2655
2656   if (TREE_CODE (binlhs) == SSA_NAME
2657       && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
2658     return true;
2659
2660   if (TREE_CODE (binrhs) == SSA_NAME
2661       && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
2662     return true;
2663
2664   if (TREE_CODE (lhs) == SSA_NAME
2665       && (immusestmt = get_single_immediate_use (lhs))
2666       && is_gimple_assign (immusestmt)
2667       && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
2668           ||  gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
2669     return true;
2670   return false;
2671 }
2672
2673 /* Transform STMT from A - B into A + -B.  */
2674
2675 static void
2676 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
2677 {
2678   tree rhs1 = gimple_assign_rhs1 (stmt);
2679   tree rhs2 = gimple_assign_rhs2 (stmt);
2680
2681   if (dump_file && (dump_flags & TDF_DETAILS))
2682     {
2683       fprintf (dump_file, "Breaking up subtract ");
2684       print_gimple_stmt (dump_file, stmt, 0, 0);
2685     }
2686
2687   rhs2 = negate_value (rhs2, gsip);
2688   gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
2689   update_stmt (stmt);
2690 }
2691
2692 /* Determine whether STMT is a builtin call that raises an SSA name
2693    to an integer power and has only one use.  If so, and this is early
2694    reassociation and unsafe math optimizations are permitted, place
2695    the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
2696    If any of these conditions does not hold, return FALSE.  */
2697
2698 static bool
2699 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
2700 {
2701   tree fndecl, arg1;
2702   REAL_VALUE_TYPE c, cint;
2703
2704   if (!first_pass_instance
2705       || !flag_unsafe_math_optimizations
2706       || !is_gimple_call (stmt)
2707       || !has_single_use (gimple_call_lhs (stmt)))
2708     return false;
2709
2710   fndecl = gimple_call_fndecl (stmt);
2711
2712   if (!fndecl
2713       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
2714     return false;
2715
2716   switch (DECL_FUNCTION_CODE (fndecl))
2717     {
2718     CASE_FLT_FN (BUILT_IN_POW):
2719       *base = gimple_call_arg (stmt, 0);
2720       arg1 = gimple_call_arg (stmt, 1);
2721
2722       if (TREE_CODE (arg1) != REAL_CST)
2723         return false;
2724
2725       c = TREE_REAL_CST (arg1);
2726
2727       if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
2728         return false;
2729
2730       *exponent = real_to_integer (&c);
2731       real_from_integer (&cint, VOIDmode, *exponent,
2732                          *exponent < 0 ? -1 : 0, 0);
2733       if (!real_identical (&c, &cint))
2734         return false;
2735
2736       break;
2737
2738     CASE_FLT_FN (BUILT_IN_POWI):
2739       *base = gimple_call_arg (stmt, 0);
2740       arg1 = gimple_call_arg (stmt, 1);
2741
2742       if (!host_integerp (arg1, 0))
2743         return false;
2744
2745       *exponent = TREE_INT_CST_LOW (arg1);
2746       break;
2747
2748     default:
2749       return false;
2750     }
2751
2752   /* Expanding negative exponents is generally unproductive, so we don't
2753      complicate matters with those.  Exponents of zero and one should
2754      have been handled by expression folding.  */
2755   if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
2756     return false;
2757
2758   return true;
2759 }
2760
2761 /* Recursively linearize a binary expression that is the RHS of STMT.
2762    Place the operands of the expression tree in the vector named OPS.  */
2763
2764 static void
2765 linearize_expr_tree (VEC(operand_entry_t, heap) **ops, gimple stmt,
2766                      bool is_associative, bool set_visited)
2767 {
2768   tree binlhs = gimple_assign_rhs1 (stmt);
2769   tree binrhs = gimple_assign_rhs2 (stmt);
2770   gimple binlhsdef = NULL, binrhsdef = NULL;
2771   bool binlhsisreassoc = false;
2772   bool binrhsisreassoc = false;
2773   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
2774   struct loop *loop = loop_containing_stmt (stmt);
2775   tree base = NULL_TREE;
2776   HOST_WIDE_INT exponent = 0;
2777
2778   if (set_visited)
2779     gimple_set_visited (stmt, true);
2780
2781   if (TREE_CODE (binlhs) == SSA_NAME)
2782     {
2783       binlhsdef = SSA_NAME_DEF_STMT (binlhs);
2784       binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
2785                          && !stmt_could_throw_p (binlhsdef));
2786     }
2787
2788   if (TREE_CODE (binrhs) == SSA_NAME)
2789     {
2790       binrhsdef = SSA_NAME_DEF_STMT (binrhs);
2791       binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
2792                          && !stmt_could_throw_p (binrhsdef));
2793     }
2794
2795   /* If the LHS is not reassociable, but the RHS is, we need to swap
2796      them.  If neither is reassociable, there is nothing we can do, so
2797      just put them in the ops vector.  If the LHS is reassociable,
2798      linearize it.  If both are reassociable, then linearize the RHS
2799      and the LHS.  */
2800
2801   if (!binlhsisreassoc)
2802     {
2803       tree temp;
2804
2805       /* If this is not a associative operation like division, give up.  */
2806       if (!is_associative)
2807         {
2808           add_to_ops_vec (ops, binrhs);
2809           return;
2810         }
2811
2812       if (!binrhsisreassoc)
2813         {
2814           if (rhscode == MULT_EXPR
2815               && TREE_CODE (binrhs) == SSA_NAME
2816               && acceptable_pow_call (binrhsdef, &base, &exponent))
2817             {
2818               add_repeat_to_ops_vec (ops, base, exponent);
2819               gimple_set_visited (binrhsdef, true);
2820             }
2821           else
2822             add_to_ops_vec (ops, binrhs);
2823
2824           if (rhscode == MULT_EXPR
2825               && TREE_CODE (binlhs) == SSA_NAME
2826               && acceptable_pow_call (binlhsdef, &base, &exponent))
2827             {
2828               add_repeat_to_ops_vec (ops, base, exponent);
2829               gimple_set_visited (binlhsdef, true);
2830             }
2831           else
2832             add_to_ops_vec (ops, binlhs);
2833
2834           return;
2835         }
2836
2837       if (dump_file && (dump_flags & TDF_DETAILS))
2838         {
2839           fprintf (dump_file, "swapping operands of ");
2840           print_gimple_stmt (dump_file, stmt, 0, 0);
2841         }
2842
2843       swap_tree_operands (stmt,
2844                           gimple_assign_rhs1_ptr (stmt),
2845                           gimple_assign_rhs2_ptr (stmt));
2846       update_stmt (stmt);
2847
2848       if (dump_file && (dump_flags & TDF_DETAILS))
2849         {
2850           fprintf (dump_file, " is now ");
2851           print_gimple_stmt (dump_file, stmt, 0, 0);
2852         }
2853
2854       /* We want to make it so the lhs is always the reassociative op,
2855          so swap.  */
2856       temp = binlhs;
2857       binlhs = binrhs;
2858       binrhs = temp;
2859     }
2860   else if (binrhsisreassoc)
2861     {
2862       linearize_expr (stmt);
2863       binlhs = gimple_assign_rhs1 (stmt);
2864       binrhs = gimple_assign_rhs2 (stmt);
2865     }
2866
2867   gcc_assert (TREE_CODE (binrhs) != SSA_NAME
2868               || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
2869                                       rhscode, loop));
2870   linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
2871                        is_associative, set_visited);
2872
2873   if (rhscode == MULT_EXPR
2874       && TREE_CODE (binrhs) == SSA_NAME
2875       && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
2876     {
2877       add_repeat_to_ops_vec (ops, base, exponent);
2878       gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
2879     }
2880   else
2881     add_to_ops_vec (ops, binrhs);
2882 }
2883
2884 /* Repropagate the negates back into subtracts, since no other pass
2885    currently does it.  */
2886
2887 static void
2888 repropagate_negates (void)
2889 {
2890   unsigned int i = 0;
2891   tree negate;
2892
2893   FOR_EACH_VEC_ELT (tree, plus_negates, i, negate)
2894     {
2895       gimple user = get_single_immediate_use (negate);
2896
2897       if (!user || !is_gimple_assign (user))
2898         continue;
2899
2900       /* The negate operand can be either operand of a PLUS_EXPR
2901          (it can be the LHS if the RHS is a constant for example).
2902
2903          Force the negate operand to the RHS of the PLUS_EXPR, then
2904          transform the PLUS_EXPR into a MINUS_EXPR.  */
2905       if (gimple_assign_rhs_code (user) == PLUS_EXPR)
2906         {
2907           /* If the negated operand appears on the LHS of the
2908              PLUS_EXPR, exchange the operands of the PLUS_EXPR
2909              to force the negated operand to the RHS of the PLUS_EXPR.  */
2910           if (gimple_assign_rhs1 (user) == negate)
2911             {
2912               swap_tree_operands (user,
2913                                   gimple_assign_rhs1_ptr (user),
2914                                   gimple_assign_rhs2_ptr (user));
2915             }
2916
2917           /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
2918              the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR.  */
2919           if (gimple_assign_rhs2 (user) == negate)
2920             {
2921               tree rhs1 = gimple_assign_rhs1 (user);
2922               tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
2923               gimple_stmt_iterator gsi = gsi_for_stmt (user);
2924               gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
2925               update_stmt (user);
2926             }
2927         }
2928       else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
2929         {
2930           if (gimple_assign_rhs1 (user) == negate)
2931             {
2932               /* We have
2933                    x = -a
2934                    y = x - b
2935                  which we transform into
2936                    x = a + b
2937                    y = -x .
2938                  This pushes down the negate which we possibly can merge
2939                  into some other operation, hence insert it into the
2940                  plus_negates vector.  */
2941               gimple feed = SSA_NAME_DEF_STMT (negate);
2942               tree a = gimple_assign_rhs1 (feed);
2943               tree rhs2 = gimple_assign_rhs2 (user);
2944               gimple_stmt_iterator gsi = gsi_for_stmt (feed), gsi2;
2945               gimple_replace_lhs (feed, negate);
2946               gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, a, rhs2);
2947               update_stmt (gsi_stmt (gsi));
2948               gsi2 = gsi_for_stmt (user);
2949               gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, negate, NULL);
2950               update_stmt (gsi_stmt (gsi2));
2951               gsi_move_before (&gsi, &gsi2);
2952               VEC_safe_push (tree, heap, plus_negates,
2953                              gimple_assign_lhs (gsi_stmt (gsi2)));
2954             }
2955           else
2956             {
2957               /* Transform "x = -a; y = b - x" into "y = b + a", getting
2958                  rid of one operation.  */
2959               gimple feed = SSA_NAME_DEF_STMT (negate);
2960               tree a = gimple_assign_rhs1 (feed);
2961               tree rhs1 = gimple_assign_rhs1 (user);
2962               gimple_stmt_iterator gsi = gsi_for_stmt (user);
2963               gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
2964               update_stmt (gsi_stmt (gsi));
2965             }
2966         }
2967     }
2968 }
2969
2970 /* Returns true if OP is of a type for which we can do reassociation.
2971    That is for integral or non-saturating fixed-point types, and for
2972    floating point type when associative-math is enabled.  */
2973
2974 static bool
2975 can_reassociate_p (tree op)
2976 {
2977   tree type = TREE_TYPE (op);
2978   if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
2979       || NON_SAT_FIXED_POINT_TYPE_P (type)
2980       || (flag_associative_math && FLOAT_TYPE_P (type)))
2981     return true;
2982   return false;
2983 }
2984
2985 /* Break up subtract operations in block BB.
2986
2987    We do this top down because we don't know whether the subtract is
2988    part of a possible chain of reassociation except at the top.
2989
2990    IE given
2991    d = f + g
2992    c = a + e
2993    b = c - d
2994    q = b - r
2995    k = t - q
2996
2997    we want to break up k = t - q, but we won't until we've transformed q
2998    = b - r, which won't be broken up until we transform b = c - d.
2999
3000    En passant, clear the GIMPLE visited flag on every statement.  */
3001
3002 static void
3003 break_up_subtract_bb (basic_block bb)
3004 {
3005   gimple_stmt_iterator gsi;
3006   basic_block son;
3007
3008   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3009     {
3010       gimple stmt = gsi_stmt (gsi);
3011       gimple_set_visited (stmt, false);
3012
3013       if (!is_gimple_assign (stmt)
3014           || !can_reassociate_p (gimple_assign_lhs (stmt)))
3015         continue;
3016
3017       /* Look for simple gimple subtract operations.  */
3018       if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
3019         {
3020           if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
3021               || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
3022             continue;
3023
3024           /* Check for a subtract used only in an addition.  If this
3025              is the case, transform it into add of a negate for better
3026              reassociation.  IE transform C = A-B into C = A + -B if C
3027              is only used in an addition.  */
3028           if (should_break_up_subtract (stmt))
3029             break_up_subtract (stmt, &gsi);
3030         }
3031       else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
3032                && can_reassociate_p (gimple_assign_rhs1 (stmt)))
3033         VEC_safe_push (tree, heap, plus_negates, gimple_assign_lhs (stmt));
3034     }
3035   for (son = first_dom_son (CDI_DOMINATORS, bb);
3036        son;
3037        son = next_dom_son (CDI_DOMINATORS, son))
3038     break_up_subtract_bb (son);
3039 }
3040
3041 /* Used for repeated factor analysis.  */
3042 struct repeat_factor_d
3043 {
3044   /* An SSA name that occurs in a multiply chain.  */
3045   tree factor;
3046
3047   /* Cached rank of the factor.  */
3048   unsigned rank;
3049
3050   /* Number of occurrences of the factor in the chain.  */
3051   HOST_WIDE_INT count;
3052
3053   /* An SSA name representing the product of this factor and
3054      all factors appearing later in the repeated factor vector.  */
3055   tree repr;
3056 };
3057
3058 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
3059 typedef const struct repeat_factor_d *const_repeat_factor_t;
3060
3061 DEF_VEC_O (repeat_factor);
3062 DEF_VEC_ALLOC_O (repeat_factor, heap);
3063
3064 static VEC (repeat_factor, heap) *repeat_factor_vec;
3065
3066 /* Used for sorting the repeat factor vector.  Sort primarily by
3067    ascending occurrence count, secondarily by descending rank.  */
3068
3069 static int
3070 compare_repeat_factors (const void *x1, const void *x2)
3071 {
3072   const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
3073   const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
3074
3075   if (rf1->count != rf2->count)
3076     return rf1->count - rf2->count;
3077
3078   return rf2->rank - rf1->rank;
3079 }
3080
3081 /* Get a new SSA name for register variable *TARGET of type TYPE.
3082    If *TARGET is null or incompatible with TYPE, create the variable
3083    first.  */
3084
3085 static tree
3086 get_reassoc_pow_ssa_name (tree *target, tree type)
3087 {
3088   if (!*target || !types_compatible_p (type, TREE_TYPE (*target)))
3089     {
3090       *target = create_tmp_reg (type, "reassocpow");
3091       add_referenced_var (*target);
3092     }
3093
3094   return make_ssa_name (*target, NULL);
3095 }
3096
3097 /* Look for repeated operands in OPS in the multiply tree rooted at
3098    STMT.  Replace them with an optimal sequence of multiplies and powi
3099    builtin calls, and remove the used operands from OPS.  Return an
3100    SSA name representing the value of the replacement sequence.  */
3101
3102 static tree
3103 attempt_builtin_powi (gimple stmt, VEC(operand_entry_t, heap) **ops,
3104                       tree *target)
3105 {
3106   unsigned i, j, vec_len;
3107   int ii;
3108   operand_entry_t oe;
3109   repeat_factor_t rf1, rf2;
3110   repeat_factor rfnew;
3111   tree result = NULL_TREE;
3112   tree target_ssa, iter_result;
3113   tree type = TREE_TYPE (gimple_get_lhs (stmt));
3114   tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
3115   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3116   gimple mul_stmt, pow_stmt;
3117
3118   /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
3119      target.  */
3120   if (!powi_fndecl)
3121     return NULL_TREE;
3122
3123   /* Allocate the repeated factor vector.  */
3124   repeat_factor_vec = VEC_alloc (repeat_factor, heap, 10);
3125
3126   /* Scan the OPS vector for all SSA names in the product and build
3127      up a vector of occurrence counts for each factor.  */
3128   FOR_EACH_VEC_ELT (operand_entry_t, *ops, i, oe)
3129     {
3130       if (TREE_CODE (oe->op) == SSA_NAME)
3131         {
3132           FOR_EACH_VEC_ELT (repeat_factor, repeat_factor_vec, j, rf1)
3133             {
3134               if (rf1->factor == oe->op)
3135                 {
3136                   rf1->count += oe->count;
3137                   break;
3138                 }
3139             }
3140
3141           if (j >= VEC_length (repeat_factor, repeat_factor_vec))
3142             {
3143               rfnew.factor = oe->op;
3144               rfnew.rank = oe->rank;
3145               rfnew.count = oe->count;
3146               rfnew.repr = NULL_TREE;
3147               VEC_safe_push (repeat_factor, heap, repeat_factor_vec, &rfnew);
3148             }
3149         }
3150     }
3151
3152   /* Sort the repeated factor vector by (a) increasing occurrence count,
3153      and (b) decreasing rank.  */
3154   VEC_qsort (repeat_factor, repeat_factor_vec, compare_repeat_factors);
3155
3156   /* It is generally best to combine as many base factors as possible
3157      into a product before applying __builtin_powi to the result.
3158      However, the sort order chosen for the repeated factor vector
3159      allows us to cache partial results for the product of the base
3160      factors for subsequent use.  When we already have a cached partial
3161      result from a previous iteration, it is best to make use of it
3162      before looking for another __builtin_pow opportunity.
3163
3164      As an example, consider x * x * y * y * y * z * z * z * z.
3165      We want to first compose the product x * y * z, raise it to the
3166      second power, then multiply this by y * z, and finally multiply
3167      by z.  This can be done in 5 multiplies provided we cache y * z
3168      for use in both expressions:
3169
3170         t1 = y * z
3171         t2 = t1 * x
3172         t3 = t2 * t2
3173         t4 = t1 * t3
3174         result = t4 * z
3175
3176      If we instead ignored the cached y * z and first multiplied by
3177      the __builtin_pow opportunity z * z, we would get the inferior:
3178
3179         t1 = y * z
3180         t2 = t1 * x
3181         t3 = t2 * t2
3182         t4 = z * z
3183         t5 = t3 * t4
3184         result = t5 * y  */
3185
3186   vec_len = VEC_length (repeat_factor, repeat_factor_vec);
3187
3188   /* Repeatedly look for opportunities to create a builtin_powi call.  */
3189   while (true)
3190     {
3191       HOST_WIDE_INT power;
3192
3193       /* First look for the largest cached product of factors from
3194          preceding iterations.  If found, create a builtin_powi for
3195          it if the minimum occurrence count for its factors is at
3196          least 2, or just use this cached product as our next
3197          multiplicand if the minimum occurrence count is 1.  */
3198       FOR_EACH_VEC_ELT (repeat_factor, repeat_factor_vec, j, rf1)
3199         {
3200           if (rf1->repr && rf1->count > 0)
3201             break;
3202         }
3203
3204       if (j < vec_len)
3205         {
3206           power = rf1->count;
3207
3208           if (power == 1)
3209             {
3210               iter_result = rf1->repr;
3211
3212               if (dump_file && (dump_flags & TDF_DETAILS))
3213                 {
3214                   unsigned elt;
3215                   repeat_factor_t rf;
3216                   fputs ("Multiplying by cached product ", dump_file);
3217                   for (elt = j; elt < vec_len; elt++)
3218                     {
3219                       rf = VEC_index (repeat_factor, repeat_factor_vec, elt);
3220                       print_generic_expr (dump_file, rf->factor, 0);
3221                       if (elt < vec_len - 1)
3222                         fputs (" * ", dump_file);
3223                     }
3224                   fputs ("\n", dump_file);
3225                 }
3226             }
3227           else
3228             {
3229               iter_result = get_reassoc_pow_ssa_name (target, type);
3230               pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
3231                                             build_int_cst (integer_type_node,
3232                                                            power));
3233               gimple_call_set_lhs (pow_stmt, iter_result);
3234               gimple_set_location (pow_stmt, gimple_location (stmt));
3235               gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
3236
3237               if (dump_file && (dump_flags & TDF_DETAILS))
3238                 {
3239                   unsigned elt;
3240                   repeat_factor_t rf;
3241                   fputs ("Building __builtin_pow call for cached product (",
3242                          dump_file);
3243                   for (elt = j; elt < vec_len; elt++)
3244                     {
3245                       rf = VEC_index (repeat_factor, repeat_factor_vec, elt);
3246                       print_generic_expr (dump_file, rf->factor, 0);
3247                       if (elt < vec_len - 1)
3248                         fputs (" * ", dump_file);
3249                     }
3250                   fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n",
3251                            power);
3252                 }
3253             }
3254         }
3255       else
3256         {
3257           /* Otherwise, find the first factor in the repeated factor
3258              vector whose occurrence count is at least 2.  If no such
3259              factor exists, there are no builtin_powi opportunities
3260              remaining.  */
3261           FOR_EACH_VEC_ELT (repeat_factor, repeat_factor_vec, j, rf1)
3262             {
3263               if (rf1->count >= 2)
3264                 break;
3265             }
3266
3267           if (j >= vec_len)
3268             break;
3269
3270           power = rf1->count;
3271
3272           if (dump_file && (dump_flags & TDF_DETAILS))
3273             {
3274               unsigned elt;
3275               repeat_factor_t rf;
3276               fputs ("Building __builtin_pow call for (", dump_file);
3277               for (elt = j; elt < vec_len; elt++)
3278                 {
3279                   rf = VEC_index (repeat_factor, repeat_factor_vec, elt);
3280                   print_generic_expr (dump_file, rf->factor, 0);
3281                   if (elt < vec_len - 1)
3282                     fputs (" * ", dump_file);
3283                 }
3284               fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power);
3285             }
3286
3287           reassociate_stats.pows_created++;
3288
3289           /* Visit each element of the vector in reverse order (so that
3290              high-occurrence elements are visited first, and within the
3291              same occurrence count, lower-ranked elements are visited
3292              first).  Form a linear product of all elements in this order
3293              whose occurrencce count is at least that of element J.
3294              Record the SSA name representing the product of each element
3295              with all subsequent elements in the vector.  */
3296           if (j == vec_len - 1)
3297             rf1->repr = rf1->factor;
3298           else
3299             {
3300               for (ii = vec_len - 2; ii >= (int)j; ii--)
3301                 {
3302                   tree op1, op2;
3303
3304                   rf1 = VEC_index (repeat_factor, repeat_factor_vec, ii);
3305                   rf2 = VEC_index (repeat_factor, repeat_factor_vec, ii + 1);
3306
3307                   /* Init the last factor's representative to be itself.  */
3308                   if (!rf2->repr)
3309                     rf2->repr = rf2->factor;
3310
3311                   op1 = rf1->factor;
3312                   op2 = rf2->repr;
3313
3314                   target_ssa = get_reassoc_pow_ssa_name (target, type);
3315                   mul_stmt = gimple_build_assign_with_ops (MULT_EXPR,
3316                                                            target_ssa,
3317                                                            op1, op2);
3318                   gimple_set_location (mul_stmt, gimple_location (stmt));
3319                   gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
3320                   rf1->repr = target_ssa;
3321
3322                   /* Don't reprocess the multiply we just introduced.  */
3323                   gimple_set_visited (mul_stmt, true);
3324                 }
3325             }
3326
3327           /* Form a call to __builtin_powi for the maximum product
3328              just formed, raised to the power obtained earlier.  */
3329           rf1 = VEC_index (repeat_factor, repeat_factor_vec, j);
3330           iter_result = get_reassoc_pow_ssa_name (target, type);
3331           pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
3332                                         build_int_cst (integer_type_node,
3333                                                        power));
3334           gimple_call_set_lhs (pow_stmt, iter_result);
3335           gimple_set_location (pow_stmt, gimple_location (stmt));
3336           gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
3337         }
3338
3339       /* If we previously formed at least one other builtin_powi call,
3340          form the product of this one and those others.  */
3341       if (result)
3342         {
3343           tree new_result = get_reassoc_pow_ssa_name (target, type);
3344           mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result,
3345                                                    result, iter_result);
3346           gimple_set_location (mul_stmt, gimple_location (stmt));
3347           gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
3348           gimple_set_visited (mul_stmt, true);
3349           result = new_result;
3350         }
3351       else
3352         result = iter_result;
3353
3354       /* Decrement the occurrence count of each element in the product
3355          by the count found above, and remove this many copies of each
3356          factor from OPS.  */
3357       for (i = j; i < vec_len; i++)
3358         {
3359           unsigned k = power;
3360           unsigned n;
3361
3362           rf1 = VEC_index (repeat_factor, repeat_factor_vec, i);
3363           rf1->count -= power;
3364
3365           FOR_EACH_VEC_ELT_REVERSE (operand_entry_t, *ops, n, oe)
3366             {
3367               if (oe->op == rf1->factor)
3368                 {
3369                   if (oe->count <= k)
3370                     {
3371                       VEC_ordered_remove (operand_entry_t, *ops, n);
3372                       k -= oe->count;
3373
3374                       if (k == 0)
3375                         break;
3376                     }
3377                   else
3378                     {
3379                       oe->count -= k;
3380                       break;
3381                     }
3382                 }
3383             }
3384         }
3385     }
3386
3387   /* At this point all elements in the repeated factor vector have a
3388      remaining occurrence count of 0 or 1, and those with a count of 1
3389      don't have cached representatives.  Re-sort the ops vector and
3390      clean up.  */
3391   VEC_qsort (operand_entry_t, *ops, sort_by_operand_rank);
3392   VEC_free (repeat_factor, heap, repeat_factor_vec);
3393
3394   /* Return the final product computed herein.  Note that there may
3395      still be some elements with single occurrence count left in OPS;
3396      those will be handled by the normal reassociation logic.  */
3397   return result;
3398 }
3399
3400 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS.  */
3401
3402 static void
3403 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
3404 {
3405   tree rhs1;
3406
3407   if (dump_file && (dump_flags & TDF_DETAILS))
3408     {
3409       fprintf (dump_file, "Transforming ");
3410       print_gimple_stmt (dump_file, stmt, 0, 0);
3411     }
3412
3413   rhs1 = gimple_assign_rhs1 (stmt);
3414   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
3415   update_stmt (stmt);
3416   remove_visited_stmt_chain (rhs1);
3417
3418   if (dump_file && (dump_flags & TDF_DETAILS))
3419     {
3420       fprintf (dump_file, " into ");
3421       print_gimple_stmt (dump_file, stmt, 0, 0);
3422     }
3423 }
3424
3425 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2.  */
3426
3427 static void
3428 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
3429                             tree rhs1, tree rhs2)
3430 {
3431   if (dump_file && (dump_flags & TDF_DETAILS))
3432     {
3433       fprintf (dump_file, "Transforming ");
3434       print_gimple_stmt (dump_file, stmt, 0, 0);
3435     }
3436
3437   gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
3438   update_stmt (gsi_stmt (*gsi));
3439   remove_visited_stmt_chain (rhs1);
3440
3441   if (dump_file && (dump_flags & TDF_DETAILS))
3442     {
3443       fprintf (dump_file, " into ");
3444       print_gimple_stmt (dump_file, stmt, 0, 0);
3445     }
3446 }
3447
3448 /* Reassociate expressions in basic block BB and its post-dominator as
3449    children.  */
3450
3451 static void
3452 reassociate_bb (basic_block bb)
3453 {
3454   gimple_stmt_iterator gsi;
3455   basic_block son;
3456   tree target = NULL_TREE;
3457
3458   for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
3459     {
3460       gimple stmt = gsi_stmt (gsi);
3461
3462       if (is_gimple_assign (stmt)
3463           && !stmt_could_throw_p (stmt))
3464         {
3465           tree lhs, rhs1, rhs2;
3466           enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
3467
3468           /* If this is not a gimple binary expression, there is
3469              nothing for us to do with it.  */
3470           if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
3471             continue;
3472
3473           /* If this was part of an already processed statement,
3474              we don't need to touch it again. */
3475           if (gimple_visited_p (stmt))
3476             {
3477               /* This statement might have become dead because of previous
3478                  reassociations.  */
3479               if (has_zero_uses (gimple_get_lhs (stmt)))
3480                 {
3481                   gsi_remove (&gsi, true);
3482                   release_defs (stmt);
3483                   /* We might end up removing the last stmt above which
3484                      places the iterator to the end of the sequence.
3485                      Reset it to the last stmt in this case which might
3486                      be the end of the sequence as well if we removed
3487                      the last statement of the sequence.  In which case
3488                      we need to bail out.  */
3489                   if (gsi_end_p (gsi))
3490                     {
3491                       gsi = gsi_last_bb (bb);
3492                       if (gsi_end_p (gsi))
3493                         break;
3494                     }
3495                 }
3496               continue;
3497             }
3498
3499           lhs = gimple_assign_lhs (stmt);
3500           rhs1 = gimple_assign_rhs1 (stmt);
3501           rhs2 = gimple_assign_rhs2 (stmt);
3502
3503           /* For non-bit or min/max operations we can't associate
3504              all types.  Verify that here.  */
3505           if (rhs_code != BIT_IOR_EXPR
3506               && rhs_code != BIT_AND_EXPR
3507               && rhs_code != BIT_XOR_EXPR
3508               && rhs_code != MIN_EXPR
3509               && rhs_code != MAX_EXPR
3510               && (!can_reassociate_p (lhs)
3511                   || !can_reassociate_p (rhs1)
3512                   || !can_reassociate_p (rhs2)))
3513             continue;
3514
3515           if (associative_tree_code (rhs_code))
3516             {
3517               VEC(operand_entry_t, heap) *ops = NULL;
3518               tree powi_result = NULL_TREE;
3519
3520               /* There may be no immediate uses left by the time we
3521                  get here because we may have eliminated them all.  */
3522               if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
3523                 continue;
3524
3525               gimple_set_visited (stmt, true);
3526               linearize_expr_tree (&ops, stmt, true, true);
3527               VEC_qsort (operand_entry_t, ops, sort_by_operand_rank);
3528               optimize_ops_list (rhs_code, &ops);
3529               if (undistribute_ops_list (rhs_code, &ops,
3530                                          loop_containing_stmt (stmt)))
3531                 {
3532                   VEC_qsort (operand_entry_t, ops, sort_by_operand_rank);
3533                   optimize_ops_list (rhs_code, &ops);
3534                 }
3535
3536               if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
3537                 optimize_range_tests (rhs_code, &ops);
3538
3539               if (first_pass_instance
3540                   && rhs_code == MULT_EXPR
3541                   && flag_unsafe_math_optimizations)
3542                 powi_result = attempt_builtin_powi (stmt, &ops, &target);
3543
3544               /* If the operand vector is now empty, all operands were
3545                  consumed by the __builtin_powi optimization.  */
3546               if (VEC_length (operand_entry_t, ops) == 0)
3547                 transform_stmt_to_copy (&gsi, stmt, powi_result);
3548               else if (VEC_length (operand_entry_t, ops) == 1)
3549                 {
3550                   tree last_op = VEC_last (operand_entry_t, ops)->op;
3551
3552                   if (powi_result)
3553                     transform_stmt_to_multiply (&gsi, stmt, last_op,
3554                                                 powi_result);
3555                   else
3556                     transform_stmt_to_copy (&gsi, stmt, last_op);
3557                 }
3558               else
3559                 {
3560                   enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
3561                   int ops_num = VEC_length (operand_entry_t, ops);
3562                   int width = get_reassociation_width (ops_num, rhs_code, mode);
3563
3564                   if (dump_file && (dump_flags & TDF_DETAILS))
3565                     fprintf (dump_file,
3566                              "Width = %d was chosen for reassociation\n", width);
3567
3568                   if (width > 1
3569                       && VEC_length (operand_entry_t, ops) > 3)
3570                     rewrite_expr_tree_parallel (stmt, width, ops);
3571                   else
3572                     rewrite_expr_tree (stmt, 0, ops, false);
3573
3574                   /* If we combined some repeated factors into a
3575                      __builtin_powi call, multiply that result by the
3576                      reassociated operands.  */
3577                   if (powi_result)
3578                     {
3579                       gimple mul_stmt;
3580                       tree type = TREE_TYPE (gimple_get_lhs (stmt));
3581                       tree target_ssa = get_reassoc_pow_ssa_name (&target,
3582                                                                   type);
3583                       gimple_set_lhs (stmt, target_ssa);
3584                       update_stmt (stmt);
3585                       mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs,
3586                                                                powi_result,
3587                                                                target_ssa);
3588                       gimple_set_location (mul_stmt, gimple_location (stmt));
3589                       gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
3590                     }
3591                 }
3592
3593               VEC_free (operand_entry_t, heap, ops);
3594             }
3595         }
3596     }
3597   for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
3598        son;
3599        son = next_dom_son (CDI_POST_DOMINATORS, son))
3600     reassociate_bb (son);
3601 }
3602
3603 void dump_ops_vector (FILE *file, VEC (operand_entry_t, heap) *ops);
3604 void debug_ops_vector (VEC (operand_entry_t, heap) *ops);
3605
3606 /* Dump the operand entry vector OPS to FILE.  */
3607
3608 void
3609 dump_ops_vector (FILE *file, VEC (operand_entry_t, heap) *ops)
3610 {
3611   operand_entry_t oe;
3612   unsigned int i;
3613
3614   FOR_EACH_VEC_ELT (operand_entry_t, ops, i, oe)
3615     {
3616       fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
3617       print_generic_expr (file, oe->op, 0);
3618     }
3619 }
3620
3621 /* Dump the operand entry vector OPS to STDERR.  */
3622
3623 DEBUG_FUNCTION void
3624 debug_ops_vector (VEC (operand_entry_t, heap) *ops)
3625 {
3626   dump_ops_vector (stderr, ops);
3627 }
3628
3629 static void
3630 do_reassoc (void)
3631 {
3632   break_up_subtract_bb (ENTRY_BLOCK_PTR);
3633   reassociate_bb (EXIT_BLOCK_PTR);
3634 }
3635
3636 /* Initialize the reassociation pass.  */
3637
3638 static void
3639 init_reassoc (void)
3640 {
3641   int i;
3642   long rank = 2;
3643   int *bbs = XNEWVEC (int, last_basic_block + 1);
3644
3645   /* Find the loops, so that we can prevent moving calculations in
3646      them.  */
3647   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
3648
3649   memset (&reassociate_stats, 0, sizeof (reassociate_stats));
3650
3651   operand_entry_pool = create_alloc_pool ("operand entry pool",
3652                                           sizeof (struct operand_entry), 30);
3653   next_operand_entry_id = 0;
3654
3655   /* Reverse RPO (Reverse Post Order) will give us something where
3656      deeper loops come later.  */
3657   pre_and_rev_post_order_compute (NULL, bbs, false);
3658   bb_rank = XCNEWVEC (long, last_basic_block + 1);
3659   operand_rank = pointer_map_create ();
3660
3661   /* Give each default definition a distinct rank.  This includes
3662      parameters and the static chain.  Walk backwards over all
3663      SSA names so that we get proper rank ordering according
3664      to tree_swap_operands_p.  */
3665   for (i = num_ssa_names - 1; i > 0; --i)
3666     {
3667       tree name = ssa_name (i);
3668       if (name && SSA_NAME_IS_DEFAULT_DEF (name))
3669         insert_operand_rank (name, ++rank);
3670     }
3671
3672   /* Set up rank for each BB  */
3673   for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
3674     bb_rank[bbs[i]] = ++rank  << 16;
3675
3676   free (bbs);
3677   calculate_dominance_info (CDI_POST_DOMINATORS);
3678   plus_negates = NULL;
3679 }
3680
3681 /* Cleanup after the reassociation pass, and print stats if
3682    requested.  */
3683
3684 static void
3685 fini_reassoc (void)
3686 {
3687   statistics_counter_event (cfun, "Linearized",
3688                             reassociate_stats.linearized);
3689   statistics_counter_event (cfun, "Constants eliminated",
3690                             reassociate_stats.constants_eliminated);
3691   statistics_counter_event (cfun, "Ops eliminated",
3692                             reassociate_stats.ops_eliminated);
3693   statistics_counter_event (cfun, "Statements rewritten",
3694                             reassociate_stats.rewritten);
3695   statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
3696                             reassociate_stats.pows_encountered);
3697   statistics_counter_event (cfun, "Built-in powi calls created",
3698                             reassociate_stats.pows_created);
3699
3700   pointer_map_destroy (operand_rank);
3701   free_alloc_pool (operand_entry_pool);
3702   free (bb_rank);
3703   VEC_free (tree, heap, plus_negates);
3704   free_dominance_info (CDI_POST_DOMINATORS);
3705   loop_optimizer_finalize ();
3706 }
3707
3708 /* Gate and execute functions for Reassociation.  */
3709
3710 static unsigned int
3711 execute_reassoc (void)
3712 {
3713   init_reassoc ();
3714
3715   do_reassoc ();
3716   repropagate_negates ();
3717
3718   fini_reassoc ();
3719   return 0;
3720 }
3721
3722 static bool
3723 gate_tree_ssa_reassoc (void)
3724 {
3725   return flag_tree_reassoc != 0;
3726 }
3727
3728 struct gimple_opt_pass pass_reassoc =
3729 {
3730  {
3731   GIMPLE_PASS,
3732   "reassoc",                            /* name */
3733   gate_tree_ssa_reassoc,                /* gate */
3734   execute_reassoc,                      /* execute */
3735   NULL,                                 /* sub */
3736   NULL,                                 /* next */
3737   0,                                    /* static_pass_number */
3738   TV_TREE_REASSOC,                      /* tv_id */
3739   PROP_cfg | PROP_ssa,                  /* properties_required */
3740   0,                                    /* properties_provided */
3741   0,                                    /* properties_destroyed */
3742   0,                                    /* todo_flags_start */
3743   TODO_verify_ssa
3744     | TODO_verify_flow
3745     | TODO_ggc_collect                  /* todo_flags_finish */
3746  }
3747 };