gcc/tree-ssa-reassoc.c

   1 /* Reassociation for trees.
   2    Copyright (C) 2005, 2007, 2008, 2009, 2010, 2011
   3    Free Software Foundation, Inc.
   4    Contributed by Daniel Berlin <dan@dberlin.org>
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation; either version 3, or (at your option)
  11 any later version.
  12
  13 GCC is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "tm.h"
  26 #include "tree.h"
  27 #include "basic-block.h"
  28 #include "tree-pretty-print.h"
  29 #include "gimple-pretty-print.h"
  30 #include "tree-inline.h"
  31 #include "tree-flow.h"
  32 #include "gimple.h"
  33 #include "tree-dump.h"
  34 #include "timevar.h"
  35 #include "tree-iterator.h"
  36 #include "tree-pass.h"
  37 #include "alloc-pool.h"
  38 #include "vec.h"
  39 #include "langhooks.h"
  40 #include "pointer-set.h"
  41 #include "cfgloop.h"
  42 #include "flags.h"
  43 #include "target.h"
  44 #include "params.h"
  45 #include "diagnostic-core.h"
  46
  47 /*  This is a simple global reassociation pass.  It is, in part, based
  48     on the LLVM pass of the same name (They do some things more/less
  49     than we do, in different orders, etc).
  50
  51     It consists of five steps:
  52
  53     1. Breaking up subtract operations into addition + negate, where
  54     it would promote the reassociation of adds.
  55
  56     2. Left linearization of the expression trees, so that (A+B)+(C+D)
  57     becomes (((A+B)+C)+D), which is easier for us to rewrite later.
  58     During linearization, we place the operands of the binary
  59     expressions into a vector of operand_entry_t
  60
  61     3. Optimization of the operand lists, eliminating things like a +
  62     -a, a & a, etc.
  63
  64     4. Rewrite the expression trees we linearized and optimized so
  65     they are in proper rank order.
  66
  67     5. Repropagate negates, as nothing else will clean it up ATM.
  68
  69     A bit of theory on #4, since nobody seems to write anything down
  70     about why it makes sense to do it the way they do it:
  71
  72     We could do this much nicer theoretically, but don't (for reasons
  73     explained after how to do it theoretically nice :P).
  74
  75     In order to promote the most redundancy elimination, you want
  76     binary expressions whose operands are the same rank (or
  77     preferably, the same value) exposed to the redundancy eliminator,
  78     for possible elimination.
  79
  80     So the way to do this if we really cared, is to build the new op
  81     tree from the leaves to the roots, merging as you go, and putting the
  82     new op on the end of the worklist, until you are left with one
  83     thing on the worklist.
  84
  85     IE if you have to rewrite the following set of operands (listed with
  86     rank in parentheses), with opcode PLUS_EXPR:
  87
  88     a (1),  b (1),  c (1),  d (2), e (2)
  89
  90
  91     We start with our merge worklist empty, and the ops list with all of
  92     those on it.
  93
  94     You want to first merge all leaves of the same rank, as much as
  95     possible.
  96
  97     So first build a binary op of
  98
  99     mergetmp = a + b, and put "mergetmp" on the merge worklist.
 100
 101     Because there is no three operand form of PLUS_EXPR, c is not going to
 102     be exposed to redundancy elimination as a rank 1 operand.
 103
 104     So you might as well throw it on the merge worklist (you could also
 105     consider it to now be a rank two operand, and merge it with d and e,
 106     but in this case, you then have evicted e from a binary op. So at
 107     least in this situation, you can't win.)
 108
 109     Then build a binary op of d + e
 110     mergetmp2 = d + e
 111
 112     and put mergetmp2 on the merge worklist.
 113
 114     so merge worklist = {mergetmp, c, mergetmp2}
 115
 116     Continue building binary ops of these operations until you have only
 117     one operation left on the worklist.
 118
 119     So we have
 120
 121     build binary op
 122     mergetmp3 = mergetmp + c
 123
 124     worklist = {mergetmp2, mergetmp3}
 125
 126     mergetmp4 = mergetmp2 + mergetmp3
 127
 128     worklist = {mergetmp4}
 129
 130     because we have one operation left, we can now just set the original
 131     statement equal to the result of that operation.
 132
 133     This will at least expose a + b  and d + e to redundancy elimination
 134     as binary operations.
 135
 136     For extra points, you can reuse the old statements to build the
 137     mergetmps, since you shouldn't run out.
 138
 139     So why don't we do this?
 140
 141     Because it's expensive, and rarely will help.  Most trees we are
 142     reassociating have 3 or less ops.  If they have 2 ops, they already
 143     will be written into a nice single binary op.  If you have 3 ops, a
 144     single simple check suffices to tell you whether the first two are of the
 145     same rank.  If so, you know to order it
 146
 147     mergetmp = op1 + op2
 148     newstmt = mergetmp + op3
 149
 150     instead of
 151     mergetmp = op2 + op3
 152     newstmt = mergetmp + op1
 153
 154     If all three are of the same rank, you can't expose them all in a
 155     single binary operator anyway, so the above is *still* the best you
 156     can do.
 157
 158     Thus, this is what we do.  When we have three ops left, we check to see
 159     what order to put them in, and call it a day.  As a nod to vector sum
 160     reduction, we check if any of the ops are really a phi node that is a
 161     destructive update for the associating op, and keep the destructive
 162     update together for vector sum reduction recognition.  */
 163
 164
 165 /* Statistics */
 166 static struct
 167 {
 168   int linearized;
 169   int constants_eliminated;
 170   int ops_eliminated;
 171   int rewritten;
 172 } reassociate_stats;
 173
 174 /* Operator, rank pair.  */
 175 typedef struct operand_entry
 176 {
 177   unsigned int rank;
 178   int id;
 179   tree op;
 180 } *operand_entry_t;
 181
 182 static alloc_pool operand_entry_pool;
 183
 184 /* This is used to assign a unique ID to each struct operand_entry
 185    so that qsort results are identical on different hosts.  */
 186 static int next_operand_entry_id;
 187
 188 /* Starting rank number for a given basic block, so that we can rank
 189    operations using unmovable instructions in that BB based on the bb
 190    depth.  */
 191 static long *bb_rank;
 192
 193 /* Operand->rank hashtable.  */
 194 static struct pointer_map_t *operand_rank;
 195
 196 /* Forward decls.  */
 197 static long get_rank (tree);
 198
 199
 200 /* Bias amount for loop-carried phis.  We want this to be larger than
 201    the depth of any reassociation tree we can see, but not larger than
 202    the rank difference between two blocks.  */
 203 #define PHI_LOOP_BIAS (1 << 15)
 204
 205 /* Rank assigned to a phi statement.  If STMT is a loop-carried phi of
 206    an innermost loop, and the phi has only a single use which is inside
 207    the loop, then the rank is the block rank of the loop latch plus an
 208    extra bias for the loop-carried dependence.  This causes expressions
 209    calculated into an accumulator variable to be independent for each
 210    iteration of the loop.  If STMT is some other phi, the rank is the
 211    block rank of its containing block.  */
 212 static long
 213 phi_rank (gimple stmt)
 214 {
 215   basic_block bb = gimple_bb (stmt);
 216   struct loop *father = bb->loop_father;
 217   tree res;
 218   unsigned i;
 219   use_operand_p use;
 220   gimple use_stmt;
 221
 222   /* We only care about real loops (those with a latch).  */
 223   if (!father->latch)
 224     return bb_rank[bb->index];
 225
 226   /* Interesting phis must be in headers of innermost loops.  */
 227   if (bb != father->header
 228       || father->inner)
 229     return bb_rank[bb->index];
 230
 231   /* Ignore virtual SSA_NAMEs.  */
 232   res = gimple_phi_result (stmt);
 233   if (!is_gimple_reg (SSA_NAME_VAR (res)))
 234     return bb_rank[bb->index];
 235
 236   /* The phi definition must have a single use, and that use must be
 237      within the loop.  Otherwise this isn't an accumulator pattern.  */
 238   if (!single_imm_use (res, &use, &use_stmt)
 239       || gimple_bb (use_stmt)->loop_father != father)
 240     return bb_rank[bb->index];
 241
 242   /* Look for phi arguments from within the loop.  If found, bias this phi.  */
 243   for (i = 0; i < gimple_phi_num_args (stmt); i++)
 244     {
 245       tree arg = gimple_phi_arg_def (stmt, i);
 246       if (TREE_CODE (arg) == SSA_NAME
 247           && !SSA_NAME_IS_DEFAULT_DEF (arg))
 248         {
 249           gimple def_stmt = SSA_NAME_DEF_STMT (arg);
 250           if (gimple_bb (def_stmt)->loop_father == father)
 251             return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
 252         }
 253     }
 254
 255   /* Must be an uninteresting phi.  */
 256   return bb_rank[bb->index];
 257 }
 258
 259 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
 260    loop-carried dependence of an innermost loop, return TRUE; else
 261    return FALSE.  */
 262 static bool
 263 loop_carried_phi (tree exp)
 264 {
 265   gimple phi_stmt;
 266   long block_rank;
 267
 268   if (TREE_CODE (exp) != SSA_NAME
 269       || SSA_NAME_IS_DEFAULT_DEF (exp))
 270     return false;
 271
 272   phi_stmt = SSA_NAME_DEF_STMT (exp);
 273
 274   if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
 275     return false;
 276
 277   /* Non-loop-carried phis have block rank.  Loop-carried phis have
 278      an additional bias added in.  If this phi doesn't have block rank,
 279      it's biased and should not be propagated.  */
 280   block_rank = bb_rank[gimple_bb (phi_stmt)->index];
 281
 282   if (phi_rank (phi_stmt) != block_rank)
 283     return true;
 284
 285   return false;
 286 }
 287
 288 /* Return the maximum of RANK and the rank that should be propagated
 289    from expression OP.  For most operands, this is just the rank of OP.
 290    For loop-carried phis, the value is zero to avoid undoing the bias
 291    in favor of the phi.  */
 292 static long
 293 propagate_rank (long rank, tree op)
 294 {
 295   long op_rank;
 296
 297   if (loop_carried_phi (op))
 298     return rank;
 299
 300   op_rank = get_rank (op);
 301
 302   return MAX (rank, op_rank);
 303 }
 304
 305 /* Look up the operand rank structure for expression E.  */
 306
 307 static inline long
 308 find_operand_rank (tree e)
 309 {
 310   void **slot = pointer_map_contains (operand_rank, e);
 311   return slot ? (long) (intptr_t) *slot : -1;
 312 }
 313
 314 /* Insert {E,RANK} into the operand rank hashtable.  */
 315
 316 static inline void
 317 insert_operand_rank (tree e, long rank)
 318 {
 319   void **slot;
 320   gcc_assert (rank > 0);
 321   slot = pointer_map_insert (operand_rank, e);
 322   gcc_assert (!*slot);
 323   *slot = (void *) (intptr_t) rank;
 324 }
 325
 326 /* Given an expression E, return the rank of the expression.  */
 327
 328 static long
 329 get_rank (tree e)
 330 {
 331   /* Constants have rank 0.  */
 332   if (is_gimple_min_invariant (e))
 333     return 0;
 334
 335   /* SSA_NAME's have the rank of the expression they are the result
 336      of.
 337      For globals and uninitialized values, the rank is 0.
 338      For function arguments, use the pre-setup rank.
 339      For PHI nodes, stores, asm statements, etc, we use the rank of
 340      the BB.
 341      For simple operations, the rank is the maximum rank of any of
 342      its operands, or the bb_rank, whichever is less.
 343      I make no claims that this is optimal, however, it gives good
 344      results.  */
 345
 346   /* We make an exception to the normal ranking system to break
 347      dependences of accumulator variables in loops.  Suppose we
 348      have a simple one-block loop containing:
 349
 350        x_1 = phi(x_0, x_2)
 351        b = a + x_1
 352        c = b + d
 353        x_2 = c + e
 354
 355      As shown, each iteration of the calculation into x is fully
 356      dependent upon the iteration before it.  We would prefer to
 357      see this in the form:
 358
 359        x_1 = phi(x_0, x_2)
 360        b = a + d
 361        c = b + e
 362        x_2 = c + x_1
 363
 364      If the loop is unrolled, the calculations of b and c from
 365      different iterations can be interleaved.
 366
 367      To obtain this result during reassociation, we bias the rank
 368      of the phi definition x_1 upward, when it is recognized as an
 369      accumulator pattern.  The artificial rank causes it to be
 370      added last, providing the desired independence.  */
 371
 372   if (TREE_CODE (e) == SSA_NAME)
 373     {
 374       gimple stmt;
 375       long rank;
 376       int i, n;
 377       tree op;
 378
 379       if (TREE_CODE (SSA_NAME_VAR (e)) == PARM_DECL
 380           && SSA_NAME_IS_DEFAULT_DEF (e))
 381         return find_operand_rank (e);
 382
 383       stmt = SSA_NAME_DEF_STMT (e);
 384       if (gimple_bb (stmt) == NULL)
 385         return 0;
 386
 387       if (gimple_code (stmt) == GIMPLE_PHI)
 388         return phi_rank (stmt);
 389
 390       if (!is_gimple_assign (stmt)
 391           || gimple_vdef (stmt))
 392         return bb_rank[gimple_bb (stmt)->index];
 393
 394       /* If we already have a rank for this expression, use that.  */
 395       rank = find_operand_rank (e);
 396       if (rank != -1)
 397         return rank;
 398
 399       /* Otherwise, find the maximum rank for the operands.  As an
 400          exception, remove the bias from loop-carried phis when propagating
 401          the rank so that dependent operations are not also biased.  */
 402       rank = 0;
 403       if (gimple_assign_single_p (stmt))
 404         {
 405           tree rhs = gimple_assign_rhs1 (stmt);
 406           n = TREE_OPERAND_LENGTH (rhs);
 407           if (n == 0)
 408             rank = propagate_rank (rank, rhs);
 409           else
 410             {
 411               for (i = 0; i < n; i++)
 412                 {
 413                   op = TREE_OPERAND (rhs, i);
 414
 415                   if (op != NULL_TREE)
 416                     rank = propagate_rank (rank, op);
 417                 }
 418             }
 419         }
 420       else
 421         {
 422           n = gimple_num_ops (stmt);
 423           for (i = 1; i < n; i++)
 424             {
 425               op = gimple_op (stmt, i);
 426               gcc_assert (op);
 427               rank = propagate_rank (rank, op);
 428             }
 429         }
 430
 431       if (dump_file && (dump_flags & TDF_DETAILS))
 432         {
 433           fprintf (dump_file, "Rank for ");
 434           print_generic_expr (dump_file, e, 0);
 435           fprintf (dump_file, " is %ld\n", (rank + 1));
 436         }
 437
 438       /* Note the rank in the hashtable so we don't recompute it.  */
 439       insert_operand_rank (e, (rank + 1));
 440       return (rank + 1);
 441     }
 442
 443   /* Globals, etc,  are rank 0 */
 444   return 0;
 445 }
 446
 447 DEF_VEC_P(operand_entry_t);
 448 DEF_VEC_ALLOC_P(operand_entry_t, heap);
 449
 450 /* We want integer ones to end up last no matter what, since they are
 451    the ones we can do the most with.  */
 452 #define INTEGER_CONST_TYPE 1 << 3
 453 #define FLOAT_CONST_TYPE 1 << 2
 454 #define OTHER_CONST_TYPE 1 << 1
 455
 456 /* Classify an invariant tree into integer, float, or other, so that
 457    we can sort them to be near other constants of the same type.  */
 458 static inline int
 459 constant_type (tree t)
 460 {
 461   if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
 462     return INTEGER_CONST_TYPE;
 463   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
 464     return FLOAT_CONST_TYPE;
 465   else
 466     return OTHER_CONST_TYPE;
 467 }
 468
 469 /* qsort comparison function to sort operand entries PA and PB by rank
 470    so that the sorted array is ordered by rank in decreasing order.  */
 471 static int
 472 sort_by_operand_rank (const void *pa, const void *pb)
 473 {
 474   const operand_entry_t oea = *(const operand_entry_t *)pa;
 475   const operand_entry_t oeb = *(const operand_entry_t *)pb;
 476
 477   /* It's nicer for optimize_expression if constants that are likely
 478      to fold when added/multiplied//whatever are put next to each
 479      other.  Since all constants have rank 0, order them by type.  */
 480   if (oeb->rank == 0 &&  oea->rank == 0)
 481     {
 482       if (constant_type (oeb->op) != constant_type (oea->op))
 483         return constant_type (oeb->op) - constant_type (oea->op);
 484       else
 485         /* To make sorting result stable, we use unique IDs to determine
 486            order.  */
 487         return oeb->id - oea->id;
 488     }
 489
 490   /* Lastly, make sure the versions that are the same go next to each
 491      other.  We use SSA_NAME_VERSION because it's stable.  */
 492   if ((oeb->rank - oea->rank == 0)
 493       && TREE_CODE (oea->op) == SSA_NAME
 494       && TREE_CODE (oeb->op) == SSA_NAME)
 495     {
 496       if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
 497         return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
 498       else
 499         return oeb->id - oea->id;
 500     }
 501
 502   if (oeb->rank != oea->rank)
 503     return oeb->rank - oea->rank;
 504   else
 505     return oeb->id - oea->id;
 506 }
 507
 508 /* Add an operand entry to *OPS for the tree operand OP.  */
 509
 510 static void
 511 add_to_ops_vec (VEC(operand_entry_t, heap) **ops, tree op)
 512 {
 513   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
 514
 515   oe->op = op;
 516   oe->rank = get_rank (op);
 517   oe->id = next_operand_entry_id++;
 518   VEC_safe_push (operand_entry_t, heap, *ops, oe);
 519 }
 520
 521 /* Return true if STMT is reassociable operation containing a binary
 522    operation with tree code CODE, and is inside LOOP.  */
 523
 524 static bool
 525 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
 526 {
 527   basic_block bb = gimple_bb (stmt);
 528
 529   if (gimple_bb (stmt) == NULL)
 530     return false;
 531
 532   if (!flow_bb_inside_loop_p (loop, bb))
 533     return false;
 534
 535   if (is_gimple_assign (stmt)
 536       && gimple_assign_rhs_code (stmt) == code
 537       && has_single_use (gimple_assign_lhs (stmt)))
 538     return true;
 539
 540   return false;
 541 }
 542
 543
 544 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
 545    operand of the negate operation.  Otherwise, return NULL.  */
 546
 547 static tree
 548 get_unary_op (tree name, enum tree_code opcode)
 549 {
 550   gimple stmt = SSA_NAME_DEF_STMT (name);
 551
 552   if (!is_gimple_assign (stmt))
 553     return NULL_TREE;
 554
 555   if (gimple_assign_rhs_code (stmt) == opcode)
 556     return gimple_assign_rhs1 (stmt);
 557   return NULL_TREE;
 558 }
 559
 560 /* If CURR and LAST are a pair of ops that OPCODE allows us to
 561    eliminate through equivalences, do so, remove them from OPS, and
 562    return true.  Otherwise, return false.  */
 563
 564 static bool
 565 eliminate_duplicate_pair (enum tree_code opcode,
 566                           VEC (operand_entry_t, heap) **ops,
 567                           bool *all_done,
 568                           unsigned int i,
 569                           operand_entry_t curr,
 570                           operand_entry_t last)
 571 {
 572
 573   /* If we have two of the same op, and the opcode is & |, min, or max,
 574      we can eliminate one of them.
 575      If we have two of the same op, and the opcode is ^, we can
 576      eliminate both of them.  */
 577
 578   if (last && last->op == curr->op)
 579     {
 580       switch (opcode)
 581         {
 582         case MAX_EXPR:
 583         case MIN_EXPR:
 584         case BIT_IOR_EXPR:
 585         case BIT_AND_EXPR:
 586           if (dump_file && (dump_flags & TDF_DETAILS))
 587             {
 588               fprintf (dump_file, "Equivalence: ");
 589               print_generic_expr (dump_file, curr->op, 0);
 590               fprintf (dump_file, " [&|minmax] ");
 591               print_generic_expr (dump_file, last->op, 0);
 592               fprintf (dump_file, " -> ");
 593               print_generic_stmt (dump_file, last->op, 0);
 594             }
 595
 596           VEC_ordered_remove (operand_entry_t, *ops, i);
 597           reassociate_stats.ops_eliminated ++;
 598
 599           return true;
 600
 601         case BIT_XOR_EXPR:
 602           if (dump_file && (dump_flags & TDF_DETAILS))
 603             {
 604               fprintf (dump_file, "Equivalence: ");
 605               print_generic_expr (dump_file, curr->op, 0);
 606               fprintf (dump_file, " ^ ");
 607               print_generic_expr (dump_file, last->op, 0);
 608               fprintf (dump_file, " -> nothing\n");
 609             }
 610
 611           reassociate_stats.ops_eliminated += 2;
 612
 613           if (VEC_length (operand_entry_t, *ops) == 2)
 614             {
 615               VEC_free (operand_entry_t, heap, *ops);
 616               *ops = NULL;
 617               add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
 618               *all_done = true;
 619             }
 620           else
 621             {
 622               VEC_ordered_remove (operand_entry_t, *ops, i-1);
 623               VEC_ordered_remove (operand_entry_t, *ops, i-1);
 624             }
 625
 626           return true;
 627
 628         default:
 629           break;
 630         }
 631     }
 632   return false;
 633 }
 634
 635 static VEC(tree, heap) *plus_negates;
 636
 637 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
 638    expression, look in OPS for a corresponding positive operation to cancel
 639    it out.  If we find one, remove the other from OPS, replace
 640    OPS[CURRINDEX] with 0 or -1, respectively, and return true.  Otherwise,
 641    return false. */
 642
 643 static bool
 644 eliminate_plus_minus_pair (enum tree_code opcode,
 645                            VEC (operand_entry_t, heap) **ops,
 646                            unsigned int currindex,
 647                            operand_entry_t curr)
 648 {
 649   tree negateop;
 650   tree notop;
 651   unsigned int i;
 652   operand_entry_t oe;
 653
 654   if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
 655     return false;
 656
 657   negateop = get_unary_op (curr->op, NEGATE_EXPR);
 658   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 659   if (negateop == NULL_TREE && notop == NULL_TREE)
 660     return false;
 661
 662   /* Any non-negated version will have a rank that is one less than
 663      the current rank.  So once we hit those ranks, if we don't find
 664      one, we can stop.  */
 665
 666   for (i = currindex + 1;
 667        VEC_iterate (operand_entry_t, *ops, i, oe)
 668        && oe->rank >= curr->rank - 1 ;
 669        i++)
 670     {
 671       if (oe->op == negateop)
 672         {
 673
 674           if (dump_file && (dump_flags & TDF_DETAILS))
 675             {
 676               fprintf (dump_file, "Equivalence: ");
 677               print_generic_expr (dump_file, negateop, 0);
 678               fprintf (dump_file, " + -");
 679               print_generic_expr (dump_file, oe->op, 0);
 680               fprintf (dump_file, " -> 0\n");
 681             }
 682
 683           VEC_ordered_remove (operand_entry_t, *ops, i);
 684           add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
 685           VEC_ordered_remove (operand_entry_t, *ops, currindex);
 686           reassociate_stats.ops_eliminated ++;
 687
 688           return true;
 689         }
 690       else if (oe->op == notop)
 691         {
 692           tree op_type = TREE_TYPE (oe->op);
 693
 694           if (dump_file && (dump_flags & TDF_DETAILS))
 695             {
 696               fprintf (dump_file, "Equivalence: ");
 697               print_generic_expr (dump_file, notop, 0);
 698               fprintf (dump_file, " + ~");
 699               print_generic_expr (dump_file, oe->op, 0);
 700               fprintf (dump_file, " -> -1\n");
 701             }
 702
 703           VEC_ordered_remove (operand_entry_t, *ops, i);
 704           add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
 705           VEC_ordered_remove (operand_entry_t, *ops, currindex);
 706           reassociate_stats.ops_eliminated ++;
 707
 708           return true;
 709         }
 710     }
 711
 712   /* CURR->OP is a negate expr in a plus expr: save it for later
 713      inspection in repropagate_negates().  */
 714   if (negateop != NULL_TREE)
 715     VEC_safe_push (tree, heap, plus_negates, curr->op);
 716
 717   return false;
 718 }
 719
 720 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
 721    bitwise not expression, look in OPS for a corresponding operand to
 722    cancel it out.  If we find one, remove the other from OPS, replace
 723    OPS[CURRINDEX] with 0, and return true.  Otherwise, return
 724    false. */
 725
 726 static bool
 727 eliminate_not_pairs (enum tree_code opcode,
 728                      VEC (operand_entry_t, heap) **ops,
 729                      unsigned int currindex,
 730                      operand_entry_t curr)
 731 {
 732   tree notop;
 733   unsigned int i;
 734   operand_entry_t oe;
 735
 736   if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
 737       || TREE_CODE (curr->op) != SSA_NAME)
 738     return false;
 739
 740   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 741   if (notop == NULL_TREE)
 742     return false;
 743
 744   /* Any non-not version will have a rank that is one less than
 745      the current rank.  So once we hit those ranks, if we don't find
 746      one, we can stop.  */
 747
 748   for (i = currindex + 1;
 749        VEC_iterate (operand_entry_t, *ops, i, oe)
 750        && oe->rank >= curr->rank - 1;
 751        i++)
 752     {
 753       if (oe->op == notop)
 754         {
 755           if (dump_file && (dump_flags & TDF_DETAILS))
 756             {
 757               fprintf (dump_file, "Equivalence: ");
 758               print_generic_expr (dump_file, notop, 0);
 759               if (opcode == BIT_AND_EXPR)
 760                 fprintf (dump_file, " & ~");
 761               else if (opcode == BIT_IOR_EXPR)
 762                 fprintf (dump_file, " | ~");
 763               print_generic_expr (dump_file, oe->op, 0);
 764               if (opcode == BIT_AND_EXPR)
 765                 fprintf (dump_file, " -> 0\n");
 766               else if (opcode == BIT_IOR_EXPR)
 767                 fprintf (dump_file, " -> -1\n");
 768             }
 769
 770           if (opcode == BIT_AND_EXPR)
 771             oe->op = build_zero_cst (TREE_TYPE (oe->op));
 772           else if (opcode == BIT_IOR_EXPR)
 773             oe->op = build_low_bits_mask (TREE_TYPE (oe->op),
 774                                           TYPE_PRECISION (TREE_TYPE (oe->op)));
 775
 776           reassociate_stats.ops_eliminated
 777             += VEC_length (operand_entry_t, *ops) - 1;
 778           VEC_free (operand_entry_t, heap, *ops);
 779           *ops = NULL;
 780           VEC_safe_push (operand_entry_t, heap, *ops, oe);
 781           return true;
 782         }
 783     }
 784
 785   return false;
 786 }
 787
 788 /* Use constant value that may be present in OPS to try to eliminate
 789    operands.  Note that this function is only really used when we've
 790    eliminated ops for other reasons, or merged constants.  Across
 791    single statements, fold already does all of this, plus more.  There
 792    is little point in duplicating logic, so I've only included the
 793    identities that I could ever construct testcases to trigger.  */
 794
 795 static void
 796 eliminate_using_constants (enum tree_code opcode,
 797                            VEC(operand_entry_t, heap) **ops)
 798 {
 799   operand_entry_t oelast = VEC_last (operand_entry_t, *ops);
 800   tree type = TREE_TYPE (oelast->op);
 801
 802   if (oelast->rank == 0
 803       && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
 804     {
 805       switch (opcode)
 806         {
 807         case BIT_AND_EXPR:
 808           if (integer_zerop (oelast->op))
 809             {
 810               if (VEC_length (operand_entry_t, *ops) != 1)
 811                 {
 812                   if (dump_file && (dump_flags & TDF_DETAILS))
 813                     fprintf (dump_file, "Found & 0, removing all other ops\n");
 814
 815                   reassociate_stats.ops_eliminated
 816                     += VEC_length (operand_entry_t, *ops) - 1;
 817
 818                   VEC_free (operand_entry_t, heap, *ops);
 819                   *ops = NULL;
 820                   VEC_safe_push (operand_entry_t, heap, *ops, oelast);
 821                   return;
 822                 }
 823             }
 824           else if (integer_all_onesp (oelast->op))
 825             {
 826               if (VEC_length (operand_entry_t, *ops) != 1)
 827                 {
 828                   if (dump_file && (dump_flags & TDF_DETAILS))
 829                     fprintf (dump_file, "Found & -1, removing\n");
 830                   VEC_pop (operand_entry_t, *ops);
 831                   reassociate_stats.ops_eliminated++;
 832                 }
 833             }
 834           break;
 835         case BIT_IOR_EXPR:
 836           if (integer_all_onesp (oelast->op))
 837             {
 838               if (VEC_length (operand_entry_t, *ops) != 1)
 839                 {
 840                   if (dump_file && (dump_flags & TDF_DETAILS))
 841                     fprintf (dump_file, "Found | -1, removing all other ops\n");
 842
 843                   reassociate_stats.ops_eliminated
 844                     += VEC_length (operand_entry_t, *ops) - 1;
 845
 846                   VEC_free (operand_entry_t, heap, *ops);
 847                   *ops = NULL;
 848                   VEC_safe_push (operand_entry_t, heap, *ops, oelast);
 849                   return;
 850                 }
 851             }
 852           else if (integer_zerop (oelast->op))
 853             {
 854               if (VEC_length (operand_entry_t, *ops) != 1)
 855                 {
 856                   if (dump_file && (dump_flags & TDF_DETAILS))
 857                     fprintf (dump_file, "Found | 0, removing\n");
 858                   VEC_pop (operand_entry_t, *ops);
 859                   reassociate_stats.ops_eliminated++;
 860                 }
 861             }
 862           break;
 863         case MULT_EXPR:
 864           if (integer_zerop (oelast->op)
 865               || (FLOAT_TYPE_P (type)
 866                   && !HONOR_NANS (TYPE_MODE (type))
 867                   && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
 868                   && real_zerop (oelast->op)))
 869             {
 870               if (VEC_length (operand_entry_t, *ops) != 1)
 871                 {
 872                   if (dump_file && (dump_flags & TDF_DETAILS))
 873                     fprintf (dump_file, "Found * 0, removing all other ops\n");
 874
 875                   reassociate_stats.ops_eliminated
 876                     += VEC_length (operand_entry_t, *ops) - 1;
 877                   VEC_free (operand_entry_t, heap, *ops);
 878                   *ops = NULL;
 879                   VEC_safe_push (operand_entry_t, heap, *ops, oelast);
 880                   return;
 881                 }
 882             }
 883           else if (integer_onep (oelast->op)
 884                    || (FLOAT_TYPE_P (type)
 885                        && !HONOR_SNANS (TYPE_MODE (type))
 886                        && real_onep (oelast->op)))
 887             {
 888               if (VEC_length (operand_entry_t, *ops) != 1)
 889                 {
 890                   if (dump_file && (dump_flags & TDF_DETAILS))
 891                     fprintf (dump_file, "Found * 1, removing\n");
 892                   VEC_pop (operand_entry_t, *ops);
 893                   reassociate_stats.ops_eliminated++;
 894                   return;
 895                 }
 896             }
 897           break;
 898         case BIT_XOR_EXPR:
 899         case PLUS_EXPR:
 900         case MINUS_EXPR:
 901           if (integer_zerop (oelast->op)
 902               || (FLOAT_TYPE_P (type)
 903                   && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
 904                   && fold_real_zero_addition_p (type, oelast->op,
 905                                                 opcode == MINUS_EXPR)))
 906             {
 907               if (VEC_length (operand_entry_t, *ops) != 1)
 908                 {
 909                   if (dump_file && (dump_flags & TDF_DETAILS))
 910                     fprintf (dump_file, "Found [|^+] 0, removing\n");
 911                   VEC_pop (operand_entry_t, *ops);
 912                   reassociate_stats.ops_eliminated++;
 913                   return;
 914                 }
 915             }
 916           break;
 917         default:
 918           break;
 919         }
 920     }
 921 }
 922
 923
 924 static void linearize_expr_tree (VEC(operand_entry_t, heap) **, gimple,
 925                                  bool, bool);
 926
 927 /* Structure for tracking and counting operands.  */
 928 typedef struct oecount_s {
 929   int cnt;
 930   int id;
 931   enum tree_code oecode;
 932   tree op;
 933 } oecount;
 934
 935 DEF_VEC_O(oecount);
 936 DEF_VEC_ALLOC_O(oecount,heap);
 937
 938 /* The heap for the oecount hashtable and the sorted list of operands.  */
 939 static VEC (oecount, heap) *cvec;
 940
 941 /* Hash function for oecount.  */
 942
 943 static hashval_t
 944 oecount_hash (const void *p)
 945 {
 946   const oecount *c = VEC_index (oecount, cvec, (size_t)p - 42);
 947   return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
 948 }
 949
 950 /* Comparison function for oecount.  */
 951
 952 static int
 953 oecount_eq (const void *p1, const void *p2)
 954 {
 955   const oecount *c1 = VEC_index (oecount, cvec, (size_t)p1 - 42);
 956   const oecount *c2 = VEC_index (oecount, cvec, (size_t)p2 - 42);
 957   return (c1->oecode == c2->oecode
 958           && c1->op == c2->op);
 959 }
 960
 961 /* Comparison function for qsort sorting oecount elements by count.  */
 962
 963 static int
 964 oecount_cmp (const void *p1, const void *p2)
 965 {
 966   const oecount *c1 = (const oecount *)p1;
 967   const oecount *c2 = (const oecount *)p2;
 968   if (c1->cnt != c2->cnt)
 969     return c1->cnt - c2->cnt;
 970   else
 971     /* If counts are identical, use unique IDs to stabilize qsort.  */
 972     return c1->id - c2->id;
 973 }
 974
 975 /* Walks the linear chain with result *DEF searching for an operation
 976    with operand OP and code OPCODE removing that from the chain.  *DEF
 977    is updated if there is only one operand but no operation left.  */
 978
 979 static void
 980 zero_one_operation (tree *def, enum tree_code opcode, tree op)
 981 {
 982   gimple stmt = SSA_NAME_DEF_STMT (*def);
 983
 984   do
 985     {
 986       tree name = gimple_assign_rhs1 (stmt);
 987
 988       /* If this is the operation we look for and one of the operands
 989          is ours simply propagate the other operand into the stmts
 990          single use.  */
 991       if (gimple_assign_rhs_code (stmt) == opcode
 992           && (name == op
 993               || gimple_assign_rhs2 (stmt) == op))
 994         {
 995           gimple use_stmt;
 996           use_operand_p use;
 997           gimple_stmt_iterator gsi;
 998           if (name == op)
 999             name = gimple_assign_rhs2 (stmt);
1000           gcc_assert (has_single_use (gimple_assign_lhs (stmt)));
1001           single_imm_use (gimple_assign_lhs (stmt), &use, &use_stmt);
1002           if (gimple_assign_lhs (stmt) == *def)
1003             *def = name;
1004           SET_USE (use, name);
1005           if (TREE_CODE (name) != SSA_NAME)
1006             update_stmt (use_stmt);
1007           gsi = gsi_for_stmt (stmt);
1008           gsi_remove (&gsi, true);
1009           release_defs (stmt);
1010           return;
1011         }
1012
1013       /* Continue walking the chain.  */
1014       gcc_assert (name != op
1015                   && TREE_CODE (name) == SSA_NAME);
1016       stmt = SSA_NAME_DEF_STMT (name);
1017     }
1018   while (1);
1019 }
1020
1021 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1022    the result.  Places the statement after the definition of either
1023    OP1 or OP2.  Returns the new statement.  */
1024
1025 static gimple
1026 build_and_add_sum (tree tmpvar, tree op1, tree op2, enum tree_code opcode)
1027 {
1028   gimple op1def = NULL, op2def = NULL;
1029   gimple_stmt_iterator gsi;
1030   tree op;
1031   gimple sum;
1032
1033   /* Create the addition statement.  */
1034   sum = gimple_build_assign_with_ops (opcode, tmpvar, op1, op2);
1035   op = make_ssa_name (tmpvar, sum);
1036   gimple_assign_set_lhs (sum, op);
1037
1038   /* Find an insertion place and insert.  */
1039   if (TREE_CODE (op1) == SSA_NAME)
1040     op1def = SSA_NAME_DEF_STMT (op1);
1041   if (TREE_CODE (op2) == SSA_NAME)
1042     op2def = SSA_NAME_DEF_STMT (op2);
1043   if ((!op1def || gimple_nop_p (op1def))
1044       && (!op2def || gimple_nop_p (op2def)))
1045     {
1046       gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR));
1047       gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1048     }
1049   else if ((!op1def || gimple_nop_p (op1def))
1050            || (op2def && !gimple_nop_p (op2def)
1051                && stmt_dominates_stmt_p (op1def, op2def)))
1052     {
1053       if (gimple_code (op2def) == GIMPLE_PHI)
1054         {
1055           gsi = gsi_after_labels (gimple_bb (op2def));
1056           gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1057         }
1058       else
1059         {
1060           if (!stmt_ends_bb_p (op2def))
1061             {
1062               gsi = gsi_for_stmt (op2def);
1063               gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1064             }
1065           else
1066             {
1067               edge e;
1068               edge_iterator ei;
1069
1070               FOR_EACH_EDGE (e, ei, gimple_bb (op2def)->succs)
1071                 if (e->flags & EDGE_FALLTHRU)
1072                   gsi_insert_on_edge_immediate (e, sum);
1073             }
1074         }
1075     }
1076   else
1077     {
1078       if (gimple_code (op1def) == GIMPLE_PHI)
1079         {
1080           gsi = gsi_after_labels (gimple_bb (op1def));
1081           gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1082         }
1083       else
1084         {
1085           if (!stmt_ends_bb_p (op1def))
1086             {
1087               gsi = gsi_for_stmt (op1def);
1088               gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1089             }
1090           else
1091             {
1092               edge e;
1093               edge_iterator ei;
1094
1095               FOR_EACH_EDGE (e, ei, gimple_bb (op1def)->succs)
1096                 if (e->flags & EDGE_FALLTHRU)
1097                   gsi_insert_on_edge_immediate (e, sum);
1098             }
1099         }
1100     }
1101   update_stmt (sum);
1102
1103   return sum;
1104 }
1105
1106 /* Perform un-distribution of divisions and multiplications.
1107    A * X + B * X is transformed into (A + B) * X and A / X + B / X
1108    to (A + B) / X for real X.
1109
1110    The algorithm is organized as follows.
1111
1112     - First we walk the addition chain *OPS looking for summands that
1113       are defined by a multiplication or a real division.  This results
1114       in the candidates bitmap with relevant indices into *OPS.
1115
1116     - Second we build the chains of multiplications or divisions for
1117       these candidates, counting the number of occurences of (operand, code)
1118       pairs in all of the candidates chains.
1119
1120     - Third we sort the (operand, code) pairs by number of occurence and
1121       process them starting with the pair with the most uses.
1122
1123       * For each such pair we walk the candidates again to build a
1124         second candidate bitmap noting all multiplication/division chains
1125         that have at least one occurence of (operand, code).
1126
1127       * We build an alternate addition chain only covering these
1128         candidates with one (operand, code) operation removed from their
1129         multiplication/division chain.
1130
1131       * The first candidate gets replaced by the alternate addition chain
1132         multiplied/divided by the operand.
1133
1134       * All candidate chains get disabled for further processing and
1135         processing of (operand, code) pairs continues.
1136
1137   The alternate addition chains built are re-processed by the main
1138   reassociation algorithm which allows optimizing a * x * y + b * y * x
1139   to (a + b ) * x * y in one invocation of the reassociation pass.  */
1140
1141 static bool
1142 undistribute_ops_list (enum tree_code opcode,
1143                        VEC (operand_entry_t, heap) **ops, struct loop *loop)
1144 {
1145   unsigned int length = VEC_length (operand_entry_t, *ops);
1146   operand_entry_t oe1;
1147   unsigned i, j;
1148   sbitmap candidates, candidates2;
1149   unsigned nr_candidates, nr_candidates2;
1150   sbitmap_iterator sbi0;
1151   VEC (operand_entry_t, heap) **subops;
1152   htab_t ctable;
1153   bool changed = false;
1154   int next_oecount_id = 0;
1155
1156   if (length <= 1
1157       || opcode != PLUS_EXPR)
1158     return false;
1159
1160   /* Build a list of candidates to process.  */
1161   candidates = sbitmap_alloc (length);
1162   sbitmap_zero (candidates);
1163   nr_candidates = 0;
1164   FOR_EACH_VEC_ELT (operand_entry_t, *ops, i, oe1)
1165     {
1166       enum tree_code dcode;
1167       gimple oe1def;
1168
1169       if (TREE_CODE (oe1->op) != SSA_NAME)
1170         continue;
1171       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1172       if (!is_gimple_assign (oe1def))
1173         continue;
1174       dcode = gimple_assign_rhs_code (oe1def);
1175       if ((dcode != MULT_EXPR
1176            && dcode != RDIV_EXPR)
1177           || !is_reassociable_op (oe1def, dcode, loop))
1178         continue;
1179
1180       SET_BIT (candidates, i);
1181       nr_candidates++;
1182     }
1183
1184   if (nr_candidates < 2)
1185     {
1186       sbitmap_free (candidates);
1187       return false;
1188     }
1189
1190   if (dump_file && (dump_flags & TDF_DETAILS))
1191     {
1192       fprintf (dump_file, "searching for un-distribute opportunities ");
1193       print_generic_expr (dump_file,
1194         VEC_index (operand_entry_t, *ops,
1195                    sbitmap_first_set_bit (candidates))->op, 0);
1196       fprintf (dump_file, " %d\n", nr_candidates);
1197     }
1198
1199   /* Build linearized sub-operand lists and the counting table.  */
1200   cvec = NULL;
1201   ctable = htab_create (15, oecount_hash, oecount_eq, NULL);
1202   subops = XCNEWVEC (VEC (operand_entry_t, heap) *,
1203                      VEC_length (operand_entry_t, *ops));
1204   EXECUTE_IF_SET_IN_SBITMAP (candidates, 0, i, sbi0)
1205     {
1206       gimple oedef;
1207       enum tree_code oecode;
1208       unsigned j;
1209
1210       oedef = SSA_NAME_DEF_STMT (VEC_index (operand_entry_t, *ops, i)->op);
1211       oecode = gimple_assign_rhs_code (oedef);
1212       linearize_expr_tree (&subops[i], oedef,
1213                            associative_tree_code (oecode), false);
1214
1215       FOR_EACH_VEC_ELT (operand_entry_t, subops[i], j, oe1)
1216         {
1217           oecount c;
1218           void **slot;
1219           size_t idx;
1220           c.oecode = oecode;
1221           c.cnt = 1;
1222           c.id = next_oecount_id++;
1223           c.op = oe1->op;
1224           VEC_safe_push (oecount, heap, cvec, &c);
1225           idx = VEC_length (oecount, cvec) + 41;
1226           slot = htab_find_slot (ctable, (void *)idx, INSERT);
1227           if (!*slot)
1228             {
1229               *slot = (void *)idx;
1230             }
1231           else
1232             {
1233               VEC_pop (oecount, cvec);
1234               VEC_index (oecount, cvec, (size_t)*slot - 42)->cnt++;
1235             }
1236         }
1237     }
1238   htab_delete (ctable);
1239
1240   /* Sort the counting table.  */
1241   VEC_qsort (oecount, cvec, oecount_cmp);
1242
1243   if (dump_file && (dump_flags & TDF_DETAILS))
1244     {
1245       oecount *c;
1246       fprintf (dump_file, "Candidates:\n");
1247       FOR_EACH_VEC_ELT (oecount, cvec, j, c)
1248         {
1249           fprintf (dump_file, "  %u %s: ", c->cnt,
1250                    c->oecode == MULT_EXPR
1251                    ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1252           print_generic_expr (dump_file, c->op, 0);
1253           fprintf (dump_file, "\n");
1254         }
1255     }
1256
1257   /* Process the (operand, code) pairs in order of most occurence.  */
1258   candidates2 = sbitmap_alloc (length);
1259   while (!VEC_empty (oecount, cvec))
1260     {
1261       oecount *c = VEC_last (oecount, cvec);
1262       if (c->cnt < 2)
1263         break;
1264
1265       /* Now collect the operands in the outer chain that contain
1266          the common operand in their inner chain.  */
1267       sbitmap_zero (candidates2);
1268       nr_candidates2 = 0;
1269       EXECUTE_IF_SET_IN_SBITMAP (candidates, 0, i, sbi0)
1270         {
1271           gimple oedef;
1272           enum tree_code oecode;
1273           unsigned j;
1274           tree op = VEC_index (operand_entry_t, *ops, i)->op;
1275
1276           /* If we undistributed in this chain already this may be
1277              a constant.  */
1278           if (TREE_CODE (op) != SSA_NAME)
1279             continue;
1280
1281           oedef = SSA_NAME_DEF_STMT (op);
1282           oecode = gimple_assign_rhs_code (oedef);
1283           if (oecode != c->oecode)
1284             continue;
1285
1286           FOR_EACH_VEC_ELT (operand_entry_t, subops[i], j, oe1)
1287             {
1288               if (oe1->op == c->op)
1289                 {
1290                   SET_BIT (candidates2, i);
1291                   ++nr_candidates2;
1292                   break;
1293                 }
1294             }
1295         }
1296
1297       if (nr_candidates2 >= 2)
1298         {
1299           operand_entry_t oe1, oe2;
1300           tree tmpvar;
1301           gimple prod;
1302           int first = sbitmap_first_set_bit (candidates2);
1303
1304           /* Build the new addition chain.  */
1305           oe1 = VEC_index (operand_entry_t, *ops, first);
1306           if (dump_file && (dump_flags & TDF_DETAILS))
1307             {
1308               fprintf (dump_file, "Building (");
1309               print_generic_expr (dump_file, oe1->op, 0);
1310             }
1311           tmpvar = create_tmp_reg (TREE_TYPE (oe1->op), NULL);
1312           add_referenced_var (tmpvar);
1313           zero_one_operation (&oe1->op, c->oecode, c->op);
1314           EXECUTE_IF_SET_IN_SBITMAP (candidates2, first+1, i, sbi0)
1315             {
1316               gimple sum;
1317               oe2 = VEC_index (operand_entry_t, *ops, i);
1318               if (dump_file && (dump_flags & TDF_DETAILS))
1319                 {
1320                   fprintf (dump_file, " + ");
1321                   print_generic_expr (dump_file, oe2->op, 0);
1322                 }
1323               zero_one_operation (&oe2->op, c->oecode, c->op);
1324               sum = build_and_add_sum (tmpvar, oe1->op, oe2->op, opcode);
1325               oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1326               oe2->rank = 0;
1327               oe1->op = gimple_get_lhs (sum);
1328             }
1329
1330           /* Apply the multiplication/division.  */
1331           prod = build_and_add_sum (tmpvar, oe1->op, c->op, c->oecode);
1332           if (dump_file && (dump_flags & TDF_DETAILS))
1333             {
1334               fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1335               print_generic_expr (dump_file, c->op, 0);
1336               fprintf (dump_file, "\n");
1337             }
1338
1339           /* Record it in the addition chain and disable further
1340              undistribution with this op.  */
1341           oe1->op = gimple_assign_lhs (prod);
1342           oe1->rank = get_rank (oe1->op);
1343           VEC_free (operand_entry_t, heap, subops[first]);
1344
1345           changed = true;
1346         }
1347
1348       VEC_pop (oecount, cvec);
1349     }
1350
1351   for (i = 0; i < VEC_length (operand_entry_t, *ops); ++i)
1352     VEC_free (operand_entry_t, heap, subops[i]);
1353   free (subops);
1354   VEC_free (oecount, heap, cvec);
1355   sbitmap_free (candidates);
1356   sbitmap_free (candidates2);
1357
1358   return changed;
1359 }
1360
1361 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1362    expression, examine the other OPS to see if any of them are comparisons
1363    of the same values, which we may be able to combine or eliminate.
1364    For example, we can rewrite (a < b) | (a == b) as (a <= b).  */
1365
1366 static bool
1367 eliminate_redundant_comparison (enum tree_code opcode,
1368                                 VEC (operand_entry_t, heap) **ops,
1369                                 unsigned int currindex,
1370                                 operand_entry_t curr)
1371 {
1372   tree op1, op2;
1373   enum tree_code lcode, rcode;
1374   gimple def1, def2;
1375   int i;
1376   operand_entry_t oe;
1377
1378   if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1379     return false;
1380
1381   /* Check that CURR is a comparison.  */
1382   if (TREE_CODE (curr->op) != SSA_NAME)
1383     return false;
1384   def1 = SSA_NAME_DEF_STMT (curr->op);
1385   if (!is_gimple_assign (def1))
1386     return false;
1387   lcode = gimple_assign_rhs_code (def1);
1388   if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1389     return false;
1390   op1 = gimple_assign_rhs1 (def1);
1391   op2 = gimple_assign_rhs2 (def1);
1392
1393   /* Now look for a similar comparison in the remaining OPS.  */
1394   for (i = currindex + 1;
1395        VEC_iterate (operand_entry_t, *ops, i, oe);
1396        i++)
1397     {
1398       tree t;
1399
1400       if (TREE_CODE (oe->op) != SSA_NAME)
1401         continue;
1402       def2 = SSA_NAME_DEF_STMT (oe->op);
1403       if (!is_gimple_assign (def2))
1404         continue;
1405       rcode = gimple_assign_rhs_code (def2);
1406       if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1407         continue;
1408
1409       /* If we got here, we have a match.  See if we can combine the
1410          two comparisons.  */
1411       if (opcode == BIT_IOR_EXPR)
1412         t = maybe_fold_or_comparisons (lcode, op1, op2,
1413                                        rcode, gimple_assign_rhs1 (def2),
1414                                        gimple_assign_rhs2 (def2));
1415       else
1416         t = maybe_fold_and_comparisons (lcode, op1, op2,
1417                                         rcode, gimple_assign_rhs1 (def2),
1418                                         gimple_assign_rhs2 (def2));
1419       if (!t)
1420         continue;
1421
1422       /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1423          always give us a boolean_type_node value back.  If the original
1424          BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1425          we need to convert.  */
1426       if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1427         t = fold_convert (TREE_TYPE (curr->op), t);
1428
1429       if (TREE_CODE (t) != INTEGER_CST
1430           && !operand_equal_p (t, curr->op, 0))
1431         {
1432           enum tree_code subcode;
1433           tree newop1, newop2;
1434           if (!COMPARISON_CLASS_P (t))
1435             continue;
1436           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1437           STRIP_USELESS_TYPE_CONVERSION (newop1);
1438           STRIP_USELESS_TYPE_CONVERSION (newop2);
1439           if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1440             continue;
1441         }
1442
1443       if (dump_file && (dump_flags & TDF_DETAILS))
1444         {
1445           fprintf (dump_file, "Equivalence: ");
1446           print_generic_expr (dump_file, curr->op, 0);
1447           fprintf (dump_file, " %s ", op_symbol_code (opcode));
1448           print_generic_expr (dump_file, oe->op, 0);
1449           fprintf (dump_file, " -> ");
1450           print_generic_expr (dump_file, t, 0);
1451           fprintf (dump_file, "\n");
1452         }
1453
1454       /* Now we can delete oe, as it has been subsumed by the new combined
1455          expression t.  */
1456       VEC_ordered_remove (operand_entry_t, *ops, i);
1457       reassociate_stats.ops_eliminated ++;
1458
1459       /* If t is the same as curr->op, we're done.  Otherwise we must
1460          replace curr->op with t.  Special case is if we got a constant
1461          back, in which case we add it to the end instead of in place of
1462          the current entry.  */
1463       if (TREE_CODE (t) == INTEGER_CST)
1464         {
1465           VEC_ordered_remove (operand_entry_t, *ops, currindex);
1466           add_to_ops_vec (ops, t);
1467         }
1468       else if (!operand_equal_p (t, curr->op, 0))
1469         {
1470           tree tmpvar;
1471           gimple sum;
1472           enum tree_code subcode;
1473           tree newop1;
1474           tree newop2;
1475           gcc_assert (COMPARISON_CLASS_P (t));
1476           tmpvar = create_tmp_var (TREE_TYPE (t), NULL);
1477           add_referenced_var (tmpvar);
1478           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1479           STRIP_USELESS_TYPE_CONVERSION (newop1);
1480           STRIP_USELESS_TYPE_CONVERSION (newop2);
1481           gcc_checking_assert (is_gimple_val (newop1)
1482                                && is_gimple_val (newop2));
1483           sum = build_and_add_sum (tmpvar, newop1, newop2, subcode);
1484           curr->op = gimple_get_lhs (sum);
1485         }
1486       return true;
1487     }
1488
1489   return false;
1490 }
1491
1492 /* Perform various identities and other optimizations on the list of
1493    operand entries, stored in OPS.  The tree code for the binary
1494    operation between all the operands is OPCODE.  */
1495
1496 static void
1497 optimize_ops_list (enum tree_code opcode,
1498                    VEC (operand_entry_t, heap) **ops)
1499 {
1500   unsigned int length = VEC_length (operand_entry_t, *ops);
1501   unsigned int i;
1502   operand_entry_t oe;
1503   operand_entry_t oelast = NULL;
1504   bool iterate = false;
1505
1506   if (length == 1)
1507     return;
1508
1509   oelast = VEC_last (operand_entry_t, *ops);
1510
1511   /* If the last two are constants, pop the constants off, merge them
1512      and try the next two.  */
1513   if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1514     {
1515       operand_entry_t oelm1 = VEC_index (operand_entry_t, *ops, length - 2);
1516
1517       if (oelm1->rank == 0
1518           && is_gimple_min_invariant (oelm1->op)
1519           && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1520                                        TREE_TYPE (oelast->op)))
1521         {
1522           tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1523                                      oelm1->op, oelast->op);
1524
1525           if (folded && is_gimple_min_invariant (folded))
1526             {
1527               if (dump_file && (dump_flags & TDF_DETAILS))
1528                 fprintf (dump_file, "Merging constants\n");
1529
1530               VEC_pop (operand_entry_t, *ops);
1531               VEC_pop (operand_entry_t, *ops);
1532
1533               add_to_ops_vec (ops, folded);
1534               reassociate_stats.constants_eliminated++;
1535
1536               optimize_ops_list (opcode, ops);
1537               return;
1538             }
1539         }
1540     }
1541
1542   eliminate_using_constants (opcode, ops);
1543   oelast = NULL;
1544
1545   for (i = 0; VEC_iterate (operand_entry_t, *ops, i, oe);)
1546     {
1547       bool done = false;
1548
1549       if (eliminate_not_pairs (opcode, ops, i, oe))
1550         return;
1551       if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1552           || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1553           || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1554         {
1555           if (done)
1556             return;
1557           iterate = true;
1558           oelast = NULL;
1559           continue;
1560         }
1561       oelast = oe;
1562       i++;
1563     }
1564
1565   length  = VEC_length (operand_entry_t, *ops);
1566   oelast = VEC_last (operand_entry_t, *ops);
1567
1568   if (iterate)
1569     optimize_ops_list (opcode, ops);
1570 }
1571
1572 /* The following functions are subroutines to optimize_range_tests and allow
1573    it to try to change a logical combination of comparisons into a range
1574    test.
1575
1576    For example, both
1577         X == 2 || X == 5 || X == 3 || X == 4
1578    and
1579         X >= 2 && X <= 5
1580    are converted to
1581         (unsigned) (X - 2) <= 3
1582
1583    For more information see comments above fold_test_range in fold-const.c,
1584    this implementation is for GIMPLE.  */
1585
1586 struct range_entry
1587 {
1588   tree exp;
1589   tree low;
1590   tree high;
1591   bool in_p;
1592   bool strict_overflow_p;
1593   unsigned int idx, next;
1594 };
1595
1596 /* This is similar to make_range in fold-const.c, but on top of
1597    GIMPLE instead of trees.  */
1598
1599 static void
1600 init_range_entry (struct range_entry *r, tree exp)
1601 {
1602   int in_p;
1603   tree low, high;
1604   bool is_bool, strict_overflow_p;
1605
1606   r->exp = NULL_TREE;
1607   r->in_p = false;
1608   r->strict_overflow_p = false;
1609   r->low = NULL_TREE;
1610   r->high = NULL_TREE;
1611   if (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp)))
1612     return;
1613
1614   /* Start with simply saying "EXP != 0" and then look at the code of EXP
1615      and see if we can refine the range.  Some of the cases below may not
1616      happen, but it doesn't seem worth worrying about this.  We "continue"
1617      the outer loop when we've changed something; otherwise we "break"
1618      the switch, which will "break" the while.  */
1619   low = build_int_cst (TREE_TYPE (exp), 0);
1620   high = low;
1621   in_p = 0;
1622   strict_overflow_p = false;
1623   is_bool = false;
1624   if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1625     {
1626       if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1627         is_bool = true;
1628       else
1629         return;
1630     }
1631   else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1632     is_bool = true;
1633
1634   while (1)
1635     {
1636       gimple stmt;
1637       enum tree_code code;
1638       tree arg0, arg1, exp_type;
1639       tree nexp;
1640       location_t loc;
1641
1642       if (TREE_CODE (exp) != SSA_NAME)
1643         break;
1644
1645       stmt = SSA_NAME_DEF_STMT (exp);
1646       if (!is_gimple_assign (stmt))
1647         break;
1648
1649       code = gimple_assign_rhs_code (stmt);
1650       arg0 = gimple_assign_rhs1 (stmt);
1651       if (TREE_CODE (arg0) != SSA_NAME)
1652         break;
1653       arg1 = gimple_assign_rhs2 (stmt);
1654       exp_type = TREE_TYPE (exp);
1655       loc = gimple_location (stmt);
1656       switch (code)
1657         {
1658         case BIT_NOT_EXPR:
1659           if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1660             {
1661               in_p = !in_p;
1662               exp = arg0;
1663               continue;
1664             }
1665           break;
1666         case SSA_NAME:
1667           exp = arg0;
1668           continue;
1669         CASE_CONVERT:
1670           if (is_bool)
1671             goto do_default;
1672           if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1673             {
1674               if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1675                 is_bool = true;
1676               else
1677                 return;
1678             }
1679           else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1680             is_bool = true;
1681           goto do_default;
1682         case EQ_EXPR:
1683         case NE_EXPR:
1684         case LT_EXPR:
1685         case LE_EXPR:
1686         case GE_EXPR:
1687         case GT_EXPR:
1688           is_bool = true;
1689           /* FALLTHRU */
1690         default:
1691           if (!is_bool)
1692             return;
1693         do_default:
1694           nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1695                                   &low, &high, &in_p,
1696                                   &strict_overflow_p);
1697           if (nexp != NULL_TREE)
1698             {
1699               exp = nexp;
1700               gcc_assert (TREE_CODE (exp) == SSA_NAME);
1701               continue;
1702             }
1703           break;
1704         }
1705       break;
1706     }
1707   if (is_bool)
1708     {
1709       r->exp = exp;
1710       r->in_p = in_p;
1711       r->low = low;
1712       r->high = high;
1713       r->strict_overflow_p = strict_overflow_p;
1714     }
1715 }
1716
1717 /* Comparison function for qsort.  Sort entries
1718    without SSA_NAME exp first, then with SSA_NAMEs sorted
1719    by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1720    by increasing ->low and if ->low is the same, by increasing
1721    ->high.  ->low == NULL_TREE means minimum, ->high == NULL_TREE
1722    maximum.  */
1723
1724 static int
1725 range_entry_cmp (const void *a, const void *b)
1726 {
1727   const struct range_entry *p = (const struct range_entry *) a;
1728   const struct range_entry *q = (const struct range_entry *) b;
1729
1730   if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
1731     {
1732       if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1733         {
1734           /* Group range_entries for the same SSA_NAME together.  */
1735           if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
1736             return -1;
1737           else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
1738             return 1;
1739           /* If ->low is different, NULL low goes first, then by
1740              ascending low.  */
1741           if (p->low != NULL_TREE)
1742             {
1743               if (q->low != NULL_TREE)
1744                 {
1745                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
1746                                           p->low, q->low);
1747                   if (tem && integer_onep (tem))
1748                     return -1;
1749                   tem = fold_binary (GT_EXPR, boolean_type_node,
1750                                      p->low, q->low);
1751                   if (tem && integer_onep (tem))
1752                     return 1;
1753                 }
1754               else
1755                 return 1;
1756             }
1757           else if (q->low != NULL_TREE)
1758             return -1;
1759           /* If ->high is different, NULL high goes last, before that by
1760              ascending high.  */
1761           if (p->high != NULL_TREE)
1762             {
1763               if (q->high != NULL_TREE)
1764                 {
1765                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
1766                                           p->high, q->high);
1767                   if (tem && integer_onep (tem))
1768                     return -1;
1769                   tem = fold_binary (GT_EXPR, boolean_type_node,
1770                                      p->high, q->high);
1771                   if (tem && integer_onep (tem))
1772                     return 1;
1773                 }
1774               else
1775                 return -1;
1776             }
1777           else if (p->high != NULL_TREE)
1778             return 1;
1779           /* If both ranges are the same, sort below by ascending idx.  */
1780         }
1781       else
1782         return 1;
1783     }
1784   else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1785     return -1;
1786
1787   if (p->idx < q->idx)
1788     return -1;
1789   else
1790     {
1791       gcc_checking_assert (p->idx > q->idx);
1792       return 1;
1793     }
1794 }
1795
1796 /* Helper routine of optimize_range_test.
1797    [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
1798    RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
1799    OPCODE and OPS are arguments of optimize_range_tests.  Return
1800    true if the range merge has been successful.  */
1801
1802 static bool
1803 update_range_test (struct range_entry *range, struct range_entry *otherrange,
1804                    unsigned int count, enum tree_code opcode,
1805                    VEC (operand_entry_t, heap) **ops, tree exp, bool in_p,
1806                    tree low, tree high, bool strict_overflow_p)
1807 {
1808   tree op = VEC_index (operand_entry_t, *ops, range->idx)->op;
1809   location_t loc = gimple_location (SSA_NAME_DEF_STMT (op));
1810   tree tem = build_range_check (loc, TREE_TYPE (op), exp, in_p, low, high);
1811   enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
1812   gimple_stmt_iterator gsi;
1813
1814   if (tem == NULL_TREE)
1815     return false;
1816
1817   if (strict_overflow_p && issue_strict_overflow_warning (wc))
1818     warning_at (loc, OPT_Wstrict_overflow,
1819                 "assuming signed overflow does not occur "
1820                 "when simplifying range test");
1821
1822   if (dump_file && (dump_flags & TDF_DETAILS))
1823     {
1824       struct range_entry *r;
1825       fprintf (dump_file, "Optimizing range tests ");
1826       print_generic_expr (dump_file, range->exp, 0);
1827       fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
1828       print_generic_expr (dump_file, range->low, 0);
1829       fprintf (dump_file, ", ");
1830       print_generic_expr (dump_file, range->high, 0);
1831       fprintf (dump_file, "]");
1832       for (r = otherrange; r < otherrange + count; r++)
1833         {
1834           fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
1835           print_generic_expr (dump_file, r->low, 0);
1836           fprintf (dump_file, ", ");
1837           print_generic_expr (dump_file, r->high, 0);
1838           fprintf (dump_file, "]");
1839         }
1840       fprintf (dump_file, "\n into ");
1841       print_generic_expr (dump_file, tem, 0);
1842       fprintf (dump_file, "\n");
1843     }
1844
1845   if (opcode == BIT_IOR_EXPR)
1846     tem = invert_truthvalue_loc (loc, tem);
1847
1848   tem = fold_convert_loc (loc, TREE_TYPE (op), tem);
1849   gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (op));
1850   tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
1851                                   GSI_SAME_STMT);
1852
1853   VEC_index (operand_entry_t, *ops, range->idx)->op = tem;
1854   range->exp = exp;
1855   range->low = low;
1856   range->high = high;
1857   range->in_p = in_p;
1858   range->strict_overflow_p = false;
1859
1860   for (range = otherrange; range < otherrange + count; range++)
1861     {
1862       VEC_index (operand_entry_t, *ops, range->idx)->op = error_mark_node;
1863       range->exp = NULL_TREE;
1864     }
1865   return true;
1866 }
1867
1868 /* Optimize range tests, similarly how fold_range_test optimizes
1869    it on trees.  The tree code for the binary
1870    operation between all the operands is OPCODE.  */
1871
1872 static void
1873 optimize_range_tests (enum tree_code opcode,
1874                       VEC (operand_entry_t, heap) **ops)
1875 {
1876   unsigned int length = VEC_length (operand_entry_t, *ops), i, j, first;
1877   operand_entry_t oe;
1878   struct range_entry *ranges;
1879   bool any_changes = false;
1880
1881   if (length == 1)
1882     return;
1883
1884   ranges = XNEWVEC (struct range_entry, length);
1885   for (i = 0; i < length; i++)
1886     {
1887       ranges[i].idx = i;
1888       init_range_entry (ranges + i, VEC_index (operand_entry_t, *ops, i)->op);
1889       /* For | invert it now, we will invert it again before emitting
1890          the optimized expression.  */
1891       if (opcode == BIT_IOR_EXPR)
1892         ranges[i].in_p = !ranges[i].in_p;
1893     }
1894
1895   qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
1896   for (i = 0; i < length; i++)
1897     if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
1898       break;
1899
1900   /* Try to merge ranges.  */
1901   for (first = i; i < length; i++)
1902     {
1903       tree low = ranges[i].low;
1904       tree high = ranges[i].high;
1905       int in_p = ranges[i].in_p;
1906       bool strict_overflow_p = ranges[i].strict_overflow_p;
1907       int update_fail_count = 0;
1908
1909       for (j = i + 1; j < length; j++)
1910         {
1911           if (ranges[i].exp != ranges[j].exp)
1912             break;
1913           if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
1914                              ranges[j].in_p, ranges[j].low, ranges[j].high))
1915             break;
1916           strict_overflow_p |= ranges[j].strict_overflow_p;
1917         }
1918
1919       if (j == i + 1)
1920         continue;
1921
1922       if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
1923                              ops, ranges[i].exp, in_p, low, high,
1924                              strict_overflow_p))
1925         {
1926           i = j - 1;
1927           any_changes = true;
1928         }
1929       /* Avoid quadratic complexity if all merge_ranges calls would succeed,
1930          while update_range_test would fail.  */
1931       else if (update_fail_count == 64)
1932         i = j - 1;
1933       else
1934         ++update_fail_count;
1935     }
1936
1937   /* Optimize X == CST1 || X == CST2
1938      if popcount (CST1 ^ CST2) == 1 into
1939      (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
1940      Similarly for ranges.  E.g.
1941      X != 2 && X != 3 && X != 10 && X != 11
1942      will be transformed by the above loop into
1943      (X - 2U) <= 1U && (X - 10U) <= 1U
1944      and this loop can transform that into
1945      ((X & ~8) - 2U) <= 1U.  */
1946   for (i = first; i < length; i++)
1947     {
1948       tree lowi, highi, lowj, highj, type, lowxor, highxor, tem, exp;
1949
1950       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
1951         continue;
1952       type = TREE_TYPE (ranges[i].exp);
1953       if (!INTEGRAL_TYPE_P (type))
1954         continue;
1955       lowi = ranges[i].low;
1956       if (lowi == NULL_TREE)
1957         lowi = TYPE_MIN_VALUE (type);
1958       highi = ranges[i].high;
1959       if (highi == NULL_TREE)
1960         continue;
1961       for (j = i + 1; j < length && j < i + 64; j++)
1962         {
1963           if (ranges[j].exp == NULL_TREE)
1964             continue;
1965           if (ranges[i].exp != ranges[j].exp)
1966             break;
1967           if (ranges[j].in_p)
1968             continue;
1969           lowj = ranges[j].low;
1970           if (lowj == NULL_TREE)
1971             continue;
1972           highj = ranges[j].high;
1973           if (highj == NULL_TREE)
1974             highj = TYPE_MAX_VALUE (type);
1975           tem = fold_binary (GT_EXPR, boolean_type_node,
1976                              lowj, highi);
1977           if (tem == NULL_TREE || !integer_onep (tem))
1978             continue;
1979           lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
1980           if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
1981             continue;
1982           gcc_checking_assert (!integer_zerop (lowxor));
1983           tem = fold_binary (MINUS_EXPR, type, lowxor,
1984                              build_int_cst (type, 1));
1985           if (tem == NULL_TREE)
1986             continue;
1987           tem = fold_binary (BIT_AND_EXPR, type, lowxor, tem);
1988           if (tem == NULL_TREE || !integer_zerop (tem))
1989             continue;
1990           highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
1991           if (!tree_int_cst_equal (lowxor, highxor))
1992             continue;
1993           tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
1994           exp = fold_build2 (BIT_AND_EXPR, type, ranges[i].exp, tem);
1995           lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
1996           highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
1997           if (update_range_test (ranges + i, ranges + j, 1, opcode, ops, exp,
1998                                  ranges[i].in_p, lowj, highj,
1999                                  ranges[i].strict_overflow_p
2000                                  || ranges[j].strict_overflow_p))
2001             {
2002               any_changes = true;
2003               break;
2004             }
2005         }
2006     }
2007
2008   if (any_changes)
2009     {
2010       j = 0;
2011       FOR_EACH_VEC_ELT (operand_entry_t, *ops, i, oe)
2012         {
2013           if (oe->op == error_mark_node)
2014             continue;
2015           else if (i != j)
2016             VEC_replace (operand_entry_t, *ops, j, oe);
2017           j++;
2018         }
2019       VEC_truncate (operand_entry_t, *ops, j);
2020     }
2021
2022   XDELETEVEC (ranges);
2023 }
2024
2025 /* Return true if OPERAND is defined by a PHI node which uses the LHS
2026    of STMT in it's operands.  This is also known as a "destructive
2027    update" operation.  */
2028
2029 static bool
2030 is_phi_for_stmt (gimple stmt, tree operand)
2031 {
2032   gimple def_stmt;
2033   tree lhs;
2034   use_operand_p arg_p;
2035   ssa_op_iter i;
2036
2037   if (TREE_CODE (operand) != SSA_NAME)
2038     return false;
2039
2040   lhs = gimple_assign_lhs (stmt);
2041
2042   def_stmt = SSA_NAME_DEF_STMT (operand);
2043   if (gimple_code (def_stmt) != GIMPLE_PHI)
2044     return false;
2045
2046   FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
2047     if (lhs == USE_FROM_PTR (arg_p))
2048       return true;
2049   return false;
2050 }
2051
2052 /* Remove def stmt of VAR if VAR has zero uses and recurse
2053    on rhs1 operand if so.  */
2054
2055 static void
2056 remove_visited_stmt_chain (tree var)
2057 {
2058   gimple stmt;
2059   gimple_stmt_iterator gsi;
2060
2061   while (1)
2062     {
2063       if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
2064         return;
2065       stmt = SSA_NAME_DEF_STMT (var);
2066       if (!is_gimple_assign (stmt)
2067           || !gimple_visited_p (stmt))
2068         return;
2069       var = gimple_assign_rhs1 (stmt);
2070       gsi = gsi_for_stmt (stmt);
2071       gsi_remove (&gsi, true);
2072       release_defs (stmt);
2073     }
2074 }
2075
2076 /* This function checks three consequtive operands in
2077    passed operands vector OPS starting from OPINDEX and
2078    swaps two operands if it is profitable for binary operation
2079    consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
2080
2081    We pair ops with the same rank if possible.
2082
2083    The alternative we try is to see if STMT is a destructive
2084    update style statement, which is like:
2085    b = phi (a, ...)
2086    a = c + b;
2087    In that case, we want to use the destructive update form to
2088    expose the possible vectorizer sum reduction opportunity.
2089    In that case, the third operand will be the phi node. This
2090    check is not performed if STMT is null.
2091
2092    We could, of course, try to be better as noted above, and do a
2093    lot of work to try to find these opportunities in >3 operand
2094    cases, but it is unlikely to be worth it.  */
2095
2096 static void
2097 swap_ops_for_binary_stmt (VEC(operand_entry_t, heap) * ops,
2098                           unsigned int opindex, gimple stmt)
2099 {
2100   operand_entry_t oe1, oe2, oe3;
2101
2102   oe1 = VEC_index (operand_entry_t, ops, opindex);
2103   oe2 = VEC_index (operand_entry_t, ops, opindex + 1);
2104   oe3 = VEC_index (operand_entry_t, ops, opindex + 2);
2105
2106   if ((oe1->rank == oe2->rank
2107        && oe2->rank != oe3->rank)
2108       || (stmt && is_phi_for_stmt (stmt, oe3->op)
2109           && !is_phi_for_stmt (stmt, oe1->op)
2110           && !is_phi_for_stmt (stmt, oe2->op)))
2111     {
2112       struct operand_entry temp = *oe3;
2113       oe3->op = oe1->op;
2114       oe3->rank = oe1->rank;
2115       oe1->op = temp.op;
2116       oe1->rank= temp.rank;
2117     }
2118   else if ((oe1->rank == oe3->rank
2119             && oe2->rank != oe3->rank)
2120            || (stmt && is_phi_for_stmt (stmt, oe2->op)
2121                && !is_phi_for_stmt (stmt, oe1->op)
2122                && !is_phi_for_stmt (stmt, oe3->op)))
2123     {
2124       struct operand_entry temp = *oe2;
2125       oe2->op = oe1->op;
2126       oe2->rank = oe1->rank;
2127       oe1->op = temp.op;
2128       oe1->rank= temp.rank;
2129     }
2130 }
2131
2132 /* Recursively rewrite our linearized statements so that the operators
2133    match those in OPS[OPINDEX], putting the computation in rank
2134    order.  */
2135
2136 static void
2137 rewrite_expr_tree (gimple stmt, unsigned int opindex,
2138                    VEC(operand_entry_t, heap) * ops, bool moved)
2139 {
2140   tree rhs1 = gimple_assign_rhs1 (stmt);
2141   tree rhs2 = gimple_assign_rhs2 (stmt);
2142   operand_entry_t oe;
2143
2144   /* If we have three operands left, then we want to make sure the ones
2145      that get the double binary op are chosen wisely.  */
2146   if (opindex + 3 == VEC_length (operand_entry_t, ops))
2147     swap_ops_for_binary_stmt (ops, opindex, stmt);
2148
2149   /* The final recursion case for this function is that you have
2150      exactly two operations left.
2151      If we had one exactly one op in the entire list to start with, we
2152      would have never called this function, and the tail recursion
2153      rewrites them one at a time.  */
2154   if (opindex + 2 == VEC_length (operand_entry_t, ops))
2155     {
2156       operand_entry_t oe1, oe2;
2157
2158       oe1 = VEC_index (operand_entry_t, ops, opindex);
2159       oe2 = VEC_index (operand_entry_t, ops, opindex + 1);
2160
2161       if (rhs1 != oe1->op || rhs2 != oe2->op)
2162         {
2163           if (dump_file && (dump_flags & TDF_DETAILS))
2164             {
2165               fprintf (dump_file, "Transforming ");
2166               print_gimple_stmt (dump_file, stmt, 0, 0);
2167             }
2168
2169           gimple_assign_set_rhs1 (stmt, oe1->op);
2170           gimple_assign_set_rhs2 (stmt, oe2->op);
2171           update_stmt (stmt);
2172           if (rhs1 != oe1->op && rhs1 != oe2->op)
2173             remove_visited_stmt_chain (rhs1);
2174
2175           if (dump_file && (dump_flags & TDF_DETAILS))
2176             {
2177               fprintf (dump_file, " into ");
2178               print_gimple_stmt (dump_file, stmt, 0, 0);
2179             }
2180
2181         }
2182       return;
2183     }
2184
2185   /* If we hit here, we should have 3 or more ops left.  */
2186   gcc_assert (opindex + 2 < VEC_length (operand_entry_t, ops));
2187
2188   /* Rewrite the next operator.  */
2189   oe = VEC_index (operand_entry_t, ops, opindex);
2190
2191   if (oe->op != rhs2)
2192     {
2193       if (!moved)
2194         {
2195           gimple_stmt_iterator gsinow, gsirhs1;
2196           gimple stmt1 = stmt, stmt2;
2197           unsigned int count;
2198
2199           gsinow = gsi_for_stmt (stmt);
2200           count = VEC_length (operand_entry_t, ops) - opindex - 2;
2201           while (count-- != 0)
2202             {
2203               stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
2204               gsirhs1 = gsi_for_stmt (stmt2);
2205               gsi_move_before (&gsirhs1, &gsinow);
2206               gsi_prev (&gsinow);
2207               stmt1 = stmt2;
2208             }
2209           moved = true;
2210         }
2211
2212       if (dump_file && (dump_flags & TDF_DETAILS))
2213         {
2214           fprintf (dump_file, "Transforming ");
2215           print_gimple_stmt (dump_file, stmt, 0, 0);
2216         }
2217
2218       gimple_assign_set_rhs2 (stmt, oe->op);
2219       update_stmt (stmt);
2220
2221       if (dump_file && (dump_flags & TDF_DETAILS))
2222         {
2223           fprintf (dump_file, " into ");
2224           print_gimple_stmt (dump_file, stmt, 0, 0);
2225         }
2226     }
2227   /* Recurse on the LHS of the binary operator, which is guaranteed to
2228      be the non-leaf side.  */
2229   rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, moved);
2230 }
2231
2232 /* Find out how many cycles we need to compute statements chain.
2233    OPS_NUM holds number os statements in a chain.  CPU_WIDTH is a
2234    maximum number of independent statements we may execute per cycle.  */
2235
2236 static int
2237 get_required_cycles (int ops_num, int cpu_width)
2238 {
2239   int res;
2240   int elog;
2241   unsigned int rest;
2242
2243   /* While we have more than 2 * cpu_width operands
2244      we may reduce number of operands by cpu_width
2245      per cycle.  */
2246   res = ops_num / (2 * cpu_width);
2247
2248   /* Remained operands count may be reduced twice per cycle
2249      until we have only one operand.  */
2250   rest = (unsigned)(ops_num - res * cpu_width);
2251   elog = exact_log2 (rest);
2252   if (elog >= 0)
2253     res += elog;
2254   else
2255     res += floor_log2 (rest) + 1;
2256
2257   return res;
2258 }
2259
2260 /* Returns an optimal number of registers to use for computation of
2261    given statements.  */
2262
2263 static int
2264 get_reassociation_width (int ops_num, enum tree_code opc,
2265                          enum machine_mode mode)
2266 {
2267   int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
2268   int width;
2269   int width_min;
2270   int cycles_best;
2271
2272   if (param_width > 0)
2273     width = param_width;
2274   else
2275     width = targetm.sched.reassociation_width (opc, mode);
2276
2277   if (width == 1)
2278     return width;
2279
2280   /* Get the minimal time required for sequence computation.  */
2281   cycles_best = get_required_cycles (ops_num, width);
2282
2283   /* Check if we may use less width and still compute sequence for
2284      the same time.  It will allow us to reduce registers usage.
2285      get_required_cycles is monotonically increasing with lower width
2286      so we can perform a binary search for the minimal width that still
2287      results in the optimal cycle count.  */
2288   width_min = 1;
2289   while (width > width_min)
2290     {
2291       int width_mid = (width + width_min) / 2;
2292
2293       if (get_required_cycles (ops_num, width_mid) == cycles_best)
2294         width = width_mid;
2295       else if (width_min < width_mid)
2296         width_min = width_mid;
2297       else
2298         break;
2299     }
2300
2301   return width;
2302 }
2303
2304 /* Recursively rewrite our linearized statements so that the operators
2305    match those in OPS[OPINDEX], putting the computation in rank
2306    order and trying to allow operations to be executed in
2307    parallel.  */
2308
2309 static void
2310 rewrite_expr_tree_parallel (gimple stmt, int width,
2311                             VEC(operand_entry_t, heap) * ops)
2312 {
2313   enum tree_code opcode = gimple_assign_rhs_code (stmt);
2314   int op_num = VEC_length (operand_entry_t, ops);
2315   int stmt_num = op_num - 1;
2316   gimple *stmts = XALLOCAVEC (gimple, stmt_num);
2317   int op_index = op_num - 1;
2318   int stmt_index = 0;
2319   int ready_stmts_end = 0;
2320   int i = 0;
2321   tree last_rhs1 = gimple_assign_rhs1 (stmt);
2322   tree lhs_var;
2323
2324   /* We start expression rewriting from the top statements.
2325      So, in this loop we create a full list of statements
2326      we will work with.  */
2327   stmts[stmt_num - 1] = stmt;
2328   for (i = stmt_num - 2; i >= 0; i--)
2329     stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
2330
2331   lhs_var = create_tmp_reg (TREE_TYPE (last_rhs1), NULL);
2332   add_referenced_var (lhs_var);
2333
2334   for (i = 0; i < stmt_num; i++)
2335     {
2336       tree op1, op2;
2337
2338       /* Determine whether we should use results of
2339          already handled statements or not.  */
2340       if (ready_stmts_end == 0
2341           && (i - stmt_index >= width || op_index < 1))
2342         ready_stmts_end = i;
2343
2344       /* Now we choose operands for the next statement.  Non zero
2345          value in ready_stmts_end means here that we should use
2346          the result of already generated statements as new operand.  */
2347       if (ready_stmts_end > 0)
2348         {
2349           op1 = gimple_assign_lhs (stmts[stmt_index++]);
2350           if (ready_stmts_end > stmt_index)
2351             op2 = gimple_assign_lhs (stmts[stmt_index++]);
2352           else if (op_index >= 0)
2353             op2 = VEC_index (operand_entry_t, ops, op_index--)->op;
2354           else
2355             {
2356               gcc_assert (stmt_index < i);
2357               op2 = gimple_assign_lhs (stmts[stmt_index++]);
2358             }
2359
2360           if (stmt_index >= ready_stmts_end)
2361             ready_stmts_end = 0;
2362         }
2363       else
2364         {
2365           if (op_index > 1)
2366             swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
2367           op2 = VEC_index (operand_entry_t, ops, op_index--)->op;
2368           op1 = VEC_index (operand_entry_t, ops, op_index--)->op;
2369         }
2370
2371       /* If we emit the last statement then we should put
2372          operands into the last statement.  It will also
2373          break the loop.  */
2374       if (op_index < 0 && stmt_index == i)
2375         i = stmt_num - 1;
2376
2377       if (dump_file && (dump_flags & TDF_DETAILS))
2378         {
2379           fprintf (dump_file, "Transforming ");
2380           print_gimple_stmt (dump_file, stmts[i], 0, 0);
2381         }
2382
2383       /* We keep original statement only for the last one.  All
2384          others are recreated.  */
2385       if (i == stmt_num - 1)
2386         {
2387           gimple_assign_set_rhs1 (stmts[i], op1);
2388           gimple_assign_set_rhs2 (stmts[i], op2);
2389           update_stmt (stmts[i]);
2390         }
2391       else
2392         stmts[i] = build_and_add_sum (lhs_var, op1, op2, opcode);
2393
2394       if (dump_file && (dump_flags & TDF_DETAILS))
2395         {
2396           fprintf (dump_file, " into ");
2397           print_gimple_stmt (dump_file, stmts[i], 0, 0);
2398         }
2399     }
2400
2401   remove_visited_stmt_chain (last_rhs1);
2402 }
2403
2404 /* Transform STMT, which is really (A +B) + (C + D) into the left
2405    linear form, ((A+B)+C)+D.
2406    Recurse on D if necessary.  */
2407
2408 static void
2409 linearize_expr (gimple stmt)
2410 {
2411   gimple_stmt_iterator gsinow, gsirhs;
2412   gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
2413   gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
2414   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
2415   gimple newbinrhs = NULL;
2416   struct loop *loop = loop_containing_stmt (stmt);
2417
2418   gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
2419               && is_reassociable_op (binrhs, rhscode, loop));
2420
2421   gsinow = gsi_for_stmt (stmt);
2422   gsirhs = gsi_for_stmt (binrhs);
2423   gsi_move_before (&gsirhs, &gsinow);
2424
2425   gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
2426   gimple_assign_set_rhs1 (binrhs, gimple_assign_lhs (binlhs));
2427   gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
2428
2429   if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
2430     newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
2431
2432   if (dump_file && (dump_flags & TDF_DETAILS))
2433     {
2434       fprintf (dump_file, "Linearized: ");
2435       print_gimple_stmt (dump_file, stmt, 0, 0);
2436     }
2437
2438   reassociate_stats.linearized++;
2439   update_stmt (binrhs);
2440   update_stmt (binlhs);
2441   update_stmt (stmt);
2442
2443   gimple_set_visited (stmt, true);
2444   gimple_set_visited (binlhs, true);
2445   gimple_set_visited (binrhs, true);
2446
2447   /* Tail recurse on the new rhs if it still needs reassociation.  */
2448   if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
2449     /* ??? This should probably be linearize_expr (newbinrhs) but I don't
2450            want to change the algorithm while converting to tuples.  */
2451     linearize_expr (stmt);
2452 }
2453
2454 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
2455    it.  Otherwise, return NULL.  */
2456
2457 static gimple
2458 get_single_immediate_use (tree lhs)
2459 {
2460   use_operand_p immuse;
2461   gimple immusestmt;
2462
2463   if (TREE_CODE (lhs) == SSA_NAME
2464       && single_imm_use (lhs, &immuse, &immusestmt)
2465       && is_gimple_assign (immusestmt))
2466     return immusestmt;
2467
2468   return NULL;
2469 }
2470
2471 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
2472    representing the negated value.  Insertions of any necessary
2473    instructions go before GSI.
2474    This function is recursive in that, if you hand it "a_5" as the
2475    value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
2476    transform b_3 + b_4 into a_5 = -b_3 + -b_4.  */
2477
2478 static tree
2479 negate_value (tree tonegate, gimple_stmt_iterator *gsi)
2480 {
2481   gimple negatedefstmt= NULL;
2482   tree resultofnegate;
2483
2484   /* If we are trying to negate a name, defined by an add, negate the
2485      add operands instead.  */
2486   if (TREE_CODE (tonegate) == SSA_NAME)
2487     negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
2488   if (TREE_CODE (tonegate) == SSA_NAME
2489       && is_gimple_assign (negatedefstmt)
2490       && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
2491       && has_single_use (gimple_assign_lhs (negatedefstmt))
2492       && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
2493     {
2494       gimple_stmt_iterator gsi;
2495       tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
2496       tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
2497
2498       gsi = gsi_for_stmt (negatedefstmt);
2499       rhs1 = negate_value (rhs1, &gsi);
2500       gimple_assign_set_rhs1 (negatedefstmt, rhs1);
2501
2502       gsi = gsi_for_stmt (negatedefstmt);
2503       rhs2 = negate_value (rhs2, &gsi);
2504       gimple_assign_set_rhs2 (negatedefstmt, rhs2);
2505
2506       update_stmt (negatedefstmt);
2507       return gimple_assign_lhs (negatedefstmt);
2508     }
2509
2510   tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
2511   resultofnegate = force_gimple_operand_gsi (gsi, tonegate, true,
2512                                              NULL_TREE, true, GSI_SAME_STMT);
2513   return resultofnegate;
2514 }
2515
2516 /* Return true if we should break up the subtract in STMT into an add
2517    with negate.  This is true when we the subtract operands are really
2518    adds, or the subtract itself is used in an add expression.  In
2519    either case, breaking up the subtract into an add with negate
2520    exposes the adds to reassociation.  */
2521
2522 static bool
2523 should_break_up_subtract (gimple stmt)
2524 {
2525   tree lhs = gimple_assign_lhs (stmt);
2526   tree binlhs = gimple_assign_rhs1 (stmt);
2527   tree binrhs = gimple_assign_rhs2 (stmt);
2528   gimple immusestmt;
2529   struct loop *loop = loop_containing_stmt (stmt);
2530
2531   if (TREE_CODE (binlhs) == SSA_NAME
2532       && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
2533     return true;
2534
2535   if (TREE_CODE (binrhs) == SSA_NAME
2536       && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
2537     return true;
2538
2539   if (TREE_CODE (lhs) == SSA_NAME
2540       && (immusestmt = get_single_immediate_use (lhs))
2541       && is_gimple_assign (immusestmt)
2542       && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
2543           ||  gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
2544     return true;
2545   return false;
2546 }
2547
2548 /* Transform STMT from A - B into A + -B.  */
2549
2550 static void
2551 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
2552 {
2553   tree rhs1 = gimple_assign_rhs1 (stmt);
2554   tree rhs2 = gimple_assign_rhs2 (stmt);
2555
2556   if (dump_file && (dump_flags & TDF_DETAILS))
2557     {
2558       fprintf (dump_file, "Breaking up subtract ");
2559       print_gimple_stmt (dump_file, stmt, 0, 0);
2560     }
2561
2562   rhs2 = negate_value (rhs2, gsip);
2563   gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
2564   update_stmt (stmt);
2565 }
2566
2567 /* Recursively linearize a binary expression that is the RHS of STMT.
2568    Place the operands of the expression tree in the vector named OPS.  */
2569
2570 static void
2571 linearize_expr_tree (VEC(operand_entry_t, heap) **ops, gimple stmt,
2572                      bool is_associative, bool set_visited)
2573 {
2574   tree binlhs = gimple_assign_rhs1 (stmt);
2575   tree binrhs = gimple_assign_rhs2 (stmt);
2576   gimple binlhsdef, binrhsdef;
2577   bool binlhsisreassoc = false;
2578   bool binrhsisreassoc = false;
2579   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
2580   struct loop *loop = loop_containing_stmt (stmt);
2581
2582   if (set_visited)
2583     gimple_set_visited (stmt, true);
2584
2585   if (TREE_CODE (binlhs) == SSA_NAME)
2586     {
2587       binlhsdef = SSA_NAME_DEF_STMT (binlhs);
2588       binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
2589                          && !stmt_could_throw_p (binlhsdef));
2590     }
2591
2592   if (TREE_CODE (binrhs) == SSA_NAME)
2593     {
2594       binrhsdef = SSA_NAME_DEF_STMT (binrhs);
2595       binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
2596                          && !stmt_could_throw_p (binrhsdef));
2597     }
2598
2599   /* If the LHS is not reassociable, but the RHS is, we need to swap
2600      them.  If neither is reassociable, there is nothing we can do, so
2601      just put them in the ops vector.  If the LHS is reassociable,
2602      linearize it.  If both are reassociable, then linearize the RHS
2603      and the LHS.  */
2604
2605   if (!binlhsisreassoc)
2606     {
2607       tree temp;
2608
2609       /* If this is not a associative operation like division, give up.  */
2610       if (!is_associative)
2611         {
2612           add_to_ops_vec (ops, binrhs);
2613           return;
2614         }
2615
2616       if (!binrhsisreassoc)
2617         {
2618           add_to_ops_vec (ops, binrhs);
2619           add_to_ops_vec (ops, binlhs);
2620           return;
2621         }
2622
2623       if (dump_file && (dump_flags & TDF_DETAILS))
2624         {
2625           fprintf (dump_file, "swapping operands of ");
2626           print_gimple_stmt (dump_file, stmt, 0, 0);
2627         }
2628
2629       swap_tree_operands (stmt,
2630                           gimple_assign_rhs1_ptr (stmt),
2631                           gimple_assign_rhs2_ptr (stmt));
2632       update_stmt (stmt);
2633
2634       if (dump_file && (dump_flags & TDF_DETAILS))
2635         {
2636           fprintf (dump_file, " is now ");
2637           print_gimple_stmt (dump_file, stmt, 0, 0);
2638         }
2639
2640       /* We want to make it so the lhs is always the reassociative op,
2641          so swap.  */
2642       temp = binlhs;
2643       binlhs = binrhs;
2644       binrhs = temp;
2645     }
2646   else if (binrhsisreassoc)
2647     {
2648       linearize_expr (stmt);
2649       binlhs = gimple_assign_rhs1 (stmt);
2650       binrhs = gimple_assign_rhs2 (stmt);
2651     }
2652
2653   gcc_assert (TREE_CODE (binrhs) != SSA_NAME
2654               || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
2655                                       rhscode, loop));
2656   linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
2657                        is_associative, set_visited);
2658   add_to_ops_vec (ops, binrhs);
2659 }
2660
2661 /* Repropagate the negates back into subtracts, since no other pass
2662    currently does it.  */
2663
2664 static void
2665 repropagate_negates (void)
2666 {
2667   unsigned int i = 0;
2668   tree negate;
2669
2670   FOR_EACH_VEC_ELT (tree, plus_negates, i, negate)
2671     {
2672       gimple user = get_single_immediate_use (negate);
2673
2674       if (!user || !is_gimple_assign (user))
2675         continue;
2676
2677       /* The negate operand can be either operand of a PLUS_EXPR
2678          (it can be the LHS if the RHS is a constant for example).
2679
2680          Force the negate operand to the RHS of the PLUS_EXPR, then
2681          transform the PLUS_EXPR into a MINUS_EXPR.  */
2682       if (gimple_assign_rhs_code (user) == PLUS_EXPR)
2683         {
2684           /* If the negated operand appears on the LHS of the
2685              PLUS_EXPR, exchange the operands of the PLUS_EXPR
2686              to force the negated operand to the RHS of the PLUS_EXPR.  */
2687           if (gimple_assign_rhs1 (user) == negate)
2688             {
2689               swap_tree_operands (user,
2690                                   gimple_assign_rhs1_ptr (user),
2691                                   gimple_assign_rhs2_ptr (user));
2692             }
2693
2694           /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
2695              the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR.  */
2696           if (gimple_assign_rhs2 (user) == negate)
2697             {
2698               tree rhs1 = gimple_assign_rhs1 (user);
2699               tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
2700               gimple_stmt_iterator gsi = gsi_for_stmt (user);
2701               gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
2702               update_stmt (user);
2703             }
2704         }
2705       else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
2706         {
2707           if (gimple_assign_rhs1 (user) == negate)
2708             {
2709               /* We have
2710                    x = -a
2711                    y = x - b
2712                  which we transform into
2713                    x = a + b
2714                    y = -x .
2715                  This pushes down the negate which we possibly can merge
2716                  into some other operation, hence insert it into the
2717                  plus_negates vector.  */
2718               gimple feed = SSA_NAME_DEF_STMT (negate);
2719               tree a = gimple_assign_rhs1 (feed);
2720               tree rhs2 = gimple_assign_rhs2 (user);
2721               gimple_stmt_iterator gsi = gsi_for_stmt (feed), gsi2;
2722               gimple_replace_lhs (feed, negate);
2723               gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, a, rhs2);
2724               update_stmt (gsi_stmt (gsi));
2725               gsi2 = gsi_for_stmt (user);
2726               gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, negate, NULL);
2727               update_stmt (gsi_stmt (gsi2));
2728               gsi_move_before (&gsi, &gsi2);
2729               VEC_safe_push (tree, heap, plus_negates,
2730                              gimple_assign_lhs (gsi_stmt (gsi2)));
2731             }
2732           else
2733             {
2734               /* Transform "x = -a; y = b - x" into "y = b + a", getting
2735                  rid of one operation.  */
2736               gimple feed = SSA_NAME_DEF_STMT (negate);
2737               tree a = gimple_assign_rhs1 (feed);
2738               tree rhs1 = gimple_assign_rhs1 (user);
2739               gimple_stmt_iterator gsi = gsi_for_stmt (user);
2740               gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
2741               update_stmt (gsi_stmt (gsi));
2742             }
2743         }
2744     }
2745 }
2746
2747 /* Returns true if OP is of a type for which we can do reassociation.
2748    That is for integral or non-saturating fixed-point types, and for
2749    floating point type when associative-math is enabled.  */
2750
2751 static bool
2752 can_reassociate_p (tree op)
2753 {
2754   tree type = TREE_TYPE (op);
2755   if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
2756       || NON_SAT_FIXED_POINT_TYPE_P (type)
2757       || (flag_associative_math && FLOAT_TYPE_P (type)))
2758     return true;
2759   return false;
2760 }
2761
2762 /* Break up subtract operations in block BB.
2763
2764    We do this top down because we don't know whether the subtract is
2765    part of a possible chain of reassociation except at the top.
2766
2767    IE given
2768    d = f + g
2769    c = a + e
2770    b = c - d
2771    q = b - r
2772    k = t - q
2773
2774    we want to break up k = t - q, but we won't until we've transformed q
2775    = b - r, which won't be broken up until we transform b = c - d.
2776
2777    En passant, clear the GIMPLE visited flag on every statement.  */
2778
2779 static void
2780 break_up_subtract_bb (basic_block bb)
2781 {
2782   gimple_stmt_iterator gsi;
2783   basic_block son;
2784
2785   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2786     {
2787       gimple stmt = gsi_stmt (gsi);
2788       gimple_set_visited (stmt, false);
2789
2790       if (!is_gimple_assign (stmt)
2791           || !can_reassociate_p (gimple_assign_lhs (stmt)))
2792         continue;
2793
2794       /* Look for simple gimple subtract operations.  */
2795       if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
2796         {
2797           if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
2798               || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
2799             continue;
2800
2801           /* Check for a subtract used only in an addition.  If this
2802              is the case, transform it into add of a negate for better
2803              reassociation.  IE transform C = A-B into C = A + -B if C
2804              is only used in an addition.  */
2805           if (should_break_up_subtract (stmt))
2806             break_up_subtract (stmt, &gsi);
2807         }
2808       else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
2809                && can_reassociate_p (gimple_assign_rhs1 (stmt)))
2810         VEC_safe_push (tree, heap, plus_negates, gimple_assign_lhs (stmt));
2811     }
2812   for (son = first_dom_son (CDI_DOMINATORS, bb);
2813        son;
2814        son = next_dom_son (CDI_DOMINATORS, son))
2815     break_up_subtract_bb (son);
2816 }
2817
2818 /* Reassociate expressions in basic block BB and its post-dominator as
2819    children.  */
2820
2821 static void
2822 reassociate_bb (basic_block bb)
2823 {
2824   gimple_stmt_iterator gsi;
2825   basic_block son;
2826
2827   for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
2828     {
2829       gimple stmt = gsi_stmt (gsi);
2830
2831       if (is_gimple_assign (stmt)
2832           && !stmt_could_throw_p (stmt))
2833         {
2834           tree lhs, rhs1, rhs2;
2835           enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
2836
2837           /* If this is not a gimple binary expression, there is
2838              nothing for us to do with it.  */
2839           if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
2840             continue;
2841
2842           /* If this was part of an already processed statement,
2843              we don't need to touch it again. */
2844           if (gimple_visited_p (stmt))
2845             {
2846               /* This statement might have become dead because of previous
2847                  reassociations.  */
2848               if (has_zero_uses (gimple_get_lhs (stmt)))
2849                 {
2850                   gsi_remove (&gsi, true);
2851                   release_defs (stmt);
2852                   /* We might end up removing the last stmt above which
2853                      places the iterator to the end of the sequence.
2854                      Reset it to the last stmt in this case which might
2855                      be the end of the sequence as well if we removed
2856                      the last statement of the sequence.  In which case
2857                      we need to bail out.  */
2858                   if (gsi_end_p (gsi))
2859                     {
2860                       gsi = gsi_last_bb (bb);
2861                       if (gsi_end_p (gsi))
2862                         break;
2863                     }
2864                 }
2865               continue;
2866             }
2867
2868           lhs = gimple_assign_lhs (stmt);
2869           rhs1 = gimple_assign_rhs1 (stmt);
2870           rhs2 = gimple_assign_rhs2 (stmt);
2871
2872           /* For non-bit or min/max operations we can't associate
2873              all types.  Verify that here.  */
2874           if (rhs_code != BIT_IOR_EXPR
2875               && rhs_code != BIT_AND_EXPR
2876               && rhs_code != BIT_XOR_EXPR
2877               && rhs_code != MIN_EXPR
2878               && rhs_code != MAX_EXPR
2879               && (!can_reassociate_p (lhs)
2880                   || !can_reassociate_p (rhs1)
2881                   || !can_reassociate_p (rhs2)))
2882             continue;
2883
2884           if (associative_tree_code (rhs_code))
2885             {
2886               VEC(operand_entry_t, heap) *ops = NULL;
2887
2888               /* There may be no immediate uses left by the time we
2889                  get here because we may have eliminated them all.  */
2890               if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
2891                 continue;
2892
2893               gimple_set_visited (stmt, true);
2894               linearize_expr_tree (&ops, stmt, true, true);
2895               VEC_qsort (operand_entry_t, ops, sort_by_operand_rank);
2896               optimize_ops_list (rhs_code, &ops);
2897               if (undistribute_ops_list (rhs_code, &ops,
2898                                          loop_containing_stmt (stmt)))
2899                 {
2900                   VEC_qsort (operand_entry_t, ops, sort_by_operand_rank);
2901                   optimize_ops_list (rhs_code, &ops);
2902                 }
2903
2904               if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
2905                 optimize_range_tests (rhs_code, &ops);
2906
2907               if (VEC_length (operand_entry_t, ops) == 1)
2908                 {
2909                   if (dump_file && (dump_flags & TDF_DETAILS))
2910                     {
2911                       fprintf (dump_file, "Transforming ");
2912                       print_gimple_stmt (dump_file, stmt, 0, 0);
2913                     }
2914
2915                   rhs1 = gimple_assign_rhs1 (stmt);
2916                   gimple_assign_set_rhs_from_tree (&gsi,
2917                                                    VEC_last (operand_entry_t,
2918                                                              ops)->op);
2919                   update_stmt (stmt);
2920                   remove_visited_stmt_chain (rhs1);
2921
2922                   if (dump_file && (dump_flags & TDF_DETAILS))
2923                     {
2924                       fprintf (dump_file, " into ");
2925                       print_gimple_stmt (dump_file, stmt, 0, 0);
2926                     }
2927                 }
2928               else
2929                 {
2930                   enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
2931                   int ops_num = VEC_length (operand_entry_t, ops);
2932                   int width = get_reassociation_width (ops_num, rhs_code, mode);
2933
2934                   if (dump_file && (dump_flags & TDF_DETAILS))
2935                     fprintf (dump_file,
2936                              "Width = %d was chosen for reassociation\n", width);
2937
2938                   if (width > 1
2939                       && VEC_length (operand_entry_t, ops) > 3)
2940                     rewrite_expr_tree_parallel (stmt, width, ops);
2941                   else
2942                     rewrite_expr_tree (stmt, 0, ops, false);
2943                 }
2944
2945               VEC_free (operand_entry_t, heap, ops);
2946             }
2947         }
2948     }
2949   for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
2950        son;
2951        son = next_dom_son (CDI_POST_DOMINATORS, son))
2952     reassociate_bb (son);
2953 }
2954
2955 void dump_ops_vector (FILE *file, VEC (operand_entry_t, heap) *ops);
2956 void debug_ops_vector (VEC (operand_entry_t, heap) *ops);
2957
2958 /* Dump the operand entry vector OPS to FILE.  */
2959
2960 void
2961 dump_ops_vector (FILE *file, VEC (operand_entry_t, heap) *ops)
2962 {
2963   operand_entry_t oe;
2964   unsigned int i;
2965
2966   FOR_EACH_VEC_ELT (operand_entry_t, ops, i, oe)
2967     {
2968       fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
2969       print_generic_expr (file, oe->op, 0);
2970     }
2971 }
2972
2973 /* Dump the operand entry vector OPS to STDERR.  */
2974
2975 DEBUG_FUNCTION void
2976 debug_ops_vector (VEC (operand_entry_t, heap) *ops)
2977 {
2978   dump_ops_vector (stderr, ops);
2979 }
2980
2981 static void
2982 do_reassoc (void)
2983 {
2984   break_up_subtract_bb (ENTRY_BLOCK_PTR);
2985   reassociate_bb (EXIT_BLOCK_PTR);
2986 }
2987
2988 /* Initialize the reassociation pass.  */
2989
2990 static void
2991 init_reassoc (void)
2992 {
2993   int i;
2994   long rank = 2;
2995   tree param;
2996   int *bbs = XNEWVEC (int, last_basic_block + 1);
2997
2998   /* Find the loops, so that we can prevent moving calculations in
2999      them.  */
3000   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
3001
3002   memset (&reassociate_stats, 0, sizeof (reassociate_stats));
3003
3004   operand_entry_pool = create_alloc_pool ("operand entry pool",
3005                                           sizeof (struct operand_entry), 30);
3006   next_operand_entry_id = 0;
3007
3008   /* Reverse RPO (Reverse Post Order) will give us something where
3009      deeper loops come later.  */
3010   pre_and_rev_post_order_compute (NULL, bbs, false);
3011   bb_rank = XCNEWVEC (long, last_basic_block + 1);
3012   operand_rank = pointer_map_create ();
3013
3014   /* Give each argument a distinct rank.   */
3015   for (param = DECL_ARGUMENTS (current_function_decl);
3016        param;
3017        param = DECL_CHAIN (param))
3018     {
3019       if (gimple_default_def (cfun, param) != NULL)
3020         {
3021           tree def = gimple_default_def (cfun, param);
3022           insert_operand_rank (def, ++rank);
3023         }
3024     }
3025
3026   /* Give the chain decl a distinct rank. */
3027   if (cfun->static_chain_decl != NULL)
3028     {
3029       tree def = gimple_default_def (cfun, cfun->static_chain_decl);
3030       if (def != NULL)
3031         insert_operand_rank (def, ++rank);
3032     }
3033
3034   /* Set up rank for each BB  */
3035   for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
3036     bb_rank[bbs[i]] = ++rank  << 16;
3037
3038   free (bbs);
3039   calculate_dominance_info (CDI_POST_DOMINATORS);
3040   plus_negates = NULL;
3041 }
3042
3043 /* Cleanup after the reassociation pass, and print stats if
3044    requested.  */
3045
3046 static void
3047 fini_reassoc (void)
3048 {
3049   statistics_counter_event (cfun, "Linearized",
3050                             reassociate_stats.linearized);
3051   statistics_counter_event (cfun, "Constants eliminated",
3052                             reassociate_stats.constants_eliminated);
3053   statistics_counter_event (cfun, "Ops eliminated",
3054                             reassociate_stats.ops_eliminated);
3055   statistics_counter_event (cfun, "Statements rewritten",
3056                             reassociate_stats.rewritten);
3057
3058   pointer_map_destroy (operand_rank);
3059   free_alloc_pool (operand_entry_pool);
3060   free (bb_rank);
3061   VEC_free (tree, heap, plus_negates);
3062   free_dominance_info (CDI_POST_DOMINATORS);
3063   loop_optimizer_finalize ();
3064 }
3065
3066 /* Gate and execute functions for Reassociation.  */
3067
3068 static unsigned int
3069 execute_reassoc (void)
3070 {
3071   init_reassoc ();
3072
3073   do_reassoc ();
3074   repropagate_negates ();
3075
3076   fini_reassoc ();
3077   return 0;
3078 }
3079
3080 static bool
3081 gate_tree_ssa_reassoc (void)
3082 {
3083   return flag_tree_reassoc != 0;
3084 }
3085
3086 struct gimple_opt_pass pass_reassoc =
3087 {
3088  {
3089   GIMPLE_PASS,
3090   "reassoc",                            /* name */
3091   gate_tree_ssa_reassoc,                /* gate */
3092   execute_reassoc,                      /* execute */
3093   NULL,                                 /* sub */
3094   NULL,                                 /* next */
3095   0,                                    /* static_pass_number */
3096   TV_TREE_REASSOC,                      /* tv_id */
3097   PROP_cfg | PROP_ssa,                  /* properties_required */
3098   0,                                    /* properties_provided */
3099   0,                                    /* properties_destroyed */
3100   0,                                    /* todo_flags_start */
3101   TODO_verify_ssa
3102     | TODO_verify_flow
3103     | TODO_ggc_collect                  /* todo_flags_finish */
3104  }
3105 };