gcc/tree-ssa-reassoc.c

   1 /* Reassociation for trees.
   2    Copyright (C) 2005-2013 Free Software Foundation, Inc.
   3    Contributed by Daniel Berlin <dan@dberlin.org>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 3, or (at your option)
  10 any later version.
  11
  12 GCC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "tree.h"
  26 #include "basic-block.h"
  27 #include "gimple-pretty-print.h"
  28 #include "tree-inline.h"
  29 #include "tree-flow.h"
  30 #include "gimple.h"
  31 #include "tree-iterator.h"
  32 #include "tree-pass.h"
  33 #include "alloc-pool.h"
  34 #include "vec.h"
  35 #include "langhooks.h"
  36 #include "pointer-set.h"
  37 #include "cfgloop.h"
  38 #include "flags.h"
  39 #include "target.h"
  40 #include "params.h"
  41 #include "diagnostic-core.h"
  42
  43 /*  This is a simple global reassociation pass.  It is, in part, based
  44     on the LLVM pass of the same name (They do some things more/less
  45     than we do, in different orders, etc).
  46
  47     It consists of five steps:
  48
  49     1. Breaking up subtract operations into addition + negate, where
  50     it would promote the reassociation of adds.
  51
  52     2. Left linearization of the expression trees, so that (A+B)+(C+D)
  53     becomes (((A+B)+C)+D), which is easier for us to rewrite later.
  54     During linearization, we place the operands of the binary
  55     expressions into a vector of operand_entry_t
  56
  57     3. Optimization of the operand lists, eliminating things like a +
  58     -a, a & a, etc.
  59
  60     3a. Combine repeated factors with the same occurrence counts
  61     into a __builtin_powi call that will later be optimized into
  62     an optimal number of multiplies.
  63
  64     4. Rewrite the expression trees we linearized and optimized so
  65     they are in proper rank order.
  66
  67     5. Repropagate negates, as nothing else will clean it up ATM.
  68
  69     A bit of theory on #4, since nobody seems to write anything down
  70     about why it makes sense to do it the way they do it:
  71
  72     We could do this much nicer theoretically, but don't (for reasons
  73     explained after how to do it theoretically nice :P).
  74
  75     In order to promote the most redundancy elimination, you want
  76     binary expressions whose operands are the same rank (or
  77     preferably, the same value) exposed to the redundancy eliminator,
  78     for possible elimination.
  79
  80     So the way to do this if we really cared, is to build the new op
  81     tree from the leaves to the roots, merging as you go, and putting the
  82     new op on the end of the worklist, until you are left with one
  83     thing on the worklist.
  84
  85     IE if you have to rewrite the following set of operands (listed with
  86     rank in parentheses), with opcode PLUS_EXPR:
  87
  88     a (1),  b (1),  c (1),  d (2), e (2)
  89
  90
  91     We start with our merge worklist empty, and the ops list with all of
  92     those on it.
  93
  94     You want to first merge all leaves of the same rank, as much as
  95     possible.
  96
  97     So first build a binary op of
  98
  99     mergetmp = a + b, and put "mergetmp" on the merge worklist.
 100
 101     Because there is no three operand form of PLUS_EXPR, c is not going to
 102     be exposed to redundancy elimination as a rank 1 operand.
 103
 104     So you might as well throw it on the merge worklist (you could also
 105     consider it to now be a rank two operand, and merge it with d and e,
 106     but in this case, you then have evicted e from a binary op. So at
 107     least in this situation, you can't win.)
 108
 109     Then build a binary op of d + e
 110     mergetmp2 = d + e
 111
 112     and put mergetmp2 on the merge worklist.
 113
 114     so merge worklist = {mergetmp, c, mergetmp2}
 115
 116     Continue building binary ops of these operations until you have only
 117     one operation left on the worklist.
 118
 119     So we have
 120
 121     build binary op
 122     mergetmp3 = mergetmp + c
 123
 124     worklist = {mergetmp2, mergetmp3}
 125
 126     mergetmp4 = mergetmp2 + mergetmp3
 127
 128     worklist = {mergetmp4}
 129
 130     because we have one operation left, we can now just set the original
 131     statement equal to the result of that operation.
 132
 133     This will at least expose a + b  and d + e to redundancy elimination
 134     as binary operations.
 135
 136     For extra points, you can reuse the old statements to build the
 137     mergetmps, since you shouldn't run out.
 138
 139     So why don't we do this?
 140
 141     Because it's expensive, and rarely will help.  Most trees we are
 142     reassociating have 3 or less ops.  If they have 2 ops, they already
 143     will be written into a nice single binary op.  If you have 3 ops, a
 144     single simple check suffices to tell you whether the first two are of the
 145     same rank.  If so, you know to order it
 146
 147     mergetmp = op1 + op2
 148     newstmt = mergetmp + op3
 149
 150     instead of
 151     mergetmp = op2 + op3
 152     newstmt = mergetmp + op1
 153
 154     If all three are of the same rank, you can't expose them all in a
 155     single binary operator anyway, so the above is *still* the best you
 156     can do.
 157
 158     Thus, this is what we do.  When we have three ops left, we check to see
 159     what order to put them in, and call it a day.  As a nod to vector sum
 160     reduction, we check if any of the ops are really a phi node that is a
 161     destructive update for the associating op, and keep the destructive
 162     update together for vector sum reduction recognition.  */
 163
 164
 165 /* Statistics */
 166 static struct
 167 {
 168   int linearized;
 169   int constants_eliminated;
 170   int ops_eliminated;
 171   int rewritten;
 172   int pows_encountered;
 173   int pows_created;
 174 } reassociate_stats;
 175
 176 /* Operator, rank pair.  */
 177 typedef struct operand_entry
 178 {
 179   unsigned int rank;
 180   int id;
 181   tree op;
 182   unsigned int count;
 183 } *operand_entry_t;
 184
 185 static alloc_pool operand_entry_pool;
 186
 187 /* This is used to assign a unique ID to each struct operand_entry
 188    so that qsort results are identical on different hosts.  */
 189 static int next_operand_entry_id;
 190
 191 /* Starting rank number for a given basic block, so that we can rank
 192    operations using unmovable instructions in that BB based on the bb
 193    depth.  */
 194 static long *bb_rank;
 195
 196 /* Operand->rank hashtable.  */
 197 static struct pointer_map_t *operand_rank;
 198
 199 /* Forward decls.  */
 200 static long get_rank (tree);
 201
 202
 203 /* Bias amount for loop-carried phis.  We want this to be larger than
 204    the depth of any reassociation tree we can see, but not larger than
 205    the rank difference between two blocks.  */
 206 #define PHI_LOOP_BIAS (1 << 15)
 207
 208 /* Rank assigned to a phi statement.  If STMT is a loop-carried phi of
 209    an innermost loop, and the phi has only a single use which is inside
 210    the loop, then the rank is the block rank of the loop latch plus an
 211    extra bias for the loop-carried dependence.  This causes expressions
 212    calculated into an accumulator variable to be independent for each
 213    iteration of the loop.  If STMT is some other phi, the rank is the
 214    block rank of its containing block.  */
 215 static long
 216 phi_rank (gimple stmt)
 217 {
 218   basic_block bb = gimple_bb (stmt);
 219   struct loop *father = bb->loop_father;
 220   tree res;
 221   unsigned i;
 222   use_operand_p use;
 223   gimple use_stmt;
 224
 225   /* We only care about real loops (those with a latch).  */
 226   if (!father->latch)
 227     return bb_rank[bb->index];
 228
 229   /* Interesting phis must be in headers of innermost loops.  */
 230   if (bb != father->header
 231       || father->inner)
 232     return bb_rank[bb->index];
 233
 234   /* Ignore virtual SSA_NAMEs.  */
 235   res = gimple_phi_result (stmt);
 236   if (virtual_operand_p (res))
 237     return bb_rank[bb->index];
 238
 239   /* The phi definition must have a single use, and that use must be
 240      within the loop.  Otherwise this isn't an accumulator pattern.  */
 241   if (!single_imm_use (res, &use, &use_stmt)
 242       || gimple_bb (use_stmt)->loop_father != father)
 243     return bb_rank[bb->index];
 244
 245   /* Look for phi arguments from within the loop.  If found, bias this phi.  */
 246   for (i = 0; i < gimple_phi_num_args (stmt); i++)
 247     {
 248       tree arg = gimple_phi_arg_def (stmt, i);
 249       if (TREE_CODE (arg) == SSA_NAME
 250           && !SSA_NAME_IS_DEFAULT_DEF (arg))
 251         {
 252           gimple def_stmt = SSA_NAME_DEF_STMT (arg);
 253           if (gimple_bb (def_stmt)->loop_father == father)
 254             return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
 255         }
 256     }
 257
 258   /* Must be an uninteresting phi.  */
 259   return bb_rank[bb->index];
 260 }
 261
 262 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
 263    loop-carried dependence of an innermost loop, return TRUE; else
 264    return FALSE.  */
 265 static bool
 266 loop_carried_phi (tree exp)
 267 {
 268   gimple phi_stmt;
 269   long block_rank;
 270
 271   if (TREE_CODE (exp) != SSA_NAME
 272       || SSA_NAME_IS_DEFAULT_DEF (exp))
 273     return false;
 274
 275   phi_stmt = SSA_NAME_DEF_STMT (exp);
 276
 277   if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
 278     return false;
 279
 280   /* Non-loop-carried phis have block rank.  Loop-carried phis have
 281      an additional bias added in.  If this phi doesn't have block rank,
 282      it's biased and should not be propagated.  */
 283   block_rank = bb_rank[gimple_bb (phi_stmt)->index];
 284
 285   if (phi_rank (phi_stmt) != block_rank)
 286     return true;
 287
 288   return false;
 289 }
 290
 291 /* Return the maximum of RANK and the rank that should be propagated
 292    from expression OP.  For most operands, this is just the rank of OP.
 293    For loop-carried phis, the value is zero to avoid undoing the bias
 294    in favor of the phi.  */
 295 static long
 296 propagate_rank (long rank, tree op)
 297 {
 298   long op_rank;
 299
 300   if (loop_carried_phi (op))
 301     return rank;
 302
 303   op_rank = get_rank (op);
 304
 305   return MAX (rank, op_rank);
 306 }
 307
 308 /* Look up the operand rank structure for expression E.  */
 309
 310 static inline long
 311 find_operand_rank (tree e)
 312 {
 313   void **slot = pointer_map_contains (operand_rank, e);
 314   return slot ? (long) (intptr_t) *slot : -1;
 315 }
 316
 317 /* Insert {E,RANK} into the operand rank hashtable.  */
 318
 319 static inline void
 320 insert_operand_rank (tree e, long rank)
 321 {
 322   void **slot;
 323   gcc_assert (rank > 0);
 324   slot = pointer_map_insert (operand_rank, e);
 325   gcc_assert (!*slot);
 326   *slot = (void *) (intptr_t) rank;
 327 }
 328
 329 /* Given an expression E, return the rank of the expression.  */
 330
 331 static long
 332 get_rank (tree e)
 333 {
 334   /* Constants have rank 0.  */
 335   if (is_gimple_min_invariant (e))
 336     return 0;
 337
 338   /* SSA_NAME's have the rank of the expression they are the result
 339      of.
 340      For globals and uninitialized values, the rank is 0.
 341      For function arguments, use the pre-setup rank.
 342      For PHI nodes, stores, asm statements, etc, we use the rank of
 343      the BB.
 344      For simple operations, the rank is the maximum rank of any of
 345      its operands, or the bb_rank, whichever is less.
 346      I make no claims that this is optimal, however, it gives good
 347      results.  */
 348
 349   /* We make an exception to the normal ranking system to break
 350      dependences of accumulator variables in loops.  Suppose we
 351      have a simple one-block loop containing:
 352
 353        x_1 = phi(x_0, x_2)
 354        b = a + x_1
 355        c = b + d
 356        x_2 = c + e
 357
 358      As shown, each iteration of the calculation into x is fully
 359      dependent upon the iteration before it.  We would prefer to
 360      see this in the form:
 361
 362        x_1 = phi(x_0, x_2)
 363        b = a + d
 364        c = b + e
 365        x_2 = c + x_1
 366
 367      If the loop is unrolled, the calculations of b and c from
 368      different iterations can be interleaved.
 369
 370      To obtain this result during reassociation, we bias the rank
 371      of the phi definition x_1 upward, when it is recognized as an
 372      accumulator pattern.  The artificial rank causes it to be
 373      added last, providing the desired independence.  */
 374
 375   if (TREE_CODE (e) == SSA_NAME)
 376     {
 377       gimple stmt;
 378       long rank;
 379       int i, n;
 380       tree op;
 381
 382       if (SSA_NAME_IS_DEFAULT_DEF (e))
 383         return find_operand_rank (e);
 384
 385       stmt = SSA_NAME_DEF_STMT (e);
 386       if (gimple_code (stmt) == GIMPLE_PHI)
 387         return phi_rank (stmt);
 388
 389       if (!is_gimple_assign (stmt)
 390           || gimple_vdef (stmt))
 391         return bb_rank[gimple_bb (stmt)->index];
 392
 393       /* If we already have a rank for this expression, use that.  */
 394       rank = find_operand_rank (e);
 395       if (rank != -1)
 396         return rank;
 397
 398       /* Otherwise, find the maximum rank for the operands.  As an
 399          exception, remove the bias from loop-carried phis when propagating
 400          the rank so that dependent operations are not also biased.  */
 401       rank = 0;
 402       if (gimple_assign_single_p (stmt))
 403         {
 404           tree rhs = gimple_assign_rhs1 (stmt);
 405           n = TREE_OPERAND_LENGTH (rhs);
 406           if (n == 0)
 407             rank = propagate_rank (rank, rhs);
 408           else
 409             {
 410               for (i = 0; i < n; i++)
 411                 {
 412                   op = TREE_OPERAND (rhs, i);
 413
 414                   if (op != NULL_TREE)
 415                     rank = propagate_rank (rank, op);
 416                 }
 417             }
 418         }
 419       else
 420         {
 421           n = gimple_num_ops (stmt);
 422           for (i = 1; i < n; i++)
 423             {
 424               op = gimple_op (stmt, i);
 425               gcc_assert (op);
 426               rank = propagate_rank (rank, op);
 427             }
 428         }
 429
 430       if (dump_file && (dump_flags & TDF_DETAILS))
 431         {
 432           fprintf (dump_file, "Rank for ");
 433           print_generic_expr (dump_file, e, 0);
 434           fprintf (dump_file, " is %ld\n", (rank + 1));
 435         }
 436
 437       /* Note the rank in the hashtable so we don't recompute it.  */
 438       insert_operand_rank (e, (rank + 1));
 439       return (rank + 1);
 440     }
 441
 442   /* Globals, etc,  are rank 0 */
 443   return 0;
 444 }
 445
 446
 447 /* We want integer ones to end up last no matter what, since they are
 448    the ones we can do the most with.  */
 449 #define INTEGER_CONST_TYPE 1 << 3
 450 #define FLOAT_CONST_TYPE 1 << 2
 451 #define OTHER_CONST_TYPE 1 << 1
 452
 453 /* Classify an invariant tree into integer, float, or other, so that
 454    we can sort them to be near other constants of the same type.  */
 455 static inline int
 456 constant_type (tree t)
 457 {
 458   if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
 459     return INTEGER_CONST_TYPE;
 460   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
 461     return FLOAT_CONST_TYPE;
 462   else
 463     return OTHER_CONST_TYPE;
 464 }
 465
 466 /* qsort comparison function to sort operand entries PA and PB by rank
 467    so that the sorted array is ordered by rank in decreasing order.  */
 468 static int
 469 sort_by_operand_rank (const void *pa, const void *pb)
 470 {
 471   const operand_entry_t oea = *(const operand_entry_t *)pa;
 472   const operand_entry_t oeb = *(const operand_entry_t *)pb;
 473
 474   /* It's nicer for optimize_expression if constants that are likely
 475      to fold when added/multiplied//whatever are put next to each
 476      other.  Since all constants have rank 0, order them by type.  */
 477   if (oeb->rank == 0 && oea->rank == 0)
 478     {
 479       if (constant_type (oeb->op) != constant_type (oea->op))
 480         return constant_type (oeb->op) - constant_type (oea->op);
 481       else
 482         /* To make sorting result stable, we use unique IDs to determine
 483            order.  */
 484         return oeb->id - oea->id;
 485     }
 486
 487   /* Lastly, make sure the versions that are the same go next to each
 488      other.  We use SSA_NAME_VERSION because it's stable.  */
 489   if ((oeb->rank - oea->rank == 0)
 490       && TREE_CODE (oea->op) == SSA_NAME
 491       && TREE_CODE (oeb->op) == SSA_NAME)
 492     {
 493       if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
 494         return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
 495       else
 496         return oeb->id - oea->id;
 497     }
 498
 499   if (oeb->rank != oea->rank)
 500     return oeb->rank - oea->rank;
 501   else
 502     return oeb->id - oea->id;
 503 }
 504
 505 /* Add an operand entry to *OPS for the tree operand OP.  */
 506
 507 static void
 508 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
 509 {
 510   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
 511
 512   oe->op = op;
 513   oe->rank = get_rank (op);
 514   oe->id = next_operand_entry_id++;
 515   oe->count = 1;
 516   ops->safe_push (oe);
 517 }
 518
 519 /* Add an operand entry to *OPS for the tree operand OP with repeat
 520    count REPEAT.  */
 521
 522 static void
 523 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
 524                        HOST_WIDE_INT repeat)
 525 {
 526   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
 527
 528   oe->op = op;
 529   oe->rank = get_rank (op);
 530   oe->id = next_operand_entry_id++;
 531   oe->count = repeat;
 532   ops->safe_push (oe);
 533
 534   reassociate_stats.pows_encountered++;
 535 }
 536
 537 /* Return true if STMT is reassociable operation containing a binary
 538    operation with tree code CODE, and is inside LOOP.  */
 539
 540 static bool
 541 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
 542 {
 543   basic_block bb = gimple_bb (stmt);
 544
 545   if (gimple_bb (stmt) == NULL)
 546     return false;
 547
 548   if (!flow_bb_inside_loop_p (loop, bb))
 549     return false;
 550
 551   if (is_gimple_assign (stmt)
 552       && gimple_assign_rhs_code (stmt) == code
 553       && has_single_use (gimple_assign_lhs (stmt)))
 554     return true;
 555
 556   return false;
 557 }
 558
 559
 560 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
 561    operand of the negate operation.  Otherwise, return NULL.  */
 562
 563 static tree
 564 get_unary_op (tree name, enum tree_code opcode)
 565 {
 566   gimple stmt = SSA_NAME_DEF_STMT (name);
 567
 568   if (!is_gimple_assign (stmt))
 569     return NULL_TREE;
 570
 571   if (gimple_assign_rhs_code (stmt) == opcode)
 572     return gimple_assign_rhs1 (stmt);
 573   return NULL_TREE;
 574 }
 575
 576 /* If CURR and LAST are a pair of ops that OPCODE allows us to
 577    eliminate through equivalences, do so, remove them from OPS, and
 578    return true.  Otherwise, return false.  */
 579
 580 static bool
 581 eliminate_duplicate_pair (enum tree_code opcode,
 582                           vec<operand_entry_t> *ops,
 583                           bool *all_done,
 584                           unsigned int i,
 585                           operand_entry_t curr,
 586                           operand_entry_t last)
 587 {
 588
 589   /* If we have two of the same op, and the opcode is & |, min, or max,
 590      we can eliminate one of them.
 591      If we have two of the same op, and the opcode is ^, we can
 592      eliminate both of them.  */
 593
 594   if (last && last->op == curr->op)
 595     {
 596       switch (opcode)
 597         {
 598         case MAX_EXPR:
 599         case MIN_EXPR:
 600         case BIT_IOR_EXPR:
 601         case BIT_AND_EXPR:
 602           if (dump_file && (dump_flags & TDF_DETAILS))
 603             {
 604               fprintf (dump_file, "Equivalence: ");
 605               print_generic_expr (dump_file, curr->op, 0);
 606               fprintf (dump_file, " [&|minmax] ");
 607               print_generic_expr (dump_file, last->op, 0);
 608               fprintf (dump_file, " -> ");
 609               print_generic_stmt (dump_file, last->op, 0);
 610             }
 611
 612           ops->ordered_remove (i);
 613           reassociate_stats.ops_eliminated ++;
 614
 615           return true;
 616
 617         case BIT_XOR_EXPR:
 618           if (dump_file && (dump_flags & TDF_DETAILS))
 619             {
 620               fprintf (dump_file, "Equivalence: ");
 621               print_generic_expr (dump_file, curr->op, 0);
 622               fprintf (dump_file, " ^ ");
 623               print_generic_expr (dump_file, last->op, 0);
 624               fprintf (dump_file, " -> nothing\n");
 625             }
 626
 627           reassociate_stats.ops_eliminated += 2;
 628
 629           if (ops->length () == 2)
 630             {
 631               ops->create (0);
 632               add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
 633               *all_done = true;
 634             }
 635           else
 636             {
 637               ops->ordered_remove (i-1);
 638               ops->ordered_remove (i-1);
 639             }
 640
 641           return true;
 642
 643         default:
 644           break;
 645         }
 646     }
 647   return false;
 648 }
 649
 650 static vec<tree> plus_negates;
 651
 652 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
 653    expression, look in OPS for a corresponding positive operation to cancel
 654    it out.  If we find one, remove the other from OPS, replace
 655    OPS[CURRINDEX] with 0 or -1, respectively, and return true.  Otherwise,
 656    return false. */
 657
 658 static bool
 659 eliminate_plus_minus_pair (enum tree_code opcode,
 660                            vec<operand_entry_t> *ops,
 661                            unsigned int currindex,
 662                            operand_entry_t curr)
 663 {
 664   tree negateop;
 665   tree notop;
 666   unsigned int i;
 667   operand_entry_t oe;
 668
 669   if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
 670     return false;
 671
 672   negateop = get_unary_op (curr->op, NEGATE_EXPR);
 673   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 674   if (negateop == NULL_TREE && notop == NULL_TREE)
 675     return false;
 676
 677   /* Any non-negated version will have a rank that is one less than
 678      the current rank.  So once we hit those ranks, if we don't find
 679      one, we can stop.  */
 680
 681   for (i = currindex + 1;
 682        ops->iterate (i, &oe)
 683        && oe->rank >= curr->rank - 1 ;
 684        i++)
 685     {
 686       if (oe->op == negateop)
 687         {
 688
 689           if (dump_file && (dump_flags & TDF_DETAILS))
 690             {
 691               fprintf (dump_file, "Equivalence: ");
 692               print_generic_expr (dump_file, negateop, 0);
 693               fprintf (dump_file, " + -");
 694               print_generic_expr (dump_file, oe->op, 0);
 695               fprintf (dump_file, " -> 0\n");
 696             }
 697
 698           ops->ordered_remove (i);
 699           add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
 700           ops->ordered_remove (currindex);
 701           reassociate_stats.ops_eliminated ++;
 702
 703           return true;
 704         }
 705       else if (oe->op == notop)
 706         {
 707           tree op_type = TREE_TYPE (oe->op);
 708
 709           if (dump_file && (dump_flags & TDF_DETAILS))
 710             {
 711               fprintf (dump_file, "Equivalence: ");
 712               print_generic_expr (dump_file, notop, 0);
 713               fprintf (dump_file, " + ~");
 714               print_generic_expr (dump_file, oe->op, 0);
 715               fprintf (dump_file, " -> -1\n");
 716             }
 717
 718           ops->ordered_remove (i);
 719           add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
 720           ops->ordered_remove (currindex);
 721           reassociate_stats.ops_eliminated ++;
 722
 723           return true;
 724         }
 725     }
 726
 727   /* CURR->OP is a negate expr in a plus expr: save it for later
 728      inspection in repropagate_negates().  */
 729   if (negateop != NULL_TREE)
 730     plus_negates.safe_push (curr->op);
 731
 732   return false;
 733 }
 734
 735 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
 736    bitwise not expression, look in OPS for a corresponding operand to
 737    cancel it out.  If we find one, remove the other from OPS, replace
 738    OPS[CURRINDEX] with 0, and return true.  Otherwise, return
 739    false. */
 740
 741 static bool
 742 eliminate_not_pairs (enum tree_code opcode,
 743                      vec<operand_entry_t> *ops,
 744                      unsigned int currindex,
 745                      operand_entry_t curr)
 746 {
 747   tree notop;
 748   unsigned int i;
 749   operand_entry_t oe;
 750
 751   if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
 752       || TREE_CODE (curr->op) != SSA_NAME)
 753     return false;
 754
 755   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 756   if (notop == NULL_TREE)
 757     return false;
 758
 759   /* Any non-not version will have a rank that is one less than
 760      the current rank.  So once we hit those ranks, if we don't find
 761      one, we can stop.  */
 762
 763   for (i = currindex + 1;
 764        ops->iterate (i, &oe)
 765        && oe->rank >= curr->rank - 1;
 766        i++)
 767     {
 768       if (oe->op == notop)
 769         {
 770           if (dump_file && (dump_flags & TDF_DETAILS))
 771             {
 772               fprintf (dump_file, "Equivalence: ");
 773               print_generic_expr (dump_file, notop, 0);
 774               if (opcode == BIT_AND_EXPR)
 775                 fprintf (dump_file, " & ~");
 776               else if (opcode == BIT_IOR_EXPR)
 777                 fprintf (dump_file, " | ~");
 778               print_generic_expr (dump_file, oe->op, 0);
 779               if (opcode == BIT_AND_EXPR)
 780                 fprintf (dump_file, " -> 0\n");
 781               else if (opcode == BIT_IOR_EXPR)
 782                 fprintf (dump_file, " -> -1\n");
 783             }
 784
 785           if (opcode == BIT_AND_EXPR)
 786             oe->op = build_zero_cst (TREE_TYPE (oe->op));
 787           else if (opcode == BIT_IOR_EXPR)
 788             oe->op = build_low_bits_mask (TREE_TYPE (oe->op),
 789                                           TYPE_PRECISION (TREE_TYPE (oe->op)));
 790
 791           reassociate_stats.ops_eliminated += ops->length () - 1;
 792           ops->truncate (0);
 793           ops->quick_push (oe);
 794           return true;
 795         }
 796     }
 797
 798   return false;
 799 }
 800
 801 /* Use constant value that may be present in OPS to try to eliminate
 802    operands.  Note that this function is only really used when we've
 803    eliminated ops for other reasons, or merged constants.  Across
 804    single statements, fold already does all of this, plus more.  There
 805    is little point in duplicating logic, so I've only included the
 806    identities that I could ever construct testcases to trigger.  */
 807
 808 static void
 809 eliminate_using_constants (enum tree_code opcode,
 810                            vec<operand_entry_t> *ops)
 811 {
 812   operand_entry_t oelast = ops->last ();
 813   tree type = TREE_TYPE (oelast->op);
 814
 815   if (oelast->rank == 0
 816       && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
 817     {
 818       switch (opcode)
 819         {
 820         case BIT_AND_EXPR:
 821           if (integer_zerop (oelast->op))
 822             {
 823               if (ops->length () != 1)
 824                 {
 825                   if (dump_file && (dump_flags & TDF_DETAILS))
 826                     fprintf (dump_file, "Found & 0, removing all other ops\n");
 827
 828                   reassociate_stats.ops_eliminated += ops->length () - 1;
 829
 830                   ops->truncate (0);
 831                   ops->quick_push (oelast);
 832                   return;
 833                 }
 834             }
 835           else if (integer_all_onesp (oelast->op))
 836             {
 837               if (ops->length () != 1)
 838                 {
 839                   if (dump_file && (dump_flags & TDF_DETAILS))
 840                     fprintf (dump_file, "Found & -1, removing\n");
 841                   ops->pop ();
 842                   reassociate_stats.ops_eliminated++;
 843                 }
 844             }
 845           break;
 846         case BIT_IOR_EXPR:
 847           if (integer_all_onesp (oelast->op))
 848             {
 849               if (ops->length () != 1)
 850                 {
 851                   if (dump_file && (dump_flags & TDF_DETAILS))
 852                     fprintf (dump_file, "Found | -1, removing all other ops\n");
 853
 854                   reassociate_stats.ops_eliminated += ops->length () - 1;
 855
 856                   ops->truncate (0);
 857                   ops->quick_push (oelast);
 858                   return;
 859                 }
 860             }
 861           else if (integer_zerop (oelast->op))
 862             {
 863               if (ops->length () != 1)
 864                 {
 865                   if (dump_file && (dump_flags & TDF_DETAILS))
 866                     fprintf (dump_file, "Found | 0, removing\n");
 867                   ops->pop ();
 868                   reassociate_stats.ops_eliminated++;
 869                 }
 870             }
 871           break;
 872         case MULT_EXPR:
 873           if (integer_zerop (oelast->op)
 874               || (FLOAT_TYPE_P (type)
 875                   && !HONOR_NANS (TYPE_MODE (type))
 876                   && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
 877                   && real_zerop (oelast->op)))
 878             {
 879               if (ops->length () != 1)
 880                 {
 881                   if (dump_file && (dump_flags & TDF_DETAILS))
 882                     fprintf (dump_file, "Found * 0, removing all other ops\n");
 883
 884                   reassociate_stats.ops_eliminated += ops->length () - 1;
 885                   ops->truncate (1);
 886                   ops->quick_push (oelast);
 887                   return;
 888                 }
 889             }
 890           else if (integer_onep (oelast->op)
 891                    || (FLOAT_TYPE_P (type)
 892                        && !HONOR_SNANS (TYPE_MODE (type))
 893                        && real_onep (oelast->op)))
 894             {
 895               if (ops->length () != 1)
 896                 {
 897                   if (dump_file && (dump_flags & TDF_DETAILS))
 898                     fprintf (dump_file, "Found * 1, removing\n");
 899                   ops->pop ();
 900                   reassociate_stats.ops_eliminated++;
 901                   return;
 902                 }
 903             }
 904           break;
 905         case BIT_XOR_EXPR:
 906         case PLUS_EXPR:
 907         case MINUS_EXPR:
 908           if (integer_zerop (oelast->op)
 909               || (FLOAT_TYPE_P (type)
 910                   && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
 911                   && fold_real_zero_addition_p (type, oelast->op,
 912                                                 opcode == MINUS_EXPR)))
 913             {
 914               if (ops->length () != 1)
 915                 {
 916                   if (dump_file && (dump_flags & TDF_DETAILS))
 917                     fprintf (dump_file, "Found [|^+] 0, removing\n");
 918                   ops->pop ();
 919                   reassociate_stats.ops_eliminated++;
 920                   return;
 921                 }
 922             }
 923           break;
 924         default:
 925           break;
 926         }
 927     }
 928 }
 929
 930
 931 static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
 932                                  bool, bool);
 933
 934 /* Structure for tracking and counting operands.  */
 935 typedef struct oecount_s {
 936   int cnt;
 937   int id;
 938   enum tree_code oecode;
 939   tree op;
 940 } oecount;
 941
 942
 943 /* The heap for the oecount hashtable and the sorted list of operands.  */
 944 static vec<oecount> cvec;
 945
 946 /* Hash function for oecount.  */
 947
 948 static hashval_t
 949 oecount_hash (const void *p)
 950 {
 951   const oecount *c = &cvec[(size_t)p - 42];
 952   return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
 953 }
 954
 955 /* Comparison function for oecount.  */
 956
 957 static int
 958 oecount_eq (const void *p1, const void *p2)
 959 {
 960   const oecount *c1 = &cvec[(size_t)p1 - 42];
 961   const oecount *c2 = &cvec[(size_t)p2 - 42];
 962   return (c1->oecode == c2->oecode
 963           && c1->op == c2->op);
 964 }
 965
 966 /* Comparison function for qsort sorting oecount elements by count.  */
 967
 968 static int
 969 oecount_cmp (const void *p1, const void *p2)
 970 {
 971   const oecount *c1 = (const oecount *)p1;
 972   const oecount *c2 = (const oecount *)p2;
 973   if (c1->cnt != c2->cnt)
 974     return c1->cnt - c2->cnt;
 975   else
 976     /* If counts are identical, use unique IDs to stabilize qsort.  */
 977     return c1->id - c2->id;
 978 }
 979
 980 /* Return TRUE iff STMT represents a builtin call that raises OP
 981    to some exponent.  */
 982
 983 static bool
 984 stmt_is_power_of_op (gimple stmt, tree op)
 985 {
 986   tree fndecl;
 987
 988   if (!is_gimple_call (stmt))
 989     return false;
 990
 991   fndecl = gimple_call_fndecl (stmt);
 992
 993   if (!fndecl
 994       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
 995     return false;
 996
 997   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
 998     {
 999     CASE_FLT_FN (BUILT_IN_POW):
1000     CASE_FLT_FN (BUILT_IN_POWI):
1001       return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1002
1003     default:
1004       return false;
1005     }
1006 }
1007
1008 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1009    in place and return the result.  Assumes that stmt_is_power_of_op
1010    was previously called for STMT and returned TRUE.  */
1011
1012 static HOST_WIDE_INT
1013 decrement_power (gimple stmt)
1014 {
1015   REAL_VALUE_TYPE c, cint;
1016   HOST_WIDE_INT power;
1017   tree arg1;
1018
1019   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1020     {
1021     CASE_FLT_FN (BUILT_IN_POW):
1022       arg1 = gimple_call_arg (stmt, 1);
1023       c = TREE_REAL_CST (arg1);
1024       power = real_to_integer (&c) - 1;
1025       real_from_integer (&cint, VOIDmode, power, 0, 0);
1026       gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1027       return power;
1028
1029     CASE_FLT_FN (BUILT_IN_POWI):
1030       arg1 = gimple_call_arg (stmt, 1);
1031       power = TREE_INT_CST_LOW (arg1) - 1;
1032       gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1033       return power;
1034
1035     default:
1036       gcc_unreachable ();
1037     }
1038 }
1039
1040 /* Find the single immediate use of STMT's LHS, and replace it
1041    with OP.  Remove STMT.  If STMT's LHS is the same as *DEF,
1042    replace *DEF with OP as well.  */
1043
1044 static void
1045 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1046 {
1047   tree lhs;
1048   gimple use_stmt;
1049   use_operand_p use;
1050   gimple_stmt_iterator gsi;
1051
1052   if (is_gimple_call (stmt))
1053     lhs = gimple_call_lhs (stmt);
1054   else
1055     lhs = gimple_assign_lhs (stmt);
1056
1057   gcc_assert (has_single_use (lhs));
1058   single_imm_use (lhs, &use, &use_stmt);
1059   if (lhs == *def)
1060     *def = op;
1061   SET_USE (use, op);
1062   if (TREE_CODE (op) != SSA_NAME)
1063     update_stmt (use_stmt);
1064   gsi = gsi_for_stmt (stmt);
1065   unlink_stmt_vdef (stmt);
1066   gsi_remove (&gsi, true);
1067   release_defs (stmt);
1068 }
1069
1070 /* Walks the linear chain with result *DEF searching for an operation
1071    with operand OP and code OPCODE removing that from the chain.  *DEF
1072    is updated if there is only one operand but no operation left.  */
1073
1074 static void
1075 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1076 {
1077   gimple stmt = SSA_NAME_DEF_STMT (*def);
1078
1079   do
1080     {
1081       tree name;
1082
1083       if (opcode == MULT_EXPR
1084           && stmt_is_power_of_op (stmt, op))
1085         {
1086           if (decrement_power (stmt) == 1)
1087             propagate_op_to_single_use (op, stmt, def);
1088           return;
1089         }
1090
1091       name = gimple_assign_rhs1 (stmt);
1092
1093       /* If this is the operation we look for and one of the operands
1094          is ours simply propagate the other operand into the stmts
1095          single use.  */
1096       if (gimple_assign_rhs_code (stmt) == opcode
1097           && (name == op
1098               || gimple_assign_rhs2 (stmt) == op))
1099         {
1100           if (name == op)
1101             name = gimple_assign_rhs2 (stmt);
1102           propagate_op_to_single_use (name, stmt, def);
1103           return;
1104         }
1105
1106       /* We might have a multiply of two __builtin_pow* calls, and
1107          the operand might be hiding in the rightmost one.  */
1108       if (opcode == MULT_EXPR
1109           && gimple_assign_rhs_code (stmt) == opcode
1110           && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1111           && has_single_use (gimple_assign_rhs2 (stmt)))
1112         {
1113           gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1114           if (stmt_is_power_of_op (stmt2, op))
1115             {
1116               if (decrement_power (stmt2) == 1)
1117                 propagate_op_to_single_use (op, stmt2, def);
1118               return;
1119             }
1120         }
1121
1122       /* Continue walking the chain.  */
1123       gcc_assert (name != op
1124                   && TREE_CODE (name) == SSA_NAME);
1125       stmt = SSA_NAME_DEF_STMT (name);
1126     }
1127   while (1);
1128 }
1129
1130 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1131    the result.  Places the statement after the definition of either
1132    OP1 or OP2.  Returns the new statement.  */
1133
1134 static gimple
1135 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1136 {
1137   gimple op1def = NULL, op2def = NULL;
1138   gimple_stmt_iterator gsi;
1139   tree op;
1140   gimple sum;
1141
1142   /* Create the addition statement.  */
1143   op = make_ssa_name (type, NULL);
1144   sum = gimple_build_assign_with_ops (opcode, op, op1, op2);
1145
1146   /* Find an insertion place and insert.  */
1147   if (TREE_CODE (op1) == SSA_NAME)
1148     op1def = SSA_NAME_DEF_STMT (op1);
1149   if (TREE_CODE (op2) == SSA_NAME)
1150     op2def = SSA_NAME_DEF_STMT (op2);
1151   if ((!op1def || gimple_nop_p (op1def))
1152       && (!op2def || gimple_nop_p (op2def)))
1153     {
1154       gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR));
1155       gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1156     }
1157   else if ((!op1def || gimple_nop_p (op1def))
1158            || (op2def && !gimple_nop_p (op2def)
1159                && stmt_dominates_stmt_p (op1def, op2def)))
1160     {
1161       if (gimple_code (op2def) == GIMPLE_PHI)
1162         {
1163           gsi = gsi_after_labels (gimple_bb (op2def));
1164           gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1165         }
1166       else
1167         {
1168           if (!stmt_ends_bb_p (op2def))
1169             {
1170               gsi = gsi_for_stmt (op2def);
1171               gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1172             }
1173           else
1174             {
1175               edge e;
1176               edge_iterator ei;
1177
1178               FOR_EACH_EDGE (e, ei, gimple_bb (op2def)->succs)
1179                 if (e->flags & EDGE_FALLTHRU)
1180                   gsi_insert_on_edge_immediate (e, sum);
1181             }
1182         }
1183     }
1184   else
1185     {
1186       if (gimple_code (op1def) == GIMPLE_PHI)
1187         {
1188           gsi = gsi_after_labels (gimple_bb (op1def));
1189           gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1190         }
1191       else
1192         {
1193           if (!stmt_ends_bb_p (op1def))
1194             {
1195               gsi = gsi_for_stmt (op1def);
1196               gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1197             }
1198           else
1199             {
1200               edge e;
1201               edge_iterator ei;
1202
1203               FOR_EACH_EDGE (e, ei, gimple_bb (op1def)->succs)
1204                 if (e->flags & EDGE_FALLTHRU)
1205                   gsi_insert_on_edge_immediate (e, sum);
1206             }
1207         }
1208     }
1209   update_stmt (sum);
1210
1211   return sum;
1212 }
1213
1214 /* Perform un-distribution of divisions and multiplications.
1215    A * X + B * X is transformed into (A + B) * X and A / X + B / X
1216    to (A + B) / X for real X.
1217
1218    The algorithm is organized as follows.
1219
1220     - First we walk the addition chain *OPS looking for summands that
1221       are defined by a multiplication or a real division.  This results
1222       in the candidates bitmap with relevant indices into *OPS.
1223
1224     - Second we build the chains of multiplications or divisions for
1225       these candidates, counting the number of occurrences of (operand, code)
1226       pairs in all of the candidates chains.
1227
1228     - Third we sort the (operand, code) pairs by number of occurrence and
1229       process them starting with the pair with the most uses.
1230
1231       * For each such pair we walk the candidates again to build a
1232         second candidate bitmap noting all multiplication/division chains
1233         that have at least one occurrence of (operand, code).
1234
1235       * We build an alternate addition chain only covering these
1236         candidates with one (operand, code) operation removed from their
1237         multiplication/division chain.
1238
1239       * The first candidate gets replaced by the alternate addition chain
1240         multiplied/divided by the operand.
1241
1242       * All candidate chains get disabled for further processing and
1243         processing of (operand, code) pairs continues.
1244
1245   The alternate addition chains built are re-processed by the main
1246   reassociation algorithm which allows optimizing a * x * y + b * y * x
1247   to (a + b ) * x * y in one invocation of the reassociation pass.  */
1248
1249 static bool
1250 undistribute_ops_list (enum tree_code opcode,
1251                        vec<operand_entry_t> *ops, struct loop *loop)
1252 {
1253   unsigned int length = ops->length ();
1254   operand_entry_t oe1;
1255   unsigned i, j;
1256   sbitmap candidates, candidates2;
1257   unsigned nr_candidates, nr_candidates2;
1258   sbitmap_iterator sbi0;
1259   vec<operand_entry_t> *subops;
1260   htab_t ctable;
1261   bool changed = false;
1262   int next_oecount_id = 0;
1263
1264   if (length <= 1
1265       || opcode != PLUS_EXPR)
1266     return false;
1267
1268   /* Build a list of candidates to process.  */
1269   candidates = sbitmap_alloc (length);
1270   bitmap_clear (candidates);
1271   nr_candidates = 0;
1272   FOR_EACH_VEC_ELT (*ops, i, oe1)
1273     {
1274       enum tree_code dcode;
1275       gimple oe1def;
1276
1277       if (TREE_CODE (oe1->op) != SSA_NAME)
1278         continue;
1279       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1280       if (!is_gimple_assign (oe1def))
1281         continue;
1282       dcode = gimple_assign_rhs_code (oe1def);
1283       if ((dcode != MULT_EXPR
1284            && dcode != RDIV_EXPR)
1285           || !is_reassociable_op (oe1def, dcode, loop))
1286         continue;
1287
1288       bitmap_set_bit (candidates, i);
1289       nr_candidates++;
1290     }
1291
1292   if (nr_candidates < 2)
1293     {
1294       sbitmap_free (candidates);
1295       return false;
1296     }
1297
1298   if (dump_file && (dump_flags & TDF_DETAILS))
1299     {
1300       fprintf (dump_file, "searching for un-distribute opportunities ");
1301       print_generic_expr (dump_file,
1302         (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1303       fprintf (dump_file, " %d\n", nr_candidates);
1304     }
1305
1306   /* Build linearized sub-operand lists and the counting table.  */
1307   cvec.create (0);
1308   ctable = htab_create (15, oecount_hash, oecount_eq, NULL);
1309   /* ??? Macro arguments cannot have multi-argument template types in
1310      them.  This typedef is needed to workaround that limitation.  */
1311   typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1312   subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1313   EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1314     {
1315       gimple oedef;
1316       enum tree_code oecode;
1317       unsigned j;
1318
1319       oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1320       oecode = gimple_assign_rhs_code (oedef);
1321       linearize_expr_tree (&subops[i], oedef,
1322                            associative_tree_code (oecode), false);
1323
1324       FOR_EACH_VEC_ELT (subops[i], j, oe1)
1325         {
1326           oecount c;
1327           void **slot;
1328           size_t idx;
1329           c.oecode = oecode;
1330           c.cnt = 1;
1331           c.id = next_oecount_id++;
1332           c.op = oe1->op;
1333           cvec.safe_push (c);
1334           idx = cvec.length () + 41;
1335           slot = htab_find_slot (ctable, (void *)idx, INSERT);
1336           if (!*slot)
1337             {
1338               *slot = (void *)idx;
1339             }
1340           else
1341             {
1342               cvec.pop ();
1343               cvec[(size_t)*slot - 42].cnt++;
1344             }
1345         }
1346     }
1347   htab_delete (ctable);
1348
1349   /* Sort the counting table.  */
1350   cvec.qsort (oecount_cmp);
1351
1352   if (dump_file && (dump_flags & TDF_DETAILS))
1353     {
1354       oecount *c;
1355       fprintf (dump_file, "Candidates:\n");
1356       FOR_EACH_VEC_ELT (cvec, j, c)
1357         {
1358           fprintf (dump_file, "  %u %s: ", c->cnt,
1359                    c->oecode == MULT_EXPR
1360                    ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1361           print_generic_expr (dump_file, c->op, 0);
1362           fprintf (dump_file, "\n");
1363         }
1364     }
1365
1366   /* Process the (operand, code) pairs in order of most occurence.  */
1367   candidates2 = sbitmap_alloc (length);
1368   while (!cvec.is_empty ())
1369     {
1370       oecount *c = &cvec.last ();
1371       if (c->cnt < 2)
1372         break;
1373
1374       /* Now collect the operands in the outer chain that contain
1375          the common operand in their inner chain.  */
1376       bitmap_clear (candidates2);
1377       nr_candidates2 = 0;
1378       EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1379         {
1380           gimple oedef;
1381           enum tree_code oecode;
1382           unsigned j;
1383           tree op = (*ops)[i]->op;
1384
1385           /* If we undistributed in this chain already this may be
1386              a constant.  */
1387           if (TREE_CODE (op) != SSA_NAME)
1388             continue;
1389
1390           oedef = SSA_NAME_DEF_STMT (op);
1391           oecode = gimple_assign_rhs_code (oedef);
1392           if (oecode != c->oecode)
1393             continue;
1394
1395           FOR_EACH_VEC_ELT (subops[i], j, oe1)
1396             {
1397               if (oe1->op == c->op)
1398                 {
1399                   bitmap_set_bit (candidates2, i);
1400                   ++nr_candidates2;
1401                   break;
1402                 }
1403             }
1404         }
1405
1406       if (nr_candidates2 >= 2)
1407         {
1408           operand_entry_t oe1, oe2;
1409           gimple prod;
1410           int first = bitmap_first_set_bit (candidates2);
1411
1412           /* Build the new addition chain.  */
1413           oe1 = (*ops)[first];
1414           if (dump_file && (dump_flags & TDF_DETAILS))
1415             {
1416               fprintf (dump_file, "Building (");
1417               print_generic_expr (dump_file, oe1->op, 0);
1418             }
1419           zero_one_operation (&oe1->op, c->oecode, c->op);
1420           EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1421             {
1422               gimple sum;
1423               oe2 = (*ops)[i];
1424               if (dump_file && (dump_flags & TDF_DETAILS))
1425                 {
1426                   fprintf (dump_file, " + ");
1427                   print_generic_expr (dump_file, oe2->op, 0);
1428                 }
1429               zero_one_operation (&oe2->op, c->oecode, c->op);
1430               sum = build_and_add_sum (TREE_TYPE (oe1->op),
1431                                        oe1->op, oe2->op, opcode);
1432               oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1433               oe2->rank = 0;
1434               oe1->op = gimple_get_lhs (sum);
1435             }
1436
1437           /* Apply the multiplication/division.  */
1438           prod = build_and_add_sum (TREE_TYPE (oe1->op),
1439                                     oe1->op, c->op, c->oecode);
1440           if (dump_file && (dump_flags & TDF_DETAILS))
1441             {
1442               fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1443               print_generic_expr (dump_file, c->op, 0);
1444               fprintf (dump_file, "\n");
1445             }
1446
1447           /* Record it in the addition chain and disable further
1448              undistribution with this op.  */
1449           oe1->op = gimple_assign_lhs (prod);
1450           oe1->rank = get_rank (oe1->op);
1451           subops[first].release ();
1452
1453           changed = true;
1454         }
1455
1456       cvec.pop ();
1457     }
1458
1459   for (i = 0; i < ops->length (); ++i)
1460     subops[i].release ();
1461   free (subops);
1462   cvec.release ();
1463   sbitmap_free (candidates);
1464   sbitmap_free (candidates2);
1465
1466   return changed;
1467 }
1468
1469 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1470    expression, examine the other OPS to see if any of them are comparisons
1471    of the same values, which we may be able to combine or eliminate.
1472    For example, we can rewrite (a < b) | (a == b) as (a <= b).  */
1473
1474 static bool
1475 eliminate_redundant_comparison (enum tree_code opcode,
1476                                 vec<operand_entry_t> *ops,
1477                                 unsigned int currindex,
1478                                 operand_entry_t curr)
1479 {
1480   tree op1, op2;
1481   enum tree_code lcode, rcode;
1482   gimple def1, def2;
1483   int i;
1484   operand_entry_t oe;
1485
1486   if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1487     return false;
1488
1489   /* Check that CURR is a comparison.  */
1490   if (TREE_CODE (curr->op) != SSA_NAME)
1491     return false;
1492   def1 = SSA_NAME_DEF_STMT (curr->op);
1493   if (!is_gimple_assign (def1))
1494     return false;
1495   lcode = gimple_assign_rhs_code (def1);
1496   if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1497     return false;
1498   op1 = gimple_assign_rhs1 (def1);
1499   op2 = gimple_assign_rhs2 (def1);
1500
1501   /* Now look for a similar comparison in the remaining OPS.  */
1502   for (i = currindex + 1; ops->iterate (i, &oe); i++)
1503     {
1504       tree t;
1505
1506       if (TREE_CODE (oe->op) != SSA_NAME)
1507         continue;
1508       def2 = SSA_NAME_DEF_STMT (oe->op);
1509       if (!is_gimple_assign (def2))
1510         continue;
1511       rcode = gimple_assign_rhs_code (def2);
1512       if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1513         continue;
1514
1515       /* If we got here, we have a match.  See if we can combine the
1516          two comparisons.  */
1517       if (opcode == BIT_IOR_EXPR)
1518         t = maybe_fold_or_comparisons (lcode, op1, op2,
1519                                        rcode, gimple_assign_rhs1 (def2),
1520                                        gimple_assign_rhs2 (def2));
1521       else
1522         t = maybe_fold_and_comparisons (lcode, op1, op2,
1523                                         rcode, gimple_assign_rhs1 (def2),
1524                                         gimple_assign_rhs2 (def2));
1525       if (!t)
1526         continue;
1527
1528       /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1529          always give us a boolean_type_node value back.  If the original
1530          BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1531          we need to convert.  */
1532       if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1533         t = fold_convert (TREE_TYPE (curr->op), t);
1534
1535       if (TREE_CODE (t) != INTEGER_CST
1536           && !operand_equal_p (t, curr->op, 0))
1537         {
1538           enum tree_code subcode;
1539           tree newop1, newop2;
1540           if (!COMPARISON_CLASS_P (t))
1541             continue;
1542           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1543           STRIP_USELESS_TYPE_CONVERSION (newop1);
1544           STRIP_USELESS_TYPE_CONVERSION (newop2);
1545           if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1546             continue;
1547         }
1548
1549       if (dump_file && (dump_flags & TDF_DETAILS))
1550         {
1551           fprintf (dump_file, "Equivalence: ");
1552           print_generic_expr (dump_file, curr->op, 0);
1553           fprintf (dump_file, " %s ", op_symbol_code (opcode));
1554           print_generic_expr (dump_file, oe->op, 0);
1555           fprintf (dump_file, " -> ");
1556           print_generic_expr (dump_file, t, 0);
1557           fprintf (dump_file, "\n");
1558         }
1559
1560       /* Now we can delete oe, as it has been subsumed by the new combined
1561          expression t.  */
1562       ops->ordered_remove (i);
1563       reassociate_stats.ops_eliminated ++;
1564
1565       /* If t is the same as curr->op, we're done.  Otherwise we must
1566          replace curr->op with t.  Special case is if we got a constant
1567          back, in which case we add it to the end instead of in place of
1568          the current entry.  */
1569       if (TREE_CODE (t) == INTEGER_CST)
1570         {
1571           ops->ordered_remove (currindex);
1572           add_to_ops_vec (ops, t);
1573         }
1574       else if (!operand_equal_p (t, curr->op, 0))
1575         {
1576           gimple sum;
1577           enum tree_code subcode;
1578           tree newop1;
1579           tree newop2;
1580           gcc_assert (COMPARISON_CLASS_P (t));
1581           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1582           STRIP_USELESS_TYPE_CONVERSION (newop1);
1583           STRIP_USELESS_TYPE_CONVERSION (newop2);
1584           gcc_checking_assert (is_gimple_val (newop1)
1585                                && is_gimple_val (newop2));
1586           sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1587           curr->op = gimple_get_lhs (sum);
1588         }
1589       return true;
1590     }
1591
1592   return false;
1593 }
1594
1595 /* Perform various identities and other optimizations on the list of
1596    operand entries, stored in OPS.  The tree code for the binary
1597    operation between all the operands is OPCODE.  */
1598
1599 static void
1600 optimize_ops_list (enum tree_code opcode,
1601                    vec<operand_entry_t> *ops)
1602 {
1603   unsigned int length = ops->length ();
1604   unsigned int i;
1605   operand_entry_t oe;
1606   operand_entry_t oelast = NULL;
1607   bool iterate = false;
1608
1609   if (length == 1)
1610     return;
1611
1612   oelast = ops->last ();
1613
1614   /* If the last two are constants, pop the constants off, merge them
1615      and try the next two.  */
1616   if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1617     {
1618       operand_entry_t oelm1 = (*ops)[length - 2];
1619
1620       if (oelm1->rank == 0
1621           && is_gimple_min_invariant (oelm1->op)
1622           && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1623                                        TREE_TYPE (oelast->op)))
1624         {
1625           tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1626                                      oelm1->op, oelast->op);
1627
1628           if (folded && is_gimple_min_invariant (folded))
1629             {
1630               if (dump_file && (dump_flags & TDF_DETAILS))
1631                 fprintf (dump_file, "Merging constants\n");
1632
1633               ops->pop ();
1634               ops->pop ();
1635
1636               add_to_ops_vec (ops, folded);
1637               reassociate_stats.constants_eliminated++;
1638
1639               optimize_ops_list (opcode, ops);
1640               return;
1641             }
1642         }
1643     }
1644
1645   eliminate_using_constants (opcode, ops);
1646   oelast = NULL;
1647
1648   for (i = 0; ops->iterate (i, &oe);)
1649     {
1650       bool done = false;
1651
1652       if (eliminate_not_pairs (opcode, ops, i, oe))
1653         return;
1654       if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1655           || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1656           || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1657         {
1658           if (done)
1659             return;
1660           iterate = true;
1661           oelast = NULL;
1662           continue;
1663         }
1664       oelast = oe;
1665       i++;
1666     }
1667
1668   length = ops->length ();
1669   oelast = ops->last ();
1670
1671   if (iterate)
1672     optimize_ops_list (opcode, ops);
1673 }
1674
1675 /* The following functions are subroutines to optimize_range_tests and allow
1676    it to try to change a logical combination of comparisons into a range
1677    test.
1678
1679    For example, both
1680         X == 2 || X == 5 || X == 3 || X == 4
1681    and
1682         X >= 2 && X <= 5
1683    are converted to
1684         (unsigned) (X - 2) <= 3
1685
1686    For more information see comments above fold_test_range in fold-const.c,
1687    this implementation is for GIMPLE.  */
1688
1689 struct range_entry
1690 {
1691   tree exp;
1692   tree low;
1693   tree high;
1694   bool in_p;
1695   bool strict_overflow_p;
1696   unsigned int idx, next;
1697 };
1698
1699 /* This is similar to make_range in fold-const.c, but on top of
1700    GIMPLE instead of trees.  If EXP is non-NULL, it should be
1701    an SSA_NAME and STMT argument is ignored, otherwise STMT
1702    argument should be a GIMPLE_COND.  */
1703
1704 static void
1705 init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1706 {
1707   int in_p;
1708   tree low, high;
1709   bool is_bool, strict_overflow_p;
1710
1711   r->exp = NULL_TREE;
1712   r->in_p = false;
1713   r->strict_overflow_p = false;
1714   r->low = NULL_TREE;
1715   r->high = NULL_TREE;
1716   if (exp != NULL_TREE
1717       && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1718     return;
1719
1720   /* Start with simply saying "EXP != 0" and then look at the code of EXP
1721      and see if we can refine the range.  Some of the cases below may not
1722      happen, but it doesn't seem worth worrying about this.  We "continue"
1723      the outer loop when we've changed something; otherwise we "break"
1724      the switch, which will "break" the while.  */
1725   low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1726   high = low;
1727   in_p = 0;
1728   strict_overflow_p = false;
1729   is_bool = false;
1730   if (exp == NULL_TREE)
1731     is_bool = true;
1732   else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1733     {
1734       if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1735         is_bool = true;
1736       else
1737         return;
1738     }
1739   else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1740     is_bool = true;
1741
1742   while (1)
1743     {
1744       enum tree_code code;
1745       tree arg0, arg1, exp_type;
1746       tree nexp;
1747       location_t loc;
1748
1749       if (exp != NULL_TREE)
1750         {
1751           if (TREE_CODE (exp) != SSA_NAME)
1752             break;
1753
1754           stmt = SSA_NAME_DEF_STMT (exp);
1755           if (!is_gimple_assign (stmt))
1756             break;
1757
1758           code = gimple_assign_rhs_code (stmt);
1759           arg0 = gimple_assign_rhs1 (stmt);
1760           arg1 = gimple_assign_rhs2 (stmt);
1761           exp_type = TREE_TYPE (exp);
1762         }
1763       else
1764         {
1765           code = gimple_cond_code (stmt);
1766           arg0 = gimple_cond_lhs (stmt);
1767           arg1 = gimple_cond_rhs (stmt);
1768           exp_type = boolean_type_node;
1769         }
1770
1771       if (TREE_CODE (arg0) != SSA_NAME)
1772         break;
1773       loc = gimple_location (stmt);
1774       switch (code)
1775         {
1776         case BIT_NOT_EXPR:
1777           if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1778             {
1779               in_p = !in_p;
1780               exp = arg0;
1781               continue;
1782             }
1783           break;
1784         case SSA_NAME:
1785           exp = arg0;
1786           continue;
1787         CASE_CONVERT:
1788           if (is_bool)
1789             goto do_default;
1790           if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1791             {
1792               if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1793                 is_bool = true;
1794               else
1795                 return;
1796             }
1797           else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1798             is_bool = true;
1799           goto do_default;
1800         case EQ_EXPR:
1801         case NE_EXPR:
1802         case LT_EXPR:
1803         case LE_EXPR:
1804         case GE_EXPR:
1805         case GT_EXPR:
1806           is_bool = true;
1807           /* FALLTHRU */
1808         default:
1809           if (!is_bool)
1810             return;
1811         do_default:
1812           nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1813                                   &low, &high, &in_p,
1814                                   &strict_overflow_p);
1815           if (nexp != NULL_TREE)
1816             {
1817               exp = nexp;
1818               gcc_assert (TREE_CODE (exp) == SSA_NAME);
1819               continue;
1820             }
1821           break;
1822         }
1823       break;
1824     }
1825   if (is_bool)
1826     {
1827       r->exp = exp;
1828       r->in_p = in_p;
1829       r->low = low;
1830       r->high = high;
1831       r->strict_overflow_p = strict_overflow_p;
1832     }
1833 }
1834
1835 /* Comparison function for qsort.  Sort entries
1836    without SSA_NAME exp first, then with SSA_NAMEs sorted
1837    by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1838    by increasing ->low and if ->low is the same, by increasing
1839    ->high.  ->low == NULL_TREE means minimum, ->high == NULL_TREE
1840    maximum.  */
1841
1842 static int
1843 range_entry_cmp (const void *a, const void *b)
1844 {
1845   const struct range_entry *p = (const struct range_entry *) a;
1846   const struct range_entry *q = (const struct range_entry *) b;
1847
1848   if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
1849     {
1850       if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1851         {
1852           /* Group range_entries for the same SSA_NAME together.  */
1853           if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
1854             return -1;
1855           else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
1856             return 1;
1857           /* If ->low is different, NULL low goes first, then by
1858              ascending low.  */
1859           if (p->low != NULL_TREE)
1860             {
1861               if (q->low != NULL_TREE)
1862                 {
1863                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
1864                                           p->low, q->low);
1865                   if (tem && integer_onep (tem))
1866                     return -1;
1867                   tem = fold_binary (GT_EXPR, boolean_type_node,
1868                                      p->low, q->low);
1869                   if (tem && integer_onep (tem))
1870                     return 1;
1871                 }
1872               else
1873                 return 1;
1874             }
1875           else if (q->low != NULL_TREE)
1876             return -1;
1877           /* If ->high is different, NULL high goes last, before that by
1878              ascending high.  */
1879           if (p->high != NULL_TREE)
1880             {
1881               if (q->high != NULL_TREE)
1882                 {
1883                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
1884                                           p->high, q->high);
1885                   if (tem && integer_onep (tem))
1886                     return -1;
1887                   tem = fold_binary (GT_EXPR, boolean_type_node,
1888                                      p->high, q->high);
1889                   if (tem && integer_onep (tem))
1890                     return 1;
1891                 }
1892               else
1893                 return -1;
1894             }
1895           else if (p->high != NULL_TREE)
1896             return 1;
1897           /* If both ranges are the same, sort below by ascending idx.  */
1898         }
1899       else
1900         return 1;
1901     }
1902   else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1903     return -1;
1904
1905   if (p->idx < q->idx)
1906     return -1;
1907   else
1908     {
1909       gcc_checking_assert (p->idx > q->idx);
1910       return 1;
1911     }
1912 }
1913
1914 /* Helper routine of optimize_range_test.
1915    [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
1916    RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
1917    OPCODE and OPS are arguments of optimize_range_tests.  Return
1918    true if the range merge has been successful.
1919    If OPCODE is ERROR_MARK, this is called from within
1920    maybe_optimize_range_tests and is performing inter-bb range optimization.
1921    Changes should be then performed right away, and whether an op is
1922    BIT_AND_EXPR or BIT_IOR_EXPR is found in oe->rank.  */
1923
1924 static bool
1925 update_range_test (struct range_entry *range, struct range_entry *otherrange,
1926                    unsigned int count, enum tree_code opcode,
1927                    vec<operand_entry_t> *ops, tree exp, bool in_p,
1928                    tree low, tree high, bool strict_overflow_p)
1929 {
1930   operand_entry_t oe = (*ops)[range->idx];
1931   tree op = oe->op;
1932   gimple stmt = op ? SSA_NAME_DEF_STMT (op) : last_stmt (BASIC_BLOCK (oe->id));
1933   location_t loc = gimple_location (stmt);
1934   tree optype = op ? TREE_TYPE (op) : boolean_type_node;
1935   tree tem = build_range_check (loc, optype, exp, in_p, low, high);
1936   enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
1937   gimple_stmt_iterator gsi;
1938
1939   if (tem == NULL_TREE)
1940     return false;
1941
1942   if (strict_overflow_p && issue_strict_overflow_warning (wc))
1943     warning_at (loc, OPT_Wstrict_overflow,
1944                 "assuming signed overflow does not occur "
1945                 "when simplifying range test");
1946
1947   if (dump_file && (dump_flags & TDF_DETAILS))
1948     {
1949       struct range_entry *r;
1950       fprintf (dump_file, "Optimizing range tests ");
1951       print_generic_expr (dump_file, range->exp, 0);
1952       fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
1953       print_generic_expr (dump_file, range->low, 0);
1954       fprintf (dump_file, ", ");
1955       print_generic_expr (dump_file, range->high, 0);
1956       fprintf (dump_file, "]");
1957       for (r = otherrange; r < otherrange + count; r++)
1958         {
1959           fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
1960           print_generic_expr (dump_file, r->low, 0);
1961           fprintf (dump_file, ", ");
1962           print_generic_expr (dump_file, r->high, 0);
1963           fprintf (dump_file, "]");
1964         }
1965       fprintf (dump_file, "\n into ");
1966       print_generic_expr (dump_file, tem, 0);
1967       fprintf (dump_file, "\n");
1968     }
1969
1970   if (opcode == BIT_IOR_EXPR
1971       || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
1972     tem = invert_truthvalue_loc (loc, tem);
1973
1974   tem = fold_convert_loc (loc, optype, tem);
1975   gsi = gsi_for_stmt (stmt);
1976   tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
1977                                   GSI_SAME_STMT);
1978
1979   /* If doing inter-bb range test optimization, update the
1980      stmts immediately.  Start with changing the first range test
1981      immediate use to the new value (TEM), or, if the first range
1982      test is a GIMPLE_COND stmt, change that condition.  */
1983   if (opcode == ERROR_MARK)
1984     {
1985       if (op)
1986         {
1987           imm_use_iterator iter;
1988           use_operand_p use_p;
1989           gimple use_stmt;
1990
1991           FOR_EACH_IMM_USE_STMT (use_stmt, iter, op)
1992             {
1993               if (is_gimple_debug (use_stmt))
1994                 continue;
1995               FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
1996                 SET_USE (use_p, tem);
1997               update_stmt (use_stmt);
1998             }
1999         }
2000       else
2001         {
2002           gimple_cond_set_code (stmt, NE_EXPR);
2003           gimple_cond_set_lhs (stmt, tem);
2004           gimple_cond_set_rhs (stmt, boolean_false_node);
2005           update_stmt (stmt);
2006         }
2007     }
2008   oe->op = tem;
2009   range->exp = exp;
2010   range->low = low;
2011   range->high = high;
2012   range->in_p = in_p;
2013   range->strict_overflow_p = false;
2014
2015   for (range = otherrange; range < otherrange + count; range++)
2016     {
2017       oe = (*ops)[range->idx];
2018       /* Now change all the other range test immediate uses, so that
2019          those tests will be optimized away.  */
2020       if (opcode == ERROR_MARK)
2021         {
2022           if (oe->op)
2023             {
2024               imm_use_iterator iter;
2025               use_operand_p use_p;
2026               gimple use_stmt;
2027
2028               FOR_EACH_IMM_USE_STMT (use_stmt, iter, oe->op)
2029                 {
2030                   if (is_gimple_debug (use_stmt))
2031                     continue;
2032                   /* If imm use of _8 is a statement like _7 = _8 | _9;,
2033                      adjust it into _7 = _9;.  */
2034                   if (is_gimple_assign (use_stmt)
2035                       && gimple_assign_rhs_code (use_stmt) == oe->rank)
2036                     {
2037                       tree expr = NULL_TREE;
2038                       if (oe->op == gimple_assign_rhs1 (use_stmt))
2039                         expr = gimple_assign_rhs2 (use_stmt);
2040                       else if (oe->op == gimple_assign_rhs2 (use_stmt))
2041                         expr = gimple_assign_rhs1 (use_stmt);
2042                       if (expr
2043                           && expr != oe->op
2044                           && TREE_CODE (expr) == SSA_NAME)
2045                         {
2046                           gimple_stmt_iterator gsi2 = gsi_for_stmt (use_stmt);
2047                           gimple_assign_set_rhs_with_ops (&gsi2, SSA_NAME,
2048                                                           expr, NULL_TREE);
2049                           update_stmt (use_stmt);
2050                           continue;
2051                         }
2052                     }
2053                   /* If imm use of _8 is a statement like _7 = (int) _8;,
2054                      adjust it into _7 = 0; or _7 = 1;.  */
2055                   if (gimple_assign_cast_p (use_stmt)
2056                       && oe->op == gimple_assign_rhs1 (use_stmt))
2057                     {
2058                       tree lhs = gimple_assign_lhs (use_stmt);
2059                       if (INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
2060                         {
2061                           gimple_stmt_iterator gsi2
2062                             = gsi_for_stmt (use_stmt);
2063                           tree expr = build_int_cst (TREE_TYPE (lhs),
2064                                                      oe->rank == BIT_IOR_EXPR
2065                                                      ? 0 : 1);
2066                           gimple_assign_set_rhs_with_ops (&gsi2,
2067                                                           INTEGER_CST,
2068                                                           expr, NULL_TREE);
2069                           update_stmt (use_stmt);
2070                           continue;
2071                         }
2072                     }
2073                   /* Otherwise replace the use with 0 or 1.  */
2074                   FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2075                     SET_USE (use_p,
2076                              build_int_cst (TREE_TYPE (oe->op),
2077                                             oe->rank == BIT_IOR_EXPR
2078                                             ? 0 : 1));
2079                   update_stmt (use_stmt);
2080                 }
2081             }
2082           else
2083             {
2084               /* If range test was a GIMPLE_COND, simply change it
2085                  into an always false or always true condition.  */
2086               stmt = last_stmt (BASIC_BLOCK (oe->id));
2087               if (oe->rank == BIT_IOR_EXPR)
2088                 gimple_cond_make_false (stmt);
2089               else
2090                 gimple_cond_make_true (stmt);
2091               update_stmt (stmt);
2092             }
2093         }
2094       oe->op = error_mark_node;
2095       range->exp = NULL_TREE;
2096     }
2097   return true;
2098 }
2099
2100 /* Optimize range tests, similarly how fold_range_test optimizes
2101    it on trees.  The tree code for the binary
2102    operation between all the operands is OPCODE.
2103    If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2104    maybe_optimize_range_tests for inter-bb range optimization.
2105    In that case if oe->op is NULL, oe->id is bb->index whose
2106    GIMPLE_COND is && or ||ed into the test, and oe->rank says
2107    the actual opcode.  */
2108
2109 static void
2110 optimize_range_tests (enum tree_code opcode,
2111                       vec<operand_entry_t> *ops)
2112 {
2113   unsigned int length = ops->length (), i, j, first;
2114   operand_entry_t oe;
2115   struct range_entry *ranges;
2116   bool any_changes = false;
2117
2118   if (length == 1)
2119     return;
2120
2121   ranges = XNEWVEC (struct range_entry, length);
2122   for (i = 0; i < length; i++)
2123     {
2124       oe = (*ops)[i];
2125       ranges[i].idx = i;
2126       init_range_entry (ranges + i, oe->op,
2127                         oe->op ? NULL : last_stmt (BASIC_BLOCK (oe->id)));
2128       /* For | invert it now, we will invert it again before emitting
2129          the optimized expression.  */
2130       if (opcode == BIT_IOR_EXPR
2131           || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2132         ranges[i].in_p = !ranges[i].in_p;
2133     }
2134
2135   qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2136   for (i = 0; i < length; i++)
2137     if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2138       break;
2139
2140   /* Try to merge ranges.  */
2141   for (first = i; i < length; i++)
2142     {
2143       tree low = ranges[i].low;
2144       tree high = ranges[i].high;
2145       int in_p = ranges[i].in_p;
2146       bool strict_overflow_p = ranges[i].strict_overflow_p;
2147       int update_fail_count = 0;
2148
2149       for (j = i + 1; j < length; j++)
2150         {
2151           if (ranges[i].exp != ranges[j].exp)
2152             break;
2153           if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2154                              ranges[j].in_p, ranges[j].low, ranges[j].high))
2155             break;
2156           strict_overflow_p |= ranges[j].strict_overflow_p;
2157         }
2158
2159       if (j == i + 1)
2160         continue;
2161
2162       if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
2163                              ops, ranges[i].exp, in_p, low, high,
2164                              strict_overflow_p))
2165         {
2166           i = j - 1;
2167           any_changes = true;
2168         }
2169       /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2170          while update_range_test would fail.  */
2171       else if (update_fail_count == 64)
2172         i = j - 1;
2173       else
2174         ++update_fail_count;
2175     }
2176
2177   /* Optimize X == CST1 || X == CST2
2178      if popcount (CST1 ^ CST2) == 1 into
2179      (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2180      Similarly for ranges.  E.g.
2181      X != 2 && X != 3 && X != 10 && X != 11
2182      will be transformed by the above loop into
2183      (X - 2U) <= 1U && (X - 10U) <= 1U
2184      and this loop can transform that into
2185      ((X & ~8) - 2U) <= 1U.  */
2186   for (i = first; i < length; i++)
2187     {
2188       tree lowi, highi, lowj, highj, type, lowxor, highxor, tem, exp;
2189
2190       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2191         continue;
2192       type = TREE_TYPE (ranges[i].exp);
2193       if (!INTEGRAL_TYPE_P (type))
2194         continue;
2195       lowi = ranges[i].low;
2196       if (lowi == NULL_TREE)
2197         lowi = TYPE_MIN_VALUE (type);
2198       highi = ranges[i].high;
2199       if (highi == NULL_TREE)
2200         continue;
2201       for (j = i + 1; j < length && j < i + 64; j++)
2202         {
2203           if (ranges[j].exp == NULL_TREE)
2204             continue;
2205           if (ranges[i].exp != ranges[j].exp)
2206             break;
2207           if (ranges[j].in_p)
2208             continue;
2209           lowj = ranges[j].low;
2210           if (lowj == NULL_TREE)
2211             continue;
2212           highj = ranges[j].high;
2213           if (highj == NULL_TREE)
2214             highj = TYPE_MAX_VALUE (type);
2215           tem = fold_binary (GT_EXPR, boolean_type_node,
2216                              lowj, highi);
2217           if (tem == NULL_TREE || !integer_onep (tem))
2218             continue;
2219           lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2220           if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2221             continue;
2222           gcc_checking_assert (!integer_zerop (lowxor));
2223           tem = fold_binary (MINUS_EXPR, type, lowxor,
2224                              build_int_cst (type, 1));
2225           if (tem == NULL_TREE)
2226             continue;
2227           tem = fold_binary (BIT_AND_EXPR, type, lowxor, tem);
2228           if (tem == NULL_TREE || !integer_zerop (tem))
2229             continue;
2230           highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2231           if (!tree_int_cst_equal (lowxor, highxor))
2232             continue;
2233           tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2234           exp = fold_build2 (BIT_AND_EXPR, type, ranges[i].exp, tem);
2235           lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2236           highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2237           if (update_range_test (ranges + i, ranges + j, 1, opcode, ops, exp,
2238                                  ranges[i].in_p, lowj, highj,
2239                                  ranges[i].strict_overflow_p
2240                                  || ranges[j].strict_overflow_p))
2241             {
2242               any_changes = true;
2243               break;
2244             }
2245         }
2246     }
2247
2248   if (any_changes && opcode != ERROR_MARK)
2249     {
2250       j = 0;
2251       FOR_EACH_VEC_ELT (*ops, i, oe)
2252         {
2253           if (oe->op == error_mark_node)
2254             continue;
2255           else if (i != j)
2256             (*ops)[j] = oe;
2257           j++;
2258         }
2259       ops->truncate (j);
2260     }
2261
2262   XDELETEVEC (ranges);
2263 }
2264
2265 /* Return true if STMT is a cast like:
2266    <bb N>:
2267    ...
2268    _123 = (int) _234;
2269
2270    <bb M>:
2271    # _345 = PHI <_123(N), 1(...), 1(...)>
2272    where _234 has bool type, _123 has single use and
2273    bb N has a single successor M.  This is commonly used in
2274    the last block of a range test.  */
2275
2276 static bool
2277 final_range_test_p (gimple stmt)
2278 {
2279   basic_block bb, rhs_bb;
2280   edge e;
2281   tree lhs, rhs;
2282   use_operand_p use_p;
2283   gimple use_stmt;
2284
2285   if (!gimple_assign_cast_p (stmt))
2286     return false;
2287   bb = gimple_bb (stmt);
2288   if (!single_succ_p (bb))
2289     return false;
2290   e = single_succ_edge (bb);
2291   if (e->flags & EDGE_COMPLEX)
2292     return false;
2293
2294   lhs = gimple_assign_lhs (stmt);
2295   rhs = gimple_assign_rhs1 (stmt);
2296   if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2297       || TREE_CODE (rhs) != SSA_NAME
2298       || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2299     return false;
2300
2301   /* Test whether lhs is consumed only by a PHI in the only successor bb.  */
2302   if (!single_imm_use (lhs, &use_p, &use_stmt))
2303     return false;
2304
2305   if (gimple_code (use_stmt) != GIMPLE_PHI
2306       || gimple_bb (use_stmt) != e->dest)
2307     return false;
2308
2309   /* And that the rhs is defined in the same loop.  */
2310   rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2311   if (rhs_bb == NULL
2312       || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2313     return false;
2314
2315   return true;
2316 }
2317
2318 /* Return true if BB is suitable basic block for inter-bb range test
2319    optimization.  If BACKWARD is true, BB should be the only predecessor
2320    of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2321    or compared with to find a common basic block to which all conditions
2322    branch to if true resp. false.  If BACKWARD is false, TEST_BB should
2323    be the only predecessor of BB.  */
2324
2325 static bool
2326 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2327                   bool backward)
2328 {
2329   edge_iterator ei, ei2;
2330   edge e, e2;
2331   gimple stmt;
2332   gimple_stmt_iterator gsi;
2333   bool other_edge_seen = false;
2334   bool is_cond;
2335
2336   if (test_bb == bb)
2337     return false;
2338   /* Check last stmt first.  */
2339   stmt = last_stmt (bb);
2340   if (stmt == NULL
2341       || (gimple_code (stmt) != GIMPLE_COND
2342           && (backward || !final_range_test_p (stmt)))
2343       || gimple_visited_p (stmt)
2344       || stmt_could_throw_p (stmt)
2345       || *other_bb == bb)
2346     return false;
2347   is_cond = gimple_code (stmt) == GIMPLE_COND;
2348   if (is_cond)
2349     {
2350       /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2351          goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2352          to *OTHER_BB (if not set yet, try to find it out).  */
2353       if (EDGE_COUNT (bb->succs) != 2)
2354         return false;
2355       FOR_EACH_EDGE (e, ei, bb->succs)
2356         {
2357           if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2358             return false;
2359           if (e->dest == test_bb)
2360             {
2361               if (backward)
2362                 continue;
2363               else
2364                 return false;
2365             }
2366           if (e->dest == bb)
2367             return false;
2368           if (*other_bb == NULL)
2369             {
2370               FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2371                 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2372                   return false;
2373                 else if (e->dest == e2->dest)
2374                   *other_bb = e->dest;
2375               if (*other_bb == NULL)
2376                 return false;
2377             }
2378           if (e->dest == *other_bb)
2379             other_edge_seen = true;
2380           else if (backward)
2381             return false;
2382         }
2383       if (*other_bb == NULL || !other_edge_seen)
2384         return false;
2385     }
2386   else if (single_succ (bb) != *other_bb)
2387     return false;
2388
2389   /* Now check all PHIs of *OTHER_BB.  */
2390   e = find_edge (bb, *other_bb);
2391   e2 = find_edge (test_bb, *other_bb);
2392   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2393     {
2394       gimple phi = gsi_stmt (gsi);
2395       /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2396          corresponding to BB and TEST_BB predecessor must be the same.  */
2397       if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2398                             gimple_phi_arg_def (phi, e2->dest_idx), 0))
2399         {
2400           /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2401              one of the PHIs should have the lhs of the last stmt in
2402              that block as PHI arg and that PHI should have 0 or 1
2403              corresponding to it in all other range test basic blocks
2404              considered.  */
2405           if (!is_cond)
2406             {
2407               if (gimple_phi_arg_def (phi, e->dest_idx)
2408                   == gimple_assign_lhs (stmt)
2409                   && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2410                       || integer_onep (gimple_phi_arg_def (phi,
2411                                                            e2->dest_idx))))
2412                 continue;
2413             }
2414           else
2415             {
2416               gimple test_last = last_stmt (test_bb);
2417               if (gimple_code (test_last) != GIMPLE_COND
2418                   && gimple_phi_arg_def (phi, e2->dest_idx)
2419                      == gimple_assign_lhs (test_last)
2420                   && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2421                       || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2422                 continue;
2423             }
2424
2425           return false;
2426         }
2427     }
2428   return true;
2429 }
2430
2431 /* Return true if BB doesn't have side-effects that would disallow
2432    range test optimization, all SSA_NAMEs set in the bb are consumed
2433    in the bb and there are no PHIs.  */
2434
2435 static bool
2436 no_side_effect_bb (basic_block bb)
2437 {
2438   gimple_stmt_iterator gsi;
2439   gimple last;
2440
2441   if (!gimple_seq_empty_p (phi_nodes (bb)))
2442     return false;
2443   last = last_stmt (bb);
2444   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2445     {
2446       gimple stmt = gsi_stmt (gsi);
2447       tree lhs;
2448       imm_use_iterator imm_iter;
2449       use_operand_p use_p;
2450
2451       if (is_gimple_debug (stmt))
2452         continue;
2453       if (gimple_has_side_effects (stmt))
2454         return false;
2455       if (stmt == last)
2456         return true;
2457       if (!is_gimple_assign (stmt))
2458         return false;
2459       lhs = gimple_assign_lhs (stmt);
2460       if (TREE_CODE (lhs) != SSA_NAME)
2461         return false;
2462       if (gimple_assign_rhs_could_trap_p (stmt))
2463         return false;
2464       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2465         {
2466           gimple use_stmt = USE_STMT (use_p);
2467           if (is_gimple_debug (use_stmt))
2468             continue;
2469           if (gimple_bb (use_stmt) != bb)
2470             return false;
2471         }
2472     }
2473   return false;
2474 }
2475
2476 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2477    return true and fill in *OPS recursively.  */
2478
2479 static bool
2480 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2481          struct loop *loop)
2482 {
2483   gimple stmt = SSA_NAME_DEF_STMT (var);
2484   tree rhs[2];
2485   int i;
2486
2487   if (!is_reassociable_op (stmt, code, loop))
2488     return false;
2489
2490   rhs[0] = gimple_assign_rhs1 (stmt);
2491   rhs[1] = gimple_assign_rhs2 (stmt);
2492   gimple_set_visited (stmt, true);
2493   for (i = 0; i < 2; i++)
2494     if (TREE_CODE (rhs[i]) == SSA_NAME
2495         && !get_ops (rhs[i], code, ops, loop)
2496         && has_single_use (rhs[i]))
2497       {
2498         operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
2499
2500         oe->op = rhs[i];
2501         oe->rank = code;
2502         oe->id = 0;
2503         oe->count = 1;
2504         ops->safe_push (oe);
2505       }
2506   return true;
2507 }
2508
2509 /* Inter-bb range test optimization.  */
2510
2511 static void
2512 maybe_optimize_range_tests (gimple stmt)
2513 {
2514   basic_block first_bb = gimple_bb (stmt);
2515   basic_block last_bb = first_bb;
2516   basic_block other_bb = NULL;
2517   basic_block bb;
2518   edge_iterator ei;
2519   edge e;
2520   vec<operand_entry_t> ops = vNULL;
2521
2522   /* Consider only basic blocks that end with GIMPLE_COND or
2523      a cast statement satisfying final_range_test_p.  All
2524      but the last bb in the first_bb .. last_bb range
2525      should end with GIMPLE_COND.  */
2526   if (gimple_code (stmt) == GIMPLE_COND)
2527     {
2528       if (EDGE_COUNT (first_bb->succs) != 2)
2529         return;
2530     }
2531   else if (final_range_test_p (stmt))
2532     other_bb = single_succ (first_bb);
2533   else
2534     return;
2535
2536   if (stmt_could_throw_p (stmt))
2537     return;
2538
2539   /* As relative ordering of post-dominator sons isn't fixed,
2540      maybe_optimize_range_tests can be called first on any
2541      bb in the range we want to optimize.  So, start searching
2542      backwards, if first_bb can be set to a predecessor.  */
2543   while (single_pred_p (first_bb))
2544     {
2545       basic_block pred_bb = single_pred (first_bb);
2546       if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
2547         break;
2548       if (!no_side_effect_bb (first_bb))
2549         break;
2550       first_bb = pred_bb;
2551     }
2552   /* If first_bb is last_bb, other_bb hasn't been computed yet.
2553      Before starting forward search in last_bb successors, find
2554      out the other_bb.  */
2555   if (first_bb == last_bb)
2556     {
2557       other_bb = NULL;
2558       /* As non-GIMPLE_COND last stmt always terminates the range,
2559          if forward search didn't discover anything, just give up.  */
2560       if (gimple_code (stmt) != GIMPLE_COND)
2561         return;
2562       /* Look at both successors.  Either it ends with a GIMPLE_COND
2563          and satisfies suitable_cond_bb, or ends with a cast and
2564          other_bb is that cast's successor.  */
2565       FOR_EACH_EDGE (e, ei, first_bb->succs)
2566         if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
2567             || e->dest == first_bb)
2568           return;
2569         else if (single_pred_p (e->dest))
2570           {
2571             stmt = last_stmt (e->dest);
2572             if (stmt
2573                 && gimple_code (stmt) == GIMPLE_COND
2574                 && EDGE_COUNT (e->dest->succs) == 2)
2575               {
2576                 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
2577                   break;
2578                 else
2579                   other_bb = NULL;
2580               }
2581             else if (stmt
2582                      && final_range_test_p (stmt)
2583                      && find_edge (first_bb, single_succ (e->dest)))
2584               {
2585                 other_bb = single_succ (e->dest);
2586                 if (other_bb == first_bb)
2587                   other_bb = NULL;
2588               }
2589           }
2590       if (other_bb == NULL)
2591         return;
2592     }
2593   /* Now do the forward search, moving last_bb to successor bbs
2594      that aren't other_bb.  */
2595   while (EDGE_COUNT (last_bb->succs) == 2)
2596     {
2597       FOR_EACH_EDGE (e, ei, last_bb->succs)
2598         if (e->dest != other_bb)
2599           break;
2600       if (e == NULL)
2601         break;
2602       if (!single_pred_p (e->dest))
2603         break;
2604       if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
2605         break;
2606       if (!no_side_effect_bb (e->dest))
2607         break;
2608       last_bb = e->dest;
2609     }
2610   if (first_bb == last_bb)
2611     return;
2612   /* Here basic blocks first_bb through last_bb's predecessor
2613      end with GIMPLE_COND, all of them have one of the edges to
2614      other_bb and another to another block in the range,
2615      all blocks except first_bb don't have side-effects and
2616      last_bb ends with either GIMPLE_COND, or cast satisfying
2617      final_range_test_p.  */
2618   for (bb = last_bb; ; bb = single_pred (bb))
2619     {
2620       enum tree_code code;
2621       tree lhs, rhs;
2622
2623       e = find_edge (bb, other_bb);
2624       stmt = last_stmt (bb);
2625       gimple_set_visited (stmt, true);
2626       if (gimple_code (stmt) != GIMPLE_COND)
2627         {
2628           use_operand_p use_p;
2629           gimple phi;
2630           edge e2;
2631           unsigned int d;
2632
2633           lhs = gimple_assign_lhs (stmt);
2634           rhs = gimple_assign_rhs1 (stmt);
2635           gcc_assert (bb == last_bb);
2636
2637           /* stmt is
2638              _123 = (int) _234;
2639
2640              followed by:
2641              <bb M>:
2642              # _345 = PHI <_123(N), 1(...), 1(...)>
2643
2644              or 0 instead of 1.  If it is 0, the _234
2645              range test is anded together with all the
2646              other range tests, if it is 1, it is ored with
2647              them.  */
2648           single_imm_use (lhs, &use_p, &phi);
2649           gcc_assert (gimple_code (phi) == GIMPLE_PHI);
2650           e2 = find_edge (first_bb, other_bb);
2651           d = e2->dest_idx;
2652           gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
2653           if (integer_zerop (gimple_phi_arg_def (phi, d)))
2654             code = BIT_AND_EXPR;
2655           else
2656             {
2657               gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
2658               code = BIT_IOR_EXPR;
2659             }
2660
2661           /* If _234 SSA_NAME_DEF_STMT is
2662              _234 = _567 | _789;
2663              (or &, corresponding to 1/0 in the phi arguments,
2664              push into ops the individual range test arguments
2665              of the bitwise or resp. and, recursively.  */
2666           if (!get_ops (rhs, code, &ops,
2667                         loop_containing_stmt (stmt))
2668               && has_single_use (rhs))
2669             {
2670               /* Otherwise, push the _234 range test itself.  */
2671               operand_entry_t oe
2672                 = (operand_entry_t) pool_alloc (operand_entry_pool);
2673
2674               oe->op = rhs;
2675               oe->rank = code;
2676               oe->id = 0;
2677               oe->count = 1;
2678               ops.safe_push (oe);
2679             }
2680           continue;
2681         }
2682       /* Otherwise stmt is GIMPLE_COND.  */
2683       code = gimple_cond_code (stmt);
2684       lhs = gimple_cond_lhs (stmt);
2685       rhs = gimple_cond_rhs (stmt);
2686       if (TREE_CODE (lhs) == SSA_NAME
2687           && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2688           && ((code != EQ_EXPR && code != NE_EXPR)
2689               || rhs != boolean_false_node
2690                  /* Either push into ops the individual bitwise
2691                     or resp. and operands, depending on which
2692                     edge is other_bb.  */
2693               || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
2694                                  ^ (code == EQ_EXPR))
2695                                 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
2696                            loop_containing_stmt (stmt))))
2697         {
2698           /* Or push the GIMPLE_COND stmt itself.  */
2699           operand_entry_t oe
2700             = (operand_entry_t) pool_alloc (operand_entry_pool);
2701
2702           oe->op = NULL;
2703           oe->rank = (e->flags & EDGE_TRUE_VALUE)
2704                      ? BIT_IOR_EXPR : BIT_AND_EXPR;
2705           /* oe->op = NULL signs that there is no SSA_NAME
2706              for the range test, and oe->id instead is the
2707              basic block number, at which's end the GIMPLE_COND
2708              is.  */
2709           oe->id = bb->index;
2710           oe->count = 1;
2711           ops.safe_push (oe);
2712         }
2713       if (bb == first_bb)
2714         break;
2715     }
2716   if (ops.length () > 1)
2717     optimize_range_tests (ERROR_MARK, &ops);
2718   ops.release ();
2719 }
2720
2721 /* Return true if OPERAND is defined by a PHI node which uses the LHS
2722    of STMT in it's operands.  This is also known as a "destructive
2723    update" operation.  */
2724
2725 static bool
2726 is_phi_for_stmt (gimple stmt, tree operand)
2727 {
2728   gimple def_stmt;
2729   tree lhs;
2730   use_operand_p arg_p;
2731   ssa_op_iter i;
2732
2733   if (TREE_CODE (operand) != SSA_NAME)
2734     return false;
2735
2736   lhs = gimple_assign_lhs (stmt);
2737
2738   def_stmt = SSA_NAME_DEF_STMT (operand);
2739   if (gimple_code (def_stmt) != GIMPLE_PHI)
2740     return false;
2741
2742   FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
2743     if (lhs == USE_FROM_PTR (arg_p))
2744       return true;
2745   return false;
2746 }
2747
2748 /* Remove def stmt of VAR if VAR has zero uses and recurse
2749    on rhs1 operand if so.  */
2750
2751 static void
2752 remove_visited_stmt_chain (tree var)
2753 {
2754   gimple stmt;
2755   gimple_stmt_iterator gsi;
2756
2757   while (1)
2758     {
2759       if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
2760         return;
2761       stmt = SSA_NAME_DEF_STMT (var);
2762       if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
2763         {
2764           var = gimple_assign_rhs1 (stmt);
2765           gsi = gsi_for_stmt (stmt);
2766           gsi_remove (&gsi, true);
2767           release_defs (stmt);
2768         }
2769       else
2770         return;
2771     }
2772 }
2773
2774 /* This function checks three consequtive operands in
2775    passed operands vector OPS starting from OPINDEX and
2776    swaps two operands if it is profitable for binary operation
2777    consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
2778
2779    We pair ops with the same rank if possible.
2780
2781    The alternative we try is to see if STMT is a destructive
2782    update style statement, which is like:
2783    b = phi (a, ...)
2784    a = c + b;
2785    In that case, we want to use the destructive update form to
2786    expose the possible vectorizer sum reduction opportunity.
2787    In that case, the third operand will be the phi node. This
2788    check is not performed if STMT is null.
2789
2790    We could, of course, try to be better as noted above, and do a
2791    lot of work to try to find these opportunities in >3 operand
2792    cases, but it is unlikely to be worth it.  */
2793
2794 static void
2795 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
2796                           unsigned int opindex, gimple stmt)
2797 {
2798   operand_entry_t oe1, oe2, oe3;
2799
2800   oe1 = ops[opindex];
2801   oe2 = ops[opindex + 1];
2802   oe3 = ops[opindex + 2];
2803
2804   if ((oe1->rank == oe2->rank
2805        && oe2->rank != oe3->rank)
2806       || (stmt && is_phi_for_stmt (stmt, oe3->op)
2807           && !is_phi_for_stmt (stmt, oe1->op)
2808           && !is_phi_for_stmt (stmt, oe2->op)))
2809     {
2810       struct operand_entry temp = *oe3;
2811       oe3->op = oe1->op;
2812       oe3->rank = oe1->rank;
2813       oe1->op = temp.op;
2814       oe1->rank= temp.rank;
2815     }
2816   else if ((oe1->rank == oe3->rank
2817             && oe2->rank != oe3->rank)
2818            || (stmt && is_phi_for_stmt (stmt, oe2->op)
2819                && !is_phi_for_stmt (stmt, oe1->op)
2820                && !is_phi_for_stmt (stmt, oe3->op)))
2821     {
2822       struct operand_entry temp = *oe2;
2823       oe2->op = oe1->op;
2824       oe2->rank = oe1->rank;
2825       oe1->op = temp.op;
2826       oe1->rank= temp.rank;
2827     }
2828 }
2829
2830 /* Recursively rewrite our linearized statements so that the operators
2831    match those in OPS[OPINDEX], putting the computation in rank
2832    order.  */
2833
2834 static void
2835 rewrite_expr_tree (gimple stmt, unsigned int opindex,
2836                    vec<operand_entry_t> ops, bool moved)
2837 {
2838   tree rhs1 = gimple_assign_rhs1 (stmt);
2839   tree rhs2 = gimple_assign_rhs2 (stmt);
2840   operand_entry_t oe;
2841
2842   /* If we have three operands left, then we want to make sure the ones
2843      that get the double binary op are chosen wisely.  */
2844   if (opindex + 3 == ops.length ())
2845     swap_ops_for_binary_stmt (ops, opindex, stmt);
2846
2847   /* The final recursion case for this function is that you have
2848      exactly two operations left.
2849      If we had one exactly one op in the entire list to start with, we
2850      would have never called this function, and the tail recursion
2851      rewrites them one at a time.  */
2852   if (opindex + 2 == ops.length ())
2853     {
2854       operand_entry_t oe1, oe2;
2855
2856       oe1 = ops[opindex];
2857       oe2 = ops[opindex + 1];
2858
2859       if (rhs1 != oe1->op || rhs2 != oe2->op)
2860         {
2861           if (dump_file && (dump_flags & TDF_DETAILS))
2862             {
2863               fprintf (dump_file, "Transforming ");
2864               print_gimple_stmt (dump_file, stmt, 0, 0);
2865             }
2866
2867           gimple_assign_set_rhs1 (stmt, oe1->op);
2868           gimple_assign_set_rhs2 (stmt, oe2->op);
2869           update_stmt (stmt);
2870           if (rhs1 != oe1->op && rhs1 != oe2->op)
2871             remove_visited_stmt_chain (rhs1);
2872
2873           if (dump_file && (dump_flags & TDF_DETAILS))
2874             {
2875               fprintf (dump_file, " into ");
2876               print_gimple_stmt (dump_file, stmt, 0, 0);
2877             }
2878         }
2879       return;
2880     }
2881
2882   /* If we hit here, we should have 3 or more ops left.  */
2883   gcc_assert (opindex + 2 < ops.length ());
2884
2885   /* Rewrite the next operator.  */
2886   oe = ops[opindex];
2887
2888   if (oe->op != rhs2)
2889     {
2890       if (!moved)
2891         {
2892           gimple_stmt_iterator gsinow, gsirhs1;
2893           gimple stmt1 = stmt, stmt2;
2894           unsigned int count;
2895
2896           gsinow = gsi_for_stmt (stmt);
2897           count = ops.length () - opindex - 2;
2898           while (count-- != 0)
2899             {
2900               stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
2901               gsirhs1 = gsi_for_stmt (stmt2);
2902               gsi_move_before (&gsirhs1, &gsinow);
2903               gsi_prev (&gsinow);
2904               stmt1 = stmt2;
2905             }
2906           moved = true;
2907         }
2908
2909       if (dump_file && (dump_flags & TDF_DETAILS))
2910         {
2911           fprintf (dump_file, "Transforming ");
2912           print_gimple_stmt (dump_file, stmt, 0, 0);
2913         }
2914
2915       gimple_assign_set_rhs2 (stmt, oe->op);
2916       update_stmt (stmt);
2917
2918       if (dump_file && (dump_flags & TDF_DETAILS))
2919         {
2920           fprintf (dump_file, " into ");
2921           print_gimple_stmt (dump_file, stmt, 0, 0);
2922         }
2923     }
2924   /* Recurse on the LHS of the binary operator, which is guaranteed to
2925      be the non-leaf side.  */
2926   rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, moved);
2927 }
2928
2929 /* Find out how many cycles we need to compute statements chain.
2930    OPS_NUM holds number os statements in a chain.  CPU_WIDTH is a
2931    maximum number of independent statements we may execute per cycle.  */
2932
2933 static int
2934 get_required_cycles (int ops_num, int cpu_width)
2935 {
2936   int res;
2937   int elog;
2938   unsigned int rest;
2939
2940   /* While we have more than 2 * cpu_width operands
2941      we may reduce number of operands by cpu_width
2942      per cycle.  */
2943   res = ops_num / (2 * cpu_width);
2944
2945   /* Remained operands count may be reduced twice per cycle
2946      until we have only one operand.  */
2947   rest = (unsigned)(ops_num - res * cpu_width);
2948   elog = exact_log2 (rest);
2949   if (elog >= 0)
2950     res += elog;
2951   else
2952     res += floor_log2 (rest) + 1;
2953
2954   return res;
2955 }
2956
2957 /* Returns an optimal number of registers to use for computation of
2958    given statements.  */
2959
2960 static int
2961 get_reassociation_width (int ops_num, enum tree_code opc,
2962                          enum machine_mode mode)
2963 {
2964   int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
2965   int width;
2966   int width_min;
2967   int cycles_best;
2968
2969   if (param_width > 0)
2970     width = param_width;
2971   else
2972     width = targetm.sched.reassociation_width (opc, mode);
2973
2974   if (width == 1)
2975     return width;
2976
2977   /* Get the minimal time required for sequence computation.  */
2978   cycles_best = get_required_cycles (ops_num, width);
2979
2980   /* Check if we may use less width and still compute sequence for
2981      the same time.  It will allow us to reduce registers usage.
2982      get_required_cycles is monotonically increasing with lower width
2983      so we can perform a binary search for the minimal width that still
2984      results in the optimal cycle count.  */
2985   width_min = 1;
2986   while (width > width_min)
2987     {
2988       int width_mid = (width + width_min) / 2;
2989
2990       if (get_required_cycles (ops_num, width_mid) == cycles_best)
2991         width = width_mid;
2992       else if (width_min < width_mid)
2993         width_min = width_mid;
2994       else
2995         break;
2996     }
2997
2998   return width;
2999 }
3000
3001 /* Recursively rewrite our linearized statements so that the operators
3002    match those in OPS[OPINDEX], putting the computation in rank
3003    order and trying to allow operations to be executed in
3004    parallel.  */
3005
3006 static void
3007 rewrite_expr_tree_parallel (gimple stmt, int width,
3008                             vec<operand_entry_t>  ops)
3009 {
3010   enum tree_code opcode = gimple_assign_rhs_code (stmt);
3011   int op_num = ops.length ();
3012   int stmt_num = op_num - 1;
3013   gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3014   int op_index = op_num - 1;
3015   int stmt_index = 0;
3016   int ready_stmts_end = 0;
3017   int i = 0;
3018   tree last_rhs1 = gimple_assign_rhs1 (stmt);
3019
3020   /* We start expression rewriting from the top statements.
3021      So, in this loop we create a full list of statements
3022      we will work with.  */
3023   stmts[stmt_num - 1] = stmt;
3024   for (i = stmt_num - 2; i >= 0; i--)
3025     stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3026
3027   for (i = 0; i < stmt_num; i++)
3028     {
3029       tree op1, op2;
3030
3031       /* Determine whether we should use results of
3032          already handled statements or not.  */
3033       if (ready_stmts_end == 0
3034           && (i - stmt_index >= width || op_index < 1))
3035         ready_stmts_end = i;
3036
3037       /* Now we choose operands for the next statement.  Non zero
3038          value in ready_stmts_end means here that we should use
3039          the result of already generated statements as new operand.  */
3040       if (ready_stmts_end > 0)
3041         {
3042           op1 = gimple_assign_lhs (stmts[stmt_index++]);
3043           if (ready_stmts_end > stmt_index)
3044             op2 = gimple_assign_lhs (stmts[stmt_index++]);
3045           else if (op_index >= 0)
3046             op2 = ops[op_index--]->op;
3047           else
3048             {
3049               gcc_assert (stmt_index < i);
3050               op2 = gimple_assign_lhs (stmts[stmt_index++]);
3051             }
3052
3053           if (stmt_index >= ready_stmts_end)
3054             ready_stmts_end = 0;
3055         }
3056       else
3057         {
3058           if (op_index > 1)
3059             swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3060           op2 = ops[op_index--]->op;
3061           op1 = ops[op_index--]->op;
3062         }
3063
3064       /* If we emit the last statement then we should put
3065          operands into the last statement.  It will also
3066          break the loop.  */
3067       if (op_index < 0 && stmt_index == i)
3068         i = stmt_num - 1;
3069
3070       if (dump_file && (dump_flags & TDF_DETAILS))
3071         {
3072           fprintf (dump_file, "Transforming ");
3073           print_gimple_stmt (dump_file, stmts[i], 0, 0);
3074         }
3075
3076       /* We keep original statement only for the last one.  All
3077          others are recreated.  */
3078       if (i == stmt_num - 1)
3079         {
3080           gimple_assign_set_rhs1 (stmts[i], op1);
3081           gimple_assign_set_rhs2 (stmts[i], op2);
3082           update_stmt (stmts[i]);
3083         }
3084       else
3085         stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3086
3087       if (dump_file && (dump_flags & TDF_DETAILS))
3088         {
3089           fprintf (dump_file, " into ");
3090           print_gimple_stmt (dump_file, stmts[i], 0, 0);
3091         }
3092     }
3093
3094   remove_visited_stmt_chain (last_rhs1);
3095 }
3096
3097 /* Transform STMT, which is really (A +B) + (C + D) into the left
3098    linear form, ((A+B)+C)+D.
3099    Recurse on D if necessary.  */
3100
3101 static void
3102 linearize_expr (gimple stmt)
3103 {
3104   gimple_stmt_iterator gsinow, gsirhs;
3105   gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3106   gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3107   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3108   gimple newbinrhs = NULL;
3109   struct loop *loop = loop_containing_stmt (stmt);
3110
3111   gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3112               && is_reassociable_op (binrhs, rhscode, loop));
3113
3114   gsinow = gsi_for_stmt (stmt);
3115   gsirhs = gsi_for_stmt (binrhs);
3116   gsi_move_before (&gsirhs, &gsinow);
3117
3118   gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3119   gimple_assign_set_rhs1 (binrhs, gimple_assign_lhs (binlhs));
3120   gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3121
3122   if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3123     newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3124
3125   if (dump_file && (dump_flags & TDF_DETAILS))
3126     {
3127       fprintf (dump_file, "Linearized: ");
3128       print_gimple_stmt (dump_file, stmt, 0, 0);
3129     }
3130
3131   reassociate_stats.linearized++;
3132   update_stmt (binrhs);
3133   update_stmt (binlhs);
3134   update_stmt (stmt);
3135
3136   gimple_set_visited (stmt, true);
3137   gimple_set_visited (binlhs, true);
3138   gimple_set_visited (binrhs, true);
3139
3140   /* Tail recurse on the new rhs if it still needs reassociation.  */
3141   if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3142     /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3143            want to change the algorithm while converting to tuples.  */
3144     linearize_expr (stmt);
3145 }
3146
3147 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3148    it.  Otherwise, return NULL.  */
3149
3150 static gimple
3151 get_single_immediate_use (tree lhs)
3152 {
3153   use_operand_p immuse;
3154   gimple immusestmt;
3155
3156   if (TREE_CODE (lhs) == SSA_NAME
3157       && single_imm_use (lhs, &immuse, &immusestmt)
3158       && is_gimple_assign (immusestmt))
3159     return immusestmt;
3160
3161   return NULL;
3162 }
3163
3164 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3165    representing the negated value.  Insertions of any necessary
3166    instructions go before GSI.
3167    This function is recursive in that, if you hand it "a_5" as the
3168    value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3169    transform b_3 + b_4 into a_5 = -b_3 + -b_4.  */
3170
3171 static tree
3172 negate_value (tree tonegate, gimple_stmt_iterator *gsi)
3173 {
3174   gimple negatedefstmt= NULL;
3175   tree resultofnegate;
3176
3177   /* If we are trying to negate a name, defined by an add, negate the
3178      add operands instead.  */
3179   if (TREE_CODE (tonegate) == SSA_NAME)
3180     negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3181   if (TREE_CODE (tonegate) == SSA_NAME
3182       && is_gimple_assign (negatedefstmt)
3183       && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3184       && has_single_use (gimple_assign_lhs (negatedefstmt))
3185       && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3186     {
3187       gimple_stmt_iterator gsi;
3188       tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3189       tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3190
3191       gsi = gsi_for_stmt (negatedefstmt);
3192       rhs1 = negate_value (rhs1, &gsi);
3193       gimple_assign_set_rhs1 (negatedefstmt, rhs1);
3194
3195       gsi = gsi_for_stmt (negatedefstmt);
3196       rhs2 = negate_value (rhs2, &gsi);
3197       gimple_assign_set_rhs2 (negatedefstmt, rhs2);
3198
3199       update_stmt (negatedefstmt);
3200       return gimple_assign_lhs (negatedefstmt);
3201     }
3202
3203   tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3204   resultofnegate = force_gimple_operand_gsi (gsi, tonegate, true,
3205                                              NULL_TREE, true, GSI_SAME_STMT);
3206   return resultofnegate;
3207 }
3208
3209 /* Return true if we should break up the subtract in STMT into an add
3210    with negate.  This is true when we the subtract operands are really
3211    adds, or the subtract itself is used in an add expression.  In
3212    either case, breaking up the subtract into an add with negate
3213    exposes the adds to reassociation.  */
3214
3215 static bool
3216 should_break_up_subtract (gimple stmt)
3217 {
3218   tree lhs = gimple_assign_lhs (stmt);
3219   tree binlhs = gimple_assign_rhs1 (stmt);
3220   tree binrhs = gimple_assign_rhs2 (stmt);
3221   gimple immusestmt;
3222   struct loop *loop = loop_containing_stmt (stmt);
3223
3224   if (TREE_CODE (binlhs) == SSA_NAME
3225       && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3226     return true;
3227
3228   if (TREE_CODE (binrhs) == SSA_NAME
3229       && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3230     return true;
3231
3232   if (TREE_CODE (lhs) == SSA_NAME
3233       && (immusestmt = get_single_immediate_use (lhs))
3234       && is_gimple_assign (immusestmt)
3235       && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3236           ||  gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3237     return true;
3238   return false;
3239 }
3240
3241 /* Transform STMT from A - B into A + -B.  */
3242
3243 static void
3244 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3245 {
3246   tree rhs1 = gimple_assign_rhs1 (stmt);
3247   tree rhs2 = gimple_assign_rhs2 (stmt);
3248
3249   if (dump_file && (dump_flags & TDF_DETAILS))
3250     {
3251       fprintf (dump_file, "Breaking up subtract ");
3252       print_gimple_stmt (dump_file, stmt, 0, 0);
3253     }
3254
3255   rhs2 = negate_value (rhs2, gsip);
3256   gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3257   update_stmt (stmt);
3258 }
3259
3260 /* Determine whether STMT is a builtin call that raises an SSA name
3261    to an integer power and has only one use.  If so, and this is early
3262    reassociation and unsafe math optimizations are permitted, place
3263    the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3264    If any of these conditions does not hold, return FALSE.  */
3265
3266 static bool
3267 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3268 {
3269   tree fndecl, arg1;
3270   REAL_VALUE_TYPE c, cint;
3271
3272   if (!first_pass_instance
3273       || !flag_unsafe_math_optimizations
3274       || !is_gimple_call (stmt)
3275       || !has_single_use (gimple_call_lhs (stmt)))
3276     return false;
3277
3278   fndecl = gimple_call_fndecl (stmt);
3279
3280   if (!fndecl
3281       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3282     return false;
3283
3284   switch (DECL_FUNCTION_CODE (fndecl))
3285     {
3286     CASE_FLT_FN (BUILT_IN_POW):
3287       *base = gimple_call_arg (stmt, 0);
3288       arg1 = gimple_call_arg (stmt, 1);
3289
3290       if (TREE_CODE (arg1) != REAL_CST)
3291         return false;
3292
3293       c = TREE_REAL_CST (arg1);
3294
3295       if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
3296         return false;
3297
3298       *exponent = real_to_integer (&c);
3299       real_from_integer (&cint, VOIDmode, *exponent,
3300                          *exponent < 0 ? -1 : 0, 0);
3301       if (!real_identical (&c, &cint))
3302         return false;
3303
3304       break;
3305
3306     CASE_FLT_FN (BUILT_IN_POWI):
3307       *base = gimple_call_arg (stmt, 0);
3308       arg1 = gimple_call_arg (stmt, 1);
3309
3310       if (!host_integerp (arg1, 0))
3311         return false;
3312
3313       *exponent = TREE_INT_CST_LOW (arg1);
3314       break;
3315
3316     default:
3317       return false;
3318     }
3319
3320   /* Expanding negative exponents is generally unproductive, so we don't
3321      complicate matters with those.  Exponents of zero and one should
3322      have been handled by expression folding.  */
3323   if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
3324     return false;
3325
3326   return true;
3327 }
3328
3329 /* Recursively linearize a binary expression that is the RHS of STMT.
3330    Place the operands of the expression tree in the vector named OPS.  */
3331
3332 static void
3333 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
3334                      bool is_associative, bool set_visited)
3335 {
3336   tree binlhs = gimple_assign_rhs1 (stmt);
3337   tree binrhs = gimple_assign_rhs2 (stmt);
3338   gimple binlhsdef = NULL, binrhsdef = NULL;
3339   bool binlhsisreassoc = false;
3340   bool binrhsisreassoc = false;
3341   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3342   struct loop *loop = loop_containing_stmt (stmt);
3343   tree base = NULL_TREE;
3344   HOST_WIDE_INT exponent = 0;
3345
3346   if (set_visited)
3347     gimple_set_visited (stmt, true);
3348
3349   if (TREE_CODE (binlhs) == SSA_NAME)
3350     {
3351       binlhsdef = SSA_NAME_DEF_STMT (binlhs);
3352       binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
3353                          && !stmt_could_throw_p (binlhsdef));
3354     }
3355
3356   if (TREE_CODE (binrhs) == SSA_NAME)
3357     {
3358       binrhsdef = SSA_NAME_DEF_STMT (binrhs);
3359       binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
3360                          && !stmt_could_throw_p (binrhsdef));
3361     }
3362
3363   /* If the LHS is not reassociable, but the RHS is, we need to swap
3364      them.  If neither is reassociable, there is nothing we can do, so
3365      just put them in the ops vector.  If the LHS is reassociable,
3366      linearize it.  If both are reassociable, then linearize the RHS
3367      and the LHS.  */
3368
3369   if (!binlhsisreassoc)
3370     {
3371       tree temp;
3372
3373       /* If this is not a associative operation like division, give up.  */
3374       if (!is_associative)
3375         {
3376           add_to_ops_vec (ops, binrhs);
3377           return;
3378         }
3379
3380       if (!binrhsisreassoc)
3381         {
3382           if (rhscode == MULT_EXPR
3383               && TREE_CODE (binrhs) == SSA_NAME
3384               && acceptable_pow_call (binrhsdef, &base, &exponent))
3385             {
3386               add_repeat_to_ops_vec (ops, base, exponent);
3387               gimple_set_visited (binrhsdef, true);
3388             }
3389           else
3390             add_to_ops_vec (ops, binrhs);
3391
3392           if (rhscode == MULT_EXPR
3393               && TREE_CODE (binlhs) == SSA_NAME
3394               && acceptable_pow_call (binlhsdef, &base, &exponent))
3395             {
3396               add_repeat_to_ops_vec (ops, base, exponent);
3397               gimple_set_visited (binlhsdef, true);
3398             }
3399           else
3400             add_to_ops_vec (ops, binlhs);
3401
3402           return;
3403         }
3404
3405       if (dump_file && (dump_flags & TDF_DETAILS))
3406         {
3407           fprintf (dump_file, "swapping operands of ");
3408           print_gimple_stmt (dump_file, stmt, 0, 0);
3409         }
3410
3411       swap_tree_operands (stmt,
3412                           gimple_assign_rhs1_ptr (stmt),
3413                           gimple_assign_rhs2_ptr (stmt));
3414       update_stmt (stmt);
3415
3416       if (dump_file && (dump_flags & TDF_DETAILS))
3417         {
3418           fprintf (dump_file, " is now ");
3419           print_gimple_stmt (dump_file, stmt, 0, 0);
3420         }
3421
3422       /* We want to make it so the lhs is always the reassociative op,
3423          so swap.  */
3424       temp = binlhs;
3425       binlhs = binrhs;
3426       binrhs = temp;
3427     }
3428   else if (binrhsisreassoc)
3429     {
3430       linearize_expr (stmt);
3431       binlhs = gimple_assign_rhs1 (stmt);
3432       binrhs = gimple_assign_rhs2 (stmt);
3433     }
3434
3435   gcc_assert (TREE_CODE (binrhs) != SSA_NAME
3436               || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
3437                                       rhscode, loop));
3438   linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
3439                        is_associative, set_visited);
3440
3441   if (rhscode == MULT_EXPR
3442       && TREE_CODE (binrhs) == SSA_NAME
3443       && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
3444     {
3445       add_repeat_to_ops_vec (ops, base, exponent);
3446       gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
3447     }
3448   else
3449     add_to_ops_vec (ops, binrhs);
3450 }
3451
3452 /* Repropagate the negates back into subtracts, since no other pass
3453    currently does it.  */
3454
3455 static void
3456 repropagate_negates (void)
3457 {
3458   unsigned int i = 0;
3459   tree negate;
3460
3461   FOR_EACH_VEC_ELT (plus_negates, i, negate)
3462     {
3463       gimple user = get_single_immediate_use (negate);
3464
3465       if (!user || !is_gimple_assign (user))
3466         continue;
3467
3468       /* The negate operand can be either operand of a PLUS_EXPR
3469          (it can be the LHS if the RHS is a constant for example).
3470
3471          Force the negate operand to the RHS of the PLUS_EXPR, then
3472          transform the PLUS_EXPR into a MINUS_EXPR.  */
3473       if (gimple_assign_rhs_code (user) == PLUS_EXPR)
3474         {
3475           /* If the negated operand appears on the LHS of the
3476              PLUS_EXPR, exchange the operands of the PLUS_EXPR
3477              to force the negated operand to the RHS of the PLUS_EXPR.  */
3478           if (gimple_assign_rhs1 (user) == negate)
3479             {
3480               swap_tree_operands (user,
3481                                   gimple_assign_rhs1_ptr (user),
3482                                   gimple_assign_rhs2_ptr (user));
3483             }
3484
3485           /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
3486              the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR.  */
3487           if (gimple_assign_rhs2 (user) == negate)
3488             {
3489               tree rhs1 = gimple_assign_rhs1 (user);
3490               tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
3491               gimple_stmt_iterator gsi = gsi_for_stmt (user);
3492               gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
3493               update_stmt (user);
3494             }
3495         }
3496       else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
3497         {
3498           if (gimple_assign_rhs1 (user) == negate)
3499             {
3500               /* We have
3501                    x = -a
3502                    y = x - b
3503                  which we transform into
3504                    x = a + b
3505                    y = -x .
3506                  This pushes down the negate which we possibly can merge
3507                  into some other operation, hence insert it into the
3508                  plus_negates vector.  */
3509               gimple feed = SSA_NAME_DEF_STMT (negate);
3510               tree a = gimple_assign_rhs1 (feed);
3511               tree rhs2 = gimple_assign_rhs2 (user);
3512               gimple_stmt_iterator gsi = gsi_for_stmt (feed), gsi2;
3513               gimple_replace_lhs (feed, negate);
3514               gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, a, rhs2);
3515               update_stmt (gsi_stmt (gsi));
3516               gsi2 = gsi_for_stmt (user);
3517               gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, negate, NULL);
3518               update_stmt (gsi_stmt (gsi2));
3519               gsi_move_before (&gsi, &gsi2);
3520               plus_negates.safe_push (gimple_assign_lhs (gsi_stmt (gsi2)));
3521             }
3522           else
3523             {
3524               /* Transform "x = -a; y = b - x" into "y = b + a", getting
3525                  rid of one operation.  */
3526               gimple feed = SSA_NAME_DEF_STMT (negate);
3527               tree a = gimple_assign_rhs1 (feed);
3528               tree rhs1 = gimple_assign_rhs1 (user);
3529               gimple_stmt_iterator gsi = gsi_for_stmt (user);
3530               gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
3531               update_stmt (gsi_stmt (gsi));
3532             }
3533         }
3534     }
3535 }
3536
3537 /* Returns true if OP is of a type for which we can do reassociation.
3538    That is for integral or non-saturating fixed-point types, and for
3539    floating point type when associative-math is enabled.  */
3540
3541 static bool
3542 can_reassociate_p (tree op)
3543 {
3544   tree type = TREE_TYPE (op);
3545   if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
3546       || NON_SAT_FIXED_POINT_TYPE_P (type)
3547       || (flag_associative_math && FLOAT_TYPE_P (type)))
3548     return true;
3549   return false;
3550 }
3551
3552 /* Break up subtract operations in block BB.
3553
3554    We do this top down because we don't know whether the subtract is
3555    part of a possible chain of reassociation except at the top.
3556
3557    IE given
3558    d = f + g
3559    c = a + e
3560    b = c - d
3561    q = b - r
3562    k = t - q
3563
3564    we want to break up k = t - q, but we won't until we've transformed q
3565    = b - r, which won't be broken up until we transform b = c - d.
3566
3567    En passant, clear the GIMPLE visited flag on every statement.  */
3568
3569 static void
3570 break_up_subtract_bb (basic_block bb)
3571 {
3572   gimple_stmt_iterator gsi;
3573   basic_block son;
3574
3575   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3576     {
3577       gimple stmt = gsi_stmt (gsi);
3578       gimple_set_visited (stmt, false);
3579
3580       if (!is_gimple_assign (stmt)
3581           || !can_reassociate_p (gimple_assign_lhs (stmt)))
3582         continue;
3583
3584       /* Look for simple gimple subtract operations.  */
3585       if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
3586         {
3587           if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
3588               || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
3589             continue;
3590
3591           /* Check for a subtract used only in an addition.  If this
3592              is the case, transform it into add of a negate for better
3593              reassociation.  IE transform C = A-B into C = A + -B if C
3594              is only used in an addition.  */
3595           if (should_break_up_subtract (stmt))
3596             break_up_subtract (stmt, &gsi);
3597         }
3598       else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
3599                && can_reassociate_p (gimple_assign_rhs1 (stmt)))
3600         plus_negates.safe_push (gimple_assign_lhs (stmt));
3601     }
3602   for (son = first_dom_son (CDI_DOMINATORS, bb);
3603        son;
3604        son = next_dom_son (CDI_DOMINATORS, son))
3605     break_up_subtract_bb (son);
3606 }
3607
3608 /* Used for repeated factor analysis.  */
3609 struct repeat_factor_d
3610 {
3611   /* An SSA name that occurs in a multiply chain.  */
3612   tree factor;
3613
3614   /* Cached rank of the factor.  */
3615   unsigned rank;
3616
3617   /* Number of occurrences of the factor in the chain.  */
3618   HOST_WIDE_INT count;
3619
3620   /* An SSA name representing the product of this factor and
3621      all factors appearing later in the repeated factor vector.  */
3622   tree repr;
3623 };
3624
3625 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
3626 typedef const struct repeat_factor_d *const_repeat_factor_t;
3627
3628
3629 static vec<repeat_factor> repeat_factor_vec;
3630
3631 /* Used for sorting the repeat factor vector.  Sort primarily by
3632    ascending occurrence count, secondarily by descending rank.  */
3633
3634 static int
3635 compare_repeat_factors (const void *x1, const void *x2)
3636 {
3637   const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
3638   const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
3639
3640   if (rf1->count != rf2->count)
3641     return rf1->count - rf2->count;
3642
3643   return rf2->rank - rf1->rank;
3644 }
3645
3646 /* Look for repeated operands in OPS in the multiply tree rooted at
3647    STMT.  Replace them with an optimal sequence of multiplies and powi
3648    builtin calls, and remove the used operands from OPS.  Return an
3649    SSA name representing the value of the replacement sequence.  */
3650
3651 static tree
3652 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
3653 {
3654   unsigned i, j, vec_len;
3655   int ii;
3656   operand_entry_t oe;
3657   repeat_factor_t rf1, rf2;
3658   repeat_factor rfnew;
3659   tree result = NULL_TREE;
3660   tree target_ssa, iter_result;
3661   tree type = TREE_TYPE (gimple_get_lhs (stmt));
3662   tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
3663   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3664   gimple mul_stmt, pow_stmt;
3665
3666   /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
3667      target.  */
3668   if (!powi_fndecl)
3669     return NULL_TREE;
3670
3671   /* Allocate the repeated factor vector.  */
3672   repeat_factor_vec.create (10);
3673
3674   /* Scan the OPS vector for all SSA names in the product and build
3675      up a vector of occurrence counts for each factor.  */
3676   FOR_EACH_VEC_ELT (*ops, i, oe)
3677     {
3678       if (TREE_CODE (oe->op) == SSA_NAME)
3679         {
3680           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
3681             {
3682               if (rf1->factor == oe->op)
3683                 {
3684                   rf1->count += oe->count;
3685                   break;
3686                 }
3687             }
3688
3689           if (j >= repeat_factor_vec.length ())
3690             {
3691               rfnew.factor = oe->op;
3692               rfnew.rank = oe->rank;
3693               rfnew.count = oe->count;
3694               rfnew.repr = NULL_TREE;
3695               repeat_factor_vec.safe_push (rfnew);
3696             }
3697         }
3698     }
3699
3700   /* Sort the repeated factor vector by (a) increasing occurrence count,
3701      and (b) decreasing rank.  */
3702   repeat_factor_vec.qsort (compare_repeat_factors);
3703
3704   /* It is generally best to combine as many base factors as possible
3705      into a product before applying __builtin_powi to the result.
3706      However, the sort order chosen for the repeated factor vector
3707      allows us to cache partial results for the product of the base
3708      factors for subsequent use.  When we already have a cached partial
3709      result from a previous iteration, it is best to make use of it
3710      before looking for another __builtin_pow opportunity.
3711
3712      As an example, consider x * x * y * y * y * z * z * z * z.
3713      We want to first compose the product x * y * z, raise it to the
3714      second power, then multiply this by y * z, and finally multiply
3715      by z.  This can be done in 5 multiplies provided we cache y * z
3716      for use in both expressions:
3717
3718         t1 = y * z
3719         t2 = t1 * x
3720         t3 = t2 * t2
3721         t4 = t1 * t3
3722         result = t4 * z
3723
3724      If we instead ignored the cached y * z and first multiplied by
3725      the __builtin_pow opportunity z * z, we would get the inferior:
3726
3727         t1 = y * z
3728         t2 = t1 * x
3729         t3 = t2 * t2
3730         t4 = z * z
3731         t5 = t3 * t4
3732         result = t5 * y  */
3733
3734   vec_len = repeat_factor_vec.length ();
3735
3736   /* Repeatedly look for opportunities to create a builtin_powi call.  */
3737   while (true)
3738     {
3739       HOST_WIDE_INT power;
3740
3741       /* First look for the largest cached product of factors from
3742          preceding iterations.  If found, create a builtin_powi for
3743          it if the minimum occurrence count for its factors is at
3744          least 2, or just use this cached product as our next
3745          multiplicand if the minimum occurrence count is 1.  */
3746       FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
3747         {
3748           if (rf1->repr && rf1->count > 0)
3749             break;
3750         }
3751
3752       if (j < vec_len)
3753         {
3754           power = rf1->count;
3755
3756           if (power == 1)
3757             {
3758               iter_result = rf1->repr;
3759
3760               if (dump_file && (dump_flags & TDF_DETAILS))
3761                 {
3762                   unsigned elt;
3763                   repeat_factor_t rf;
3764                   fputs ("Multiplying by cached product ", dump_file);
3765                   for (elt = j; elt < vec_len; elt++)
3766                     {
3767                       rf = &repeat_factor_vec[elt];
3768                       print_generic_expr (dump_file, rf->factor, 0);
3769                       if (elt < vec_len - 1)
3770                         fputs (" * ", dump_file);
3771                     }
3772                   fputs ("\n", dump_file);
3773                 }
3774             }
3775           else
3776             {
3777               iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
3778               pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
3779                                             build_int_cst (integer_type_node,
3780                                                            power));
3781               gimple_call_set_lhs (pow_stmt, iter_result);
3782               gimple_set_location (pow_stmt, gimple_location (stmt));
3783               gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
3784
3785               if (dump_file && (dump_flags & TDF_DETAILS))
3786                 {
3787                   unsigned elt;
3788                   repeat_factor_t rf;
3789                   fputs ("Building __builtin_pow call for cached product (",
3790                          dump_file);
3791                   for (elt = j; elt < vec_len; elt++)
3792                     {
3793                       rf = &repeat_factor_vec[elt];
3794                       print_generic_expr (dump_file, rf->factor, 0);
3795                       if (elt < vec_len - 1)
3796                         fputs (" * ", dump_file);
3797                     }
3798                   fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n",
3799                            power);
3800                 }
3801             }
3802         }
3803       else
3804         {
3805           /* Otherwise, find the first factor in the repeated factor
3806              vector whose occurrence count is at least 2.  If no such
3807              factor exists, there are no builtin_powi opportunities
3808              remaining.  */
3809           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
3810             {
3811               if (rf1->count >= 2)
3812                 break;
3813             }
3814
3815           if (j >= vec_len)
3816             break;
3817
3818           power = rf1->count;
3819
3820           if (dump_file && (dump_flags & TDF_DETAILS))
3821             {
3822               unsigned elt;
3823               repeat_factor_t rf;
3824               fputs ("Building __builtin_pow call for (", dump_file);
3825               for (elt = j; elt < vec_len; elt++)
3826                 {
3827                   rf = &repeat_factor_vec[elt];
3828                   print_generic_expr (dump_file, rf->factor, 0);
3829                   if (elt < vec_len - 1)
3830                     fputs (" * ", dump_file);
3831                 }
3832               fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power);
3833             }
3834
3835           reassociate_stats.pows_created++;
3836
3837           /* Visit each element of the vector in reverse order (so that
3838              high-occurrence elements are visited first, and within the
3839              same occurrence count, lower-ranked elements are visited
3840              first).  Form a linear product of all elements in this order
3841              whose occurrencce count is at least that of element J.
3842              Record the SSA name representing the product of each element
3843              with all subsequent elements in the vector.  */
3844           if (j == vec_len - 1)
3845             rf1->repr = rf1->factor;
3846           else
3847             {
3848               for (ii = vec_len - 2; ii >= (int)j; ii--)
3849                 {
3850                   tree op1, op2;
3851
3852                   rf1 = &repeat_factor_vec[ii];
3853                   rf2 = &repeat_factor_vec[ii + 1];
3854
3855                   /* Init the last factor's representative to be itself.  */
3856                   if (!rf2->repr)
3857                     rf2->repr = rf2->factor;
3858
3859                   op1 = rf1->factor;
3860                   op2 = rf2->repr;
3861
3862                   target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
3863                   mul_stmt = gimple_build_assign_with_ops (MULT_EXPR,
3864                                                            target_ssa,
3865                                                            op1, op2);
3866                   gimple_set_location (mul_stmt, gimple_location (stmt));
3867                   gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
3868                   rf1->repr = target_ssa;
3869
3870                   /* Don't reprocess the multiply we just introduced.  */
3871                   gimple_set_visited (mul_stmt, true);
3872                 }
3873             }
3874
3875           /* Form a call to __builtin_powi for the maximum product
3876              just formed, raised to the power obtained earlier.  */
3877           rf1 = &repeat_factor_vec[j];
3878           iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
3879           pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
3880                                         build_int_cst (integer_type_node,
3881                                                        power));
3882           gimple_call_set_lhs (pow_stmt, iter_result);
3883           gimple_set_location (pow_stmt, gimple_location (stmt));
3884           gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
3885         }
3886
3887       /* If we previously formed at least one other builtin_powi call,
3888          form the product of this one and those others.  */
3889       if (result)
3890         {
3891           tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
3892           mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result,
3893                                                    result, iter_result);
3894           gimple_set_location (mul_stmt, gimple_location (stmt));
3895           gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
3896           gimple_set_visited (mul_stmt, true);
3897           result = new_result;
3898         }
3899       else
3900         result = iter_result;
3901
3902       /* Decrement the occurrence count of each element in the product
3903          by the count found above, and remove this many copies of each
3904          factor from OPS.  */
3905       for (i = j; i < vec_len; i++)
3906         {
3907           unsigned k = power;
3908           unsigned n;
3909
3910           rf1 = &repeat_factor_vec[i];
3911           rf1->count -= power;
3912
3913           FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
3914             {
3915               if (oe->op == rf1->factor)
3916                 {
3917                   if (oe->count <= k)
3918                     {
3919                       ops->ordered_remove (n);
3920                       k -= oe->count;
3921
3922                       if (k == 0)
3923                         break;
3924                     }
3925                   else
3926                     {
3927                       oe->count -= k;
3928                       break;
3929                     }
3930                 }
3931             }
3932         }
3933     }
3934
3935   /* At this point all elements in the repeated factor vector have a
3936      remaining occurrence count of 0 or 1, and those with a count of 1
3937      don't have cached representatives.  Re-sort the ops vector and
3938      clean up.  */
3939   ops->qsort (sort_by_operand_rank);
3940   repeat_factor_vec.release ();
3941
3942   /* Return the final product computed herein.  Note that there may
3943      still be some elements with single occurrence count left in OPS;
3944      those will be handled by the normal reassociation logic.  */
3945   return result;
3946 }
3947
3948 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS.  */
3949
3950 static void
3951 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
3952 {
3953   tree rhs1;
3954
3955   if (dump_file && (dump_flags & TDF_DETAILS))
3956     {
3957       fprintf (dump_file, "Transforming ");
3958       print_gimple_stmt (dump_file, stmt, 0, 0);
3959     }
3960
3961   rhs1 = gimple_assign_rhs1 (stmt);
3962   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
3963   update_stmt (stmt);
3964   remove_visited_stmt_chain (rhs1);
3965
3966   if (dump_file && (dump_flags & TDF_DETAILS))
3967     {
3968       fprintf (dump_file, " into ");
3969       print_gimple_stmt (dump_file, stmt, 0, 0);
3970     }
3971 }
3972
3973 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2.  */
3974
3975 static void
3976 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
3977                             tree rhs1, tree rhs2)
3978 {
3979   if (dump_file && (dump_flags & TDF_DETAILS))
3980     {
3981       fprintf (dump_file, "Transforming ");
3982       print_gimple_stmt (dump_file, stmt, 0, 0);
3983     }
3984
3985   gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
3986   update_stmt (gsi_stmt (*gsi));
3987   remove_visited_stmt_chain (rhs1);
3988
3989   if (dump_file && (dump_flags & TDF_DETAILS))
3990     {
3991       fprintf (dump_file, " into ");
3992       print_gimple_stmt (dump_file, stmt, 0, 0);
3993     }
3994 }
3995
3996 /* Reassociate expressions in basic block BB and its post-dominator as
3997    children.  */
3998
3999 static void
4000 reassociate_bb (basic_block bb)
4001 {
4002   gimple_stmt_iterator gsi;
4003   basic_block son;
4004   gimple stmt = last_stmt (bb);
4005
4006   if (stmt && !gimple_visited_p (stmt))
4007     maybe_optimize_range_tests (stmt);
4008
4009   for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4010     {
4011       stmt = gsi_stmt (gsi);
4012
4013       if (is_gimple_assign (stmt)
4014           && !stmt_could_throw_p (stmt))
4015         {
4016           tree lhs, rhs1, rhs2;
4017           enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4018
4019           /* If this is not a gimple binary expression, there is
4020              nothing for us to do with it.  */
4021           if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4022             continue;
4023
4024           /* If this was part of an already processed statement,
4025              we don't need to touch it again. */
4026           if (gimple_visited_p (stmt))
4027             {
4028               /* This statement might have become dead because of previous
4029                  reassociations.  */
4030               if (has_zero_uses (gimple_get_lhs (stmt)))
4031                 {
4032                   gsi_remove (&gsi, true);
4033                   release_defs (stmt);
4034                   /* We might end up removing the last stmt above which
4035                      places the iterator to the end of the sequence.
4036                      Reset it to the last stmt in this case which might
4037                      be the end of the sequence as well if we removed
4038                      the last statement of the sequence.  In which case
4039                      we need to bail out.  */
4040                   if (gsi_end_p (gsi))
4041                     {
4042                       gsi = gsi_last_bb (bb);
4043                       if (gsi_end_p (gsi))
4044                         break;
4045                     }
4046                 }
4047               continue;
4048             }
4049
4050           lhs = gimple_assign_lhs (stmt);
4051           rhs1 = gimple_assign_rhs1 (stmt);
4052           rhs2 = gimple_assign_rhs2 (stmt);
4053
4054           /* For non-bit or min/max operations we can't associate
4055              all types.  Verify that here.  */
4056           if (rhs_code != BIT_IOR_EXPR
4057               && rhs_code != BIT_AND_EXPR
4058               && rhs_code != BIT_XOR_EXPR
4059               && rhs_code != MIN_EXPR
4060               && rhs_code != MAX_EXPR
4061               && (!can_reassociate_p (lhs)
4062                   || !can_reassociate_p (rhs1)
4063                   || !can_reassociate_p (rhs2)))
4064             continue;
4065
4066           if (associative_tree_code (rhs_code))
4067             {
4068               vec<operand_entry_t> ops = vNULL;
4069               tree powi_result = NULL_TREE;
4070
4071               /* There may be no immediate uses left by the time we
4072                  get here because we may have eliminated them all.  */
4073               if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4074                 continue;
4075
4076               gimple_set_visited (stmt, true);
4077               linearize_expr_tree (&ops, stmt, true, true);
4078               ops.qsort (sort_by_operand_rank);
4079               optimize_ops_list (rhs_code, &ops);
4080               if (undistribute_ops_list (rhs_code, &ops,
4081                                          loop_containing_stmt (stmt)))
4082                 {
4083                   ops.qsort (sort_by_operand_rank);
4084                   optimize_ops_list (rhs_code, &ops);
4085                 }
4086
4087               if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4088                 optimize_range_tests (rhs_code, &ops);
4089
4090               if (first_pass_instance
4091                   && rhs_code == MULT_EXPR
4092                   && flag_unsafe_math_optimizations)
4093                 powi_result = attempt_builtin_powi (stmt, &ops);
4094
4095               /* If the operand vector is now empty, all operands were
4096                  consumed by the __builtin_powi optimization.  */
4097               if (ops.length () == 0)
4098                 transform_stmt_to_copy (&gsi, stmt, powi_result);
4099               else if (ops.length () == 1)
4100                 {
4101                   tree last_op = ops.last ()->op;
4102
4103                   if (powi_result)
4104                     transform_stmt_to_multiply (&gsi, stmt, last_op,
4105                                                 powi_result);
4106                   else
4107                     transform_stmt_to_copy (&gsi, stmt, last_op);
4108                 }
4109               else
4110                 {
4111                   enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4112                   int ops_num = ops.length ();
4113                   int width = get_reassociation_width (ops_num, rhs_code, mode);
4114
4115                   if (dump_file && (dump_flags & TDF_DETAILS))
4116                     fprintf (dump_file,
4117                              "Width = %d was chosen for reassociation\n", width);
4118
4119                   if (width > 1
4120                       && ops.length () > 3)
4121                     rewrite_expr_tree_parallel (stmt, width, ops);
4122                   else
4123                     rewrite_expr_tree (stmt, 0, ops, false);
4124
4125                   /* If we combined some repeated factors into a
4126                      __builtin_powi call, multiply that result by the
4127                      reassociated operands.  */
4128                   if (powi_result)
4129                     {
4130                       gimple mul_stmt;
4131                       tree type = TREE_TYPE (gimple_get_lhs (stmt));
4132                       tree target_ssa = make_temp_ssa_name (type, NULL,
4133                                                             "reassocpow");
4134                       gimple_set_lhs (stmt, target_ssa);
4135                       update_stmt (stmt);
4136                       mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs,
4137                                                                powi_result,
4138                                                                target_ssa);
4139                       gimple_set_location (mul_stmt, gimple_location (stmt));
4140                       gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4141                     }
4142                 }
4143
4144               ops.release ();
4145             }
4146         }
4147     }
4148   for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4149        son;
4150        son = next_dom_son (CDI_POST_DOMINATORS, son))
4151     reassociate_bb (son);
4152 }
4153
4154 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4155 void debug_ops_vector (vec<operand_entry_t> ops);
4156
4157 /* Dump the operand entry vector OPS to FILE.  */
4158
4159 void
4160 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
4161 {
4162   operand_entry_t oe;
4163   unsigned int i;
4164
4165   FOR_EACH_VEC_ELT (ops, i, oe)
4166     {
4167       fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
4168       print_generic_expr (file, oe->op, 0);
4169     }
4170 }
4171
4172 /* Dump the operand entry vector OPS to STDERR.  */
4173
4174 DEBUG_FUNCTION void
4175 debug_ops_vector (vec<operand_entry_t> ops)
4176 {
4177   dump_ops_vector (stderr, ops);
4178 }
4179
4180 static void
4181 do_reassoc (void)
4182 {
4183   break_up_subtract_bb (ENTRY_BLOCK_PTR);
4184   reassociate_bb (EXIT_BLOCK_PTR);
4185 }
4186
4187 /* Initialize the reassociation pass.  */
4188
4189 static void
4190 init_reassoc (void)
4191 {
4192   int i;
4193   long rank = 2;
4194   int *bbs = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
4195
4196   /* Find the loops, so that we can prevent moving calculations in
4197      them.  */
4198   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4199
4200   memset (&reassociate_stats, 0, sizeof (reassociate_stats));
4201
4202   operand_entry_pool = create_alloc_pool ("operand entry pool",
4203                                           sizeof (struct operand_entry), 30);
4204   next_operand_entry_id = 0;
4205
4206   /* Reverse RPO (Reverse Post Order) will give us something where
4207      deeper loops come later.  */
4208   pre_and_rev_post_order_compute (NULL, bbs, false);
4209   bb_rank = XCNEWVEC (long, last_basic_block);
4210   operand_rank = pointer_map_create ();
4211
4212   /* Give each default definition a distinct rank.  This includes
4213      parameters and the static chain.  Walk backwards over all
4214      SSA names so that we get proper rank ordering according
4215      to tree_swap_operands_p.  */
4216   for (i = num_ssa_names - 1; i > 0; --i)
4217     {
4218       tree name = ssa_name (i);
4219       if (name && SSA_NAME_IS_DEFAULT_DEF (name))
4220         insert_operand_rank (name, ++rank);
4221     }
4222
4223   /* Set up rank for each BB  */
4224   for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
4225     bb_rank[bbs[i]] = ++rank  << 16;
4226
4227   free (bbs);
4228   calculate_dominance_info (CDI_POST_DOMINATORS);
4229   plus_negates = vNULL;
4230 }
4231
4232 /* Cleanup after the reassociation pass, and print stats if
4233    requested.  */
4234
4235 static void
4236 fini_reassoc (void)
4237 {
4238   statistics_counter_event (cfun, "Linearized",
4239                             reassociate_stats.linearized);
4240   statistics_counter_event (cfun, "Constants eliminated",
4241                             reassociate_stats.constants_eliminated);
4242   statistics_counter_event (cfun, "Ops eliminated",
4243                             reassociate_stats.ops_eliminated);
4244   statistics_counter_event (cfun, "Statements rewritten",
4245                             reassociate_stats.rewritten);
4246   statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
4247                             reassociate_stats.pows_encountered);
4248   statistics_counter_event (cfun, "Built-in powi calls created",
4249                             reassociate_stats.pows_created);
4250
4251   pointer_map_destroy (operand_rank);
4252   free_alloc_pool (operand_entry_pool);
4253   free (bb_rank);
4254   plus_negates.release ();
4255   free_dominance_info (CDI_POST_DOMINATORS);
4256   loop_optimizer_finalize ();
4257 }
4258
4259 /* Gate and execute functions for Reassociation.  */
4260
4261 static unsigned int
4262 execute_reassoc (void)
4263 {
4264   init_reassoc ();
4265
4266   do_reassoc ();
4267   repropagate_negates ();
4268
4269   fini_reassoc ();
4270   return 0;
4271 }
4272
4273 static bool
4274 gate_tree_ssa_reassoc (void)
4275 {
4276   return flag_tree_reassoc != 0;
4277 }
4278
4279 struct gimple_opt_pass pass_reassoc =
4280 {
4281  {
4282   GIMPLE_PASS,
4283   "reassoc",                            /* name */
4284   OPTGROUP_NONE,                        /* optinfo_flags */
4285   gate_tree_ssa_reassoc,                /* gate */
4286   execute_reassoc,                      /* execute */
4287   NULL,                                 /* sub */
4288   NULL,                                 /* next */
4289   0,                                    /* static_pass_number */
4290   TV_TREE_REASSOC,                      /* tv_id */
4291   PROP_cfg | PROP_ssa,                  /* properties_required */
4292   0,                                    /* properties_provided */
4293   0,                                    /* properties_destroyed */
4294   0,                                    /* todo_flags_start */
4295   TODO_verify_ssa
4296     | TODO_verify_flow
4297     | TODO_ggc_collect                  /* todo_flags_finish */
4298  }
4299 };