gcc/tree-ssa-reassoc.c

   1 /* Reassociation for trees.
   2    Copyright (C) 2005-2013 Free Software Foundation, Inc.
   3    Contributed by Daniel Berlin <dan@dberlin.org>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 3, or (at your option)
  10 any later version.
  11
  12 GCC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "hash-table.h"
  25 #include "tm.h"
  26 #include "rtl.h"
  27 #include "tm_p.h"
  28 #include "tree.h"
  29 #include "basic-block.h"
  30 #include "gimple-pretty-print.h"
  31 #include "tree-inline.h"
  32 #include "gimple.h"
  33 #include "gimple-iterator.h"
  34 #include "gimplify-me.h"
  35 #include "gimple-ssa.h"
  36 #include "tree-cfg.h"
  37 #include "tree-phinodes.h"
  38 #include "ssa-iterators.h"
  39 #include "tree-ssanames.h"
  40 #include "tree-ssa-loop-niter.h"
  41 #include "tree-ssa-loop.h"
  42 #include "tree-dfa.h"
  43 #include "tree-ssa.h"
  44 #include "tree-iterator.h"
  45 #include "tree-pass.h"
  46 #include "alloc-pool.h"
  47 #include "vec.h"
  48 #include "langhooks.h"
  49 #include "pointer-set.h"
  50 #include "cfgloop.h"
  51 #include "flags.h"
  52 #include "target.h"
  53 #include "params.h"
  54 #include "diagnostic-core.h"
  55
  56 /*  This is a simple global reassociation pass.  It is, in part, based
  57     on the LLVM pass of the same name (They do some things more/less
  58     than we do, in different orders, etc).
  59
  60     It consists of five steps:
  61
  62     1. Breaking up subtract operations into addition + negate, where
  63     it would promote the reassociation of adds.
  64
  65     2. Left linearization of the expression trees, so that (A+B)+(C+D)
  66     becomes (((A+B)+C)+D), which is easier for us to rewrite later.
  67     During linearization, we place the operands of the binary
  68     expressions into a vector of operand_entry_t
  69
  70     3. Optimization of the operand lists, eliminating things like a +
  71     -a, a & a, etc.
  72
  73     3a. Combine repeated factors with the same occurrence counts
  74     into a __builtin_powi call that will later be optimized into
  75     an optimal number of multiplies.
  76
  77     4. Rewrite the expression trees we linearized and optimized so
  78     they are in proper rank order.
  79
  80     5. Repropagate negates, as nothing else will clean it up ATM.
  81
  82     A bit of theory on #4, since nobody seems to write anything down
  83     about why it makes sense to do it the way they do it:
  84
  85     We could do this much nicer theoretically, but don't (for reasons
  86     explained after how to do it theoretically nice :P).
  87
  88     In order to promote the most redundancy elimination, you want
  89     binary expressions whose operands are the same rank (or
  90     preferably, the same value) exposed to the redundancy eliminator,
  91     for possible elimination.
  92
  93     So the way to do this if we really cared, is to build the new op
  94     tree from the leaves to the roots, merging as you go, and putting the
  95     new op on the end of the worklist, until you are left with one
  96     thing on the worklist.
  97
  98     IE if you have to rewrite the following set of operands (listed with
  99     rank in parentheses), with opcode PLUS_EXPR:
 100
 101     a (1),  b (1),  c (1),  d (2), e (2)
 102
 103
 104     We start with our merge worklist empty, and the ops list with all of
 105     those on it.
 106
 107     You want to first merge all leaves of the same rank, as much as
 108     possible.
 109
 110     So first build a binary op of
 111
 112     mergetmp = a + b, and put "mergetmp" on the merge worklist.
 113
 114     Because there is no three operand form of PLUS_EXPR, c is not going to
 115     be exposed to redundancy elimination as a rank 1 operand.
 116
 117     So you might as well throw it on the merge worklist (you could also
 118     consider it to now be a rank two operand, and merge it with d and e,
 119     but in this case, you then have evicted e from a binary op. So at
 120     least in this situation, you can't win.)
 121
 122     Then build a binary op of d + e
 123     mergetmp2 = d + e
 124
 125     and put mergetmp2 on the merge worklist.
 126
 127     so merge worklist = {mergetmp, c, mergetmp2}
 128
 129     Continue building binary ops of these operations until you have only
 130     one operation left on the worklist.
 131
 132     So we have
 133
 134     build binary op
 135     mergetmp3 = mergetmp + c
 136
 137     worklist = {mergetmp2, mergetmp3}
 138
 139     mergetmp4 = mergetmp2 + mergetmp3
 140
 141     worklist = {mergetmp4}
 142
 143     because we have one operation left, we can now just set the original
 144     statement equal to the result of that operation.
 145
 146     This will at least expose a + b  and d + e to redundancy elimination
 147     as binary operations.
 148
 149     For extra points, you can reuse the old statements to build the
 150     mergetmps, since you shouldn't run out.
 151
 152     So why don't we do this?
 153
 154     Because it's expensive, and rarely will help.  Most trees we are
 155     reassociating have 3 or less ops.  If they have 2 ops, they already
 156     will be written into a nice single binary op.  If you have 3 ops, a
 157     single simple check suffices to tell you whether the first two are of the
 158     same rank.  If so, you know to order it
 159
 160     mergetmp = op1 + op2
 161     newstmt = mergetmp + op3
 162
 163     instead of
 164     mergetmp = op2 + op3
 165     newstmt = mergetmp + op1
 166
 167     If all three are of the same rank, you can't expose them all in a
 168     single binary operator anyway, so the above is *still* the best you
 169     can do.
 170
 171     Thus, this is what we do.  When we have three ops left, we check to see
 172     what order to put them in, and call it a day.  As a nod to vector sum
 173     reduction, we check if any of the ops are really a phi node that is a
 174     destructive update for the associating op, and keep the destructive
 175     update together for vector sum reduction recognition.  */
 176
 177
 178 /* Statistics */
 179 static struct
 180 {
 181   int linearized;
 182   int constants_eliminated;
 183   int ops_eliminated;
 184   int rewritten;
 185   int pows_encountered;
 186   int pows_created;
 187 } reassociate_stats;
 188
 189 /* Operator, rank pair.  */
 190 typedef struct operand_entry
 191 {
 192   unsigned int rank;
 193   int id;
 194   tree op;
 195   unsigned int count;
 196 } *operand_entry_t;
 197
 198 static alloc_pool operand_entry_pool;
 199
 200 /* This is used to assign a unique ID to each struct operand_entry
 201    so that qsort results are identical on different hosts.  */
 202 static int next_operand_entry_id;
 203
 204 /* Starting rank number for a given basic block, so that we can rank
 205    operations using unmovable instructions in that BB based on the bb
 206    depth.  */
 207 static long *bb_rank;
 208
 209 /* Operand->rank hashtable.  */
 210 static struct pointer_map_t *operand_rank;
 211
 212 /* Forward decls.  */
 213 static long get_rank (tree);
 214
 215
 216 /* Bias amount for loop-carried phis.  We want this to be larger than
 217    the depth of any reassociation tree we can see, but not larger than
 218    the rank difference between two blocks.  */
 219 #define PHI_LOOP_BIAS (1 << 15)
 220
 221 /* Rank assigned to a phi statement.  If STMT is a loop-carried phi of
 222    an innermost loop, and the phi has only a single use which is inside
 223    the loop, then the rank is the block rank of the loop latch plus an
 224    extra bias for the loop-carried dependence.  This causes expressions
 225    calculated into an accumulator variable to be independent for each
 226    iteration of the loop.  If STMT is some other phi, the rank is the
 227    block rank of its containing block.  */
 228 static long
 229 phi_rank (gimple stmt)
 230 {
 231   basic_block bb = gimple_bb (stmt);
 232   struct loop *father = bb->loop_father;
 233   tree res;
 234   unsigned i;
 235   use_operand_p use;
 236   gimple use_stmt;
 237
 238   /* We only care about real loops (those with a latch).  */
 239   if (!father->latch)
 240     return bb_rank[bb->index];
 241
 242   /* Interesting phis must be in headers of innermost loops.  */
 243   if (bb != father->header
 244       || father->inner)
 245     return bb_rank[bb->index];
 246
 247   /* Ignore virtual SSA_NAMEs.  */
 248   res = gimple_phi_result (stmt);
 249   if (virtual_operand_p (res))
 250     return bb_rank[bb->index];
 251
 252   /* The phi definition must have a single use, and that use must be
 253      within the loop.  Otherwise this isn't an accumulator pattern.  */
 254   if (!single_imm_use (res, &use, &use_stmt)
 255       || gimple_bb (use_stmt)->loop_father != father)
 256     return bb_rank[bb->index];
 257
 258   /* Look for phi arguments from within the loop.  If found, bias this phi.  */
 259   for (i = 0; i < gimple_phi_num_args (stmt); i++)
 260     {
 261       tree arg = gimple_phi_arg_def (stmt, i);
 262       if (TREE_CODE (arg) == SSA_NAME
 263           && !SSA_NAME_IS_DEFAULT_DEF (arg))
 264         {
 265           gimple def_stmt = SSA_NAME_DEF_STMT (arg);
 266           if (gimple_bb (def_stmt)->loop_father == father)
 267             return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
 268         }
 269     }
 270
 271   /* Must be an uninteresting phi.  */
 272   return bb_rank[bb->index];
 273 }
 274
 275 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
 276    loop-carried dependence of an innermost loop, return TRUE; else
 277    return FALSE.  */
 278 static bool
 279 loop_carried_phi (tree exp)
 280 {
 281   gimple phi_stmt;
 282   long block_rank;
 283
 284   if (TREE_CODE (exp) != SSA_NAME
 285       || SSA_NAME_IS_DEFAULT_DEF (exp))
 286     return false;
 287
 288   phi_stmt = SSA_NAME_DEF_STMT (exp);
 289
 290   if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
 291     return false;
 292
 293   /* Non-loop-carried phis have block rank.  Loop-carried phis have
 294      an additional bias added in.  If this phi doesn't have block rank,
 295      it's biased and should not be propagated.  */
 296   block_rank = bb_rank[gimple_bb (phi_stmt)->index];
 297
 298   if (phi_rank (phi_stmt) != block_rank)
 299     return true;
 300
 301   return false;
 302 }
 303
 304 /* Return the maximum of RANK and the rank that should be propagated
 305    from expression OP.  For most operands, this is just the rank of OP.
 306    For loop-carried phis, the value is zero to avoid undoing the bias
 307    in favor of the phi.  */
 308 static long
 309 propagate_rank (long rank, tree op)
 310 {
 311   long op_rank;
 312
 313   if (loop_carried_phi (op))
 314     return rank;
 315
 316   op_rank = get_rank (op);
 317
 318   return MAX (rank, op_rank);
 319 }
 320
 321 /* Look up the operand rank structure for expression E.  */
 322
 323 static inline long
 324 find_operand_rank (tree e)
 325 {
 326   void **slot = pointer_map_contains (operand_rank, e);
 327   return slot ? (long) (intptr_t) *slot : -1;
 328 }
 329
 330 /* Insert {E,RANK} into the operand rank hashtable.  */
 331
 332 static inline void
 333 insert_operand_rank (tree e, long rank)
 334 {
 335   void **slot;
 336   gcc_assert (rank > 0);
 337   slot = pointer_map_insert (operand_rank, e);
 338   gcc_assert (!*slot);
 339   *slot = (void *) (intptr_t) rank;
 340 }
 341
 342 /* Given an expression E, return the rank of the expression.  */
 343
 344 static long
 345 get_rank (tree e)
 346 {
 347   /* Constants have rank 0.  */
 348   if (is_gimple_min_invariant (e))
 349     return 0;
 350
 351   /* SSA_NAME's have the rank of the expression they are the result
 352      of.
 353      For globals and uninitialized values, the rank is 0.
 354      For function arguments, use the pre-setup rank.
 355      For PHI nodes, stores, asm statements, etc, we use the rank of
 356      the BB.
 357      For simple operations, the rank is the maximum rank of any of
 358      its operands, or the bb_rank, whichever is less.
 359      I make no claims that this is optimal, however, it gives good
 360      results.  */
 361
 362   /* We make an exception to the normal ranking system to break
 363      dependences of accumulator variables in loops.  Suppose we
 364      have a simple one-block loop containing:
 365
 366        x_1 = phi(x_0, x_2)
 367        b = a + x_1
 368        c = b + d
 369        x_2 = c + e
 370
 371      As shown, each iteration of the calculation into x is fully
 372      dependent upon the iteration before it.  We would prefer to
 373      see this in the form:
 374
 375        x_1 = phi(x_0, x_2)
 376        b = a + d
 377        c = b + e
 378        x_2 = c + x_1
 379
 380      If the loop is unrolled, the calculations of b and c from
 381      different iterations can be interleaved.
 382
 383      To obtain this result during reassociation, we bias the rank
 384      of the phi definition x_1 upward, when it is recognized as an
 385      accumulator pattern.  The artificial rank causes it to be
 386      added last, providing the desired independence.  */
 387
 388   if (TREE_CODE (e) == SSA_NAME)
 389     {
 390       gimple stmt;
 391       long rank;
 392       int i, n;
 393       tree op;
 394
 395       if (SSA_NAME_IS_DEFAULT_DEF (e))
 396         return find_operand_rank (e);
 397
 398       stmt = SSA_NAME_DEF_STMT (e);
 399       if (gimple_code (stmt) == GIMPLE_PHI)
 400         return phi_rank (stmt);
 401
 402       if (!is_gimple_assign (stmt)
 403           || gimple_vdef (stmt))
 404         return bb_rank[gimple_bb (stmt)->index];
 405
 406       /* If we already have a rank for this expression, use that.  */
 407       rank = find_operand_rank (e);
 408       if (rank != -1)
 409         return rank;
 410
 411       /* Otherwise, find the maximum rank for the operands.  As an
 412          exception, remove the bias from loop-carried phis when propagating
 413          the rank so that dependent operations are not also biased.  */
 414       rank = 0;
 415       if (gimple_assign_single_p (stmt))
 416         {
 417           tree rhs = gimple_assign_rhs1 (stmt);
 418           n = TREE_OPERAND_LENGTH (rhs);
 419           if (n == 0)
 420             rank = propagate_rank (rank, rhs);
 421           else
 422             {
 423               for (i = 0; i < n; i++)
 424                 {
 425                   op = TREE_OPERAND (rhs, i);
 426
 427                   if (op != NULL_TREE)
 428                     rank = propagate_rank (rank, op);
 429                 }
 430             }
 431         }
 432       else
 433         {
 434           n = gimple_num_ops (stmt);
 435           for (i = 1; i < n; i++)
 436             {
 437               op = gimple_op (stmt, i);
 438               gcc_assert (op);
 439               rank = propagate_rank (rank, op);
 440             }
 441         }
 442
 443       if (dump_file && (dump_flags & TDF_DETAILS))
 444         {
 445           fprintf (dump_file, "Rank for ");
 446           print_generic_expr (dump_file, e, 0);
 447           fprintf (dump_file, " is %ld\n", (rank + 1));
 448         }
 449
 450       /* Note the rank in the hashtable so we don't recompute it.  */
 451       insert_operand_rank (e, (rank + 1));
 452       return (rank + 1);
 453     }
 454
 455   /* Globals, etc,  are rank 0 */
 456   return 0;
 457 }
 458
 459
 460 /* We want integer ones to end up last no matter what, since they are
 461    the ones we can do the most with.  */
 462 #define INTEGER_CONST_TYPE 1 << 3
 463 #define FLOAT_CONST_TYPE 1 << 2
 464 #define OTHER_CONST_TYPE 1 << 1
 465
 466 /* Classify an invariant tree into integer, float, or other, so that
 467    we can sort them to be near other constants of the same type.  */
 468 static inline int
 469 constant_type (tree t)
 470 {
 471   if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
 472     return INTEGER_CONST_TYPE;
 473   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
 474     return FLOAT_CONST_TYPE;
 475   else
 476     return OTHER_CONST_TYPE;
 477 }
 478
 479 /* qsort comparison function to sort operand entries PA and PB by rank
 480    so that the sorted array is ordered by rank in decreasing order.  */
 481 static int
 482 sort_by_operand_rank (const void *pa, const void *pb)
 483 {
 484   const operand_entry_t oea = *(const operand_entry_t *)pa;
 485   const operand_entry_t oeb = *(const operand_entry_t *)pb;
 486
 487   /* It's nicer for optimize_expression if constants that are likely
 488      to fold when added/multiplied//whatever are put next to each
 489      other.  Since all constants have rank 0, order them by type.  */
 490   if (oeb->rank == 0 && oea->rank == 0)
 491     {
 492       if (constant_type (oeb->op) != constant_type (oea->op))
 493         return constant_type (oeb->op) - constant_type (oea->op);
 494       else
 495         /* To make sorting result stable, we use unique IDs to determine
 496            order.  */
 497         return oeb->id - oea->id;
 498     }
 499
 500   /* Lastly, make sure the versions that are the same go next to each
 501      other.  We use SSA_NAME_VERSION because it's stable.  */
 502   if ((oeb->rank - oea->rank == 0)
 503       && TREE_CODE (oea->op) == SSA_NAME
 504       && TREE_CODE (oeb->op) == SSA_NAME)
 505     {
 506       if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
 507         return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
 508       else
 509         return oeb->id - oea->id;
 510     }
 511
 512   if (oeb->rank != oea->rank)
 513     return oeb->rank - oea->rank;
 514   else
 515     return oeb->id - oea->id;
 516 }
 517
 518 /* Add an operand entry to *OPS for the tree operand OP.  */
 519
 520 static void
 521 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
 522 {
 523   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
 524
 525   oe->op = op;
 526   oe->rank = get_rank (op);
 527   oe->id = next_operand_entry_id++;
 528   oe->count = 1;
 529   ops->safe_push (oe);
 530 }
 531
 532 /* Add an operand entry to *OPS for the tree operand OP with repeat
 533    count REPEAT.  */
 534
 535 static void
 536 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
 537                        HOST_WIDE_INT repeat)
 538 {
 539   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
 540
 541   oe->op = op;
 542   oe->rank = get_rank (op);
 543   oe->id = next_operand_entry_id++;
 544   oe->count = repeat;
 545   ops->safe_push (oe);
 546
 547   reassociate_stats.pows_encountered++;
 548 }
 549
 550 /* Return true if STMT is reassociable operation containing a binary
 551    operation with tree code CODE, and is inside LOOP.  */
 552
 553 static bool
 554 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
 555 {
 556   basic_block bb = gimple_bb (stmt);
 557
 558   if (gimple_bb (stmt) == NULL)
 559     return false;
 560
 561   if (!flow_bb_inside_loop_p (loop, bb))
 562     return false;
 563
 564   if (is_gimple_assign (stmt)
 565       && gimple_assign_rhs_code (stmt) == code
 566       && has_single_use (gimple_assign_lhs (stmt)))
 567     return true;
 568
 569   return false;
 570 }
 571
 572
 573 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
 574    operand of the negate operation.  Otherwise, return NULL.  */
 575
 576 static tree
 577 get_unary_op (tree name, enum tree_code opcode)
 578 {
 579   gimple stmt = SSA_NAME_DEF_STMT (name);
 580
 581   if (!is_gimple_assign (stmt))
 582     return NULL_TREE;
 583
 584   if (gimple_assign_rhs_code (stmt) == opcode)
 585     return gimple_assign_rhs1 (stmt);
 586   return NULL_TREE;
 587 }
 588
 589 /* If CURR and LAST are a pair of ops that OPCODE allows us to
 590    eliminate through equivalences, do so, remove them from OPS, and
 591    return true.  Otherwise, return false.  */
 592
 593 static bool
 594 eliminate_duplicate_pair (enum tree_code opcode,
 595                           vec<operand_entry_t> *ops,
 596                           bool *all_done,
 597                           unsigned int i,
 598                           operand_entry_t curr,
 599                           operand_entry_t last)
 600 {
 601
 602   /* If we have two of the same op, and the opcode is & |, min, or max,
 603      we can eliminate one of them.
 604      If we have two of the same op, and the opcode is ^, we can
 605      eliminate both of them.  */
 606
 607   if (last && last->op == curr->op)
 608     {
 609       switch (opcode)
 610         {
 611         case MAX_EXPR:
 612         case MIN_EXPR:
 613         case BIT_IOR_EXPR:
 614         case BIT_AND_EXPR:
 615           if (dump_file && (dump_flags & TDF_DETAILS))
 616             {
 617               fprintf (dump_file, "Equivalence: ");
 618               print_generic_expr (dump_file, curr->op, 0);
 619               fprintf (dump_file, " [&|minmax] ");
 620               print_generic_expr (dump_file, last->op, 0);
 621               fprintf (dump_file, " -> ");
 622               print_generic_stmt (dump_file, last->op, 0);
 623             }
 624
 625           ops->ordered_remove (i);
 626           reassociate_stats.ops_eliminated ++;
 627
 628           return true;
 629
 630         case BIT_XOR_EXPR:
 631           if (dump_file && (dump_flags & TDF_DETAILS))
 632             {
 633               fprintf (dump_file, "Equivalence: ");
 634               print_generic_expr (dump_file, curr->op, 0);
 635               fprintf (dump_file, " ^ ");
 636               print_generic_expr (dump_file, last->op, 0);
 637               fprintf (dump_file, " -> nothing\n");
 638             }
 639
 640           reassociate_stats.ops_eliminated += 2;
 641
 642           if (ops->length () == 2)
 643             {
 644               ops->create (0);
 645               add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
 646               *all_done = true;
 647             }
 648           else
 649             {
 650               ops->ordered_remove (i-1);
 651               ops->ordered_remove (i-1);
 652             }
 653
 654           return true;
 655
 656         default:
 657           break;
 658         }
 659     }
 660   return false;
 661 }
 662
 663 static vec<tree> plus_negates;
 664
 665 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
 666    expression, look in OPS for a corresponding positive operation to cancel
 667    it out.  If we find one, remove the other from OPS, replace
 668    OPS[CURRINDEX] with 0 or -1, respectively, and return true.  Otherwise,
 669    return false. */
 670
 671 static bool
 672 eliminate_plus_minus_pair (enum tree_code opcode,
 673                            vec<operand_entry_t> *ops,
 674                            unsigned int currindex,
 675                            operand_entry_t curr)
 676 {
 677   tree negateop;
 678   tree notop;
 679   unsigned int i;
 680   operand_entry_t oe;
 681
 682   if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
 683     return false;
 684
 685   negateop = get_unary_op (curr->op, NEGATE_EXPR);
 686   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 687   if (negateop == NULL_TREE && notop == NULL_TREE)
 688     return false;
 689
 690   /* Any non-negated version will have a rank that is one less than
 691      the current rank.  So once we hit those ranks, if we don't find
 692      one, we can stop.  */
 693
 694   for (i = currindex + 1;
 695        ops->iterate (i, &oe)
 696        && oe->rank >= curr->rank - 1 ;
 697        i++)
 698     {
 699       if (oe->op == negateop)
 700         {
 701
 702           if (dump_file && (dump_flags & TDF_DETAILS))
 703             {
 704               fprintf (dump_file, "Equivalence: ");
 705               print_generic_expr (dump_file, negateop, 0);
 706               fprintf (dump_file, " + -");
 707               print_generic_expr (dump_file, oe->op, 0);
 708               fprintf (dump_file, " -> 0\n");
 709             }
 710
 711           ops->ordered_remove (i);
 712           add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
 713           ops->ordered_remove (currindex);
 714           reassociate_stats.ops_eliminated ++;
 715
 716           return true;
 717         }
 718       else if (oe->op == notop)
 719         {
 720           tree op_type = TREE_TYPE (oe->op);
 721
 722           if (dump_file && (dump_flags & TDF_DETAILS))
 723             {
 724               fprintf (dump_file, "Equivalence: ");
 725               print_generic_expr (dump_file, notop, 0);
 726               fprintf (dump_file, " + ~");
 727               print_generic_expr (dump_file, oe->op, 0);
 728               fprintf (dump_file, " -> -1\n");
 729             }
 730
 731           ops->ordered_remove (i);
 732           add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
 733           ops->ordered_remove (currindex);
 734           reassociate_stats.ops_eliminated ++;
 735
 736           return true;
 737         }
 738     }
 739
 740   /* CURR->OP is a negate expr in a plus expr: save it for later
 741      inspection in repropagate_negates().  */
 742   if (negateop != NULL_TREE)
 743     plus_negates.safe_push (curr->op);
 744
 745   return false;
 746 }
 747
 748 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
 749    bitwise not expression, look in OPS for a corresponding operand to
 750    cancel it out.  If we find one, remove the other from OPS, replace
 751    OPS[CURRINDEX] with 0, and return true.  Otherwise, return
 752    false. */
 753
 754 static bool
 755 eliminate_not_pairs (enum tree_code opcode,
 756                      vec<operand_entry_t> *ops,
 757                      unsigned int currindex,
 758                      operand_entry_t curr)
 759 {
 760   tree notop;
 761   unsigned int i;
 762   operand_entry_t oe;
 763
 764   if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
 765       || TREE_CODE (curr->op) != SSA_NAME)
 766     return false;
 767
 768   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 769   if (notop == NULL_TREE)
 770     return false;
 771
 772   /* Any non-not version will have a rank that is one less than
 773      the current rank.  So once we hit those ranks, if we don't find
 774      one, we can stop.  */
 775
 776   for (i = currindex + 1;
 777        ops->iterate (i, &oe)
 778        && oe->rank >= curr->rank - 1;
 779        i++)
 780     {
 781       if (oe->op == notop)
 782         {
 783           if (dump_file && (dump_flags & TDF_DETAILS))
 784             {
 785               fprintf (dump_file, "Equivalence: ");
 786               print_generic_expr (dump_file, notop, 0);
 787               if (opcode == BIT_AND_EXPR)
 788                 fprintf (dump_file, " & ~");
 789               else if (opcode == BIT_IOR_EXPR)
 790                 fprintf (dump_file, " | ~");
 791               print_generic_expr (dump_file, oe->op, 0);
 792               if (opcode == BIT_AND_EXPR)
 793                 fprintf (dump_file, " -> 0\n");
 794               else if (opcode == BIT_IOR_EXPR)
 795                 fprintf (dump_file, " -> -1\n");
 796             }
 797
 798           if (opcode == BIT_AND_EXPR)
 799             oe->op = build_zero_cst (TREE_TYPE (oe->op));
 800           else if (opcode == BIT_IOR_EXPR)
 801             oe->op = build_low_bits_mask (TREE_TYPE (oe->op),
 802                                           TYPE_PRECISION (TREE_TYPE (oe->op)));
 803
 804           reassociate_stats.ops_eliminated += ops->length () - 1;
 805           ops->truncate (0);
 806           ops->quick_push (oe);
 807           return true;
 808         }
 809     }
 810
 811   return false;
 812 }
 813
 814 /* Use constant value that may be present in OPS to try to eliminate
 815    operands.  Note that this function is only really used when we've
 816    eliminated ops for other reasons, or merged constants.  Across
 817    single statements, fold already does all of this, plus more.  There
 818    is little point in duplicating logic, so I've only included the
 819    identities that I could ever construct testcases to trigger.  */
 820
 821 static void
 822 eliminate_using_constants (enum tree_code opcode,
 823                            vec<operand_entry_t> *ops)
 824 {
 825   operand_entry_t oelast = ops->last ();
 826   tree type = TREE_TYPE (oelast->op);
 827
 828   if (oelast->rank == 0
 829       && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
 830     {
 831       switch (opcode)
 832         {
 833         case BIT_AND_EXPR:
 834           if (integer_zerop (oelast->op))
 835             {
 836               if (ops->length () != 1)
 837                 {
 838                   if (dump_file && (dump_flags & TDF_DETAILS))
 839                     fprintf (dump_file, "Found & 0, removing all other ops\n");
 840
 841                   reassociate_stats.ops_eliminated += ops->length () - 1;
 842
 843                   ops->truncate (0);
 844                   ops->quick_push (oelast);
 845                   return;
 846                 }
 847             }
 848           else if (integer_all_onesp (oelast->op))
 849             {
 850               if (ops->length () != 1)
 851                 {
 852                   if (dump_file && (dump_flags & TDF_DETAILS))
 853                     fprintf (dump_file, "Found & -1, removing\n");
 854                   ops->pop ();
 855                   reassociate_stats.ops_eliminated++;
 856                 }
 857             }
 858           break;
 859         case BIT_IOR_EXPR:
 860           if (integer_all_onesp (oelast->op))
 861             {
 862               if (ops->length () != 1)
 863                 {
 864                   if (dump_file && (dump_flags & TDF_DETAILS))
 865                     fprintf (dump_file, "Found | -1, removing all other ops\n");
 866
 867                   reassociate_stats.ops_eliminated += ops->length () - 1;
 868
 869                   ops->truncate (0);
 870                   ops->quick_push (oelast);
 871                   return;
 872                 }
 873             }
 874           else if (integer_zerop (oelast->op))
 875             {
 876               if (ops->length () != 1)
 877                 {
 878                   if (dump_file && (dump_flags & TDF_DETAILS))
 879                     fprintf (dump_file, "Found | 0, removing\n");
 880                   ops->pop ();
 881                   reassociate_stats.ops_eliminated++;
 882                 }
 883             }
 884           break;
 885         case MULT_EXPR:
 886           if (integer_zerop (oelast->op)
 887               || (FLOAT_TYPE_P (type)
 888                   && !HONOR_NANS (TYPE_MODE (type))
 889                   && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
 890                   && real_zerop (oelast->op)))
 891             {
 892               if (ops->length () != 1)
 893                 {
 894                   if (dump_file && (dump_flags & TDF_DETAILS))
 895                     fprintf (dump_file, "Found * 0, removing all other ops\n");
 896
 897                   reassociate_stats.ops_eliminated += ops->length () - 1;
 898                   ops->truncate (1);
 899                   ops->quick_push (oelast);
 900                   return;
 901                 }
 902             }
 903           else if (integer_onep (oelast->op)
 904                    || (FLOAT_TYPE_P (type)
 905                        && !HONOR_SNANS (TYPE_MODE (type))
 906                        && real_onep (oelast->op)))
 907             {
 908               if (ops->length () != 1)
 909                 {
 910                   if (dump_file && (dump_flags & TDF_DETAILS))
 911                     fprintf (dump_file, "Found * 1, removing\n");
 912                   ops->pop ();
 913                   reassociate_stats.ops_eliminated++;
 914                   return;
 915                 }
 916             }
 917           break;
 918         case BIT_XOR_EXPR:
 919         case PLUS_EXPR:
 920         case MINUS_EXPR:
 921           if (integer_zerop (oelast->op)
 922               || (FLOAT_TYPE_P (type)
 923                   && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
 924                   && fold_real_zero_addition_p (type, oelast->op,
 925                                                 opcode == MINUS_EXPR)))
 926             {
 927               if (ops->length () != 1)
 928                 {
 929                   if (dump_file && (dump_flags & TDF_DETAILS))
 930                     fprintf (dump_file, "Found [|^+] 0, removing\n");
 931                   ops->pop ();
 932                   reassociate_stats.ops_eliminated++;
 933                   return;
 934                 }
 935             }
 936           break;
 937         default:
 938           break;
 939         }
 940     }
 941 }
 942
 943
 944 static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
 945                                  bool, bool);
 946
 947 /* Structure for tracking and counting operands.  */
 948 typedef struct oecount_s {
 949   int cnt;
 950   int id;
 951   enum tree_code oecode;
 952   tree op;
 953 } oecount;
 954
 955
 956 /* The heap for the oecount hashtable and the sorted list of operands.  */
 957 static vec<oecount> cvec;
 958
 959
 960 /* Oecount hashtable helpers.  */
 961
 962 struct oecount_hasher : typed_noop_remove <void>
 963 {
 964   /* Note that this hash table stores integers, not pointers.
 965      So, observe the casting in the member functions.  */
 966   typedef void value_type;
 967   typedef void compare_type;
 968   static inline hashval_t hash (const value_type *);
 969   static inline bool equal (const value_type *, const compare_type *);
 970 };
 971
 972 /* Hash function for oecount.  */
 973
 974 inline hashval_t
 975 oecount_hasher::hash (const value_type *p)
 976 {
 977   const oecount *c = &cvec[(size_t)p - 42];
 978   return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
 979 }
 980
 981 /* Comparison function for oecount.  */
 982
 983 inline bool
 984 oecount_hasher::equal (const value_type *p1, const compare_type *p2)
 985 {
 986   const oecount *c1 = &cvec[(size_t)p1 - 42];
 987   const oecount *c2 = &cvec[(size_t)p2 - 42];
 988   return (c1->oecode == c2->oecode
 989           && c1->op == c2->op);
 990 }
 991
 992 /* Comparison function for qsort sorting oecount elements by count.  */
 993
 994 static int
 995 oecount_cmp (const void *p1, const void *p2)
 996 {
 997   const oecount *c1 = (const oecount *)p1;
 998   const oecount *c2 = (const oecount *)p2;
 999   if (c1->cnt != c2->cnt)
1000     return c1->cnt - c2->cnt;
1001   else
1002     /* If counts are identical, use unique IDs to stabilize qsort.  */
1003     return c1->id - c2->id;
1004 }
1005
1006 /* Return TRUE iff STMT represents a builtin call that raises OP
1007    to some exponent.  */
1008
1009 static bool
1010 stmt_is_power_of_op (gimple stmt, tree op)
1011 {
1012   tree fndecl;
1013
1014   if (!is_gimple_call (stmt))
1015     return false;
1016
1017   fndecl = gimple_call_fndecl (stmt);
1018
1019   if (!fndecl
1020       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
1021     return false;
1022
1023   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1024     {
1025     CASE_FLT_FN (BUILT_IN_POW):
1026     CASE_FLT_FN (BUILT_IN_POWI):
1027       return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1028
1029     default:
1030       return false;
1031     }
1032 }
1033
1034 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1035    in place and return the result.  Assumes that stmt_is_power_of_op
1036    was previously called for STMT and returned TRUE.  */
1037
1038 static HOST_WIDE_INT
1039 decrement_power (gimple stmt)
1040 {
1041   REAL_VALUE_TYPE c, cint;
1042   HOST_WIDE_INT power;
1043   tree arg1;
1044
1045   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1046     {
1047     CASE_FLT_FN (BUILT_IN_POW):
1048       arg1 = gimple_call_arg (stmt, 1);
1049       c = TREE_REAL_CST (arg1);
1050       power = real_to_integer (&c) - 1;
1051       real_from_integer (&cint, VOIDmode, power, 0, 0);
1052       gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1053       return power;
1054
1055     CASE_FLT_FN (BUILT_IN_POWI):
1056       arg1 = gimple_call_arg (stmt, 1);
1057       power = TREE_INT_CST_LOW (arg1) - 1;
1058       gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1059       return power;
1060
1061     default:
1062       gcc_unreachable ();
1063     }
1064 }
1065
1066 /* Find the single immediate use of STMT's LHS, and replace it
1067    with OP.  Remove STMT.  If STMT's LHS is the same as *DEF,
1068    replace *DEF with OP as well.  */
1069
1070 static void
1071 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1072 {
1073   tree lhs;
1074   gimple use_stmt;
1075   use_operand_p use;
1076   gimple_stmt_iterator gsi;
1077
1078   if (is_gimple_call (stmt))
1079     lhs = gimple_call_lhs (stmt);
1080   else
1081     lhs = gimple_assign_lhs (stmt);
1082
1083   gcc_assert (has_single_use (lhs));
1084   single_imm_use (lhs, &use, &use_stmt);
1085   if (lhs == *def)
1086     *def = op;
1087   SET_USE (use, op);
1088   if (TREE_CODE (op) != SSA_NAME)
1089     update_stmt (use_stmt);
1090   gsi = gsi_for_stmt (stmt);
1091   unlink_stmt_vdef (stmt);
1092   gsi_remove (&gsi, true);
1093   release_defs (stmt);
1094 }
1095
1096 /* Walks the linear chain with result *DEF searching for an operation
1097    with operand OP and code OPCODE removing that from the chain.  *DEF
1098    is updated if there is only one operand but no operation left.  */
1099
1100 static void
1101 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1102 {
1103   gimple stmt = SSA_NAME_DEF_STMT (*def);
1104
1105   do
1106     {
1107       tree name;
1108
1109       if (opcode == MULT_EXPR
1110           && stmt_is_power_of_op (stmt, op))
1111         {
1112           if (decrement_power (stmt) == 1)
1113             propagate_op_to_single_use (op, stmt, def);
1114           return;
1115         }
1116
1117       name = gimple_assign_rhs1 (stmt);
1118
1119       /* If this is the operation we look for and one of the operands
1120          is ours simply propagate the other operand into the stmts
1121          single use.  */
1122       if (gimple_assign_rhs_code (stmt) == opcode
1123           && (name == op
1124               || gimple_assign_rhs2 (stmt) == op))
1125         {
1126           if (name == op)
1127             name = gimple_assign_rhs2 (stmt);
1128           propagate_op_to_single_use (name, stmt, def);
1129           return;
1130         }
1131
1132       /* We might have a multiply of two __builtin_pow* calls, and
1133          the operand might be hiding in the rightmost one.  */
1134       if (opcode == MULT_EXPR
1135           && gimple_assign_rhs_code (stmt) == opcode
1136           && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1137           && has_single_use (gimple_assign_rhs2 (stmt)))
1138         {
1139           gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1140           if (stmt_is_power_of_op (stmt2, op))
1141             {
1142               if (decrement_power (stmt2) == 1)
1143                 propagate_op_to_single_use (op, stmt2, def);
1144               return;
1145             }
1146         }
1147
1148       /* Continue walking the chain.  */
1149       gcc_assert (name != op
1150                   && TREE_CODE (name) == SSA_NAME);
1151       stmt = SSA_NAME_DEF_STMT (name);
1152     }
1153   while (1);
1154 }
1155
1156 /* Returns true if statement S1 dominates statement S2.  Like
1157    stmt_dominates_stmt_p, but uses stmt UIDs to optimize.  */
1158
1159 static bool
1160 reassoc_stmt_dominates_stmt_p (gimple s1, gimple s2)
1161 {
1162   basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1163
1164   /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1165      SSA_NAME.  Assume it lives at the beginning of function and
1166      thus dominates everything.  */
1167   if (!bb1 || s1 == s2)
1168     return true;
1169
1170   /* If bb2 is NULL, it doesn't dominate any stmt with a bb.  */
1171   if (!bb2)
1172     return false;
1173
1174   if (bb1 == bb2)
1175     {
1176       /* PHIs in the same basic block are assumed to be
1177          executed all in parallel, if only one stmt is a PHI,
1178          it dominates the other stmt in the same basic block.  */
1179       if (gimple_code (s1) == GIMPLE_PHI)
1180         return true;
1181
1182       if (gimple_code (s2) == GIMPLE_PHI)
1183         return false;
1184
1185       gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1186
1187       if (gimple_uid (s1) < gimple_uid (s2))
1188         return true;
1189
1190       if (gimple_uid (s1) > gimple_uid (s2))
1191         return false;
1192
1193       gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1194       unsigned int uid = gimple_uid (s1);
1195       for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1196         {
1197           gimple s = gsi_stmt (gsi);
1198           if (gimple_uid (s) != uid)
1199             break;
1200           if (s == s2)
1201             return true;
1202         }
1203
1204       return false;
1205     }
1206
1207   return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1208 }
1209
1210 /* Insert STMT after INSERT_POINT.  */
1211
1212 static void
1213 insert_stmt_after (gimple stmt, gimple insert_point)
1214 {
1215   gimple_stmt_iterator gsi;
1216   basic_block bb;
1217
1218   if (gimple_code (insert_point) == GIMPLE_PHI)
1219     bb = gimple_bb (insert_point);
1220   else if (!stmt_ends_bb_p (insert_point))
1221     {
1222       gsi = gsi_for_stmt (insert_point);
1223       gimple_set_uid (stmt, gimple_uid (insert_point));
1224       gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1225       return;
1226     }
1227   else
1228     /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1229        thus if it must end a basic block, it should be a call that can
1230        throw, or some assignment that can throw.  If it throws, the LHS
1231        of it will not be initialized though, so only valid places using
1232        the SSA_NAME should be dominated by the fallthru edge.  */
1233     bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1234   gsi = gsi_after_labels (bb);
1235   if (gsi_end_p (gsi))
1236     {
1237       gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1238       gimple_set_uid (stmt,
1239                       gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1240     }
1241   else
1242     gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1243   gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1244 }
1245
1246 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1247    the result.  Places the statement after the definition of either
1248    OP1 or OP2.  Returns the new statement.  */
1249
1250 static gimple
1251 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1252 {
1253   gimple op1def = NULL, op2def = NULL;
1254   gimple_stmt_iterator gsi;
1255   tree op;
1256   gimple sum;
1257
1258   /* Create the addition statement.  */
1259   op = make_ssa_name (type, NULL);
1260   sum = gimple_build_assign_with_ops (opcode, op, op1, op2);
1261
1262   /* Find an insertion place and insert.  */
1263   if (TREE_CODE (op1) == SSA_NAME)
1264     op1def = SSA_NAME_DEF_STMT (op1);
1265   if (TREE_CODE (op2) == SSA_NAME)
1266     op2def = SSA_NAME_DEF_STMT (op2);
1267   if ((!op1def || gimple_nop_p (op1def))
1268       && (!op2def || gimple_nop_p (op2def)))
1269     {
1270       gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR));
1271       if (gsi_end_p (gsi))
1272         {
1273           gimple_stmt_iterator gsi2
1274             = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR));
1275           gimple_set_uid (sum,
1276                           gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1277         }
1278       else
1279         gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1280       gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1281     }
1282   else
1283     {
1284       gimple insert_point;
1285       if ((!op1def || gimple_nop_p (op1def))
1286            || (op2def && !gimple_nop_p (op2def)
1287                && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1288         insert_point = op2def;
1289       else
1290         insert_point = op1def;
1291       insert_stmt_after (sum, insert_point);
1292     }
1293   update_stmt (sum);
1294
1295   return sum;
1296 }
1297
1298 /* Perform un-distribution of divisions and multiplications.
1299    A * X + B * X is transformed into (A + B) * X and A / X + B / X
1300    to (A + B) / X for real X.
1301
1302    The algorithm is organized as follows.
1303
1304     - First we walk the addition chain *OPS looking for summands that
1305       are defined by a multiplication or a real division.  This results
1306       in the candidates bitmap with relevant indices into *OPS.
1307
1308     - Second we build the chains of multiplications or divisions for
1309       these candidates, counting the number of occurrences of (operand, code)
1310       pairs in all of the candidates chains.
1311
1312     - Third we sort the (operand, code) pairs by number of occurrence and
1313       process them starting with the pair with the most uses.
1314
1315       * For each such pair we walk the candidates again to build a
1316         second candidate bitmap noting all multiplication/division chains
1317         that have at least one occurrence of (operand, code).
1318
1319       * We build an alternate addition chain only covering these
1320         candidates with one (operand, code) operation removed from their
1321         multiplication/division chain.
1322
1323       * The first candidate gets replaced by the alternate addition chain
1324         multiplied/divided by the operand.
1325
1326       * All candidate chains get disabled for further processing and
1327         processing of (operand, code) pairs continues.
1328
1329   The alternate addition chains built are re-processed by the main
1330   reassociation algorithm which allows optimizing a * x * y + b * y * x
1331   to (a + b ) * x * y in one invocation of the reassociation pass.  */
1332
1333 static bool
1334 undistribute_ops_list (enum tree_code opcode,
1335                        vec<operand_entry_t> *ops, struct loop *loop)
1336 {
1337   unsigned int length = ops->length ();
1338   operand_entry_t oe1;
1339   unsigned i, j;
1340   sbitmap candidates, candidates2;
1341   unsigned nr_candidates, nr_candidates2;
1342   sbitmap_iterator sbi0;
1343   vec<operand_entry_t> *subops;
1344   hash_table <oecount_hasher> ctable;
1345   bool changed = false;
1346   int next_oecount_id = 0;
1347
1348   if (length <= 1
1349       || opcode != PLUS_EXPR)
1350     return false;
1351
1352   /* Build a list of candidates to process.  */
1353   candidates = sbitmap_alloc (length);
1354   bitmap_clear (candidates);
1355   nr_candidates = 0;
1356   FOR_EACH_VEC_ELT (*ops, i, oe1)
1357     {
1358       enum tree_code dcode;
1359       gimple oe1def;
1360
1361       if (TREE_CODE (oe1->op) != SSA_NAME)
1362         continue;
1363       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1364       if (!is_gimple_assign (oe1def))
1365         continue;
1366       dcode = gimple_assign_rhs_code (oe1def);
1367       if ((dcode != MULT_EXPR
1368            && dcode != RDIV_EXPR)
1369           || !is_reassociable_op (oe1def, dcode, loop))
1370         continue;
1371
1372       bitmap_set_bit (candidates, i);
1373       nr_candidates++;
1374     }
1375
1376   if (nr_candidates < 2)
1377     {
1378       sbitmap_free (candidates);
1379       return false;
1380     }
1381
1382   if (dump_file && (dump_flags & TDF_DETAILS))
1383     {
1384       fprintf (dump_file, "searching for un-distribute opportunities ");
1385       print_generic_expr (dump_file,
1386         (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1387       fprintf (dump_file, " %d\n", nr_candidates);
1388     }
1389
1390   /* Build linearized sub-operand lists and the counting table.  */
1391   cvec.create (0);
1392   ctable.create (15);
1393   /* ??? Macro arguments cannot have multi-argument template types in
1394      them.  This typedef is needed to workaround that limitation.  */
1395   typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1396   subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1397   EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1398     {
1399       gimple oedef;
1400       enum tree_code oecode;
1401       unsigned j;
1402
1403       oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1404       oecode = gimple_assign_rhs_code (oedef);
1405       linearize_expr_tree (&subops[i], oedef,
1406                            associative_tree_code (oecode), false);
1407
1408       FOR_EACH_VEC_ELT (subops[i], j, oe1)
1409         {
1410           oecount c;
1411           void **slot;
1412           size_t idx;
1413           c.oecode = oecode;
1414           c.cnt = 1;
1415           c.id = next_oecount_id++;
1416           c.op = oe1->op;
1417           cvec.safe_push (c);
1418           idx = cvec.length () + 41;
1419           slot = ctable.find_slot ((void *)idx, INSERT);
1420           if (!*slot)
1421             {
1422               *slot = (void *)idx;
1423             }
1424           else
1425             {
1426               cvec.pop ();
1427               cvec[(size_t)*slot - 42].cnt++;
1428             }
1429         }
1430     }
1431   ctable.dispose ();
1432
1433   /* Sort the counting table.  */
1434   cvec.qsort (oecount_cmp);
1435
1436   if (dump_file && (dump_flags & TDF_DETAILS))
1437     {
1438       oecount *c;
1439       fprintf (dump_file, "Candidates:\n");
1440       FOR_EACH_VEC_ELT (cvec, j, c)
1441         {
1442           fprintf (dump_file, "  %u %s: ", c->cnt,
1443                    c->oecode == MULT_EXPR
1444                    ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1445           print_generic_expr (dump_file, c->op, 0);
1446           fprintf (dump_file, "\n");
1447         }
1448     }
1449
1450   /* Process the (operand, code) pairs in order of most occurrence.  */
1451   candidates2 = sbitmap_alloc (length);
1452   while (!cvec.is_empty ())
1453     {
1454       oecount *c = &cvec.last ();
1455       if (c->cnt < 2)
1456         break;
1457
1458       /* Now collect the operands in the outer chain that contain
1459          the common operand in their inner chain.  */
1460       bitmap_clear (candidates2);
1461       nr_candidates2 = 0;
1462       EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1463         {
1464           gimple oedef;
1465           enum tree_code oecode;
1466           unsigned j;
1467           tree op = (*ops)[i]->op;
1468
1469           /* If we undistributed in this chain already this may be
1470              a constant.  */
1471           if (TREE_CODE (op) != SSA_NAME)
1472             continue;
1473
1474           oedef = SSA_NAME_DEF_STMT (op);
1475           oecode = gimple_assign_rhs_code (oedef);
1476           if (oecode != c->oecode)
1477             continue;
1478
1479           FOR_EACH_VEC_ELT (subops[i], j, oe1)
1480             {
1481               if (oe1->op == c->op)
1482                 {
1483                   bitmap_set_bit (candidates2, i);
1484                   ++nr_candidates2;
1485                   break;
1486                 }
1487             }
1488         }
1489
1490       if (nr_candidates2 >= 2)
1491         {
1492           operand_entry_t oe1, oe2;
1493           gimple prod;
1494           int first = bitmap_first_set_bit (candidates2);
1495
1496           /* Build the new addition chain.  */
1497           oe1 = (*ops)[first];
1498           if (dump_file && (dump_flags & TDF_DETAILS))
1499             {
1500               fprintf (dump_file, "Building (");
1501               print_generic_expr (dump_file, oe1->op, 0);
1502             }
1503           zero_one_operation (&oe1->op, c->oecode, c->op);
1504           EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1505             {
1506               gimple sum;
1507               oe2 = (*ops)[i];
1508               if (dump_file && (dump_flags & TDF_DETAILS))
1509                 {
1510                   fprintf (dump_file, " + ");
1511                   print_generic_expr (dump_file, oe2->op, 0);
1512                 }
1513               zero_one_operation (&oe2->op, c->oecode, c->op);
1514               sum = build_and_add_sum (TREE_TYPE (oe1->op),
1515                                        oe1->op, oe2->op, opcode);
1516               oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1517               oe2->rank = 0;
1518               oe1->op = gimple_get_lhs (sum);
1519             }
1520
1521           /* Apply the multiplication/division.  */
1522           prod = build_and_add_sum (TREE_TYPE (oe1->op),
1523                                     oe1->op, c->op, c->oecode);
1524           if (dump_file && (dump_flags & TDF_DETAILS))
1525             {
1526               fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1527               print_generic_expr (dump_file, c->op, 0);
1528               fprintf (dump_file, "\n");
1529             }
1530
1531           /* Record it in the addition chain and disable further
1532              undistribution with this op.  */
1533           oe1->op = gimple_assign_lhs (prod);
1534           oe1->rank = get_rank (oe1->op);
1535           subops[first].release ();
1536
1537           changed = true;
1538         }
1539
1540       cvec.pop ();
1541     }
1542
1543   for (i = 0; i < ops->length (); ++i)
1544     subops[i].release ();
1545   free (subops);
1546   cvec.release ();
1547   sbitmap_free (candidates);
1548   sbitmap_free (candidates2);
1549
1550   return changed;
1551 }
1552
1553 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1554    expression, examine the other OPS to see if any of them are comparisons
1555    of the same values, which we may be able to combine or eliminate.
1556    For example, we can rewrite (a < b) | (a == b) as (a <= b).  */
1557
1558 static bool
1559 eliminate_redundant_comparison (enum tree_code opcode,
1560                                 vec<operand_entry_t> *ops,
1561                                 unsigned int currindex,
1562                                 operand_entry_t curr)
1563 {
1564   tree op1, op2;
1565   enum tree_code lcode, rcode;
1566   gimple def1, def2;
1567   int i;
1568   operand_entry_t oe;
1569
1570   if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1571     return false;
1572
1573   /* Check that CURR is a comparison.  */
1574   if (TREE_CODE (curr->op) != SSA_NAME)
1575     return false;
1576   def1 = SSA_NAME_DEF_STMT (curr->op);
1577   if (!is_gimple_assign (def1))
1578     return false;
1579   lcode = gimple_assign_rhs_code (def1);
1580   if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1581     return false;
1582   op1 = gimple_assign_rhs1 (def1);
1583   op2 = gimple_assign_rhs2 (def1);
1584
1585   /* Now look for a similar comparison in the remaining OPS.  */
1586   for (i = currindex + 1; ops->iterate (i, &oe); i++)
1587     {
1588       tree t;
1589
1590       if (TREE_CODE (oe->op) != SSA_NAME)
1591         continue;
1592       def2 = SSA_NAME_DEF_STMT (oe->op);
1593       if (!is_gimple_assign (def2))
1594         continue;
1595       rcode = gimple_assign_rhs_code (def2);
1596       if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1597         continue;
1598
1599       /* If we got here, we have a match.  See if we can combine the
1600          two comparisons.  */
1601       if (opcode == BIT_IOR_EXPR)
1602         t = maybe_fold_or_comparisons (lcode, op1, op2,
1603                                        rcode, gimple_assign_rhs1 (def2),
1604                                        gimple_assign_rhs2 (def2));
1605       else
1606         t = maybe_fold_and_comparisons (lcode, op1, op2,
1607                                         rcode, gimple_assign_rhs1 (def2),
1608                                         gimple_assign_rhs2 (def2));
1609       if (!t)
1610         continue;
1611
1612       /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1613          always give us a boolean_type_node value back.  If the original
1614          BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1615          we need to convert.  */
1616       if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1617         t = fold_convert (TREE_TYPE (curr->op), t);
1618
1619       if (TREE_CODE (t) != INTEGER_CST
1620           && !operand_equal_p (t, curr->op, 0))
1621         {
1622           enum tree_code subcode;
1623           tree newop1, newop2;
1624           if (!COMPARISON_CLASS_P (t))
1625             continue;
1626           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1627           STRIP_USELESS_TYPE_CONVERSION (newop1);
1628           STRIP_USELESS_TYPE_CONVERSION (newop2);
1629           if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1630             continue;
1631         }
1632
1633       if (dump_file && (dump_flags & TDF_DETAILS))
1634         {
1635           fprintf (dump_file, "Equivalence: ");
1636           print_generic_expr (dump_file, curr->op, 0);
1637           fprintf (dump_file, " %s ", op_symbol_code (opcode));
1638           print_generic_expr (dump_file, oe->op, 0);
1639           fprintf (dump_file, " -> ");
1640           print_generic_expr (dump_file, t, 0);
1641           fprintf (dump_file, "\n");
1642         }
1643
1644       /* Now we can delete oe, as it has been subsumed by the new combined
1645          expression t.  */
1646       ops->ordered_remove (i);
1647       reassociate_stats.ops_eliminated ++;
1648
1649       /* If t is the same as curr->op, we're done.  Otherwise we must
1650          replace curr->op with t.  Special case is if we got a constant
1651          back, in which case we add it to the end instead of in place of
1652          the current entry.  */
1653       if (TREE_CODE (t) == INTEGER_CST)
1654         {
1655           ops->ordered_remove (currindex);
1656           add_to_ops_vec (ops, t);
1657         }
1658       else if (!operand_equal_p (t, curr->op, 0))
1659         {
1660           gimple sum;
1661           enum tree_code subcode;
1662           tree newop1;
1663           tree newop2;
1664           gcc_assert (COMPARISON_CLASS_P (t));
1665           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1666           STRIP_USELESS_TYPE_CONVERSION (newop1);
1667           STRIP_USELESS_TYPE_CONVERSION (newop2);
1668           gcc_checking_assert (is_gimple_val (newop1)
1669                                && is_gimple_val (newop2));
1670           sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1671           curr->op = gimple_get_lhs (sum);
1672         }
1673       return true;
1674     }
1675
1676   return false;
1677 }
1678
1679 /* Perform various identities and other optimizations on the list of
1680    operand entries, stored in OPS.  The tree code for the binary
1681    operation between all the operands is OPCODE.  */
1682
1683 static void
1684 optimize_ops_list (enum tree_code opcode,
1685                    vec<operand_entry_t> *ops)
1686 {
1687   unsigned int length = ops->length ();
1688   unsigned int i;
1689   operand_entry_t oe;
1690   operand_entry_t oelast = NULL;
1691   bool iterate = false;
1692
1693   if (length == 1)
1694     return;
1695
1696   oelast = ops->last ();
1697
1698   /* If the last two are constants, pop the constants off, merge them
1699      and try the next two.  */
1700   if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1701     {
1702       operand_entry_t oelm1 = (*ops)[length - 2];
1703
1704       if (oelm1->rank == 0
1705           && is_gimple_min_invariant (oelm1->op)
1706           && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1707                                        TREE_TYPE (oelast->op)))
1708         {
1709           tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1710                                      oelm1->op, oelast->op);
1711
1712           if (folded && is_gimple_min_invariant (folded))
1713             {
1714               if (dump_file && (dump_flags & TDF_DETAILS))
1715                 fprintf (dump_file, "Merging constants\n");
1716
1717               ops->pop ();
1718               ops->pop ();
1719
1720               add_to_ops_vec (ops, folded);
1721               reassociate_stats.constants_eliminated++;
1722
1723               optimize_ops_list (opcode, ops);
1724               return;
1725             }
1726         }
1727     }
1728
1729   eliminate_using_constants (opcode, ops);
1730   oelast = NULL;
1731
1732   for (i = 0; ops->iterate (i, &oe);)
1733     {
1734       bool done = false;
1735
1736       if (eliminate_not_pairs (opcode, ops, i, oe))
1737         return;
1738       if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1739           || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1740           || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1741         {
1742           if (done)
1743             return;
1744           iterate = true;
1745           oelast = NULL;
1746           continue;
1747         }
1748       oelast = oe;
1749       i++;
1750     }
1751
1752   length = ops->length ();
1753   oelast = ops->last ();
1754
1755   if (iterate)
1756     optimize_ops_list (opcode, ops);
1757 }
1758
1759 /* The following functions are subroutines to optimize_range_tests and allow
1760    it to try to change a logical combination of comparisons into a range
1761    test.
1762
1763    For example, both
1764         X == 2 || X == 5 || X == 3 || X == 4
1765    and
1766         X >= 2 && X <= 5
1767    are converted to
1768         (unsigned) (X - 2) <= 3
1769
1770    For more information see comments above fold_test_range in fold-const.c,
1771    this implementation is for GIMPLE.  */
1772
1773 struct range_entry
1774 {
1775   tree exp;
1776   tree low;
1777   tree high;
1778   bool in_p;
1779   bool strict_overflow_p;
1780   unsigned int idx, next;
1781 };
1782
1783 /* This is similar to make_range in fold-const.c, but on top of
1784    GIMPLE instead of trees.  If EXP is non-NULL, it should be
1785    an SSA_NAME and STMT argument is ignored, otherwise STMT
1786    argument should be a GIMPLE_COND.  */
1787
1788 static void
1789 init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1790 {
1791   int in_p;
1792   tree low, high;
1793   bool is_bool, strict_overflow_p;
1794
1795   r->exp = NULL_TREE;
1796   r->in_p = false;
1797   r->strict_overflow_p = false;
1798   r->low = NULL_TREE;
1799   r->high = NULL_TREE;
1800   if (exp != NULL_TREE
1801       && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1802     return;
1803
1804   /* Start with simply saying "EXP != 0" and then look at the code of EXP
1805      and see if we can refine the range.  Some of the cases below may not
1806      happen, but it doesn't seem worth worrying about this.  We "continue"
1807      the outer loop when we've changed something; otherwise we "break"
1808      the switch, which will "break" the while.  */
1809   low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1810   high = low;
1811   in_p = 0;
1812   strict_overflow_p = false;
1813   is_bool = false;
1814   if (exp == NULL_TREE)
1815     is_bool = true;
1816   else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1817     {
1818       if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1819         is_bool = true;
1820       else
1821         return;
1822     }
1823   else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1824     is_bool = true;
1825
1826   while (1)
1827     {
1828       enum tree_code code;
1829       tree arg0, arg1, exp_type;
1830       tree nexp;
1831       location_t loc;
1832
1833       if (exp != NULL_TREE)
1834         {
1835           if (TREE_CODE (exp) != SSA_NAME)
1836             break;
1837
1838           stmt = SSA_NAME_DEF_STMT (exp);
1839           if (!is_gimple_assign (stmt))
1840             break;
1841
1842           code = gimple_assign_rhs_code (stmt);
1843           arg0 = gimple_assign_rhs1 (stmt);
1844           arg1 = gimple_assign_rhs2 (stmt);
1845           exp_type = TREE_TYPE (exp);
1846         }
1847       else
1848         {
1849           code = gimple_cond_code (stmt);
1850           arg0 = gimple_cond_lhs (stmt);
1851           arg1 = gimple_cond_rhs (stmt);
1852           exp_type = boolean_type_node;
1853         }
1854
1855       if (TREE_CODE (arg0) != SSA_NAME)
1856         break;
1857       loc = gimple_location (stmt);
1858       switch (code)
1859         {
1860         case BIT_NOT_EXPR:
1861           if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
1862               /* Ensure the range is either +[-,0], +[0,0],
1863                  -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1864                  -[1,1].  If it is e.g. +[-,-] or -[-,-]
1865                  or similar expression of unconditional true or
1866                  false, it should not be negated.  */
1867               && ((high && integer_zerop (high))
1868                   || (low && integer_onep (low))))
1869             {
1870               in_p = !in_p;
1871               exp = arg0;
1872               continue;
1873             }
1874           break;
1875         case SSA_NAME:
1876           exp = arg0;
1877           continue;
1878         CASE_CONVERT:
1879           if (is_bool)
1880             goto do_default;
1881           if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1882             {
1883               if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1884                 is_bool = true;
1885               else
1886                 return;
1887             }
1888           else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1889             is_bool = true;
1890           goto do_default;
1891         case EQ_EXPR:
1892         case NE_EXPR:
1893         case LT_EXPR:
1894         case LE_EXPR:
1895         case GE_EXPR:
1896         case GT_EXPR:
1897           is_bool = true;
1898           /* FALLTHRU */
1899         default:
1900           if (!is_bool)
1901             return;
1902         do_default:
1903           nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1904                                   &low, &high, &in_p,
1905                                   &strict_overflow_p);
1906           if (nexp != NULL_TREE)
1907             {
1908               exp = nexp;
1909               gcc_assert (TREE_CODE (exp) == SSA_NAME);
1910               continue;
1911             }
1912           break;
1913         }
1914       break;
1915     }
1916   if (is_bool)
1917     {
1918       r->exp = exp;
1919       r->in_p = in_p;
1920       r->low = low;
1921       r->high = high;
1922       r->strict_overflow_p = strict_overflow_p;
1923     }
1924 }
1925
1926 /* Comparison function for qsort.  Sort entries
1927    without SSA_NAME exp first, then with SSA_NAMEs sorted
1928    by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1929    by increasing ->low and if ->low is the same, by increasing
1930    ->high.  ->low == NULL_TREE means minimum, ->high == NULL_TREE
1931    maximum.  */
1932
1933 static int
1934 range_entry_cmp (const void *a, const void *b)
1935 {
1936   const struct range_entry *p = (const struct range_entry *) a;
1937   const struct range_entry *q = (const struct range_entry *) b;
1938
1939   if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
1940     {
1941       if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1942         {
1943           /* Group range_entries for the same SSA_NAME together.  */
1944           if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
1945             return -1;
1946           else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
1947             return 1;
1948           /* If ->low is different, NULL low goes first, then by
1949              ascending low.  */
1950           if (p->low != NULL_TREE)
1951             {
1952               if (q->low != NULL_TREE)
1953                 {
1954                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
1955                                           p->low, q->low);
1956                   if (tem && integer_onep (tem))
1957                     return -1;
1958                   tem = fold_binary (GT_EXPR, boolean_type_node,
1959                                      p->low, q->low);
1960                   if (tem && integer_onep (tem))
1961                     return 1;
1962                 }
1963               else
1964                 return 1;
1965             }
1966           else if (q->low != NULL_TREE)
1967             return -1;
1968           /* If ->high is different, NULL high goes last, before that by
1969              ascending high.  */
1970           if (p->high != NULL_TREE)
1971             {
1972               if (q->high != NULL_TREE)
1973                 {
1974                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
1975                                           p->high, q->high);
1976                   if (tem && integer_onep (tem))
1977                     return -1;
1978                   tem = fold_binary (GT_EXPR, boolean_type_node,
1979                                      p->high, q->high);
1980                   if (tem && integer_onep (tem))
1981                     return 1;
1982                 }
1983               else
1984                 return -1;
1985             }
1986           else if (p->high != NULL_TREE)
1987             return 1;
1988           /* If both ranges are the same, sort below by ascending idx.  */
1989         }
1990       else
1991         return 1;
1992     }
1993   else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1994     return -1;
1995
1996   if (p->idx < q->idx)
1997     return -1;
1998   else
1999     {
2000       gcc_checking_assert (p->idx > q->idx);
2001       return 1;
2002     }
2003 }
2004
2005 /* Helper routine of optimize_range_test.
2006    [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2007    RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2008    OPCODE and OPS are arguments of optimize_range_tests.  Return
2009    true if the range merge has been successful.
2010    If OPCODE is ERROR_MARK, this is called from within
2011    maybe_optimize_range_tests and is performing inter-bb range optimization.
2012    In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2013    oe->rank.  */
2014
2015 static bool
2016 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2017                    unsigned int count, enum tree_code opcode,
2018                    vec<operand_entry_t> *ops, tree exp, bool in_p,
2019                    tree low, tree high, bool strict_overflow_p)
2020 {
2021   operand_entry_t oe = (*ops)[range->idx];
2022   tree op = oe->op;
2023   gimple stmt = op ? SSA_NAME_DEF_STMT (op) : last_stmt (BASIC_BLOCK (oe->id));
2024   location_t loc = gimple_location (stmt);
2025   tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2026   tree tem = build_range_check (loc, optype, exp, in_p, low, high);
2027   enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2028   gimple_stmt_iterator gsi;
2029
2030   if (tem == NULL_TREE)
2031     return false;
2032
2033   if (strict_overflow_p && issue_strict_overflow_warning (wc))
2034     warning_at (loc, OPT_Wstrict_overflow,
2035                 "assuming signed overflow does not occur "
2036                 "when simplifying range test");
2037
2038   if (dump_file && (dump_flags & TDF_DETAILS))
2039     {
2040       struct range_entry *r;
2041       fprintf (dump_file, "Optimizing range tests ");
2042       print_generic_expr (dump_file, range->exp, 0);
2043       fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2044       print_generic_expr (dump_file, range->low, 0);
2045       fprintf (dump_file, ", ");
2046       print_generic_expr (dump_file, range->high, 0);
2047       fprintf (dump_file, "]");
2048       for (r = otherrange; r < otherrange + count; r++)
2049         {
2050           fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2051           print_generic_expr (dump_file, r->low, 0);
2052           fprintf (dump_file, ", ");
2053           print_generic_expr (dump_file, r->high, 0);
2054           fprintf (dump_file, "]");
2055         }
2056       fprintf (dump_file, "\n into ");
2057       print_generic_expr (dump_file, tem, 0);
2058       fprintf (dump_file, "\n");
2059     }
2060
2061   if (opcode == BIT_IOR_EXPR
2062       || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2063     tem = invert_truthvalue_loc (loc, tem);
2064
2065   tem = fold_convert_loc (loc, optype, tem);
2066   gsi = gsi_for_stmt (stmt);
2067   tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2068                                   GSI_SAME_STMT);
2069   for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
2070     if (gimple_uid (gsi_stmt (gsi)))
2071       break;
2072     else
2073       gimple_set_uid (gsi_stmt (gsi), gimple_uid (stmt));
2074
2075   oe->op = tem;
2076   range->exp = exp;
2077   range->low = low;
2078   range->high = high;
2079   range->in_p = in_p;
2080   range->strict_overflow_p = false;
2081
2082   for (range = otherrange; range < otherrange + count; range++)
2083     {
2084       oe = (*ops)[range->idx];
2085       /* Now change all the other range test immediate uses, so that
2086          those tests will be optimized away.  */
2087       if (opcode == ERROR_MARK)
2088         {
2089           if (oe->op)
2090             oe->op = build_int_cst (TREE_TYPE (oe->op),
2091                                     oe->rank == BIT_IOR_EXPR ? 0 : 1);
2092           else
2093             oe->op = (oe->rank == BIT_IOR_EXPR
2094                       ? boolean_false_node : boolean_true_node);
2095         }
2096       else
2097         oe->op = error_mark_node;
2098       range->exp = NULL_TREE;
2099     }
2100   return true;
2101 }
2102
2103 /* Optimize X == CST1 || X == CST2
2104    if popcount (CST1 ^ CST2) == 1 into
2105    (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2106    Similarly for ranges.  E.g.
2107    X != 2 && X != 3 && X != 10 && X != 11
2108    will be transformed by the previous optimization into
2109    !((X - 2U) <= 1U || (X - 10U) <= 1U)
2110    and this loop can transform that into
2111    !(((X & ~8) - 2U) <= 1U).  */
2112
2113 static bool
2114 optimize_range_tests_xor (enum tree_code opcode, tree type,
2115                           tree lowi, tree lowj, tree highi, tree highj,
2116                           vec<operand_entry_t> *ops,
2117                           struct range_entry *rangei,
2118                           struct range_entry *rangej)
2119 {
2120   tree lowxor, highxor, tem, exp;
2121   /* Check highi ^ lowi == highj ^ lowj and
2122      popcount (highi ^ lowi) == 1.  */
2123   lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2124   if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2125     return false;
2126   if (tree_log2 (lowxor) < 0)
2127     return false;
2128   highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2129   if (!tree_int_cst_equal (lowxor, highxor))
2130     return false;
2131
2132   tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2133   exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2134   lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2135   highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2136   if (update_range_test (rangei, rangej, 1, opcode, ops, exp,
2137                          rangei->in_p, lowj, highj,
2138                          rangei->strict_overflow_p
2139                          || rangej->strict_overflow_p))
2140     return true;
2141   return false;
2142 }
2143
2144 /* Optimize X == CST1 || X == CST2
2145    if popcount (CST2 - CST1) == 1 into
2146    ((X - CST1) & ~(CST2 - CST1)) == 0.
2147    Similarly for ranges.  E.g.
2148    X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2149    || X == 75 || X == 45
2150    will be transformed by the previous optimization into
2151    (X - 43U) <= 3U || (X - 75U) <= 3U
2152    and this loop can transform that into
2153    ((X - 43U) & ~(75U - 43U)) <= 3U.  */
2154 static bool
2155 optimize_range_tests_diff (enum tree_code opcode, tree type,
2156                             tree lowi, tree lowj, tree highi, tree highj,
2157                             vec<operand_entry_t> *ops,
2158                             struct range_entry *rangei,
2159                             struct range_entry *rangej)
2160 {
2161   tree tem1, tem2, mask;
2162   /* Check highi - lowi == highj - lowj.  */
2163   tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2164   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2165     return false;
2166   tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2167   if (!tree_int_cst_equal (tem1, tem2))
2168     return false;
2169   /* Check popcount (lowj - lowi) == 1.  */
2170   tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2171   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2172     return false;
2173   if (tree_log2 (tem1) < 0)
2174     return false;
2175
2176   mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2177   tem1 = fold_binary (MINUS_EXPR, type, rangei->exp, lowi);
2178   tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2179   lowj = build_int_cst (type, 0);
2180   if (update_range_test (rangei, rangej, 1, opcode, ops, tem1,
2181                          rangei->in_p, lowj, tem2,
2182                          rangei->strict_overflow_p
2183                          || rangej->strict_overflow_p))
2184     return true;
2185   return false;
2186 }
2187
2188 /* It does some common checks for function optimize_range_tests_xor and
2189    optimize_range_tests_diff.
2190    If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2191    Else it calls optimize_range_tests_diff.  */
2192
2193 static bool
2194 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2195                         bool optimize_xor, vec<operand_entry_t> *ops,
2196                         struct range_entry *ranges)
2197 {
2198   int i, j;
2199   bool any_changes = false;
2200   for (i = first; i < length; i++)
2201     {
2202       tree lowi, highi, lowj, highj, type, tem;
2203
2204       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2205         continue;
2206       type = TREE_TYPE (ranges[i].exp);
2207       if (!INTEGRAL_TYPE_P (type))
2208         continue;
2209       lowi = ranges[i].low;
2210       if (lowi == NULL_TREE)
2211         lowi = TYPE_MIN_VALUE (type);
2212       highi = ranges[i].high;
2213       if (highi == NULL_TREE)
2214         continue;
2215       for (j = i + 1; j < length && j < i + 64; j++)
2216         {
2217           bool changes;
2218           if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2219             continue;
2220           lowj = ranges[j].low;
2221           if (lowj == NULL_TREE)
2222             continue;
2223           highj = ranges[j].high;
2224           if (highj == NULL_TREE)
2225             highj = TYPE_MAX_VALUE (type);
2226           /* Check lowj > highi.  */
2227           tem = fold_binary (GT_EXPR, boolean_type_node,
2228                              lowj, highi);
2229           if (tem == NULL_TREE || !integer_onep (tem))
2230             continue;
2231           if (optimize_xor)
2232             changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2233                                                 highi, highj, ops,
2234                                                 ranges + i, ranges + j);
2235           else
2236             changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2237                                                  highi, highj, ops,
2238                                                  ranges + i, ranges + j);
2239           if (changes)
2240             {
2241               any_changes = true;
2242               break;
2243             }
2244         }
2245     }
2246   return any_changes;
2247 }
2248
2249 /* Optimize range tests, similarly how fold_range_test optimizes
2250    it on trees.  The tree code for the binary
2251    operation between all the operands is OPCODE.
2252    If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2253    maybe_optimize_range_tests for inter-bb range optimization.
2254    In that case if oe->op is NULL, oe->id is bb->index whose
2255    GIMPLE_COND is && or ||ed into the test, and oe->rank says
2256    the actual opcode.  */
2257
2258 static bool
2259 optimize_range_tests (enum tree_code opcode,
2260                       vec<operand_entry_t> *ops)
2261 {
2262   unsigned int length = ops->length (), i, j, first;
2263   operand_entry_t oe;
2264   struct range_entry *ranges;
2265   bool any_changes = false;
2266
2267   if (length == 1)
2268     return false;
2269
2270   ranges = XNEWVEC (struct range_entry, length);
2271   for (i = 0; i < length; i++)
2272     {
2273       oe = (*ops)[i];
2274       ranges[i].idx = i;
2275       init_range_entry (ranges + i, oe->op,
2276                         oe->op ? NULL : last_stmt (BASIC_BLOCK (oe->id)));
2277       /* For | invert it now, we will invert it again before emitting
2278          the optimized expression.  */
2279       if (opcode == BIT_IOR_EXPR
2280           || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2281         ranges[i].in_p = !ranges[i].in_p;
2282     }
2283
2284   qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2285   for (i = 0; i < length; i++)
2286     if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2287       break;
2288
2289   /* Try to merge ranges.  */
2290   for (first = i; i < length; i++)
2291     {
2292       tree low = ranges[i].low;
2293       tree high = ranges[i].high;
2294       int in_p = ranges[i].in_p;
2295       bool strict_overflow_p = ranges[i].strict_overflow_p;
2296       int update_fail_count = 0;
2297
2298       for (j = i + 1; j < length; j++)
2299         {
2300           if (ranges[i].exp != ranges[j].exp)
2301             break;
2302           if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2303                              ranges[j].in_p, ranges[j].low, ranges[j].high))
2304             break;
2305           strict_overflow_p |= ranges[j].strict_overflow_p;
2306         }
2307
2308       if (j == i + 1)
2309         continue;
2310
2311       if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
2312                              ops, ranges[i].exp, in_p, low, high,
2313                              strict_overflow_p))
2314         {
2315           i = j - 1;
2316           any_changes = true;
2317         }
2318       /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2319          while update_range_test would fail.  */
2320       else if (update_fail_count == 64)
2321         i = j - 1;
2322       else
2323         ++update_fail_count;
2324     }
2325
2326   any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2327                                          ops, ranges);
2328
2329   if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2330     any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2331                                            ops, ranges);
2332
2333   if (any_changes && opcode != ERROR_MARK)
2334     {
2335       j = 0;
2336       FOR_EACH_VEC_ELT (*ops, i, oe)
2337         {
2338           if (oe->op == error_mark_node)
2339             continue;
2340           else if (i != j)
2341             (*ops)[j] = oe;
2342           j++;
2343         }
2344       ops->truncate (j);
2345     }
2346
2347   XDELETEVEC (ranges);
2348   return any_changes;
2349 }
2350
2351 /* Return true if STMT is a cast like:
2352    <bb N>:
2353    ...
2354    _123 = (int) _234;
2355
2356    <bb M>:
2357    # _345 = PHI <_123(N), 1(...), 1(...)>
2358    where _234 has bool type, _123 has single use and
2359    bb N has a single successor M.  This is commonly used in
2360    the last block of a range test.  */
2361
2362 static bool
2363 final_range_test_p (gimple stmt)
2364 {
2365   basic_block bb, rhs_bb;
2366   edge e;
2367   tree lhs, rhs;
2368   use_operand_p use_p;
2369   gimple use_stmt;
2370
2371   if (!gimple_assign_cast_p (stmt))
2372     return false;
2373   bb = gimple_bb (stmt);
2374   if (!single_succ_p (bb))
2375     return false;
2376   e = single_succ_edge (bb);
2377   if (e->flags & EDGE_COMPLEX)
2378     return false;
2379
2380   lhs = gimple_assign_lhs (stmt);
2381   rhs = gimple_assign_rhs1 (stmt);
2382   if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2383       || TREE_CODE (rhs) != SSA_NAME
2384       || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2385     return false;
2386
2387   /* Test whether lhs is consumed only by a PHI in the only successor bb.  */
2388   if (!single_imm_use (lhs, &use_p, &use_stmt))
2389     return false;
2390
2391   if (gimple_code (use_stmt) != GIMPLE_PHI
2392       || gimple_bb (use_stmt) != e->dest)
2393     return false;
2394
2395   /* And that the rhs is defined in the same loop.  */
2396   rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2397   if (rhs_bb == NULL
2398       || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2399     return false;
2400
2401   return true;
2402 }
2403
2404 /* Return true if BB is suitable basic block for inter-bb range test
2405    optimization.  If BACKWARD is true, BB should be the only predecessor
2406    of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2407    or compared with to find a common basic block to which all conditions
2408    branch to if true resp. false.  If BACKWARD is false, TEST_BB should
2409    be the only predecessor of BB.  */
2410
2411 static bool
2412 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2413                   bool backward)
2414 {
2415   edge_iterator ei, ei2;
2416   edge e, e2;
2417   gimple stmt;
2418   gimple_stmt_iterator gsi;
2419   bool other_edge_seen = false;
2420   bool is_cond;
2421
2422   if (test_bb == bb)
2423     return false;
2424   /* Check last stmt first.  */
2425   stmt = last_stmt (bb);
2426   if (stmt == NULL
2427       || (gimple_code (stmt) != GIMPLE_COND
2428           && (backward || !final_range_test_p (stmt)))
2429       || gimple_visited_p (stmt)
2430       || stmt_could_throw_p (stmt)
2431       || *other_bb == bb)
2432     return false;
2433   is_cond = gimple_code (stmt) == GIMPLE_COND;
2434   if (is_cond)
2435     {
2436       /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2437          goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2438          to *OTHER_BB (if not set yet, try to find it out).  */
2439       if (EDGE_COUNT (bb->succs) != 2)
2440         return false;
2441       FOR_EACH_EDGE (e, ei, bb->succs)
2442         {
2443           if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2444             return false;
2445           if (e->dest == test_bb)
2446             {
2447               if (backward)
2448                 continue;
2449               else
2450                 return false;
2451             }
2452           if (e->dest == bb)
2453             return false;
2454           if (*other_bb == NULL)
2455             {
2456               FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2457                 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2458                   return false;
2459                 else if (e->dest == e2->dest)
2460                   *other_bb = e->dest;
2461               if (*other_bb == NULL)
2462                 return false;
2463             }
2464           if (e->dest == *other_bb)
2465             other_edge_seen = true;
2466           else if (backward)
2467             return false;
2468         }
2469       if (*other_bb == NULL || !other_edge_seen)
2470         return false;
2471     }
2472   else if (single_succ (bb) != *other_bb)
2473     return false;
2474
2475   /* Now check all PHIs of *OTHER_BB.  */
2476   e = find_edge (bb, *other_bb);
2477   e2 = find_edge (test_bb, *other_bb);
2478   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2479     {
2480       gimple phi = gsi_stmt (gsi);
2481       /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2482          corresponding to BB and TEST_BB predecessor must be the same.  */
2483       if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2484                             gimple_phi_arg_def (phi, e2->dest_idx), 0))
2485         {
2486           /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2487              one of the PHIs should have the lhs of the last stmt in
2488              that block as PHI arg and that PHI should have 0 or 1
2489              corresponding to it in all other range test basic blocks
2490              considered.  */
2491           if (!is_cond)
2492             {
2493               if (gimple_phi_arg_def (phi, e->dest_idx)
2494                   == gimple_assign_lhs (stmt)
2495                   && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2496                       || integer_onep (gimple_phi_arg_def (phi,
2497                                                            e2->dest_idx))))
2498                 continue;
2499             }
2500           else
2501             {
2502               gimple test_last = last_stmt (test_bb);
2503               if (gimple_code (test_last) != GIMPLE_COND
2504                   && gimple_phi_arg_def (phi, e2->dest_idx)
2505                      == gimple_assign_lhs (test_last)
2506                   && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2507                       || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2508                 continue;
2509             }
2510
2511           return false;
2512         }
2513     }
2514   return true;
2515 }
2516
2517 /* Return true if BB doesn't have side-effects that would disallow
2518    range test optimization, all SSA_NAMEs set in the bb are consumed
2519    in the bb and there are no PHIs.  */
2520
2521 static bool
2522 no_side_effect_bb (basic_block bb)
2523 {
2524   gimple_stmt_iterator gsi;
2525   gimple last;
2526
2527   if (!gimple_seq_empty_p (phi_nodes (bb)))
2528     return false;
2529   last = last_stmt (bb);
2530   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2531     {
2532       gimple stmt = gsi_stmt (gsi);
2533       tree lhs;
2534       imm_use_iterator imm_iter;
2535       use_operand_p use_p;
2536
2537       if (is_gimple_debug (stmt))
2538         continue;
2539       if (gimple_has_side_effects (stmt))
2540         return false;
2541       if (stmt == last)
2542         return true;
2543       if (!is_gimple_assign (stmt))
2544         return false;
2545       lhs = gimple_assign_lhs (stmt);
2546       if (TREE_CODE (lhs) != SSA_NAME)
2547         return false;
2548       if (gimple_assign_rhs_could_trap_p (stmt))
2549         return false;
2550       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2551         {
2552           gimple use_stmt = USE_STMT (use_p);
2553           if (is_gimple_debug (use_stmt))
2554             continue;
2555           if (gimple_bb (use_stmt) != bb)
2556             return false;
2557         }
2558     }
2559   return false;
2560 }
2561
2562 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2563    return true and fill in *OPS recursively.  */
2564
2565 static bool
2566 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2567          struct loop *loop)
2568 {
2569   gimple stmt = SSA_NAME_DEF_STMT (var);
2570   tree rhs[2];
2571   int i;
2572
2573   if (!is_reassociable_op (stmt, code, loop))
2574     return false;
2575
2576   rhs[0] = gimple_assign_rhs1 (stmt);
2577   rhs[1] = gimple_assign_rhs2 (stmt);
2578   gimple_set_visited (stmt, true);
2579   for (i = 0; i < 2; i++)
2580     if (TREE_CODE (rhs[i]) == SSA_NAME
2581         && !get_ops (rhs[i], code, ops, loop)
2582         && has_single_use (rhs[i]))
2583       {
2584         operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
2585
2586         oe->op = rhs[i];
2587         oe->rank = code;
2588         oe->id = 0;
2589         oe->count = 1;
2590         ops->safe_push (oe);
2591       }
2592   return true;
2593 }
2594
2595 /* Find the ops that were added by get_ops starting from VAR, see if
2596    they were changed during update_range_test and if yes, create new
2597    stmts.  */
2598
2599 static tree
2600 update_ops (tree var, enum tree_code code, vec<operand_entry_t> ops,
2601             unsigned int *pidx, struct loop *loop)
2602 {
2603   gimple stmt = SSA_NAME_DEF_STMT (var);
2604   tree rhs[4];
2605   int i;
2606
2607   if (!is_reassociable_op (stmt, code, loop))
2608     return NULL;
2609
2610   rhs[0] = gimple_assign_rhs1 (stmt);
2611   rhs[1] = gimple_assign_rhs2 (stmt);
2612   rhs[2] = rhs[0];
2613   rhs[3] = rhs[1];
2614   for (i = 0; i < 2; i++)
2615     if (TREE_CODE (rhs[i]) == SSA_NAME)
2616       {
2617         rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
2618         if (rhs[2 + i] == NULL_TREE)
2619           {
2620             if (has_single_use (rhs[i]))
2621               rhs[2 + i] = ops[(*pidx)++]->op;
2622             else
2623               rhs[2 + i] = rhs[i];
2624           }
2625       }
2626   if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
2627       && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
2628     {
2629       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2630       var = make_ssa_name (TREE_TYPE (var), NULL);
2631       gimple g = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
2632                                                var, rhs[2], rhs[3]);
2633       gimple_set_uid (g, gimple_uid (stmt));
2634       gimple_set_visited (g, true);
2635       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2636     }
2637   return var;
2638 }
2639
2640 /* Structure to track the initial value passed to get_ops and
2641    the range in the ops vector for each basic block.  */
2642
2643 struct inter_bb_range_test_entry
2644 {
2645   tree op;
2646   unsigned int first_idx, last_idx;
2647 };
2648
2649 /* Inter-bb range test optimization.  */
2650
2651 static void
2652 maybe_optimize_range_tests (gimple stmt)
2653 {
2654   basic_block first_bb = gimple_bb (stmt);
2655   basic_block last_bb = first_bb;
2656   basic_block other_bb = NULL;
2657   basic_block bb;
2658   edge_iterator ei;
2659   edge e;
2660   vec<operand_entry_t> ops = vNULL;
2661   vec<inter_bb_range_test_entry> bbinfo = vNULL;
2662   bool any_changes = false;
2663
2664   /* Consider only basic blocks that end with GIMPLE_COND or
2665      a cast statement satisfying final_range_test_p.  All
2666      but the last bb in the first_bb .. last_bb range
2667      should end with GIMPLE_COND.  */
2668   if (gimple_code (stmt) == GIMPLE_COND)
2669     {
2670       if (EDGE_COUNT (first_bb->succs) != 2)
2671         return;
2672     }
2673   else if (final_range_test_p (stmt))
2674     other_bb = single_succ (first_bb);
2675   else
2676     return;
2677
2678   if (stmt_could_throw_p (stmt))
2679     return;
2680
2681   /* As relative ordering of post-dominator sons isn't fixed,
2682      maybe_optimize_range_tests can be called first on any
2683      bb in the range we want to optimize.  So, start searching
2684      backwards, if first_bb can be set to a predecessor.  */
2685   while (single_pred_p (first_bb))
2686     {
2687       basic_block pred_bb = single_pred (first_bb);
2688       if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
2689         break;
2690       if (!no_side_effect_bb (first_bb))
2691         break;
2692       first_bb = pred_bb;
2693     }
2694   /* If first_bb is last_bb, other_bb hasn't been computed yet.
2695      Before starting forward search in last_bb successors, find
2696      out the other_bb.  */
2697   if (first_bb == last_bb)
2698     {
2699       other_bb = NULL;
2700       /* As non-GIMPLE_COND last stmt always terminates the range,
2701          if forward search didn't discover anything, just give up.  */
2702       if (gimple_code (stmt) != GIMPLE_COND)
2703         return;
2704       /* Look at both successors.  Either it ends with a GIMPLE_COND
2705          and satisfies suitable_cond_bb, or ends with a cast and
2706          other_bb is that cast's successor.  */
2707       FOR_EACH_EDGE (e, ei, first_bb->succs)
2708         if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
2709             || e->dest == first_bb)
2710           return;
2711         else if (single_pred_p (e->dest))
2712           {
2713             stmt = last_stmt (e->dest);
2714             if (stmt
2715                 && gimple_code (stmt) == GIMPLE_COND
2716                 && EDGE_COUNT (e->dest->succs) == 2)
2717               {
2718                 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
2719                   break;
2720                 else
2721                   other_bb = NULL;
2722               }
2723             else if (stmt
2724                      && final_range_test_p (stmt)
2725                      && find_edge (first_bb, single_succ (e->dest)))
2726               {
2727                 other_bb = single_succ (e->dest);
2728                 if (other_bb == first_bb)
2729                   other_bb = NULL;
2730               }
2731           }
2732       if (other_bb == NULL)
2733         return;
2734     }
2735   /* Now do the forward search, moving last_bb to successor bbs
2736      that aren't other_bb.  */
2737   while (EDGE_COUNT (last_bb->succs) == 2)
2738     {
2739       FOR_EACH_EDGE (e, ei, last_bb->succs)
2740         if (e->dest != other_bb)
2741           break;
2742       if (e == NULL)
2743         break;
2744       if (!single_pred_p (e->dest))
2745         break;
2746       if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
2747         break;
2748       if (!no_side_effect_bb (e->dest))
2749         break;
2750       last_bb = e->dest;
2751     }
2752   if (first_bb == last_bb)
2753     return;
2754   /* Here basic blocks first_bb through last_bb's predecessor
2755      end with GIMPLE_COND, all of them have one of the edges to
2756      other_bb and another to another block in the range,
2757      all blocks except first_bb don't have side-effects and
2758      last_bb ends with either GIMPLE_COND, or cast satisfying
2759      final_range_test_p.  */
2760   for (bb = last_bb; ; bb = single_pred (bb))
2761     {
2762       enum tree_code code;
2763       tree lhs, rhs;
2764       inter_bb_range_test_entry bb_ent;
2765
2766       bb_ent.op = NULL_TREE;
2767       bb_ent.first_idx = ops.length ();
2768       bb_ent.last_idx = bb_ent.first_idx;
2769       e = find_edge (bb, other_bb);
2770       stmt = last_stmt (bb);
2771       gimple_set_visited (stmt, true);
2772       if (gimple_code (stmt) != GIMPLE_COND)
2773         {
2774           use_operand_p use_p;
2775           gimple phi;
2776           edge e2;
2777           unsigned int d;
2778
2779           lhs = gimple_assign_lhs (stmt);
2780           rhs = gimple_assign_rhs1 (stmt);
2781           gcc_assert (bb == last_bb);
2782
2783           /* stmt is
2784              _123 = (int) _234;
2785
2786              followed by:
2787              <bb M>:
2788              # _345 = PHI <_123(N), 1(...), 1(...)>
2789
2790              or 0 instead of 1.  If it is 0, the _234
2791              range test is anded together with all the
2792              other range tests, if it is 1, it is ored with
2793              them.  */
2794           single_imm_use (lhs, &use_p, &phi);
2795           gcc_assert (gimple_code (phi) == GIMPLE_PHI);
2796           e2 = find_edge (first_bb, other_bb);
2797           d = e2->dest_idx;
2798           gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
2799           if (integer_zerop (gimple_phi_arg_def (phi, d)))
2800             code = BIT_AND_EXPR;
2801           else
2802             {
2803               gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
2804               code = BIT_IOR_EXPR;
2805             }
2806
2807           /* If _234 SSA_NAME_DEF_STMT is
2808              _234 = _567 | _789;
2809              (or &, corresponding to 1/0 in the phi arguments,
2810              push into ops the individual range test arguments
2811              of the bitwise or resp. and, recursively.  */
2812           if (!get_ops (rhs, code, &ops,
2813                         loop_containing_stmt (stmt))
2814               && has_single_use (rhs))
2815             {
2816               /* Otherwise, push the _234 range test itself.  */
2817               operand_entry_t oe
2818                 = (operand_entry_t) pool_alloc (operand_entry_pool);
2819
2820               oe->op = rhs;
2821               oe->rank = code;
2822               oe->id = 0;
2823               oe->count = 1;
2824               ops.safe_push (oe);
2825               bb_ent.last_idx++;
2826             }
2827           else
2828             bb_ent.last_idx = ops.length ();
2829           bb_ent.op = rhs;
2830           bbinfo.safe_push (bb_ent);
2831           continue;
2832         }
2833       /* Otherwise stmt is GIMPLE_COND.  */
2834       code = gimple_cond_code (stmt);
2835       lhs = gimple_cond_lhs (stmt);
2836       rhs = gimple_cond_rhs (stmt);
2837       if (TREE_CODE (lhs) == SSA_NAME
2838           && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2839           && ((code != EQ_EXPR && code != NE_EXPR)
2840               || rhs != boolean_false_node
2841                  /* Either push into ops the individual bitwise
2842                     or resp. and operands, depending on which
2843                     edge is other_bb.  */
2844               || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
2845                                  ^ (code == EQ_EXPR))
2846                                 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
2847                            loop_containing_stmt (stmt))))
2848         {
2849           /* Or push the GIMPLE_COND stmt itself.  */
2850           operand_entry_t oe
2851             = (operand_entry_t) pool_alloc (operand_entry_pool);
2852
2853           oe->op = NULL;
2854           oe->rank = (e->flags & EDGE_TRUE_VALUE)
2855                      ? BIT_IOR_EXPR : BIT_AND_EXPR;
2856           /* oe->op = NULL signs that there is no SSA_NAME
2857              for the range test, and oe->id instead is the
2858              basic block number, at which's end the GIMPLE_COND
2859              is.  */
2860           oe->id = bb->index;
2861           oe->count = 1;
2862           ops.safe_push (oe);
2863           bb_ent.op = NULL;
2864           bb_ent.last_idx++;
2865         }
2866       else if (ops.length () > bb_ent.first_idx)
2867         {
2868           bb_ent.op = lhs;
2869           bb_ent.last_idx = ops.length ();
2870         }
2871       bbinfo.safe_push (bb_ent);
2872       if (bb == first_bb)
2873         break;
2874     }
2875   if (ops.length () > 1)
2876     any_changes = optimize_range_tests (ERROR_MARK, &ops);
2877   if (any_changes)
2878     {
2879       unsigned int idx;
2880       /* update_ops relies on has_single_use predicates returning the
2881          same values as it did during get_ops earlier.  Additionally it
2882          never removes statements, only adds new ones and it should walk
2883          from the single imm use and check the predicate already before
2884          making those changes.
2885          On the other side, the handling of GIMPLE_COND directly can turn
2886          previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
2887          it needs to be done in a separate loop afterwards.  */
2888       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
2889         {
2890           if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
2891               && bbinfo[idx].op != NULL_TREE)
2892             {
2893               tree new_op;
2894
2895               stmt = last_stmt (bb);
2896               new_op = update_ops (bbinfo[idx].op,
2897                                    (enum tree_code)
2898                                    ops[bbinfo[idx].first_idx]->rank,
2899                                    ops, &bbinfo[idx].first_idx,
2900                                    loop_containing_stmt (stmt));
2901               if (new_op == NULL_TREE)
2902                 {
2903                   gcc_assert (bb == last_bb);
2904                   new_op = ops[bbinfo[idx].first_idx++]->op;
2905                 }
2906               if (bbinfo[idx].op != new_op)
2907                 {
2908                   imm_use_iterator iter;
2909                   use_operand_p use_p;
2910                   gimple use_stmt, cast_stmt = NULL;
2911
2912                   FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
2913                     if (is_gimple_debug (use_stmt))
2914                       continue;
2915                     else if (gimple_code (use_stmt) == GIMPLE_COND
2916                              || gimple_code (use_stmt) == GIMPLE_PHI)
2917                       FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2918                         SET_USE (use_p, new_op);
2919                     else if (gimple_assign_cast_p (use_stmt))
2920                       cast_stmt = use_stmt;
2921                     else
2922                       gcc_unreachable ();
2923                   if (cast_stmt)
2924                     {
2925                       gcc_assert (bb == last_bb);
2926                       tree lhs = gimple_assign_lhs (cast_stmt);
2927                       tree new_lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
2928                       enum tree_code rhs_code
2929                         = gimple_assign_rhs_code (cast_stmt);
2930                       gimple g
2931                         = gimple_build_assign_with_ops (rhs_code, new_lhs,
2932                                                         new_op, NULL_TREE);
2933                       gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt);
2934                       gimple_set_uid (g, gimple_uid (cast_stmt));
2935                       gimple_set_visited (g, true);
2936                       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2937                       FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
2938                         if (is_gimple_debug (use_stmt))
2939                           continue;
2940                         else if (gimple_code (use_stmt) == GIMPLE_COND
2941                                  || gimple_code (use_stmt) == GIMPLE_PHI)
2942                           FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2943                             SET_USE (use_p, new_lhs);
2944                         else
2945                           gcc_unreachable ();
2946                     }
2947                 }
2948             }
2949           if (bb == first_bb)
2950             break;
2951         }
2952       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
2953         {
2954           if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
2955               && bbinfo[idx].op == NULL_TREE
2956               && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
2957             {
2958               stmt = last_stmt (bb);
2959               if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
2960                 gimple_cond_make_false (stmt);
2961               else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
2962                 gimple_cond_make_true (stmt);
2963               else
2964                 {
2965                   gimple_cond_set_code (stmt, NE_EXPR);
2966                   gimple_cond_set_lhs (stmt, ops[bbinfo[idx].first_idx]->op);
2967                   gimple_cond_set_rhs (stmt, boolean_false_node);
2968                 }
2969               update_stmt (stmt);
2970             }
2971           if (bb == first_bb)
2972             break;
2973         }
2974     }
2975   bbinfo.release ();
2976   ops.release ();
2977 }
2978
2979 /* Return true if OPERAND is defined by a PHI node which uses the LHS
2980    of STMT in it's operands.  This is also known as a "destructive
2981    update" operation.  */
2982
2983 static bool
2984 is_phi_for_stmt (gimple stmt, tree operand)
2985 {
2986   gimple def_stmt;
2987   tree lhs;
2988   use_operand_p arg_p;
2989   ssa_op_iter i;
2990
2991   if (TREE_CODE (operand) != SSA_NAME)
2992     return false;
2993
2994   lhs = gimple_assign_lhs (stmt);
2995
2996   def_stmt = SSA_NAME_DEF_STMT (operand);
2997   if (gimple_code (def_stmt) != GIMPLE_PHI)
2998     return false;
2999
3000   FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
3001     if (lhs == USE_FROM_PTR (arg_p))
3002       return true;
3003   return false;
3004 }
3005
3006 /* Remove def stmt of VAR if VAR has zero uses and recurse
3007    on rhs1 operand if so.  */
3008
3009 static void
3010 remove_visited_stmt_chain (tree var)
3011 {
3012   gimple stmt;
3013   gimple_stmt_iterator gsi;
3014
3015   while (1)
3016     {
3017       if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
3018         return;
3019       stmt = SSA_NAME_DEF_STMT (var);
3020       if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
3021         {
3022           var = gimple_assign_rhs1 (stmt);
3023           gsi = gsi_for_stmt (stmt);
3024           gsi_remove (&gsi, true);
3025           release_defs (stmt);
3026         }
3027       else
3028         return;
3029     }
3030 }
3031
3032 /* This function checks three consequtive operands in
3033    passed operands vector OPS starting from OPINDEX and
3034    swaps two operands if it is profitable for binary operation
3035    consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3036
3037    We pair ops with the same rank if possible.
3038
3039    The alternative we try is to see if STMT is a destructive
3040    update style statement, which is like:
3041    b = phi (a, ...)
3042    a = c + b;
3043    In that case, we want to use the destructive update form to
3044    expose the possible vectorizer sum reduction opportunity.
3045    In that case, the third operand will be the phi node. This
3046    check is not performed if STMT is null.
3047
3048    We could, of course, try to be better as noted above, and do a
3049    lot of work to try to find these opportunities in >3 operand
3050    cases, but it is unlikely to be worth it.  */
3051
3052 static void
3053 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
3054                           unsigned int opindex, gimple stmt)
3055 {
3056   operand_entry_t oe1, oe2, oe3;
3057
3058   oe1 = ops[opindex];
3059   oe2 = ops[opindex + 1];
3060   oe3 = ops[opindex + 2];
3061
3062   if ((oe1->rank == oe2->rank
3063        && oe2->rank != oe3->rank)
3064       || (stmt && is_phi_for_stmt (stmt, oe3->op)
3065           && !is_phi_for_stmt (stmt, oe1->op)
3066           && !is_phi_for_stmt (stmt, oe2->op)))
3067     {
3068       struct operand_entry temp = *oe3;
3069       oe3->op = oe1->op;
3070       oe3->rank = oe1->rank;
3071       oe1->op = temp.op;
3072       oe1->rank= temp.rank;
3073     }
3074   else if ((oe1->rank == oe3->rank
3075             && oe2->rank != oe3->rank)
3076            || (stmt && is_phi_for_stmt (stmt, oe2->op)
3077                && !is_phi_for_stmt (stmt, oe1->op)
3078                && !is_phi_for_stmt (stmt, oe3->op)))
3079     {
3080       struct operand_entry temp = *oe2;
3081       oe2->op = oe1->op;
3082       oe2->rank = oe1->rank;
3083       oe1->op = temp.op;
3084       oe1->rank = temp.rank;
3085     }
3086 }
3087
3088 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3089    two definitions, otherwise return STMT.  */
3090
3091 static inline gimple
3092 find_insert_point (gimple stmt, tree rhs1, tree rhs2)
3093 {
3094   if (TREE_CODE (rhs1) == SSA_NAME
3095       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
3096     stmt = SSA_NAME_DEF_STMT (rhs1);
3097   if (TREE_CODE (rhs2) == SSA_NAME
3098       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
3099     stmt = SSA_NAME_DEF_STMT (rhs2);
3100   return stmt;
3101 }
3102
3103 /* Recursively rewrite our linearized statements so that the operators
3104    match those in OPS[OPINDEX], putting the computation in rank
3105    order.  Return new lhs.  */
3106
3107 static tree
3108 rewrite_expr_tree (gimple stmt, unsigned int opindex,
3109                    vec<operand_entry_t> ops, bool changed)
3110 {
3111   tree rhs1 = gimple_assign_rhs1 (stmt);
3112   tree rhs2 = gimple_assign_rhs2 (stmt);
3113   tree lhs = gimple_assign_lhs (stmt);
3114   operand_entry_t oe;
3115
3116   /* The final recursion case for this function is that you have
3117      exactly two operations left.
3118      If we had one exactly one op in the entire list to start with, we
3119      would have never called this function, and the tail recursion
3120      rewrites them one at a time.  */
3121   if (opindex + 2 == ops.length ())
3122     {
3123       operand_entry_t oe1, oe2;
3124
3125       oe1 = ops[opindex];
3126       oe2 = ops[opindex + 1];
3127
3128       if (rhs1 != oe1->op || rhs2 != oe2->op)
3129         {
3130           gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3131           unsigned int uid = gimple_uid (stmt);
3132
3133           if (dump_file && (dump_flags & TDF_DETAILS))
3134             {
3135               fprintf (dump_file, "Transforming ");
3136               print_gimple_stmt (dump_file, stmt, 0, 0);
3137             }
3138
3139           if (changed)
3140             {
3141               gimple insert_point = find_insert_point (stmt, oe1->op, oe2->op);
3142               lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3143               stmt
3144                 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3145                                                 lhs, oe1->op, oe2->op);
3146               gimple_set_uid (stmt, uid);
3147               gimple_set_visited (stmt, true);
3148               if (insert_point == gsi_stmt (gsi))
3149                 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3150               else
3151                 insert_stmt_after (stmt, insert_point);
3152             }
3153           else
3154             {
3155               gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
3156                                    == stmt);
3157               gimple_assign_set_rhs1 (stmt, oe1->op);
3158               gimple_assign_set_rhs2 (stmt, oe2->op);
3159               update_stmt (stmt);
3160             }
3161
3162           if (rhs1 != oe1->op && rhs1 != oe2->op)
3163             remove_visited_stmt_chain (rhs1);
3164
3165           if (dump_file && (dump_flags & TDF_DETAILS))
3166             {
3167               fprintf (dump_file, " into ");
3168               print_gimple_stmt (dump_file, stmt, 0, 0);
3169             }
3170         }
3171       return lhs;
3172     }
3173
3174   /* If we hit here, we should have 3 or more ops left.  */
3175   gcc_assert (opindex + 2 < ops.length ());
3176
3177   /* Rewrite the next operator.  */
3178   oe = ops[opindex];
3179
3180   /* Recurse on the LHS of the binary operator, which is guaranteed to
3181      be the non-leaf side.  */
3182   tree new_rhs1
3183     = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
3184                          changed || oe->op != rhs2);
3185
3186   if (oe->op != rhs2 || new_rhs1 != rhs1)
3187     {
3188       if (dump_file && (dump_flags & TDF_DETAILS))
3189         {
3190           fprintf (dump_file, "Transforming ");
3191           print_gimple_stmt (dump_file, stmt, 0, 0);
3192         }
3193
3194       /* If changed is false, this is either opindex == 0
3195          or all outer rhs2's were equal to corresponding oe->op,
3196          and powi_result is NULL.
3197          That means lhs is equivalent before and after reassociation.
3198          Otherwise ensure the old lhs SSA_NAME is not reused and
3199          create a new stmt as well, so that any debug stmts will be
3200          properly adjusted.  */
3201       if (changed)
3202         {
3203           gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3204           unsigned int uid = gimple_uid (stmt);
3205           gimple insert_point = find_insert_point (stmt, new_rhs1, oe->op);
3206
3207           lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3208           stmt = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3209                                                lhs, new_rhs1, oe->op);
3210           gimple_set_uid (stmt, uid);
3211           gimple_set_visited (stmt, true);
3212           if (insert_point == gsi_stmt (gsi))
3213             gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3214           else
3215             insert_stmt_after (stmt, insert_point);
3216         }
3217       else
3218         {
3219           gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
3220                                == stmt);
3221           gimple_assign_set_rhs1 (stmt, new_rhs1);
3222           gimple_assign_set_rhs2 (stmt, oe->op);
3223           update_stmt (stmt);
3224         }
3225
3226       if (dump_file && (dump_flags & TDF_DETAILS))
3227         {
3228           fprintf (dump_file, " into ");
3229           print_gimple_stmt (dump_file, stmt, 0, 0);
3230         }
3231     }
3232   return lhs;
3233 }
3234
3235 /* Find out how many cycles we need to compute statements chain.
3236    OPS_NUM holds number os statements in a chain.  CPU_WIDTH is a
3237    maximum number of independent statements we may execute per cycle.  */
3238
3239 static int
3240 get_required_cycles (int ops_num, int cpu_width)
3241 {
3242   int res;
3243   int elog;
3244   unsigned int rest;
3245
3246   /* While we have more than 2 * cpu_width operands
3247      we may reduce number of operands by cpu_width
3248      per cycle.  */
3249   res = ops_num / (2 * cpu_width);
3250
3251   /* Remained operands count may be reduced twice per cycle
3252      until we have only one operand.  */
3253   rest = (unsigned)(ops_num - res * cpu_width);
3254   elog = exact_log2 (rest);
3255   if (elog >= 0)
3256     res += elog;
3257   else
3258     res += floor_log2 (rest) + 1;
3259
3260   return res;
3261 }
3262
3263 /* Returns an optimal number of registers to use for computation of
3264    given statements.  */
3265
3266 static int
3267 get_reassociation_width (int ops_num, enum tree_code opc,
3268                          enum machine_mode mode)
3269 {
3270   int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
3271   int width;
3272   int width_min;
3273   int cycles_best;
3274
3275   if (param_width > 0)
3276     width = param_width;
3277   else
3278     width = targetm.sched.reassociation_width (opc, mode);
3279
3280   if (width == 1)
3281     return width;
3282
3283   /* Get the minimal time required for sequence computation.  */
3284   cycles_best = get_required_cycles (ops_num, width);
3285
3286   /* Check if we may use less width and still compute sequence for
3287      the same time.  It will allow us to reduce registers usage.
3288      get_required_cycles is monotonically increasing with lower width
3289      so we can perform a binary search for the minimal width that still
3290      results in the optimal cycle count.  */
3291   width_min = 1;
3292   while (width > width_min)
3293     {
3294       int width_mid = (width + width_min) / 2;
3295
3296       if (get_required_cycles (ops_num, width_mid) == cycles_best)
3297         width = width_mid;
3298       else if (width_min < width_mid)
3299         width_min = width_mid;
3300       else
3301         break;
3302     }
3303
3304   return width;
3305 }
3306
3307 /* Recursively rewrite our linearized statements so that the operators
3308    match those in OPS[OPINDEX], putting the computation in rank
3309    order and trying to allow operations to be executed in
3310    parallel.  */
3311
3312 static void
3313 rewrite_expr_tree_parallel (gimple stmt, int width,
3314                             vec<operand_entry_t> ops)
3315 {
3316   enum tree_code opcode = gimple_assign_rhs_code (stmt);
3317   int op_num = ops.length ();
3318   int stmt_num = op_num - 1;
3319   gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3320   int op_index = op_num - 1;
3321   int stmt_index = 0;
3322   int ready_stmts_end = 0;
3323   int i = 0;
3324   tree last_rhs1 = gimple_assign_rhs1 (stmt);
3325
3326   /* We start expression rewriting from the top statements.
3327      So, in this loop we create a full list of statements
3328      we will work with.  */
3329   stmts[stmt_num - 1] = stmt;
3330   for (i = stmt_num - 2; i >= 0; i--)
3331     stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3332
3333   for (i = 0; i < stmt_num; i++)
3334     {
3335       tree op1, op2;
3336
3337       /* Determine whether we should use results of
3338          already handled statements or not.  */
3339       if (ready_stmts_end == 0
3340           && (i - stmt_index >= width || op_index < 1))
3341         ready_stmts_end = i;
3342
3343       /* Now we choose operands for the next statement.  Non zero
3344          value in ready_stmts_end means here that we should use
3345          the result of already generated statements as new operand.  */
3346       if (ready_stmts_end > 0)
3347         {
3348           op1 = gimple_assign_lhs (stmts[stmt_index++]);
3349           if (ready_stmts_end > stmt_index)
3350             op2 = gimple_assign_lhs (stmts[stmt_index++]);
3351           else if (op_index >= 0)
3352             op2 = ops[op_index--]->op;
3353           else
3354             {
3355               gcc_assert (stmt_index < i);
3356               op2 = gimple_assign_lhs (stmts[stmt_index++]);
3357             }
3358
3359           if (stmt_index >= ready_stmts_end)
3360             ready_stmts_end = 0;
3361         }
3362       else
3363         {
3364           if (op_index > 1)
3365             swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3366           op2 = ops[op_index--]->op;
3367           op1 = ops[op_index--]->op;
3368         }
3369
3370       /* If we emit the last statement then we should put
3371          operands into the last statement.  It will also
3372          break the loop.  */
3373       if (op_index < 0 && stmt_index == i)
3374         i = stmt_num - 1;
3375
3376       if (dump_file && (dump_flags & TDF_DETAILS))
3377         {
3378           fprintf (dump_file, "Transforming ");
3379           print_gimple_stmt (dump_file, stmts[i], 0, 0);
3380         }
3381
3382       /* We keep original statement only for the last one.  All
3383          others are recreated.  */
3384       if (i == stmt_num - 1)
3385         {
3386           gimple_assign_set_rhs1 (stmts[i], op1);
3387           gimple_assign_set_rhs2 (stmts[i], op2);
3388           update_stmt (stmts[i]);
3389         }
3390       else
3391         stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3392
3393       if (dump_file && (dump_flags & TDF_DETAILS))
3394         {
3395           fprintf (dump_file, " into ");
3396           print_gimple_stmt (dump_file, stmts[i], 0, 0);
3397         }
3398     }
3399
3400   remove_visited_stmt_chain (last_rhs1);
3401 }
3402
3403 /* Transform STMT, which is really (A +B) + (C + D) into the left
3404    linear form, ((A+B)+C)+D.
3405    Recurse on D if necessary.  */
3406
3407 static void
3408 linearize_expr (gimple stmt)
3409 {
3410   gimple_stmt_iterator gsi;
3411   gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3412   gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3413   gimple oldbinrhs = binrhs;
3414   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3415   gimple newbinrhs = NULL;
3416   struct loop *loop = loop_containing_stmt (stmt);
3417   tree lhs = gimple_assign_lhs (stmt);
3418
3419   gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3420               && is_reassociable_op (binrhs, rhscode, loop));
3421
3422   gsi = gsi_for_stmt (stmt);
3423
3424   gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3425   binrhs = gimple_build_assign_with_ops (gimple_assign_rhs_code (binrhs),
3426                                          make_ssa_name (TREE_TYPE (lhs), NULL),
3427                                          gimple_assign_lhs (binlhs),
3428                                          gimple_assign_rhs2 (binrhs));
3429   gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3430   gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
3431   gimple_set_uid (binrhs, gimple_uid (stmt));
3432
3433   if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3434     newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3435
3436   if (dump_file && (dump_flags & TDF_DETAILS))
3437     {
3438       fprintf (dump_file, "Linearized: ");
3439       print_gimple_stmt (dump_file, stmt, 0, 0);
3440     }
3441
3442   reassociate_stats.linearized++;
3443   update_stmt (stmt);
3444
3445   gsi = gsi_for_stmt (oldbinrhs);
3446   gsi_remove (&gsi, true);
3447   release_defs (oldbinrhs);
3448
3449   gimple_set_visited (stmt, true);
3450   gimple_set_visited (binlhs, true);
3451   gimple_set_visited (binrhs, true);
3452
3453   /* Tail recurse on the new rhs if it still needs reassociation.  */
3454   if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3455     /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3456            want to change the algorithm while converting to tuples.  */
3457     linearize_expr (stmt);
3458 }
3459
3460 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3461    it.  Otherwise, return NULL.  */
3462
3463 static gimple
3464 get_single_immediate_use (tree lhs)
3465 {
3466   use_operand_p immuse;
3467   gimple immusestmt;
3468
3469   if (TREE_CODE (lhs) == SSA_NAME
3470       && single_imm_use (lhs, &immuse, &immusestmt)
3471       && is_gimple_assign (immusestmt))
3472     return immusestmt;
3473
3474   return NULL;
3475 }
3476
3477 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3478    representing the negated value.  Insertions of any necessary
3479    instructions go before GSI.
3480    This function is recursive in that, if you hand it "a_5" as the
3481    value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3482    transform b_3 + b_4 into a_5 = -b_3 + -b_4.  */
3483
3484 static tree
3485 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
3486 {
3487   gimple negatedefstmt = NULL;
3488   tree resultofnegate;
3489   gimple_stmt_iterator gsi;
3490   unsigned int uid;
3491
3492   /* If we are trying to negate a name, defined by an add, negate the
3493      add operands instead.  */
3494   if (TREE_CODE (tonegate) == SSA_NAME)
3495     negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3496   if (TREE_CODE (tonegate) == SSA_NAME
3497       && is_gimple_assign (negatedefstmt)
3498       && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3499       && has_single_use (gimple_assign_lhs (negatedefstmt))
3500       && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3501     {
3502       tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3503       tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3504       tree lhs = gimple_assign_lhs (negatedefstmt);
3505       gimple g;
3506
3507       gsi = gsi_for_stmt (negatedefstmt);
3508       rhs1 = negate_value (rhs1, &gsi);
3509
3510       gsi = gsi_for_stmt (negatedefstmt);
3511       rhs2 = negate_value (rhs2, &gsi);
3512
3513       gsi = gsi_for_stmt (negatedefstmt);
3514       lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3515       gimple_set_visited (negatedefstmt, true);
3516       g = gimple_build_assign_with_ops (PLUS_EXPR, lhs, rhs1, rhs2);
3517       gimple_set_uid (g, gimple_uid (negatedefstmt));
3518       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3519       return lhs;
3520     }
3521
3522   tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3523   resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
3524                                              NULL_TREE, true, GSI_SAME_STMT);
3525   gsi = *gsip;
3526   uid = gimple_uid (gsi_stmt (gsi));
3527   for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
3528     {
3529       gimple stmt = gsi_stmt (gsi);
3530       if (gimple_uid (stmt) != 0)
3531         break;
3532       gimple_set_uid (stmt, uid);
3533     }
3534   return resultofnegate;
3535 }
3536
3537 /* Return true if we should break up the subtract in STMT into an add
3538    with negate.  This is true when we the subtract operands are really
3539    adds, or the subtract itself is used in an add expression.  In
3540    either case, breaking up the subtract into an add with negate
3541    exposes the adds to reassociation.  */
3542
3543 static bool
3544 should_break_up_subtract (gimple stmt)
3545 {
3546   tree lhs = gimple_assign_lhs (stmt);
3547   tree binlhs = gimple_assign_rhs1 (stmt);
3548   tree binrhs = gimple_assign_rhs2 (stmt);
3549   gimple immusestmt;
3550   struct loop *loop = loop_containing_stmt (stmt);
3551
3552   if (TREE_CODE (binlhs) == SSA_NAME
3553       && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3554     return true;
3555
3556   if (TREE_CODE (binrhs) == SSA_NAME
3557       && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3558     return true;
3559
3560   if (TREE_CODE (lhs) == SSA_NAME
3561       && (immusestmt = get_single_immediate_use (lhs))
3562       && is_gimple_assign (immusestmt)
3563       && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3564           ||  gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3565     return true;
3566   return false;
3567 }
3568
3569 /* Transform STMT from A - B into A + -B.  */
3570
3571 static void
3572 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3573 {
3574   tree rhs1 = gimple_assign_rhs1 (stmt);
3575   tree rhs2 = gimple_assign_rhs2 (stmt);
3576
3577   if (dump_file && (dump_flags & TDF_DETAILS))
3578     {
3579       fprintf (dump_file, "Breaking up subtract ");
3580       print_gimple_stmt (dump_file, stmt, 0, 0);
3581     }
3582
3583   rhs2 = negate_value (rhs2, gsip);
3584   gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3585   update_stmt (stmt);
3586 }
3587
3588 /* Determine whether STMT is a builtin call that raises an SSA name
3589    to an integer power and has only one use.  If so, and this is early
3590    reassociation and unsafe math optimizations are permitted, place
3591    the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3592    If any of these conditions does not hold, return FALSE.  */
3593
3594 static bool
3595 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3596 {
3597   tree fndecl, arg1;
3598   REAL_VALUE_TYPE c, cint;
3599
3600   if (!first_pass_instance
3601       || !flag_unsafe_math_optimizations
3602       || !is_gimple_call (stmt)
3603       || !has_single_use (gimple_call_lhs (stmt)))
3604     return false;
3605
3606   fndecl = gimple_call_fndecl (stmt);
3607
3608   if (!fndecl
3609       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3610     return false;
3611
3612   switch (DECL_FUNCTION_CODE (fndecl))
3613     {
3614     CASE_FLT_FN (BUILT_IN_POW):
3615       *base = gimple_call_arg (stmt, 0);
3616       arg1 = gimple_call_arg (stmt, 1);
3617
3618       if (TREE_CODE (arg1) != REAL_CST)
3619         return false;
3620
3621       c = TREE_REAL_CST (arg1);
3622
3623       if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
3624         return false;
3625
3626       *exponent = real_to_integer (&c);
3627       real_from_integer (&cint, VOIDmode, *exponent,
3628                          *exponent < 0 ? -1 : 0, 0);
3629       if (!real_identical (&c, &cint))
3630         return false;
3631
3632       break;
3633
3634     CASE_FLT_FN (BUILT_IN_POWI):
3635       *base = gimple_call_arg (stmt, 0);
3636       arg1 = gimple_call_arg (stmt, 1);
3637
3638       if (!tree_fits_shwi_p (arg1))
3639         return false;
3640
3641       *exponent = TREE_INT_CST_LOW (arg1);
3642       break;
3643
3644     default:
3645       return false;
3646     }
3647
3648   /* Expanding negative exponents is generally unproductive, so we don't
3649      complicate matters with those.  Exponents of zero and one should
3650      have been handled by expression folding.  */
3651   if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
3652     return false;
3653
3654   return true;
3655 }
3656
3657 /* Recursively linearize a binary expression that is the RHS of STMT.
3658    Place the operands of the expression tree in the vector named OPS.  */
3659
3660 static void
3661 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
3662                      bool is_associative, bool set_visited)
3663 {
3664   tree binlhs = gimple_assign_rhs1 (stmt);
3665   tree binrhs = gimple_assign_rhs2 (stmt);
3666   gimple binlhsdef = NULL, binrhsdef = NULL;
3667   bool binlhsisreassoc = false;
3668   bool binrhsisreassoc = false;
3669   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3670   struct loop *loop = loop_containing_stmt (stmt);
3671   tree base = NULL_TREE;
3672   HOST_WIDE_INT exponent = 0;
3673
3674   if (set_visited)
3675     gimple_set_visited (stmt, true);
3676
3677   if (TREE_CODE (binlhs) == SSA_NAME)
3678     {
3679       binlhsdef = SSA_NAME_DEF_STMT (binlhs);
3680       binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
3681                          && !stmt_could_throw_p (binlhsdef));
3682     }
3683
3684   if (TREE_CODE (binrhs) == SSA_NAME)
3685     {
3686       binrhsdef = SSA_NAME_DEF_STMT (binrhs);
3687       binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
3688                          && !stmt_could_throw_p (binrhsdef));
3689     }
3690
3691   /* If the LHS is not reassociable, but the RHS is, we need to swap
3692      them.  If neither is reassociable, there is nothing we can do, so
3693      just put them in the ops vector.  If the LHS is reassociable,
3694      linearize it.  If both are reassociable, then linearize the RHS
3695      and the LHS.  */
3696
3697   if (!binlhsisreassoc)
3698     {
3699       tree temp;
3700
3701       /* If this is not a associative operation like division, give up.  */
3702       if (!is_associative)
3703         {
3704           add_to_ops_vec (ops, binrhs);
3705           return;
3706         }
3707
3708       if (!binrhsisreassoc)
3709         {
3710           if (rhscode == MULT_EXPR
3711               && TREE_CODE (binrhs) == SSA_NAME
3712               && acceptable_pow_call (binrhsdef, &base, &exponent))
3713             {
3714               add_repeat_to_ops_vec (ops, base, exponent);
3715               gimple_set_visited (binrhsdef, true);
3716             }
3717           else
3718             add_to_ops_vec (ops, binrhs);
3719
3720           if (rhscode == MULT_EXPR
3721               && TREE_CODE (binlhs) == SSA_NAME
3722               && acceptable_pow_call (binlhsdef, &base, &exponent))
3723             {
3724               add_repeat_to_ops_vec (ops, base, exponent);
3725               gimple_set_visited (binlhsdef, true);
3726             }
3727           else
3728             add_to_ops_vec (ops, binlhs);
3729
3730           return;
3731         }
3732
3733       if (dump_file && (dump_flags & TDF_DETAILS))
3734         {
3735           fprintf (dump_file, "swapping operands of ");
3736           print_gimple_stmt (dump_file, stmt, 0, 0);
3737         }
3738
3739       swap_ssa_operands (stmt,
3740                          gimple_assign_rhs1_ptr (stmt),
3741                          gimple_assign_rhs2_ptr (stmt));
3742       update_stmt (stmt);
3743
3744       if (dump_file && (dump_flags & TDF_DETAILS))
3745         {
3746           fprintf (dump_file, " is now ");
3747           print_gimple_stmt (dump_file, stmt, 0, 0);
3748         }
3749
3750       /* We want to make it so the lhs is always the reassociative op,
3751          so swap.  */
3752       temp = binlhs;
3753       binlhs = binrhs;
3754       binrhs = temp;
3755     }
3756   else if (binrhsisreassoc)
3757     {
3758       linearize_expr (stmt);
3759       binlhs = gimple_assign_rhs1 (stmt);
3760       binrhs = gimple_assign_rhs2 (stmt);
3761     }
3762
3763   gcc_assert (TREE_CODE (binrhs) != SSA_NAME
3764               || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
3765                                       rhscode, loop));
3766   linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
3767                        is_associative, set_visited);
3768
3769   if (rhscode == MULT_EXPR
3770       && TREE_CODE (binrhs) == SSA_NAME
3771       && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
3772     {
3773       add_repeat_to_ops_vec (ops, base, exponent);
3774       gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
3775     }
3776   else
3777     add_to_ops_vec (ops, binrhs);
3778 }
3779
3780 /* Repropagate the negates back into subtracts, since no other pass
3781    currently does it.  */
3782
3783 static void
3784 repropagate_negates (void)
3785 {
3786   unsigned int i = 0;
3787   tree negate;
3788
3789   FOR_EACH_VEC_ELT (plus_negates, i, negate)
3790     {
3791       gimple user = get_single_immediate_use (negate);
3792
3793       if (!user || !is_gimple_assign (user))
3794         continue;
3795
3796       /* The negate operand can be either operand of a PLUS_EXPR
3797          (it can be the LHS if the RHS is a constant for example).
3798
3799          Force the negate operand to the RHS of the PLUS_EXPR, then
3800          transform the PLUS_EXPR into a MINUS_EXPR.  */
3801       if (gimple_assign_rhs_code (user) == PLUS_EXPR)
3802         {
3803           /* If the negated operand appears on the LHS of the
3804              PLUS_EXPR, exchange the operands of the PLUS_EXPR
3805              to force the negated operand to the RHS of the PLUS_EXPR.  */
3806           if (gimple_assign_rhs1 (user) == negate)
3807             {
3808               swap_ssa_operands (user,
3809                                  gimple_assign_rhs1_ptr (user),
3810                                  gimple_assign_rhs2_ptr (user));
3811             }
3812
3813           /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
3814              the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR.  */
3815           if (gimple_assign_rhs2 (user) == negate)
3816             {
3817               tree rhs1 = gimple_assign_rhs1 (user);
3818               tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
3819               gimple_stmt_iterator gsi = gsi_for_stmt (user);
3820               gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
3821               update_stmt (user);
3822             }
3823         }
3824       else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
3825         {
3826           if (gimple_assign_rhs1 (user) == negate)
3827             {
3828               /* We have
3829                    x = -a
3830                    y = x - b
3831                  which we transform into
3832                    x = a + b
3833                    y = -x .
3834                  This pushes down the negate which we possibly can merge
3835                  into some other operation, hence insert it into the
3836                  plus_negates vector.  */
3837               gimple feed = SSA_NAME_DEF_STMT (negate);
3838               tree a = gimple_assign_rhs1 (feed);
3839               tree b = gimple_assign_rhs2 (user);
3840               gimple_stmt_iterator gsi = gsi_for_stmt (feed);
3841               gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
3842               tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)), NULL);
3843               gimple g = gimple_build_assign_with_ops (PLUS_EXPR, x, a, b);
3844               gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3845               gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x, NULL);
3846               user = gsi_stmt (gsi2);
3847               update_stmt (user);
3848               gsi_remove (&gsi, true);
3849               release_defs (feed);
3850               plus_negates.safe_push (gimple_assign_lhs (user));
3851             }
3852           else
3853             {
3854               /* Transform "x = -a; y = b - x" into "y = b + a", getting
3855                  rid of one operation.  */
3856               gimple feed = SSA_NAME_DEF_STMT (negate);
3857               tree a = gimple_assign_rhs1 (feed);
3858               tree rhs1 = gimple_assign_rhs1 (user);
3859               gimple_stmt_iterator gsi = gsi_for_stmt (user);
3860               gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
3861               update_stmt (gsi_stmt (gsi));
3862             }
3863         }
3864     }
3865 }
3866
3867 /* Returns true if OP is of a type for which we can do reassociation.
3868    That is for integral or non-saturating fixed-point types, and for
3869    floating point type when associative-math is enabled.  */
3870
3871 static bool
3872 can_reassociate_p (tree op)
3873 {
3874   tree type = TREE_TYPE (op);
3875   if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
3876       || NON_SAT_FIXED_POINT_TYPE_P (type)
3877       || (flag_associative_math && FLOAT_TYPE_P (type)))
3878     return true;
3879   return false;
3880 }
3881
3882 /* Break up subtract operations in block BB.
3883
3884    We do this top down because we don't know whether the subtract is
3885    part of a possible chain of reassociation except at the top.
3886
3887    IE given
3888    d = f + g
3889    c = a + e
3890    b = c - d
3891    q = b - r
3892    k = t - q
3893
3894    we want to break up k = t - q, but we won't until we've transformed q
3895    = b - r, which won't be broken up until we transform b = c - d.
3896
3897    En passant, clear the GIMPLE visited flag on every statement
3898    and set UIDs within each basic block.  */
3899
3900 static void
3901 break_up_subtract_bb (basic_block bb)
3902 {
3903   gimple_stmt_iterator gsi;
3904   basic_block son;
3905   unsigned int uid = 1;
3906
3907   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3908     {
3909       gimple stmt = gsi_stmt (gsi);
3910       gimple_set_visited (stmt, false);
3911       gimple_set_uid (stmt, uid++);
3912
3913       if (!is_gimple_assign (stmt)
3914           || !can_reassociate_p (gimple_assign_lhs (stmt)))
3915         continue;
3916
3917       /* Look for simple gimple subtract operations.  */
3918       if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
3919         {
3920           if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
3921               || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
3922             continue;
3923
3924           /* Check for a subtract used only in an addition.  If this
3925              is the case, transform it into add of a negate for better
3926              reassociation.  IE transform C = A-B into C = A + -B if C
3927              is only used in an addition.  */
3928           if (should_break_up_subtract (stmt))
3929             break_up_subtract (stmt, &gsi);
3930         }
3931       else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
3932                && can_reassociate_p (gimple_assign_rhs1 (stmt)))
3933         plus_negates.safe_push (gimple_assign_lhs (stmt));
3934     }
3935   for (son = first_dom_son (CDI_DOMINATORS, bb);
3936        son;
3937        son = next_dom_son (CDI_DOMINATORS, son))
3938     break_up_subtract_bb (son);
3939 }
3940
3941 /* Used for repeated factor analysis.  */
3942 struct repeat_factor_d
3943 {
3944   /* An SSA name that occurs in a multiply chain.  */
3945   tree factor;
3946
3947   /* Cached rank of the factor.  */
3948   unsigned rank;
3949
3950   /* Number of occurrences of the factor in the chain.  */
3951   HOST_WIDE_INT count;
3952
3953   /* An SSA name representing the product of this factor and
3954      all factors appearing later in the repeated factor vector.  */
3955   tree repr;
3956 };
3957
3958 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
3959 typedef const struct repeat_factor_d *const_repeat_factor_t;
3960
3961
3962 static vec<repeat_factor> repeat_factor_vec;
3963
3964 /* Used for sorting the repeat factor vector.  Sort primarily by
3965    ascending occurrence count, secondarily by descending rank.  */
3966
3967 static int
3968 compare_repeat_factors (const void *x1, const void *x2)
3969 {
3970   const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
3971   const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
3972
3973   if (rf1->count != rf2->count)
3974     return rf1->count - rf2->count;
3975
3976   return rf2->rank - rf1->rank;
3977 }
3978
3979 /* Look for repeated operands in OPS in the multiply tree rooted at
3980    STMT.  Replace them with an optimal sequence of multiplies and powi
3981    builtin calls, and remove the used operands from OPS.  Return an
3982    SSA name representing the value of the replacement sequence.  */
3983
3984 static tree
3985 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
3986 {
3987   unsigned i, j, vec_len;
3988   int ii;
3989   operand_entry_t oe;
3990   repeat_factor_t rf1, rf2;
3991   repeat_factor rfnew;
3992   tree result = NULL_TREE;
3993   tree target_ssa, iter_result;
3994   tree type = TREE_TYPE (gimple_get_lhs (stmt));
3995   tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
3996   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3997   gimple mul_stmt, pow_stmt;
3998
3999   /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4000      target.  */
4001   if (!powi_fndecl)
4002     return NULL_TREE;
4003
4004   /* Allocate the repeated factor vector.  */
4005   repeat_factor_vec.create (10);
4006
4007   /* Scan the OPS vector for all SSA names in the product and build
4008      up a vector of occurrence counts for each factor.  */
4009   FOR_EACH_VEC_ELT (*ops, i, oe)
4010     {
4011       if (TREE_CODE (oe->op) == SSA_NAME)
4012         {
4013           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4014             {
4015               if (rf1->factor == oe->op)
4016                 {
4017                   rf1->count += oe->count;
4018                   break;
4019                 }
4020             }
4021
4022           if (j >= repeat_factor_vec.length ())
4023             {
4024               rfnew.factor = oe->op;
4025               rfnew.rank = oe->rank;
4026               rfnew.count = oe->count;
4027               rfnew.repr = NULL_TREE;
4028               repeat_factor_vec.safe_push (rfnew);
4029             }
4030         }
4031     }
4032
4033   /* Sort the repeated factor vector by (a) increasing occurrence count,
4034      and (b) decreasing rank.  */
4035   repeat_factor_vec.qsort (compare_repeat_factors);
4036
4037   /* It is generally best to combine as many base factors as possible
4038      into a product before applying __builtin_powi to the result.
4039      However, the sort order chosen for the repeated factor vector
4040      allows us to cache partial results for the product of the base
4041      factors for subsequent use.  When we already have a cached partial
4042      result from a previous iteration, it is best to make use of it
4043      before looking for another __builtin_pow opportunity.
4044
4045      As an example, consider x * x * y * y * y * z * z * z * z.
4046      We want to first compose the product x * y * z, raise it to the
4047      second power, then multiply this by y * z, and finally multiply
4048      by z.  This can be done in 5 multiplies provided we cache y * z
4049      for use in both expressions:
4050
4051         t1 = y * z
4052         t2 = t1 * x
4053         t3 = t2 * t2
4054         t4 = t1 * t3
4055         result = t4 * z
4056
4057      If we instead ignored the cached y * z and first multiplied by
4058      the __builtin_pow opportunity z * z, we would get the inferior:
4059
4060         t1 = y * z
4061         t2 = t1 * x
4062         t3 = t2 * t2
4063         t4 = z * z
4064         t5 = t3 * t4
4065         result = t5 * y  */
4066
4067   vec_len = repeat_factor_vec.length ();
4068
4069   /* Repeatedly look for opportunities to create a builtin_powi call.  */
4070   while (true)
4071     {
4072       HOST_WIDE_INT power;
4073
4074       /* First look for the largest cached product of factors from
4075          preceding iterations.  If found, create a builtin_powi for
4076          it if the minimum occurrence count for its factors is at
4077          least 2, or just use this cached product as our next
4078          multiplicand if the minimum occurrence count is 1.  */
4079       FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4080         {
4081           if (rf1->repr && rf1->count > 0)
4082             break;
4083         }
4084
4085       if (j < vec_len)
4086         {
4087           power = rf1->count;
4088
4089           if (power == 1)
4090             {
4091               iter_result = rf1->repr;
4092
4093               if (dump_file && (dump_flags & TDF_DETAILS))
4094                 {
4095                   unsigned elt;
4096                   repeat_factor_t rf;
4097                   fputs ("Multiplying by cached product ", dump_file);
4098                   for (elt = j; elt < vec_len; elt++)
4099                     {
4100                       rf = &repeat_factor_vec[elt];
4101                       print_generic_expr (dump_file, rf->factor, 0);
4102                       if (elt < vec_len - 1)
4103                         fputs (" * ", dump_file);
4104                     }
4105                   fputs ("\n", dump_file);
4106                 }
4107             }
4108           else
4109             {
4110               iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4111               pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4112                                             build_int_cst (integer_type_node,
4113                                                            power));
4114               gimple_call_set_lhs (pow_stmt, iter_result);
4115               gimple_set_location (pow_stmt, gimple_location (stmt));
4116               gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4117
4118               if (dump_file && (dump_flags & TDF_DETAILS))
4119                 {
4120                   unsigned elt;
4121                   repeat_factor_t rf;
4122                   fputs ("Building __builtin_pow call for cached product (",
4123                          dump_file);
4124                   for (elt = j; elt < vec_len; elt++)
4125                     {
4126                       rf = &repeat_factor_vec[elt];
4127                       print_generic_expr (dump_file, rf->factor, 0);
4128                       if (elt < vec_len - 1)
4129                         fputs (" * ", dump_file);
4130                     }
4131                   fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n",
4132                            power);
4133                 }
4134             }
4135         }
4136       else
4137         {
4138           /* Otherwise, find the first factor in the repeated factor
4139              vector whose occurrence count is at least 2.  If no such
4140              factor exists, there are no builtin_powi opportunities
4141              remaining.  */
4142           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4143             {
4144               if (rf1->count >= 2)
4145                 break;
4146             }
4147
4148           if (j >= vec_len)
4149             break;
4150
4151           power = rf1->count;
4152
4153           if (dump_file && (dump_flags & TDF_DETAILS))
4154             {
4155               unsigned elt;
4156               repeat_factor_t rf;
4157               fputs ("Building __builtin_pow call for (", dump_file);
4158               for (elt = j; elt < vec_len; elt++)
4159                 {
4160                   rf = &repeat_factor_vec[elt];
4161                   print_generic_expr (dump_file, rf->factor, 0);
4162                   if (elt < vec_len - 1)
4163                     fputs (" * ", dump_file);
4164                 }
4165               fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power);
4166             }
4167
4168           reassociate_stats.pows_created++;
4169
4170           /* Visit each element of the vector in reverse order (so that
4171              high-occurrence elements are visited first, and within the
4172              same occurrence count, lower-ranked elements are visited
4173              first).  Form a linear product of all elements in this order
4174              whose occurrencce count is at least that of element J.
4175              Record the SSA name representing the product of each element
4176              with all subsequent elements in the vector.  */
4177           if (j == vec_len - 1)
4178             rf1->repr = rf1->factor;
4179           else
4180             {
4181               for (ii = vec_len - 2; ii >= (int)j; ii--)
4182                 {
4183                   tree op1, op2;
4184
4185                   rf1 = &repeat_factor_vec[ii];
4186                   rf2 = &repeat_factor_vec[ii + 1];
4187
4188                   /* Init the last factor's representative to be itself.  */
4189                   if (!rf2->repr)
4190                     rf2->repr = rf2->factor;
4191
4192                   op1 = rf1->factor;
4193                   op2 = rf2->repr;
4194
4195                   target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4196                   mul_stmt = gimple_build_assign_with_ops (MULT_EXPR,
4197                                                            target_ssa,
4198                                                            op1, op2);
4199                   gimple_set_location (mul_stmt, gimple_location (stmt));
4200                   gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4201                   rf1->repr = target_ssa;
4202
4203                   /* Don't reprocess the multiply we just introduced.  */
4204                   gimple_set_visited (mul_stmt, true);
4205                 }
4206             }
4207
4208           /* Form a call to __builtin_powi for the maximum product
4209              just formed, raised to the power obtained earlier.  */
4210           rf1 = &repeat_factor_vec[j];
4211           iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4212           pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4213                                         build_int_cst (integer_type_node,
4214                                                        power));
4215           gimple_call_set_lhs (pow_stmt, iter_result);
4216           gimple_set_location (pow_stmt, gimple_location (stmt));
4217           gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4218         }
4219
4220       /* If we previously formed at least one other builtin_powi call,
4221          form the product of this one and those others.  */
4222       if (result)
4223         {
4224           tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4225           mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result,
4226                                                    result, iter_result);
4227           gimple_set_location (mul_stmt, gimple_location (stmt));
4228           gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4229           gimple_set_visited (mul_stmt, true);
4230           result = new_result;
4231         }
4232       else
4233         result = iter_result;
4234
4235       /* Decrement the occurrence count of each element in the product
4236          by the count found above, and remove this many copies of each
4237          factor from OPS.  */
4238       for (i = j; i < vec_len; i++)
4239         {
4240           unsigned k = power;
4241           unsigned n;
4242
4243           rf1 = &repeat_factor_vec[i];
4244           rf1->count -= power;
4245
4246           FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
4247             {
4248               if (oe->op == rf1->factor)
4249                 {
4250                   if (oe->count <= k)
4251                     {
4252                       ops->ordered_remove (n);
4253                       k -= oe->count;
4254
4255                       if (k == 0)
4256                         break;
4257                     }
4258                   else
4259                     {
4260                       oe->count -= k;
4261                       break;
4262                     }
4263                 }
4264             }
4265         }
4266     }
4267
4268   /* At this point all elements in the repeated factor vector have a
4269      remaining occurrence count of 0 or 1, and those with a count of 1
4270      don't have cached representatives.  Re-sort the ops vector and
4271      clean up.  */
4272   ops->qsort (sort_by_operand_rank);
4273   repeat_factor_vec.release ();
4274
4275   /* Return the final product computed herein.  Note that there may
4276      still be some elements with single occurrence count left in OPS;
4277      those will be handled by the normal reassociation logic.  */
4278   return result;
4279 }
4280
4281 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS.  */
4282
4283 static void
4284 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
4285 {
4286   tree rhs1;
4287
4288   if (dump_file && (dump_flags & TDF_DETAILS))
4289     {
4290       fprintf (dump_file, "Transforming ");
4291       print_gimple_stmt (dump_file, stmt, 0, 0);
4292     }
4293
4294   rhs1 = gimple_assign_rhs1 (stmt);
4295   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
4296   update_stmt (stmt);
4297   remove_visited_stmt_chain (rhs1);
4298
4299   if (dump_file && (dump_flags & TDF_DETAILS))
4300     {
4301       fprintf (dump_file, " into ");
4302       print_gimple_stmt (dump_file, stmt, 0, 0);
4303     }
4304 }
4305
4306 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2.  */
4307
4308 static void
4309 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
4310                             tree rhs1, tree rhs2)
4311 {
4312   if (dump_file && (dump_flags & TDF_DETAILS))
4313     {
4314       fprintf (dump_file, "Transforming ");
4315       print_gimple_stmt (dump_file, stmt, 0, 0);
4316     }
4317
4318   gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
4319   update_stmt (gsi_stmt (*gsi));
4320   remove_visited_stmt_chain (rhs1);
4321
4322   if (dump_file && (dump_flags & TDF_DETAILS))
4323     {
4324       fprintf (dump_file, " into ");
4325       print_gimple_stmt (dump_file, stmt, 0, 0);
4326     }
4327 }
4328
4329 /* Reassociate expressions in basic block BB and its post-dominator as
4330    children.  */
4331
4332 static void
4333 reassociate_bb (basic_block bb)
4334 {
4335   gimple_stmt_iterator gsi;
4336   basic_block son;
4337   gimple stmt = last_stmt (bb);
4338
4339   if (stmt && !gimple_visited_p (stmt))
4340     maybe_optimize_range_tests (stmt);
4341
4342   for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4343     {
4344       stmt = gsi_stmt (gsi);
4345
4346       if (is_gimple_assign (stmt)
4347           && !stmt_could_throw_p (stmt))
4348         {
4349           tree lhs, rhs1, rhs2;
4350           enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4351
4352           /* If this is not a gimple binary expression, there is
4353              nothing for us to do with it.  */
4354           if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4355             continue;
4356
4357           /* If this was part of an already processed statement,
4358              we don't need to touch it again. */
4359           if (gimple_visited_p (stmt))
4360             {
4361               /* This statement might have become dead because of previous
4362                  reassociations.  */
4363               if (has_zero_uses (gimple_get_lhs (stmt)))
4364                 {
4365                   gsi_remove (&gsi, true);
4366                   release_defs (stmt);
4367                   /* We might end up removing the last stmt above which
4368                      places the iterator to the end of the sequence.
4369                      Reset it to the last stmt in this case which might
4370                      be the end of the sequence as well if we removed
4371                      the last statement of the sequence.  In which case
4372                      we need to bail out.  */
4373                   if (gsi_end_p (gsi))
4374                     {
4375                       gsi = gsi_last_bb (bb);
4376                       if (gsi_end_p (gsi))
4377                         break;
4378                     }
4379                 }
4380               continue;
4381             }
4382
4383           lhs = gimple_assign_lhs (stmt);
4384           rhs1 = gimple_assign_rhs1 (stmt);
4385           rhs2 = gimple_assign_rhs2 (stmt);
4386
4387           /* For non-bit or min/max operations we can't associate
4388              all types.  Verify that here.  */
4389           if (rhs_code != BIT_IOR_EXPR
4390               && rhs_code != BIT_AND_EXPR
4391               && rhs_code != BIT_XOR_EXPR
4392               && rhs_code != MIN_EXPR
4393               && rhs_code != MAX_EXPR
4394               && (!can_reassociate_p (lhs)
4395                   || !can_reassociate_p (rhs1)
4396                   || !can_reassociate_p (rhs2)))
4397             continue;
4398
4399           if (associative_tree_code (rhs_code))
4400             {
4401               vec<operand_entry_t> ops = vNULL;
4402               tree powi_result = NULL_TREE;
4403
4404               /* There may be no immediate uses left by the time we
4405                  get here because we may have eliminated them all.  */
4406               if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4407                 continue;
4408
4409               gimple_set_visited (stmt, true);
4410               linearize_expr_tree (&ops, stmt, true, true);
4411               ops.qsort (sort_by_operand_rank);
4412               optimize_ops_list (rhs_code, &ops);
4413               if (undistribute_ops_list (rhs_code, &ops,
4414                                          loop_containing_stmt (stmt)))
4415                 {
4416                   ops.qsort (sort_by_operand_rank);
4417                   optimize_ops_list (rhs_code, &ops);
4418                 }
4419
4420               if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4421                 optimize_range_tests (rhs_code, &ops);
4422
4423               if (first_pass_instance
4424                   && rhs_code == MULT_EXPR
4425                   && flag_unsafe_math_optimizations)
4426                 powi_result = attempt_builtin_powi (stmt, &ops);
4427
4428               /* If the operand vector is now empty, all operands were
4429                  consumed by the __builtin_powi optimization.  */
4430               if (ops.length () == 0)
4431                 transform_stmt_to_copy (&gsi, stmt, powi_result);
4432               else if (ops.length () == 1)
4433                 {
4434                   tree last_op = ops.last ()->op;
4435
4436                   if (powi_result)
4437                     transform_stmt_to_multiply (&gsi, stmt, last_op,
4438                                                 powi_result);
4439                   else
4440                     transform_stmt_to_copy (&gsi, stmt, last_op);
4441                 }
4442               else
4443                 {
4444                   enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4445                   int ops_num = ops.length ();
4446                   int width = get_reassociation_width (ops_num, rhs_code, mode);
4447                   tree new_lhs = lhs;
4448
4449                   if (dump_file && (dump_flags & TDF_DETAILS))
4450                     fprintf (dump_file,
4451                              "Width = %d was chosen for reassociation\n", width);
4452
4453                   if (width > 1
4454                       && ops.length () > 3)
4455                     rewrite_expr_tree_parallel (stmt, width, ops);
4456                   else
4457                     {
4458                       /* When there are three operands left, we want
4459                          to make sure the ones that get the double
4460                          binary op are chosen wisely.  */
4461                       int len = ops.length ();
4462                       if (len >= 3)
4463                         swap_ops_for_binary_stmt (ops, len - 3, stmt);
4464
4465                       new_lhs = rewrite_expr_tree (stmt, 0, ops,
4466                                                    powi_result != NULL);
4467                     }
4468
4469                   /* If we combined some repeated factors into a
4470                      __builtin_powi call, multiply that result by the
4471                      reassociated operands.  */
4472                   if (powi_result)
4473                     {
4474                       gimple mul_stmt, lhs_stmt = SSA_NAME_DEF_STMT (lhs);
4475                       tree type = TREE_TYPE (lhs);
4476                       tree target_ssa = make_temp_ssa_name (type, NULL,
4477                                                             "reassocpow");
4478                       gimple_set_lhs (lhs_stmt, target_ssa);
4479                       update_stmt (lhs_stmt);
4480                       if (lhs != new_lhs)
4481                         target_ssa = new_lhs;
4482                       mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs,
4483                                                                powi_result,
4484                                                                target_ssa);
4485                       gimple_set_location (mul_stmt, gimple_location (stmt));
4486                       gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4487                     }
4488                 }
4489
4490               ops.release ();
4491             }
4492         }
4493     }
4494   for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4495        son;
4496        son = next_dom_son (CDI_POST_DOMINATORS, son))
4497     reassociate_bb (son);
4498 }
4499
4500 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4501 void debug_ops_vector (vec<operand_entry_t> ops);
4502
4503 /* Dump the operand entry vector OPS to FILE.  */
4504
4505 void
4506 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
4507 {
4508   operand_entry_t oe;
4509   unsigned int i;
4510
4511   FOR_EACH_VEC_ELT (ops, i, oe)
4512     {
4513       fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
4514       print_generic_expr (file, oe->op, 0);
4515     }
4516 }
4517
4518 /* Dump the operand entry vector OPS to STDERR.  */
4519
4520 DEBUG_FUNCTION void
4521 debug_ops_vector (vec<operand_entry_t> ops)
4522 {
4523   dump_ops_vector (stderr, ops);
4524 }
4525
4526 static void
4527 do_reassoc (void)
4528 {
4529   break_up_subtract_bb (ENTRY_BLOCK_PTR);
4530   reassociate_bb (EXIT_BLOCK_PTR);
4531 }
4532
4533 /* Initialize the reassociation pass.  */
4534
4535 static void
4536 init_reassoc (void)
4537 {
4538   int i;
4539   long rank = 2;
4540   int *bbs = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
4541
4542   /* Find the loops, so that we can prevent moving calculations in
4543      them.  */
4544   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4545
4546   memset (&reassociate_stats, 0, sizeof (reassociate_stats));
4547
4548   operand_entry_pool = create_alloc_pool ("operand entry pool",
4549                                           sizeof (struct operand_entry), 30);
4550   next_operand_entry_id = 0;
4551
4552   /* Reverse RPO (Reverse Post Order) will give us something where
4553      deeper loops come later.  */
4554   pre_and_rev_post_order_compute (NULL, bbs, false);
4555   bb_rank = XCNEWVEC (long, last_basic_block);
4556   operand_rank = pointer_map_create ();
4557
4558   /* Give each default definition a distinct rank.  This includes
4559      parameters and the static chain.  Walk backwards over all
4560      SSA names so that we get proper rank ordering according
4561      to tree_swap_operands_p.  */
4562   for (i = num_ssa_names - 1; i > 0; --i)
4563     {
4564       tree name = ssa_name (i);
4565       if (name && SSA_NAME_IS_DEFAULT_DEF (name))
4566         insert_operand_rank (name, ++rank);
4567     }
4568
4569   /* Set up rank for each BB  */
4570   for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
4571     bb_rank[bbs[i]] = ++rank  << 16;
4572
4573   free (bbs);
4574   calculate_dominance_info (CDI_POST_DOMINATORS);
4575   plus_negates = vNULL;
4576 }
4577
4578 /* Cleanup after the reassociation pass, and print stats if
4579    requested.  */
4580
4581 static void
4582 fini_reassoc (void)
4583 {
4584   statistics_counter_event (cfun, "Linearized",
4585                             reassociate_stats.linearized);
4586   statistics_counter_event (cfun, "Constants eliminated",
4587                             reassociate_stats.constants_eliminated);
4588   statistics_counter_event (cfun, "Ops eliminated",
4589                             reassociate_stats.ops_eliminated);
4590   statistics_counter_event (cfun, "Statements rewritten",
4591                             reassociate_stats.rewritten);
4592   statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
4593                             reassociate_stats.pows_encountered);
4594   statistics_counter_event (cfun, "Built-in powi calls created",
4595                             reassociate_stats.pows_created);
4596
4597   pointer_map_destroy (operand_rank);
4598   free_alloc_pool (operand_entry_pool);
4599   free (bb_rank);
4600   plus_negates.release ();
4601   free_dominance_info (CDI_POST_DOMINATORS);
4602   loop_optimizer_finalize ();
4603 }
4604
4605 /* Gate and execute functions for Reassociation.  */
4606
4607 static unsigned int
4608 execute_reassoc (void)
4609 {
4610   init_reassoc ();
4611
4612   do_reassoc ();
4613   repropagate_negates ();
4614
4615   fini_reassoc ();
4616   return 0;
4617 }
4618
4619 static bool
4620 gate_tree_ssa_reassoc (void)
4621 {
4622   return flag_tree_reassoc != 0;
4623 }
4624
4625 namespace {
4626
4627 const pass_data pass_data_reassoc =
4628 {
4629   GIMPLE_PASS, /* type */
4630   "reassoc", /* name */
4631   OPTGROUP_NONE, /* optinfo_flags */
4632   true, /* has_gate */
4633   true, /* has_execute */
4634   TV_TREE_REASSOC, /* tv_id */
4635   ( PROP_cfg | PROP_ssa ), /* properties_required */
4636   0, /* properties_provided */
4637   0, /* properties_destroyed */
4638   0, /* todo_flags_start */
4639   ( TODO_verify_ssa
4640     | TODO_update_ssa_only_virtuals
4641     | TODO_verify_flow ), /* todo_flags_finish */
4642 };
4643
4644 class pass_reassoc : public gimple_opt_pass
4645 {
4646 public:
4647   pass_reassoc (gcc::context *ctxt)
4648     : gimple_opt_pass (pass_data_reassoc, ctxt)
4649   {}
4650
4651   /* opt_pass methods: */
4652   opt_pass * clone () { return new pass_reassoc (m_ctxt); }
4653   bool gate () { return gate_tree_ssa_reassoc (); }
4654   unsigned int execute () { return execute_reassoc (); }
4655
4656 }; // class pass_reassoc
4657
4658 } // anon namespace
4659
4660 gimple_opt_pass *
4661 make_pass_reassoc (gcc::context *ctxt)
4662 {
4663   return new pass_reassoc (ctxt);
4664 }