gcc/tree-ssa-reassoc.c

   1 /* Reassociation for trees.
   2    Copyright (C) 2005-2013 Free Software Foundation, Inc.
   3    Contributed by Daniel Berlin <dan@dberlin.org>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 3, or (at your option)
  10 any later version.
  11
  12 GCC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "tree.h"
  26 #include "basic-block.h"
  27 #include "gimple-pretty-print.h"
  28 #include "tree-inline.h"
  29 #include "tree-flow.h"
  30 #include "gimple.h"
  31 #include "tree-iterator.h"
  32 #include "tree-pass.h"
  33 #include "alloc-pool.h"
  34 #include "vec.h"
  35 #include "langhooks.h"
  36 #include "pointer-set.h"
  37 #include "cfgloop.h"
  38 #include "flags.h"
  39 #include "target.h"
  40 #include "params.h"
  41 #include "diagnostic-core.h"
  42
  43 /*  This is a simple global reassociation pass.  It is, in part, based
  44     on the LLVM pass of the same name (They do some things more/less
  45     than we do, in different orders, etc).
  46
  47     It consists of five steps:
  48
  49     1. Breaking up subtract operations into addition + negate, where
  50     it would promote the reassociation of adds.
  51
  52     2. Left linearization of the expression trees, so that (A+B)+(C+D)
  53     becomes (((A+B)+C)+D), which is easier for us to rewrite later.
  54     During linearization, we place the operands of the binary
  55     expressions into a vector of operand_entry_t
  56
  57     3. Optimization of the operand lists, eliminating things like a +
  58     -a, a & a, etc.
  59
  60     3a. Combine repeated factors with the same occurrence counts
  61     into a __builtin_powi call that will later be optimized into
  62     an optimal number of multiplies.
  63
  64     4. Rewrite the expression trees we linearized and optimized so
  65     they are in proper rank order.
  66
  67     5. Repropagate negates, as nothing else will clean it up ATM.
  68
  69     A bit of theory on #4, since nobody seems to write anything down
  70     about why it makes sense to do it the way they do it:
  71
  72     We could do this much nicer theoretically, but don't (for reasons
  73     explained after how to do it theoretically nice :P).
  74
  75     In order to promote the most redundancy elimination, you want
  76     binary expressions whose operands are the same rank (or
  77     preferably, the same value) exposed to the redundancy eliminator,
  78     for possible elimination.
  79
  80     So the way to do this if we really cared, is to build the new op
  81     tree from the leaves to the roots, merging as you go, and putting the
  82     new op on the end of the worklist, until you are left with one
  83     thing on the worklist.
  84
  85     IE if you have to rewrite the following set of operands (listed with
  86     rank in parentheses), with opcode PLUS_EXPR:
  87
  88     a (1),  b (1),  c (1),  d (2), e (2)
  89
  90
  91     We start with our merge worklist empty, and the ops list with all of
  92     those on it.
  93
  94     You want to first merge all leaves of the same rank, as much as
  95     possible.
  96
  97     So first build a binary op of
  98
  99     mergetmp = a + b, and put "mergetmp" on the merge worklist.
 100
 101     Because there is no three operand form of PLUS_EXPR, c is not going to
 102     be exposed to redundancy elimination as a rank 1 operand.
 103
 104     So you might as well throw it on the merge worklist (you could also
 105     consider it to now be a rank two operand, and merge it with d and e,
 106     but in this case, you then have evicted e from a binary op. So at
 107     least in this situation, you can't win.)
 108
 109     Then build a binary op of d + e
 110     mergetmp2 = d + e
 111
 112     and put mergetmp2 on the merge worklist.
 113
 114     so merge worklist = {mergetmp, c, mergetmp2}
 115
 116     Continue building binary ops of these operations until you have only
 117     one operation left on the worklist.
 118
 119     So we have
 120
 121     build binary op
 122     mergetmp3 = mergetmp + c
 123
 124     worklist = {mergetmp2, mergetmp3}
 125
 126     mergetmp4 = mergetmp2 + mergetmp3
 127
 128     worklist = {mergetmp4}
 129
 130     because we have one operation left, we can now just set the original
 131     statement equal to the result of that operation.
 132
 133     This will at least expose a + b  and d + e to redundancy elimination
 134     as binary operations.
 135
 136     For extra points, you can reuse the old statements to build the
 137     mergetmps, since you shouldn't run out.
 138
 139     So why don't we do this?
 140
 141     Because it's expensive, and rarely will help.  Most trees we are
 142     reassociating have 3 or less ops.  If they have 2 ops, they already
 143     will be written into a nice single binary op.  If you have 3 ops, a
 144     single simple check suffices to tell you whether the first two are of the
 145     same rank.  If so, you know to order it
 146
 147     mergetmp = op1 + op2
 148     newstmt = mergetmp + op3
 149
 150     instead of
 151     mergetmp = op2 + op3
 152     newstmt = mergetmp + op1
 153
 154     If all three are of the same rank, you can't expose them all in a
 155     single binary operator anyway, so the above is *still* the best you
 156     can do.
 157
 158     Thus, this is what we do.  When we have three ops left, we check to see
 159     what order to put them in, and call it a day.  As a nod to vector sum
 160     reduction, we check if any of the ops are really a phi node that is a
 161     destructive update for the associating op, and keep the destructive
 162     update together for vector sum reduction recognition.  */
 163
 164
 165 /* Statistics */
 166 static struct
 167 {
 168   int linearized;
 169   int constants_eliminated;
 170   int ops_eliminated;
 171   int rewritten;
 172   int pows_encountered;
 173   int pows_created;
 174 } reassociate_stats;
 175
 176 /* Operator, rank pair.  */
 177 typedef struct operand_entry
 178 {
 179   unsigned int rank;
 180   int id;
 181   tree op;
 182   unsigned int count;
 183 } *operand_entry_t;
 184
 185 static alloc_pool operand_entry_pool;
 186
 187 /* This is used to assign a unique ID to each struct operand_entry
 188    so that qsort results are identical on different hosts.  */
 189 static int next_operand_entry_id;
 190
 191 /* Starting rank number for a given basic block, so that we can rank
 192    operations using unmovable instructions in that BB based on the bb
 193    depth.  */
 194 static long *bb_rank;
 195
 196 /* Operand->rank hashtable.  */
 197 static struct pointer_map_t *operand_rank;
 198
 199 /* Forward decls.  */
 200 static long get_rank (tree);
 201
 202
 203 /* Bias amount for loop-carried phis.  We want this to be larger than
 204    the depth of any reassociation tree we can see, but not larger than
 205    the rank difference between two blocks.  */
 206 #define PHI_LOOP_BIAS (1 << 15)
 207
 208 /* Rank assigned to a phi statement.  If STMT is a loop-carried phi of
 209    an innermost loop, and the phi has only a single use which is inside
 210    the loop, then the rank is the block rank of the loop latch plus an
 211    extra bias for the loop-carried dependence.  This causes expressions
 212    calculated into an accumulator variable to be independent for each
 213    iteration of the loop.  If STMT is some other phi, the rank is the
 214    block rank of its containing block.  */
 215 static long
 216 phi_rank (gimple stmt)
 217 {
 218   basic_block bb = gimple_bb (stmt);
 219   struct loop *father = bb->loop_father;
 220   tree res;
 221   unsigned i;
 222   use_operand_p use;
 223   gimple use_stmt;
 224
 225   /* We only care about real loops (those with a latch).  */
 226   if (!father->latch)
 227     return bb_rank[bb->index];
 228
 229   /* Interesting phis must be in headers of innermost loops.  */
 230   if (bb != father->header
 231       || father->inner)
 232     return bb_rank[bb->index];
 233
 234   /* Ignore virtual SSA_NAMEs.  */
 235   res = gimple_phi_result (stmt);
 236   if (virtual_operand_p (res))
 237     return bb_rank[bb->index];
 238
 239   /* The phi definition must have a single use, and that use must be
 240      within the loop.  Otherwise this isn't an accumulator pattern.  */
 241   if (!single_imm_use (res, &use, &use_stmt)
 242       || gimple_bb (use_stmt)->loop_father != father)
 243     return bb_rank[bb->index];
 244
 245   /* Look for phi arguments from within the loop.  If found, bias this phi.  */
 246   for (i = 0; i < gimple_phi_num_args (stmt); i++)
 247     {
 248       tree arg = gimple_phi_arg_def (stmt, i);
 249       if (TREE_CODE (arg) == SSA_NAME
 250           && !SSA_NAME_IS_DEFAULT_DEF (arg))
 251         {
 252           gimple def_stmt = SSA_NAME_DEF_STMT (arg);
 253           if (gimple_bb (def_stmt)->loop_father == father)
 254             return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
 255         }
 256     }
 257
 258   /* Must be an uninteresting phi.  */
 259   return bb_rank[bb->index];
 260 }
 261
 262 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
 263    loop-carried dependence of an innermost loop, return TRUE; else
 264    return FALSE.  */
 265 static bool
 266 loop_carried_phi (tree exp)
 267 {
 268   gimple phi_stmt;
 269   long block_rank;
 270
 271   if (TREE_CODE (exp) != SSA_NAME
 272       || SSA_NAME_IS_DEFAULT_DEF (exp))
 273     return false;
 274
 275   phi_stmt = SSA_NAME_DEF_STMT (exp);
 276
 277   if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
 278     return false;
 279
 280   /* Non-loop-carried phis have block rank.  Loop-carried phis have
 281      an additional bias added in.  If this phi doesn't have block rank,
 282      it's biased and should not be propagated.  */
 283   block_rank = bb_rank[gimple_bb (phi_stmt)->index];
 284
 285   if (phi_rank (phi_stmt) != block_rank)
 286     return true;
 287
 288   return false;
 289 }
 290
 291 /* Return the maximum of RANK and the rank that should be propagated
 292    from expression OP.  For most operands, this is just the rank of OP.
 293    For loop-carried phis, the value is zero to avoid undoing the bias
 294    in favor of the phi.  */
 295 static long
 296 propagate_rank (long rank, tree op)
 297 {
 298   long op_rank;
 299
 300   if (loop_carried_phi (op))
 301     return rank;
 302
 303   op_rank = get_rank (op);
 304
 305   return MAX (rank, op_rank);
 306 }
 307
 308 /* Look up the operand rank structure for expression E.  */
 309
 310 static inline long
 311 find_operand_rank (tree e)
 312 {
 313   void **slot = pointer_map_contains (operand_rank, e);
 314   return slot ? (long) (intptr_t) *slot : -1;
 315 }
 316
 317 /* Insert {E,RANK} into the operand rank hashtable.  */
 318
 319 static inline void
 320 insert_operand_rank (tree e, long rank)
 321 {
 322   void **slot;
 323   gcc_assert (rank > 0);
 324   slot = pointer_map_insert (operand_rank, e);
 325   gcc_assert (!*slot);
 326   *slot = (void *) (intptr_t) rank;
 327 }
 328
 329 /* Given an expression E, return the rank of the expression.  */
 330
 331 static long
 332 get_rank (tree e)
 333 {
 334   /* Constants have rank 0.  */
 335   if (is_gimple_min_invariant (e))
 336     return 0;
 337
 338   /* SSA_NAME's have the rank of the expression they are the result
 339      of.
 340      For globals and uninitialized values, the rank is 0.
 341      For function arguments, use the pre-setup rank.
 342      For PHI nodes, stores, asm statements, etc, we use the rank of
 343      the BB.
 344      For simple operations, the rank is the maximum rank of any of
 345      its operands, or the bb_rank, whichever is less.
 346      I make no claims that this is optimal, however, it gives good
 347      results.  */
 348
 349   /* We make an exception to the normal ranking system to break
 350      dependences of accumulator variables in loops.  Suppose we
 351      have a simple one-block loop containing:
 352
 353        x_1 = phi(x_0, x_2)
 354        b = a + x_1
 355        c = b + d
 356        x_2 = c + e
 357
 358      As shown, each iteration of the calculation into x is fully
 359      dependent upon the iteration before it.  We would prefer to
 360      see this in the form:
 361
 362        x_1 = phi(x_0, x_2)
 363        b = a + d
 364        c = b + e
 365        x_2 = c + x_1
 366
 367      If the loop is unrolled, the calculations of b and c from
 368      different iterations can be interleaved.
 369
 370      To obtain this result during reassociation, we bias the rank
 371      of the phi definition x_1 upward, when it is recognized as an
 372      accumulator pattern.  The artificial rank causes it to be
 373      added last, providing the desired independence.  */
 374
 375   if (TREE_CODE (e) == SSA_NAME)
 376     {
 377       gimple stmt;
 378       long rank;
 379       int i, n;
 380       tree op;
 381
 382       if (SSA_NAME_IS_DEFAULT_DEF (e))
 383         return find_operand_rank (e);
 384
 385       stmt = SSA_NAME_DEF_STMT (e);
 386       if (gimple_code (stmt) == GIMPLE_PHI)
 387         return phi_rank (stmt);
 388
 389       if (!is_gimple_assign (stmt)
 390           || gimple_vdef (stmt))
 391         return bb_rank[gimple_bb (stmt)->index];
 392
 393       /* If we already have a rank for this expression, use that.  */
 394       rank = find_operand_rank (e);
 395       if (rank != -1)
 396         return rank;
 397
 398       /* Otherwise, find the maximum rank for the operands.  As an
 399          exception, remove the bias from loop-carried phis when propagating
 400          the rank so that dependent operations are not also biased.  */
 401       rank = 0;
 402       if (gimple_assign_single_p (stmt))
 403         {
 404           tree rhs = gimple_assign_rhs1 (stmt);
 405           n = TREE_OPERAND_LENGTH (rhs);
 406           if (n == 0)
 407             rank = propagate_rank (rank, rhs);
 408           else
 409             {
 410               for (i = 0; i < n; i++)
 411                 {
 412                   op = TREE_OPERAND (rhs, i);
 413
 414                   if (op != NULL_TREE)
 415                     rank = propagate_rank (rank, op);
 416                 }
 417             }
 418         }
 419       else
 420         {
 421           n = gimple_num_ops (stmt);
 422           for (i = 1; i < n; i++)
 423             {
 424               op = gimple_op (stmt, i);
 425               gcc_assert (op);
 426               rank = propagate_rank (rank, op);
 427             }
 428         }
 429
 430       if (dump_file && (dump_flags & TDF_DETAILS))
 431         {
 432           fprintf (dump_file, "Rank for ");
 433           print_generic_expr (dump_file, e, 0);
 434           fprintf (dump_file, " is %ld\n", (rank + 1));
 435         }
 436
 437       /* Note the rank in the hashtable so we don't recompute it.  */
 438       insert_operand_rank (e, (rank + 1));
 439       return (rank + 1);
 440     }
 441
 442   /* Globals, etc,  are rank 0 */
 443   return 0;
 444 }
 445
 446
 447 /* We want integer ones to end up last no matter what, since they are
 448    the ones we can do the most with.  */
 449 #define INTEGER_CONST_TYPE 1 << 3
 450 #define FLOAT_CONST_TYPE 1 << 2
 451 #define OTHER_CONST_TYPE 1 << 1
 452
 453 /* Classify an invariant tree into integer, float, or other, so that
 454    we can sort them to be near other constants of the same type.  */
 455 static inline int
 456 constant_type (tree t)
 457 {
 458   if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
 459     return INTEGER_CONST_TYPE;
 460   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
 461     return FLOAT_CONST_TYPE;
 462   else
 463     return OTHER_CONST_TYPE;
 464 }
 465
 466 /* qsort comparison function to sort operand entries PA and PB by rank
 467    so that the sorted array is ordered by rank in decreasing order.  */
 468 static int
 469 sort_by_operand_rank (const void *pa, const void *pb)
 470 {
 471   const operand_entry_t oea = *(const operand_entry_t *)pa;
 472   const operand_entry_t oeb = *(const operand_entry_t *)pb;
 473
 474   /* It's nicer for optimize_expression if constants that are likely
 475      to fold when added/multiplied//whatever are put next to each
 476      other.  Since all constants have rank 0, order them by type.  */
 477   if (oeb->rank == 0 && oea->rank == 0)
 478     {
 479       if (constant_type (oeb->op) != constant_type (oea->op))
 480         return constant_type (oeb->op) - constant_type (oea->op);
 481       else
 482         /* To make sorting result stable, we use unique IDs to determine
 483            order.  */
 484         return oeb->id - oea->id;
 485     }
 486
 487   /* Lastly, make sure the versions that are the same go next to each
 488      other.  We use SSA_NAME_VERSION because it's stable.  */
 489   if ((oeb->rank - oea->rank == 0)
 490       && TREE_CODE (oea->op) == SSA_NAME
 491       && TREE_CODE (oeb->op) == SSA_NAME)
 492     {
 493       if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
 494         return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
 495       else
 496         return oeb->id - oea->id;
 497     }
 498
 499   if (oeb->rank != oea->rank)
 500     return oeb->rank - oea->rank;
 501   else
 502     return oeb->id - oea->id;
 503 }
 504
 505 /* Add an operand entry to *OPS for the tree operand OP.  */
 506
 507 static void
 508 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
 509 {
 510   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
 511
 512   oe->op = op;
 513   oe->rank = get_rank (op);
 514   oe->id = next_operand_entry_id++;
 515   oe->count = 1;
 516   ops->safe_push (oe);
 517 }
 518
 519 /* Add an operand entry to *OPS for the tree operand OP with repeat
 520    count REPEAT.  */
 521
 522 static void
 523 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
 524                        HOST_WIDE_INT repeat)
 525 {
 526   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
 527
 528   oe->op = op;
 529   oe->rank = get_rank (op);
 530   oe->id = next_operand_entry_id++;
 531   oe->count = repeat;
 532   ops->safe_push (oe);
 533
 534   reassociate_stats.pows_encountered++;
 535 }
 536
 537 /* Return true if STMT is reassociable operation containing a binary
 538    operation with tree code CODE, and is inside LOOP.  */
 539
 540 static bool
 541 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
 542 {
 543   basic_block bb = gimple_bb (stmt);
 544
 545   if (gimple_bb (stmt) == NULL)
 546     return false;
 547
 548   if (!flow_bb_inside_loop_p (loop, bb))
 549     return false;
 550
 551   if (is_gimple_assign (stmt)
 552       && gimple_assign_rhs_code (stmt) == code
 553       && has_single_use (gimple_assign_lhs (stmt)))
 554     return true;
 555
 556   return false;
 557 }
 558
 559
 560 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
 561    operand of the negate operation.  Otherwise, return NULL.  */
 562
 563 static tree
 564 get_unary_op (tree name, enum tree_code opcode)
 565 {
 566   gimple stmt = SSA_NAME_DEF_STMT (name);
 567
 568   if (!is_gimple_assign (stmt))
 569     return NULL_TREE;
 570
 571   if (gimple_assign_rhs_code (stmt) == opcode)
 572     return gimple_assign_rhs1 (stmt);
 573   return NULL_TREE;
 574 }
 575
 576 /* If CURR and LAST are a pair of ops that OPCODE allows us to
 577    eliminate through equivalences, do so, remove them from OPS, and
 578    return true.  Otherwise, return false.  */
 579
 580 static bool
 581 eliminate_duplicate_pair (enum tree_code opcode,
 582                           vec<operand_entry_t> *ops,
 583                           bool *all_done,
 584                           unsigned int i,
 585                           operand_entry_t curr,
 586                           operand_entry_t last)
 587 {
 588
 589   /* If we have two of the same op, and the opcode is & |, min, or max,
 590      we can eliminate one of them.
 591      If we have two of the same op, and the opcode is ^, we can
 592      eliminate both of them.  */
 593
 594   if (last && last->op == curr->op)
 595     {
 596       switch (opcode)
 597         {
 598         case MAX_EXPR:
 599         case MIN_EXPR:
 600         case BIT_IOR_EXPR:
 601         case BIT_AND_EXPR:
 602           if (dump_file && (dump_flags & TDF_DETAILS))
 603             {
 604               fprintf (dump_file, "Equivalence: ");
 605               print_generic_expr (dump_file, curr->op, 0);
 606               fprintf (dump_file, " [&|minmax] ");
 607               print_generic_expr (dump_file, last->op, 0);
 608               fprintf (dump_file, " -> ");
 609               print_generic_stmt (dump_file, last->op, 0);
 610             }
 611
 612           ops->ordered_remove (i);
 613           reassociate_stats.ops_eliminated ++;
 614
 615           return true;
 616
 617         case BIT_XOR_EXPR:
 618           if (dump_file && (dump_flags & TDF_DETAILS))
 619             {
 620               fprintf (dump_file, "Equivalence: ");
 621               print_generic_expr (dump_file, curr->op, 0);
 622               fprintf (dump_file, " ^ ");
 623               print_generic_expr (dump_file, last->op, 0);
 624               fprintf (dump_file, " -> nothing\n");
 625             }
 626
 627           reassociate_stats.ops_eliminated += 2;
 628
 629           if (ops->length () == 2)
 630             {
 631               ops->create (0);
 632               add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
 633               *all_done = true;
 634             }
 635           else
 636             {
 637               ops->ordered_remove (i-1);
 638               ops->ordered_remove (i-1);
 639             }
 640
 641           return true;
 642
 643         default:
 644           break;
 645         }
 646     }
 647   return false;
 648 }
 649
 650 static vec<tree> plus_negates;
 651
 652 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
 653    expression, look in OPS for a corresponding positive operation to cancel
 654    it out.  If we find one, remove the other from OPS, replace
 655    OPS[CURRINDEX] with 0 or -1, respectively, and return true.  Otherwise,
 656    return false. */
 657
 658 static bool
 659 eliminate_plus_minus_pair (enum tree_code opcode,
 660                            vec<operand_entry_t> *ops,
 661                            unsigned int currindex,
 662                            operand_entry_t curr)
 663 {
 664   tree negateop;
 665   tree notop;
 666   unsigned int i;
 667   operand_entry_t oe;
 668
 669   if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
 670     return false;
 671
 672   negateop = get_unary_op (curr->op, NEGATE_EXPR);
 673   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 674   if (negateop == NULL_TREE && notop == NULL_TREE)
 675     return false;
 676
 677   /* Any non-negated version will have a rank that is one less than
 678      the current rank.  So once we hit those ranks, if we don't find
 679      one, we can stop.  */
 680
 681   for (i = currindex + 1;
 682        ops->iterate (i, &oe)
 683        && oe->rank >= curr->rank - 1 ;
 684        i++)
 685     {
 686       if (oe->op == negateop)
 687         {
 688
 689           if (dump_file && (dump_flags & TDF_DETAILS))
 690             {
 691               fprintf (dump_file, "Equivalence: ");
 692               print_generic_expr (dump_file, negateop, 0);
 693               fprintf (dump_file, " + -");
 694               print_generic_expr (dump_file, oe->op, 0);
 695               fprintf (dump_file, " -> 0\n");
 696             }
 697
 698           ops->ordered_remove (i);
 699           add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
 700           ops->ordered_remove (currindex);
 701           reassociate_stats.ops_eliminated ++;
 702
 703           return true;
 704         }
 705       else if (oe->op == notop)
 706         {
 707           tree op_type = TREE_TYPE (oe->op);
 708
 709           if (dump_file && (dump_flags & TDF_DETAILS))
 710             {
 711               fprintf (dump_file, "Equivalence: ");
 712               print_generic_expr (dump_file, notop, 0);
 713               fprintf (dump_file, " + ~");
 714               print_generic_expr (dump_file, oe->op, 0);
 715               fprintf (dump_file, " -> -1\n");
 716             }
 717
 718           ops->ordered_remove (i);
 719           add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
 720           ops->ordered_remove (currindex);
 721           reassociate_stats.ops_eliminated ++;
 722
 723           return true;
 724         }
 725     }
 726
 727   /* CURR->OP is a negate expr in a plus expr: save it for later
 728      inspection in repropagate_negates().  */
 729   if (negateop != NULL_TREE)
 730     plus_negates.safe_push (curr->op);
 731
 732   return false;
 733 }
 734
 735 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
 736    bitwise not expression, look in OPS for a corresponding operand to
 737    cancel it out.  If we find one, remove the other from OPS, replace
 738    OPS[CURRINDEX] with 0, and return true.  Otherwise, return
 739    false. */
 740
 741 static bool
 742 eliminate_not_pairs (enum tree_code opcode,
 743                      vec<operand_entry_t> *ops,
 744                      unsigned int currindex,
 745                      operand_entry_t curr)
 746 {
 747   tree notop;
 748   unsigned int i;
 749   operand_entry_t oe;
 750
 751   if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
 752       || TREE_CODE (curr->op) != SSA_NAME)
 753     return false;
 754
 755   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 756   if (notop == NULL_TREE)
 757     return false;
 758
 759   /* Any non-not version will have a rank that is one less than
 760      the current rank.  So once we hit those ranks, if we don't find
 761      one, we can stop.  */
 762
 763   for (i = currindex + 1;
 764        ops->iterate (i, &oe)
 765        && oe->rank >= curr->rank - 1;
 766        i++)
 767     {
 768       if (oe->op == notop)
 769         {
 770           if (dump_file && (dump_flags & TDF_DETAILS))
 771             {
 772               fprintf (dump_file, "Equivalence: ");
 773               print_generic_expr (dump_file, notop, 0);
 774               if (opcode == BIT_AND_EXPR)
 775                 fprintf (dump_file, " & ~");
 776               else if (opcode == BIT_IOR_EXPR)
 777                 fprintf (dump_file, " | ~");
 778               print_generic_expr (dump_file, oe->op, 0);
 779               if (opcode == BIT_AND_EXPR)
 780                 fprintf (dump_file, " -> 0\n");
 781               else if (opcode == BIT_IOR_EXPR)
 782                 fprintf (dump_file, " -> -1\n");
 783             }
 784
 785           if (opcode == BIT_AND_EXPR)
 786             oe->op = build_zero_cst (TREE_TYPE (oe->op));
 787           else if (opcode == BIT_IOR_EXPR)
 788             oe->op = build_low_bits_mask (TREE_TYPE (oe->op),
 789                                           TYPE_PRECISION (TREE_TYPE (oe->op)));
 790
 791           reassociate_stats.ops_eliminated += ops->length () - 1;
 792           ops->truncate (0);
 793           ops->quick_push (oe);
 794           return true;
 795         }
 796     }
 797
 798   return false;
 799 }
 800
 801 /* Use constant value that may be present in OPS to try to eliminate
 802    operands.  Note that this function is only really used when we've
 803    eliminated ops for other reasons, or merged constants.  Across
 804    single statements, fold already does all of this, plus more.  There
 805    is little point in duplicating logic, so I've only included the
 806    identities that I could ever construct testcases to trigger.  */
 807
 808 static void
 809 eliminate_using_constants (enum tree_code opcode,
 810                            vec<operand_entry_t> *ops)
 811 {
 812   operand_entry_t oelast = ops->last ();
 813   tree type = TREE_TYPE (oelast->op);
 814
 815   if (oelast->rank == 0
 816       && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
 817     {
 818       switch (opcode)
 819         {
 820         case BIT_AND_EXPR:
 821           if (integer_zerop (oelast->op))
 822             {
 823               if (ops->length () != 1)
 824                 {
 825                   if (dump_file && (dump_flags & TDF_DETAILS))
 826                     fprintf (dump_file, "Found & 0, removing all other ops\n");
 827
 828                   reassociate_stats.ops_eliminated += ops->length () - 1;
 829
 830                   ops->truncate (0);
 831                   ops->quick_push (oelast);
 832                   return;
 833                 }
 834             }
 835           else if (integer_all_onesp (oelast->op))
 836             {
 837               if (ops->length () != 1)
 838                 {
 839                   if (dump_file && (dump_flags & TDF_DETAILS))
 840                     fprintf (dump_file, "Found & -1, removing\n");
 841                   ops->pop ();
 842                   reassociate_stats.ops_eliminated++;
 843                 }
 844             }
 845           break;
 846         case BIT_IOR_EXPR:
 847           if (integer_all_onesp (oelast->op))
 848             {
 849               if (ops->length () != 1)
 850                 {
 851                   if (dump_file && (dump_flags & TDF_DETAILS))
 852                     fprintf (dump_file, "Found | -1, removing all other ops\n");
 853
 854                   reassociate_stats.ops_eliminated += ops->length () - 1;
 855
 856                   ops->truncate (0);
 857                   ops->quick_push (oelast);
 858                   return;
 859                 }
 860             }
 861           else if (integer_zerop (oelast->op))
 862             {
 863               if (ops->length () != 1)
 864                 {
 865                   if (dump_file && (dump_flags & TDF_DETAILS))
 866                     fprintf (dump_file, "Found | 0, removing\n");
 867                   ops->pop ();
 868                   reassociate_stats.ops_eliminated++;
 869                 }
 870             }
 871           break;
 872         case MULT_EXPR:
 873           if (integer_zerop (oelast->op)
 874               || (FLOAT_TYPE_P (type)
 875                   && !HONOR_NANS (TYPE_MODE (type))
 876                   && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
 877                   && real_zerop (oelast->op)))
 878             {
 879               if (ops->length () != 1)
 880                 {
 881                   if (dump_file && (dump_flags & TDF_DETAILS))
 882                     fprintf (dump_file, "Found * 0, removing all other ops\n");
 883
 884                   reassociate_stats.ops_eliminated += ops->length () - 1;
 885                   ops->truncate (1);
 886                   ops->quick_push (oelast);
 887                   return;
 888                 }
 889             }
 890           else if (integer_onep (oelast->op)
 891                    || (FLOAT_TYPE_P (type)
 892                        && !HONOR_SNANS (TYPE_MODE (type))
 893                        && real_onep (oelast->op)))
 894             {
 895               if (ops->length () != 1)
 896                 {
 897                   if (dump_file && (dump_flags & TDF_DETAILS))
 898                     fprintf (dump_file, "Found * 1, removing\n");
 899                   ops->pop ();
 900                   reassociate_stats.ops_eliminated++;
 901                   return;
 902                 }
 903             }
 904           break;
 905         case BIT_XOR_EXPR:
 906         case PLUS_EXPR:
 907         case MINUS_EXPR:
 908           if (integer_zerop (oelast->op)
 909               || (FLOAT_TYPE_P (type)
 910                   && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
 911                   && fold_real_zero_addition_p (type, oelast->op,
 912                                                 opcode == MINUS_EXPR)))
 913             {
 914               if (ops->length () != 1)
 915                 {
 916                   if (dump_file && (dump_flags & TDF_DETAILS))
 917                     fprintf (dump_file, "Found [|^+] 0, removing\n");
 918                   ops->pop ();
 919                   reassociate_stats.ops_eliminated++;
 920                   return;
 921                 }
 922             }
 923           break;
 924         default:
 925           break;
 926         }
 927     }
 928 }
 929
 930
 931 static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
 932                                  bool, bool);
 933
 934 /* Structure for tracking and counting operands.  */
 935 typedef struct oecount_s {
 936   int cnt;
 937   int id;
 938   enum tree_code oecode;
 939   tree op;
 940 } oecount;
 941
 942
 943 /* The heap for the oecount hashtable and the sorted list of operands.  */
 944 static vec<oecount> cvec;
 945
 946 /* Hash function for oecount.  */
 947
 948 static hashval_t
 949 oecount_hash (const void *p)
 950 {
 951   const oecount *c = &cvec[(size_t)p - 42];
 952   return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
 953 }
 954
 955 /* Comparison function for oecount.  */
 956
 957 static int
 958 oecount_eq (const void *p1, const void *p2)
 959 {
 960   const oecount *c1 = &cvec[(size_t)p1 - 42];
 961   const oecount *c2 = &cvec[(size_t)p2 - 42];
 962   return (c1->oecode == c2->oecode
 963           && c1->op == c2->op);
 964 }
 965
 966 /* Comparison function for qsort sorting oecount elements by count.  */
 967
 968 static int
 969 oecount_cmp (const void *p1, const void *p2)
 970 {
 971   const oecount *c1 = (const oecount *)p1;
 972   const oecount *c2 = (const oecount *)p2;
 973   if (c1->cnt != c2->cnt)
 974     return c1->cnt - c2->cnt;
 975   else
 976     /* If counts are identical, use unique IDs to stabilize qsort.  */
 977     return c1->id - c2->id;
 978 }
 979
 980 /* Return TRUE iff STMT represents a builtin call that raises OP
 981    to some exponent.  */
 982
 983 static bool
 984 stmt_is_power_of_op (gimple stmt, tree op)
 985 {
 986   tree fndecl;
 987
 988   if (!is_gimple_call (stmt))
 989     return false;
 990
 991   fndecl = gimple_call_fndecl (stmt);
 992
 993   if (!fndecl
 994       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
 995     return false;
 996
 997   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
 998     {
 999     CASE_FLT_FN (BUILT_IN_POW):
1000     CASE_FLT_FN (BUILT_IN_POWI):
1001       return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1002
1003     default:
1004       return false;
1005     }
1006 }
1007
1008 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1009    in place and return the result.  Assumes that stmt_is_power_of_op
1010    was previously called for STMT and returned TRUE.  */
1011
1012 static HOST_WIDE_INT
1013 decrement_power (gimple stmt)
1014 {
1015   REAL_VALUE_TYPE c, cint;
1016   HOST_WIDE_INT power;
1017   tree arg1;
1018
1019   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1020     {
1021     CASE_FLT_FN (BUILT_IN_POW):
1022       arg1 = gimple_call_arg (stmt, 1);
1023       c = TREE_REAL_CST (arg1);
1024       power = real_to_integer (&c) - 1;
1025       real_from_integer (&cint, VOIDmode, power, 0, 0);
1026       gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1027       return power;
1028
1029     CASE_FLT_FN (BUILT_IN_POWI):
1030       arg1 = gimple_call_arg (stmt, 1);
1031       power = TREE_INT_CST_LOW (arg1) - 1;
1032       gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1033       return power;
1034
1035     default:
1036       gcc_unreachable ();
1037     }
1038 }
1039
1040 /* Find the single immediate use of STMT's LHS, and replace it
1041    with OP.  Remove STMT.  If STMT's LHS is the same as *DEF,
1042    replace *DEF with OP as well.  */
1043
1044 static void
1045 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1046 {
1047   tree lhs;
1048   gimple use_stmt;
1049   use_operand_p use;
1050   gimple_stmt_iterator gsi;
1051
1052   if (is_gimple_call (stmt))
1053     lhs = gimple_call_lhs (stmt);
1054   else
1055     lhs = gimple_assign_lhs (stmt);
1056
1057   gcc_assert (has_single_use (lhs));
1058   single_imm_use (lhs, &use, &use_stmt);
1059   if (lhs == *def)
1060     *def = op;
1061   SET_USE (use, op);
1062   if (TREE_CODE (op) != SSA_NAME)
1063     update_stmt (use_stmt);
1064   gsi = gsi_for_stmt (stmt);
1065   unlink_stmt_vdef (stmt);
1066   gsi_remove (&gsi, true);
1067   release_defs (stmt);
1068 }
1069
1070 /* Walks the linear chain with result *DEF searching for an operation
1071    with operand OP and code OPCODE removing that from the chain.  *DEF
1072    is updated if there is only one operand but no operation left.  */
1073
1074 static void
1075 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1076 {
1077   gimple stmt = SSA_NAME_DEF_STMT (*def);
1078
1079   do
1080     {
1081       tree name;
1082
1083       if (opcode == MULT_EXPR
1084           && stmt_is_power_of_op (stmt, op))
1085         {
1086           if (decrement_power (stmt) == 1)
1087             propagate_op_to_single_use (op, stmt, def);
1088           return;
1089         }
1090
1091       name = gimple_assign_rhs1 (stmt);
1092
1093       /* If this is the operation we look for and one of the operands
1094          is ours simply propagate the other operand into the stmts
1095          single use.  */
1096       if (gimple_assign_rhs_code (stmt) == opcode
1097           && (name == op
1098               || gimple_assign_rhs2 (stmt) == op))
1099         {
1100           if (name == op)
1101             name = gimple_assign_rhs2 (stmt);
1102           propagate_op_to_single_use (name, stmt, def);
1103           return;
1104         }
1105
1106       /* We might have a multiply of two __builtin_pow* calls, and
1107          the operand might be hiding in the rightmost one.  */
1108       if (opcode == MULT_EXPR
1109           && gimple_assign_rhs_code (stmt) == opcode
1110           && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
1111         {
1112           gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1113           if (stmt_is_power_of_op (stmt2, op))
1114             {
1115               if (decrement_power (stmt2) == 1)
1116                 propagate_op_to_single_use (op, stmt2, def);
1117               return;
1118             }
1119         }
1120
1121       /* Continue walking the chain.  */
1122       gcc_assert (name != op
1123                   && TREE_CODE (name) == SSA_NAME);
1124       stmt = SSA_NAME_DEF_STMT (name);
1125     }
1126   while (1);
1127 }
1128
1129 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1130    the result.  Places the statement after the definition of either
1131    OP1 or OP2.  Returns the new statement.  */
1132
1133 static gimple
1134 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1135 {
1136   gimple op1def = NULL, op2def = NULL;
1137   gimple_stmt_iterator gsi;
1138   tree op;
1139   gimple sum;
1140
1141   /* Create the addition statement.  */
1142   op = make_ssa_name (type, NULL);
1143   sum = gimple_build_assign_with_ops (opcode, op, op1, op2);
1144
1145   /* Find an insertion place and insert.  */
1146   if (TREE_CODE (op1) == SSA_NAME)
1147     op1def = SSA_NAME_DEF_STMT (op1);
1148   if (TREE_CODE (op2) == SSA_NAME)
1149     op2def = SSA_NAME_DEF_STMT (op2);
1150   if ((!op1def || gimple_nop_p (op1def))
1151       && (!op2def || gimple_nop_p (op2def)))
1152     {
1153       gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR));
1154       gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1155     }
1156   else if ((!op1def || gimple_nop_p (op1def))
1157            || (op2def && !gimple_nop_p (op2def)
1158                && stmt_dominates_stmt_p (op1def, op2def)))
1159     {
1160       if (gimple_code (op2def) == GIMPLE_PHI)
1161         {
1162           gsi = gsi_after_labels (gimple_bb (op2def));
1163           gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1164         }
1165       else
1166         {
1167           if (!stmt_ends_bb_p (op2def))
1168             {
1169               gsi = gsi_for_stmt (op2def);
1170               gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1171             }
1172           else
1173             {
1174               edge e;
1175               edge_iterator ei;
1176
1177               FOR_EACH_EDGE (e, ei, gimple_bb (op2def)->succs)
1178                 if (e->flags & EDGE_FALLTHRU)
1179                   gsi_insert_on_edge_immediate (e, sum);
1180             }
1181         }
1182     }
1183   else
1184     {
1185       if (gimple_code (op1def) == GIMPLE_PHI)
1186         {
1187           gsi = gsi_after_labels (gimple_bb (op1def));
1188           gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1189         }
1190       else
1191         {
1192           if (!stmt_ends_bb_p (op1def))
1193             {
1194               gsi = gsi_for_stmt (op1def);
1195               gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1196             }
1197           else
1198             {
1199               edge e;
1200               edge_iterator ei;
1201
1202               FOR_EACH_EDGE (e, ei, gimple_bb (op1def)->succs)
1203                 if (e->flags & EDGE_FALLTHRU)
1204                   gsi_insert_on_edge_immediate (e, sum);
1205             }
1206         }
1207     }
1208   update_stmt (sum);
1209
1210   return sum;
1211 }
1212
1213 /* Perform un-distribution of divisions and multiplications.
1214    A * X + B * X is transformed into (A + B) * X and A / X + B / X
1215    to (A + B) / X for real X.
1216
1217    The algorithm is organized as follows.
1218
1219     - First we walk the addition chain *OPS looking for summands that
1220       are defined by a multiplication or a real division.  This results
1221       in the candidates bitmap with relevant indices into *OPS.
1222
1223     - Second we build the chains of multiplications or divisions for
1224       these candidates, counting the number of occurrences of (operand, code)
1225       pairs in all of the candidates chains.
1226
1227     - Third we sort the (operand, code) pairs by number of occurrence and
1228       process them starting with the pair with the most uses.
1229
1230       * For each such pair we walk the candidates again to build a
1231         second candidate bitmap noting all multiplication/division chains
1232         that have at least one occurrence of (operand, code).
1233
1234       * We build an alternate addition chain only covering these
1235         candidates with one (operand, code) operation removed from their
1236         multiplication/division chain.
1237
1238       * The first candidate gets replaced by the alternate addition chain
1239         multiplied/divided by the operand.
1240
1241       * All candidate chains get disabled for further processing and
1242         processing of (operand, code) pairs continues.
1243
1244   The alternate addition chains built are re-processed by the main
1245   reassociation algorithm which allows optimizing a * x * y + b * y * x
1246   to (a + b ) * x * y in one invocation of the reassociation pass.  */
1247
1248 static bool
1249 undistribute_ops_list (enum tree_code opcode,
1250                        vec<operand_entry_t> *ops, struct loop *loop)
1251 {
1252   unsigned int length = ops->length ();
1253   operand_entry_t oe1;
1254   unsigned i, j;
1255   sbitmap candidates, candidates2;
1256   unsigned nr_candidates, nr_candidates2;
1257   sbitmap_iterator sbi0;
1258   vec<operand_entry_t> *subops;
1259   htab_t ctable;
1260   bool changed = false;
1261   int next_oecount_id = 0;
1262
1263   if (length <= 1
1264       || opcode != PLUS_EXPR)
1265     return false;
1266
1267   /* Build a list of candidates to process.  */
1268   candidates = sbitmap_alloc (length);
1269   bitmap_clear (candidates);
1270   nr_candidates = 0;
1271   FOR_EACH_VEC_ELT (*ops, i, oe1)
1272     {
1273       enum tree_code dcode;
1274       gimple oe1def;
1275
1276       if (TREE_CODE (oe1->op) != SSA_NAME)
1277         continue;
1278       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1279       if (!is_gimple_assign (oe1def))
1280         continue;
1281       dcode = gimple_assign_rhs_code (oe1def);
1282       if ((dcode != MULT_EXPR
1283            && dcode != RDIV_EXPR)
1284           || !is_reassociable_op (oe1def, dcode, loop))
1285         continue;
1286
1287       bitmap_set_bit (candidates, i);
1288       nr_candidates++;
1289     }
1290
1291   if (nr_candidates < 2)
1292     {
1293       sbitmap_free (candidates);
1294       return false;
1295     }
1296
1297   if (dump_file && (dump_flags & TDF_DETAILS))
1298     {
1299       fprintf (dump_file, "searching for un-distribute opportunities ");
1300       print_generic_expr (dump_file,
1301         (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1302       fprintf (dump_file, " %d\n", nr_candidates);
1303     }
1304
1305   /* Build linearized sub-operand lists and the counting table.  */
1306   cvec.create (0);
1307   ctable = htab_create (15, oecount_hash, oecount_eq, NULL);
1308   /* ??? Macro arguments cannot have multi-argument template types in
1309      them.  This typedef is needed to workaround that limitation.  */
1310   typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1311   subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1312   EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1313     {
1314       gimple oedef;
1315       enum tree_code oecode;
1316       unsigned j;
1317
1318       oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1319       oecode = gimple_assign_rhs_code (oedef);
1320       linearize_expr_tree (&subops[i], oedef,
1321                            associative_tree_code (oecode), false);
1322
1323       FOR_EACH_VEC_ELT (subops[i], j, oe1)
1324         {
1325           oecount c;
1326           void **slot;
1327           size_t idx;
1328           c.oecode = oecode;
1329           c.cnt = 1;
1330           c.id = next_oecount_id++;
1331           c.op = oe1->op;
1332           cvec.safe_push (c);
1333           idx = cvec.length () + 41;
1334           slot = htab_find_slot (ctable, (void *)idx, INSERT);
1335           if (!*slot)
1336             {
1337               *slot = (void *)idx;
1338             }
1339           else
1340             {
1341               cvec.pop ();
1342               cvec[(size_t)*slot - 42].cnt++;
1343             }
1344         }
1345     }
1346   htab_delete (ctable);
1347
1348   /* Sort the counting table.  */
1349   cvec.qsort (oecount_cmp);
1350
1351   if (dump_file && (dump_flags & TDF_DETAILS))
1352     {
1353       oecount *c;
1354       fprintf (dump_file, "Candidates:\n");
1355       FOR_EACH_VEC_ELT (cvec, j, c)
1356         {
1357           fprintf (dump_file, "  %u %s: ", c->cnt,
1358                    c->oecode == MULT_EXPR
1359                    ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1360           print_generic_expr (dump_file, c->op, 0);
1361           fprintf (dump_file, "\n");
1362         }
1363     }
1364
1365   /* Process the (operand, code) pairs in order of most occurence.  */
1366   candidates2 = sbitmap_alloc (length);
1367   while (!cvec.is_empty ())
1368     {
1369       oecount *c = &cvec.last ();
1370       if (c->cnt < 2)
1371         break;
1372
1373       /* Now collect the operands in the outer chain that contain
1374          the common operand in their inner chain.  */
1375       bitmap_clear (candidates2);
1376       nr_candidates2 = 0;
1377       EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1378         {
1379           gimple oedef;
1380           enum tree_code oecode;
1381           unsigned j;
1382           tree op = (*ops)[i]->op;
1383
1384           /* If we undistributed in this chain already this may be
1385              a constant.  */
1386           if (TREE_CODE (op) != SSA_NAME)
1387             continue;
1388
1389           oedef = SSA_NAME_DEF_STMT (op);
1390           oecode = gimple_assign_rhs_code (oedef);
1391           if (oecode != c->oecode)
1392             continue;
1393
1394           FOR_EACH_VEC_ELT (subops[i], j, oe1)
1395             {
1396               if (oe1->op == c->op)
1397                 {
1398                   bitmap_set_bit (candidates2, i);
1399                   ++nr_candidates2;
1400                   break;
1401                 }
1402             }
1403         }
1404
1405       if (nr_candidates2 >= 2)
1406         {
1407           operand_entry_t oe1, oe2;
1408           gimple prod;
1409           int first = bitmap_first_set_bit (candidates2);
1410
1411           /* Build the new addition chain.  */
1412           oe1 = (*ops)[first];
1413           if (dump_file && (dump_flags & TDF_DETAILS))
1414             {
1415               fprintf (dump_file, "Building (");
1416               print_generic_expr (dump_file, oe1->op, 0);
1417             }
1418           zero_one_operation (&oe1->op, c->oecode, c->op);
1419           EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1420             {
1421               gimple sum;
1422               oe2 = (*ops)[i];
1423               if (dump_file && (dump_flags & TDF_DETAILS))
1424                 {
1425                   fprintf (dump_file, " + ");
1426                   print_generic_expr (dump_file, oe2->op, 0);
1427                 }
1428               zero_one_operation (&oe2->op, c->oecode, c->op);
1429               sum = build_and_add_sum (TREE_TYPE (oe1->op),
1430                                        oe1->op, oe2->op, opcode);
1431               oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1432               oe2->rank = 0;
1433               oe1->op = gimple_get_lhs (sum);
1434             }
1435
1436           /* Apply the multiplication/division.  */
1437           prod = build_and_add_sum (TREE_TYPE (oe1->op),
1438                                     oe1->op, c->op, c->oecode);
1439           if (dump_file && (dump_flags & TDF_DETAILS))
1440             {
1441               fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1442               print_generic_expr (dump_file, c->op, 0);
1443               fprintf (dump_file, "\n");
1444             }
1445
1446           /* Record it in the addition chain and disable further
1447              undistribution with this op.  */
1448           oe1->op = gimple_assign_lhs (prod);
1449           oe1->rank = get_rank (oe1->op);
1450           subops[first].release ();
1451
1452           changed = true;
1453         }
1454
1455       cvec.pop ();
1456     }
1457
1458   for (i = 0; i < ops->length (); ++i)
1459     subops[i].release ();
1460   free (subops);
1461   cvec.release ();
1462   sbitmap_free (candidates);
1463   sbitmap_free (candidates2);
1464
1465   return changed;
1466 }
1467
1468 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1469    expression, examine the other OPS to see if any of them are comparisons
1470    of the same values, which we may be able to combine or eliminate.
1471    For example, we can rewrite (a < b) | (a == b) as (a <= b).  */
1472
1473 static bool
1474 eliminate_redundant_comparison (enum tree_code opcode,
1475                                 vec<operand_entry_t> *ops,
1476                                 unsigned int currindex,
1477                                 operand_entry_t curr)
1478 {
1479   tree op1, op2;
1480   enum tree_code lcode, rcode;
1481   gimple def1, def2;
1482   int i;
1483   operand_entry_t oe;
1484
1485   if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1486     return false;
1487
1488   /* Check that CURR is a comparison.  */
1489   if (TREE_CODE (curr->op) != SSA_NAME)
1490     return false;
1491   def1 = SSA_NAME_DEF_STMT (curr->op);
1492   if (!is_gimple_assign (def1))
1493     return false;
1494   lcode = gimple_assign_rhs_code (def1);
1495   if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1496     return false;
1497   op1 = gimple_assign_rhs1 (def1);
1498   op2 = gimple_assign_rhs2 (def1);
1499
1500   /* Now look for a similar comparison in the remaining OPS.  */
1501   for (i = currindex + 1; ops->iterate (i, &oe); i++)
1502     {
1503       tree t;
1504
1505       if (TREE_CODE (oe->op) != SSA_NAME)
1506         continue;
1507       def2 = SSA_NAME_DEF_STMT (oe->op);
1508       if (!is_gimple_assign (def2))
1509         continue;
1510       rcode = gimple_assign_rhs_code (def2);
1511       if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1512         continue;
1513
1514       /* If we got here, we have a match.  See if we can combine the
1515          two comparisons.  */
1516       if (opcode == BIT_IOR_EXPR)
1517         t = maybe_fold_or_comparisons (lcode, op1, op2,
1518                                        rcode, gimple_assign_rhs1 (def2),
1519                                        gimple_assign_rhs2 (def2));
1520       else
1521         t = maybe_fold_and_comparisons (lcode, op1, op2,
1522                                         rcode, gimple_assign_rhs1 (def2),
1523                                         gimple_assign_rhs2 (def2));
1524       if (!t)
1525         continue;
1526
1527       /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1528          always give us a boolean_type_node value back.  If the original
1529          BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1530          we need to convert.  */
1531       if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1532         t = fold_convert (TREE_TYPE (curr->op), t);
1533
1534       if (TREE_CODE (t) != INTEGER_CST
1535           && !operand_equal_p (t, curr->op, 0))
1536         {
1537           enum tree_code subcode;
1538           tree newop1, newop2;
1539           if (!COMPARISON_CLASS_P (t))
1540             continue;
1541           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1542           STRIP_USELESS_TYPE_CONVERSION (newop1);
1543           STRIP_USELESS_TYPE_CONVERSION (newop2);
1544           if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1545             continue;
1546         }
1547
1548       if (dump_file && (dump_flags & TDF_DETAILS))
1549         {
1550           fprintf (dump_file, "Equivalence: ");
1551           print_generic_expr (dump_file, curr->op, 0);
1552           fprintf (dump_file, " %s ", op_symbol_code (opcode));
1553           print_generic_expr (dump_file, oe->op, 0);
1554           fprintf (dump_file, " -> ");
1555           print_generic_expr (dump_file, t, 0);
1556           fprintf (dump_file, "\n");
1557         }
1558
1559       /* Now we can delete oe, as it has been subsumed by the new combined
1560          expression t.  */
1561       ops->ordered_remove (i);
1562       reassociate_stats.ops_eliminated ++;
1563
1564       /* If t is the same as curr->op, we're done.  Otherwise we must
1565          replace curr->op with t.  Special case is if we got a constant
1566          back, in which case we add it to the end instead of in place of
1567          the current entry.  */
1568       if (TREE_CODE (t) == INTEGER_CST)
1569         {
1570           ops->ordered_remove (currindex);
1571           add_to_ops_vec (ops, t);
1572         }
1573       else if (!operand_equal_p (t, curr->op, 0))
1574         {
1575           gimple sum;
1576           enum tree_code subcode;
1577           tree newop1;
1578           tree newop2;
1579           gcc_assert (COMPARISON_CLASS_P (t));
1580           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1581           STRIP_USELESS_TYPE_CONVERSION (newop1);
1582           STRIP_USELESS_TYPE_CONVERSION (newop2);
1583           gcc_checking_assert (is_gimple_val (newop1)
1584                                && is_gimple_val (newop2));
1585           sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1586           curr->op = gimple_get_lhs (sum);
1587         }
1588       return true;
1589     }
1590
1591   return false;
1592 }
1593
1594 /* Perform various identities and other optimizations on the list of
1595    operand entries, stored in OPS.  The tree code for the binary
1596    operation between all the operands is OPCODE.  */
1597
1598 static void
1599 optimize_ops_list (enum tree_code opcode,
1600                    vec<operand_entry_t> *ops)
1601 {
1602   unsigned int length = ops->length ();
1603   unsigned int i;
1604   operand_entry_t oe;
1605   operand_entry_t oelast = NULL;
1606   bool iterate = false;
1607
1608   if (length == 1)
1609     return;
1610
1611   oelast = ops->last ();
1612
1613   /* If the last two are constants, pop the constants off, merge them
1614      and try the next two.  */
1615   if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1616     {
1617       operand_entry_t oelm1 = (*ops)[length - 2];
1618
1619       if (oelm1->rank == 0
1620           && is_gimple_min_invariant (oelm1->op)
1621           && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1622                                        TREE_TYPE (oelast->op)))
1623         {
1624           tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1625                                      oelm1->op, oelast->op);
1626
1627           if (folded && is_gimple_min_invariant (folded))
1628             {
1629               if (dump_file && (dump_flags & TDF_DETAILS))
1630                 fprintf (dump_file, "Merging constants\n");
1631
1632               ops->pop ();
1633               ops->pop ();
1634
1635               add_to_ops_vec (ops, folded);
1636               reassociate_stats.constants_eliminated++;
1637
1638               optimize_ops_list (opcode, ops);
1639               return;
1640             }
1641         }
1642     }
1643
1644   eliminate_using_constants (opcode, ops);
1645   oelast = NULL;
1646
1647   for (i = 0; ops->iterate (i, &oe);)
1648     {
1649       bool done = false;
1650
1651       if (eliminate_not_pairs (opcode, ops, i, oe))
1652         return;
1653       if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1654           || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1655           || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1656         {
1657           if (done)
1658             return;
1659           iterate = true;
1660           oelast = NULL;
1661           continue;
1662         }
1663       oelast = oe;
1664       i++;
1665     }
1666
1667   length = ops->length ();
1668   oelast = ops->last ();
1669
1670   if (iterate)
1671     optimize_ops_list (opcode, ops);
1672 }
1673
1674 /* The following functions are subroutines to optimize_range_tests and allow
1675    it to try to change a logical combination of comparisons into a range
1676    test.
1677
1678    For example, both
1679         X == 2 || X == 5 || X == 3 || X == 4
1680    and
1681         X >= 2 && X <= 5
1682    are converted to
1683         (unsigned) (X - 2) <= 3
1684
1685    For more information see comments above fold_test_range in fold-const.c,
1686    this implementation is for GIMPLE.  */
1687
1688 struct range_entry
1689 {
1690   tree exp;
1691   tree low;
1692   tree high;
1693   bool in_p;
1694   bool strict_overflow_p;
1695   unsigned int idx, next;
1696 };
1697
1698 /* This is similar to make_range in fold-const.c, but on top of
1699    GIMPLE instead of trees.  If EXP is non-NULL, it should be
1700    an SSA_NAME and STMT argument is ignored, otherwise STMT
1701    argument should be a GIMPLE_COND.  */
1702
1703 static void
1704 init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1705 {
1706   int in_p;
1707   tree low, high;
1708   bool is_bool, strict_overflow_p;
1709
1710   r->exp = NULL_TREE;
1711   r->in_p = false;
1712   r->strict_overflow_p = false;
1713   r->low = NULL_TREE;
1714   r->high = NULL_TREE;
1715   if (exp != NULL_TREE
1716       && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1717     return;
1718
1719   /* Start with simply saying "EXP != 0" and then look at the code of EXP
1720      and see if we can refine the range.  Some of the cases below may not
1721      happen, but it doesn't seem worth worrying about this.  We "continue"
1722      the outer loop when we've changed something; otherwise we "break"
1723      the switch, which will "break" the while.  */
1724   low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1725   high = low;
1726   in_p = 0;
1727   strict_overflow_p = false;
1728   is_bool = false;
1729   if (exp == NULL_TREE)
1730     is_bool = true;
1731   else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1732     {
1733       if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1734         is_bool = true;
1735       else
1736         return;
1737     }
1738   else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1739     is_bool = true;
1740
1741   while (1)
1742     {
1743       enum tree_code code;
1744       tree arg0, arg1, exp_type;
1745       tree nexp;
1746       location_t loc;
1747
1748       if (exp != NULL_TREE)
1749         {
1750           if (TREE_CODE (exp) != SSA_NAME)
1751             break;
1752
1753           stmt = SSA_NAME_DEF_STMT (exp);
1754           if (!is_gimple_assign (stmt))
1755             break;
1756
1757           code = gimple_assign_rhs_code (stmt);
1758           arg0 = gimple_assign_rhs1 (stmt);
1759           arg1 = gimple_assign_rhs2 (stmt);
1760           exp_type = TREE_TYPE (exp);
1761         }
1762       else
1763         {
1764           code = gimple_cond_code (stmt);
1765           arg0 = gimple_cond_lhs (stmt);
1766           arg1 = gimple_cond_rhs (stmt);
1767           exp_type = boolean_type_node;
1768         }
1769
1770       if (TREE_CODE (arg0) != SSA_NAME)
1771         break;
1772       loc = gimple_location (stmt);
1773       switch (code)
1774         {
1775         case BIT_NOT_EXPR:
1776           if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1777             {
1778               in_p = !in_p;
1779               exp = arg0;
1780               continue;
1781             }
1782           break;
1783         case SSA_NAME:
1784           exp = arg0;
1785           continue;
1786         CASE_CONVERT:
1787           if (is_bool)
1788             goto do_default;
1789           if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1790             {
1791               if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1792                 is_bool = true;
1793               else
1794                 return;
1795             }
1796           else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1797             is_bool = true;
1798           goto do_default;
1799         case EQ_EXPR:
1800         case NE_EXPR:
1801         case LT_EXPR:
1802         case LE_EXPR:
1803         case GE_EXPR:
1804         case GT_EXPR:
1805           is_bool = true;
1806           /* FALLTHRU */
1807         default:
1808           if (!is_bool)
1809             return;
1810         do_default:
1811           nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1812                                   &low, &high, &in_p,
1813                                   &strict_overflow_p);
1814           if (nexp != NULL_TREE)
1815             {
1816               exp = nexp;
1817               gcc_assert (TREE_CODE (exp) == SSA_NAME);
1818               continue;
1819             }
1820           break;
1821         }
1822       break;
1823     }
1824   if (is_bool)
1825     {
1826       r->exp = exp;
1827       r->in_p = in_p;
1828       r->low = low;
1829       r->high = high;
1830       r->strict_overflow_p = strict_overflow_p;
1831     }
1832 }
1833
1834 /* Comparison function for qsort.  Sort entries
1835    without SSA_NAME exp first, then with SSA_NAMEs sorted
1836    by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1837    by increasing ->low and if ->low is the same, by increasing
1838    ->high.  ->low == NULL_TREE means minimum, ->high == NULL_TREE
1839    maximum.  */
1840
1841 static int
1842 range_entry_cmp (const void *a, const void *b)
1843 {
1844   const struct range_entry *p = (const struct range_entry *) a;
1845   const struct range_entry *q = (const struct range_entry *) b;
1846
1847   if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
1848     {
1849       if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1850         {
1851           /* Group range_entries for the same SSA_NAME together.  */
1852           if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
1853             return -1;
1854           else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
1855             return 1;
1856           /* If ->low is different, NULL low goes first, then by
1857              ascending low.  */
1858           if (p->low != NULL_TREE)
1859             {
1860               if (q->low != NULL_TREE)
1861                 {
1862                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
1863                                           p->low, q->low);
1864                   if (tem && integer_onep (tem))
1865                     return -1;
1866                   tem = fold_binary (GT_EXPR, boolean_type_node,
1867                                      p->low, q->low);
1868                   if (tem && integer_onep (tem))
1869                     return 1;
1870                 }
1871               else
1872                 return 1;
1873             }
1874           else if (q->low != NULL_TREE)
1875             return -1;
1876           /* If ->high is different, NULL high goes last, before that by
1877              ascending high.  */
1878           if (p->high != NULL_TREE)
1879             {
1880               if (q->high != NULL_TREE)
1881                 {
1882                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
1883                                           p->high, q->high);
1884                   if (tem && integer_onep (tem))
1885                     return -1;
1886                   tem = fold_binary (GT_EXPR, boolean_type_node,
1887                                      p->high, q->high);
1888                   if (tem && integer_onep (tem))
1889                     return 1;
1890                 }
1891               else
1892                 return -1;
1893             }
1894           else if (p->high != NULL_TREE)
1895             return 1;
1896           /* If both ranges are the same, sort below by ascending idx.  */
1897         }
1898       else
1899         return 1;
1900     }
1901   else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1902     return -1;
1903
1904   if (p->idx < q->idx)
1905     return -1;
1906   else
1907     {
1908       gcc_checking_assert (p->idx > q->idx);
1909       return 1;
1910     }
1911 }
1912
1913 /* Helper routine of optimize_range_test.
1914    [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
1915    RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
1916    OPCODE and OPS are arguments of optimize_range_tests.  Return
1917    true if the range merge has been successful.
1918    If OPCODE is ERROR_MARK, this is called from within
1919    maybe_optimize_range_tests and is performing inter-bb range optimization.
1920    Changes should be then performed right away, and whether an op is
1921    BIT_AND_EXPR or BIT_IOR_EXPR is found in oe->rank.  */
1922
1923 static bool
1924 update_range_test (struct range_entry *range, struct range_entry *otherrange,
1925                    unsigned int count, enum tree_code opcode,
1926                    vec<operand_entry_t> *ops, tree exp, bool in_p,
1927                    tree low, tree high, bool strict_overflow_p)
1928 {
1929   operand_entry_t oe = (*ops)[range->idx];
1930   tree op = oe->op;
1931   gimple stmt = op ? SSA_NAME_DEF_STMT (op) : last_stmt (BASIC_BLOCK (oe->id));
1932   location_t loc = gimple_location (stmt);
1933   tree optype = op ? TREE_TYPE (op) : boolean_type_node;
1934   tree tem = build_range_check (loc, optype, exp, in_p, low, high);
1935   enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
1936   gimple_stmt_iterator gsi;
1937
1938   if (tem == NULL_TREE)
1939     return false;
1940
1941   if (strict_overflow_p && issue_strict_overflow_warning (wc))
1942     warning_at (loc, OPT_Wstrict_overflow,
1943                 "assuming signed overflow does not occur "
1944                 "when simplifying range test");
1945
1946   if (dump_file && (dump_flags & TDF_DETAILS))
1947     {
1948       struct range_entry *r;
1949       fprintf (dump_file, "Optimizing range tests ");
1950       print_generic_expr (dump_file, range->exp, 0);
1951       fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
1952       print_generic_expr (dump_file, range->low, 0);
1953       fprintf (dump_file, ", ");
1954       print_generic_expr (dump_file, range->high, 0);
1955       fprintf (dump_file, "]");
1956       for (r = otherrange; r < otherrange + count; r++)
1957         {
1958           fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
1959           print_generic_expr (dump_file, r->low, 0);
1960           fprintf (dump_file, ", ");
1961           print_generic_expr (dump_file, r->high, 0);
1962           fprintf (dump_file, "]");
1963         }
1964       fprintf (dump_file, "\n into ");
1965       print_generic_expr (dump_file, tem, 0);
1966       fprintf (dump_file, "\n");
1967     }
1968
1969   if (opcode == BIT_IOR_EXPR
1970       || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
1971     tem = invert_truthvalue_loc (loc, tem);
1972
1973   tem = fold_convert_loc (loc, optype, tem);
1974   gsi = gsi_for_stmt (stmt);
1975   tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
1976                                   GSI_SAME_STMT);
1977
1978   /* If doing inter-bb range test optimization, update the
1979      stmts immediately.  Start with changing the first range test
1980      immediate use to the new value (TEM), or, if the first range
1981      test is a GIMPLE_COND stmt, change that condition.  */
1982   if (opcode == ERROR_MARK)
1983     {
1984       if (op)
1985         {
1986           imm_use_iterator iter;
1987           use_operand_p use_p;
1988           gimple use_stmt;
1989
1990           FOR_EACH_IMM_USE_STMT (use_stmt, iter, op)
1991             {
1992               if (is_gimple_debug (use_stmt))
1993                 continue;
1994               FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
1995                 SET_USE (use_p, tem);
1996               update_stmt (use_stmt);
1997             }
1998         }
1999       else
2000         {
2001           gimple_cond_set_code (stmt, NE_EXPR);
2002           gimple_cond_set_lhs (stmt, tem);
2003           gimple_cond_set_rhs (stmt, boolean_false_node);
2004           update_stmt (stmt);
2005         }
2006     }
2007   oe->op = tem;
2008   range->exp = exp;
2009   range->low = low;
2010   range->high = high;
2011   range->in_p = in_p;
2012   range->strict_overflow_p = false;
2013
2014   for (range = otherrange; range < otherrange + count; range++)
2015     {
2016       oe = (*ops)[range->idx];
2017       /* Now change all the other range test immediate uses, so that
2018          those tests will be optimized away.  */
2019       if (opcode == ERROR_MARK)
2020         {
2021           if (oe->op)
2022             {
2023               imm_use_iterator iter;
2024               use_operand_p use_p;
2025               gimple use_stmt;
2026
2027               FOR_EACH_IMM_USE_STMT (use_stmt, iter, oe->op)
2028                 {
2029                   if (is_gimple_debug (use_stmt))
2030                     continue;
2031                   /* If imm use of _8 is a statement like _7 = _8 | _9;,
2032                      adjust it into _7 = _9;.  */
2033                   if (is_gimple_assign (use_stmt)
2034                       && gimple_assign_rhs_code (use_stmt) == oe->rank)
2035                     {
2036                       tree expr = NULL_TREE;
2037                       if (oe->op == gimple_assign_rhs1 (use_stmt))
2038                         expr = gimple_assign_rhs2 (use_stmt);
2039                       else if (oe->op == gimple_assign_rhs2 (use_stmt))
2040                         expr = gimple_assign_rhs1 (use_stmt);
2041                       if (expr
2042                           && expr != oe->op
2043                           && TREE_CODE (expr) == SSA_NAME)
2044                         {
2045                           gimple_stmt_iterator gsi2 = gsi_for_stmt (use_stmt);
2046                           gimple_assign_set_rhs_with_ops (&gsi2, SSA_NAME,
2047                                                           expr, NULL_TREE);
2048                           update_stmt (use_stmt);
2049                           continue;
2050                         }
2051                     }
2052                   /* If imm use of _8 is a statement like _7 = (int) _8;,
2053                      adjust it into _7 = 0; or _7 = 1;.  */
2054                   if (gimple_assign_cast_p (use_stmt)
2055                       && oe->op == gimple_assign_rhs1 (use_stmt))
2056                     {
2057                       tree lhs = gimple_assign_lhs (use_stmt);
2058                       if (INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
2059                         {
2060                           gimple_stmt_iterator gsi2
2061                             = gsi_for_stmt (use_stmt);
2062                           tree expr = build_int_cst (TREE_TYPE (lhs),
2063                                                      oe->rank == BIT_IOR_EXPR
2064                                                      ? 0 : 1);
2065                           gimple_assign_set_rhs_with_ops (&gsi2,
2066                                                           INTEGER_CST,
2067                                                           expr, NULL_TREE);
2068                           update_stmt (use_stmt);
2069                           continue;
2070                         }
2071                     }
2072                   /* Otherwise replace the use with 0 or 1.  */
2073                   FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2074                     SET_USE (use_p,
2075                              build_int_cst (TREE_TYPE (oe->op),
2076                                             oe->rank == BIT_IOR_EXPR
2077                                             ? 0 : 1));
2078                   update_stmt (use_stmt);
2079                 }
2080             }
2081           else
2082             {
2083               /* If range test was a GIMPLE_COND, simply change it
2084                  into an always false or always true condition.  */
2085               stmt = last_stmt (BASIC_BLOCK (oe->id));
2086               if (oe->rank == BIT_IOR_EXPR)
2087                 gimple_cond_make_false (stmt);
2088               else
2089                 gimple_cond_make_true (stmt);
2090               update_stmt (stmt);
2091             }
2092         }
2093       oe->op = error_mark_node;
2094       range->exp = NULL_TREE;
2095     }
2096   return true;
2097 }
2098
2099 /* Optimize range tests, similarly how fold_range_test optimizes
2100    it on trees.  The tree code for the binary
2101    operation between all the operands is OPCODE.
2102    If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2103    maybe_optimize_range_tests for inter-bb range optimization.
2104    In that case if oe->op is NULL, oe->id is bb->index whose
2105    GIMPLE_COND is && or ||ed into the test, and oe->rank says
2106    the actual opcode.  */
2107
2108 static void
2109 optimize_range_tests (enum tree_code opcode,
2110                       vec<operand_entry_t> *ops)
2111 {
2112   unsigned int length = ops->length (), i, j, first;
2113   operand_entry_t oe;
2114   struct range_entry *ranges;
2115   bool any_changes = false;
2116
2117   if (length == 1)
2118     return;
2119
2120   ranges = XNEWVEC (struct range_entry, length);
2121   for (i = 0; i < length; i++)
2122     {
2123       oe = (*ops)[i];
2124       ranges[i].idx = i;
2125       init_range_entry (ranges + i, oe->op,
2126                         oe->op ? NULL : last_stmt (BASIC_BLOCK (oe->id)));
2127       /* For | invert it now, we will invert it again before emitting
2128          the optimized expression.  */
2129       if (opcode == BIT_IOR_EXPR
2130           || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2131         ranges[i].in_p = !ranges[i].in_p;
2132     }
2133
2134   qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2135   for (i = 0; i < length; i++)
2136     if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2137       break;
2138
2139   /* Try to merge ranges.  */
2140   for (first = i; i < length; i++)
2141     {
2142       tree low = ranges[i].low;
2143       tree high = ranges[i].high;
2144       int in_p = ranges[i].in_p;
2145       bool strict_overflow_p = ranges[i].strict_overflow_p;
2146       int update_fail_count = 0;
2147
2148       for (j = i + 1; j < length; j++)
2149         {
2150           if (ranges[i].exp != ranges[j].exp)
2151             break;
2152           if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2153                              ranges[j].in_p, ranges[j].low, ranges[j].high))
2154             break;
2155           strict_overflow_p |= ranges[j].strict_overflow_p;
2156         }
2157
2158       if (j == i + 1)
2159         continue;
2160
2161       if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
2162                              ops, ranges[i].exp, in_p, low, high,
2163                              strict_overflow_p))
2164         {
2165           i = j - 1;
2166           any_changes = true;
2167         }
2168       /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2169          while update_range_test would fail.  */
2170       else if (update_fail_count == 64)
2171         i = j - 1;
2172       else
2173         ++update_fail_count;
2174     }
2175
2176   /* Optimize X == CST1 || X == CST2
2177      if popcount (CST1 ^ CST2) == 1 into
2178      (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2179      Similarly for ranges.  E.g.
2180      X != 2 && X != 3 && X != 10 && X != 11
2181      will be transformed by the above loop into
2182      (X - 2U) <= 1U && (X - 10U) <= 1U
2183      and this loop can transform that into
2184      ((X & ~8) - 2U) <= 1U.  */
2185   for (i = first; i < length; i++)
2186     {
2187       tree lowi, highi, lowj, highj, type, lowxor, highxor, tem, exp;
2188
2189       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2190         continue;
2191       type = TREE_TYPE (ranges[i].exp);
2192       if (!INTEGRAL_TYPE_P (type))
2193         continue;
2194       lowi = ranges[i].low;
2195       if (lowi == NULL_TREE)
2196         lowi = TYPE_MIN_VALUE (type);
2197       highi = ranges[i].high;
2198       if (highi == NULL_TREE)
2199         continue;
2200       for (j = i + 1; j < length && j < i + 64; j++)
2201         {
2202           if (ranges[j].exp == NULL_TREE)
2203             continue;
2204           if (ranges[i].exp != ranges[j].exp)
2205             break;
2206           if (ranges[j].in_p)
2207             continue;
2208           lowj = ranges[j].low;
2209           if (lowj == NULL_TREE)
2210             continue;
2211           highj = ranges[j].high;
2212           if (highj == NULL_TREE)
2213             highj = TYPE_MAX_VALUE (type);
2214           tem = fold_binary (GT_EXPR, boolean_type_node,
2215                              lowj, highi);
2216           if (tem == NULL_TREE || !integer_onep (tem))
2217             continue;
2218           lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2219           if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2220             continue;
2221           gcc_checking_assert (!integer_zerop (lowxor));
2222           tem = fold_binary (MINUS_EXPR, type, lowxor,
2223                              build_int_cst (type, 1));
2224           if (tem == NULL_TREE)
2225             continue;
2226           tem = fold_binary (BIT_AND_EXPR, type, lowxor, tem);
2227           if (tem == NULL_TREE || !integer_zerop (tem))
2228             continue;
2229           highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2230           if (!tree_int_cst_equal (lowxor, highxor))
2231             continue;
2232           tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2233           exp = fold_build2 (BIT_AND_EXPR, type, ranges[i].exp, tem);
2234           lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2235           highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2236           if (update_range_test (ranges + i, ranges + j, 1, opcode, ops, exp,
2237                                  ranges[i].in_p, lowj, highj,
2238                                  ranges[i].strict_overflow_p
2239                                  || ranges[j].strict_overflow_p))
2240             {
2241               any_changes = true;
2242               break;
2243             }
2244         }
2245     }
2246
2247   if (any_changes && opcode != ERROR_MARK)
2248     {
2249       j = 0;
2250       FOR_EACH_VEC_ELT (*ops, i, oe)
2251         {
2252           if (oe->op == error_mark_node)
2253             continue;
2254           else if (i != j)
2255             (*ops)[j] = oe;
2256           j++;
2257         }
2258       ops->truncate (j);
2259     }
2260
2261   XDELETEVEC (ranges);
2262 }
2263
2264 /* Return true if STMT is a cast like:
2265    <bb N>:
2266    ...
2267    _123 = (int) _234;
2268
2269    <bb M>:
2270    # _345 = PHI <_123(N), 1(...), 1(...)>
2271    where _234 has bool type, _123 has single use and
2272    bb N has a single successor M.  This is commonly used in
2273    the last block of a range test.  */
2274
2275 static bool
2276 final_range_test_p (gimple stmt)
2277 {
2278   basic_block bb, rhs_bb;
2279   edge e;
2280   tree lhs, rhs;
2281   use_operand_p use_p;
2282   gimple use_stmt;
2283
2284   if (!gimple_assign_cast_p (stmt))
2285     return false;
2286   bb = gimple_bb (stmt);
2287   if (!single_succ_p (bb))
2288     return false;
2289   e = single_succ_edge (bb);
2290   if (e->flags & EDGE_COMPLEX)
2291     return false;
2292
2293   lhs = gimple_assign_lhs (stmt);
2294   rhs = gimple_assign_rhs1 (stmt);
2295   if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2296       || TREE_CODE (rhs) != SSA_NAME
2297       || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2298     return false;
2299
2300   /* Test whether lhs is consumed only by a PHI in the only successor bb.  */
2301   if (!single_imm_use (lhs, &use_p, &use_stmt))
2302     return false;
2303
2304   if (gimple_code (use_stmt) != GIMPLE_PHI
2305       || gimple_bb (use_stmt) != e->dest)
2306     return false;
2307
2308   /* And that the rhs is defined in the same loop.  */
2309   rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2310   if (rhs_bb == NULL
2311       || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2312     return false;
2313
2314   return true;
2315 }
2316
2317 /* Return true if BB is suitable basic block for inter-bb range test
2318    optimization.  If BACKWARD is true, BB should be the only predecessor
2319    of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2320    or compared with to find a common basic block to which all conditions
2321    branch to if true resp. false.  If BACKWARD is false, TEST_BB should
2322    be the only predecessor of BB.  */
2323
2324 static bool
2325 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2326                   bool backward)
2327 {
2328   edge_iterator ei, ei2;
2329   edge e, e2;
2330   gimple stmt;
2331   gimple_stmt_iterator gsi;
2332   bool other_edge_seen = false;
2333   bool is_cond;
2334
2335   if (test_bb == bb)
2336     return false;
2337   /* Check last stmt first.  */
2338   stmt = last_stmt (bb);
2339   if (stmt == NULL
2340       || (gimple_code (stmt) != GIMPLE_COND
2341           && (backward || !final_range_test_p (stmt)))
2342       || gimple_visited_p (stmt)
2343       || stmt_could_throw_p (stmt)
2344       || *other_bb == bb)
2345     return false;
2346   is_cond = gimple_code (stmt) == GIMPLE_COND;
2347   if (is_cond)
2348     {
2349       /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2350          goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2351          to *OTHER_BB (if not set yet, try to find it out).  */
2352       if (EDGE_COUNT (bb->succs) != 2)
2353         return false;
2354       FOR_EACH_EDGE (e, ei, bb->succs)
2355         {
2356           if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2357             return false;
2358           if (e->dest == test_bb)
2359             {
2360               if (backward)
2361                 continue;
2362               else
2363                 return false;
2364             }
2365           if (e->dest == bb)
2366             return false;
2367           if (*other_bb == NULL)
2368             {
2369               FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2370                 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2371                   return false;
2372                 else if (e->dest == e2->dest)
2373                   *other_bb = e->dest;
2374               if (*other_bb == NULL)
2375                 return false;
2376             }
2377           if (e->dest == *other_bb)
2378             other_edge_seen = true;
2379           else if (backward)
2380             return false;
2381         }
2382       if (*other_bb == NULL || !other_edge_seen)
2383         return false;
2384     }
2385   else if (single_succ (bb) != *other_bb)
2386     return false;
2387
2388   /* Now check all PHIs of *OTHER_BB.  */
2389   e = find_edge (bb, *other_bb);
2390   e2 = find_edge (test_bb, *other_bb);
2391   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2392     {
2393       gimple phi = gsi_stmt (gsi);
2394       /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2395          corresponding to BB and TEST_BB predecessor must be the same.  */
2396       if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2397                             gimple_phi_arg_def (phi, e2->dest_idx), 0))
2398         {
2399           /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2400              one of the PHIs should have the lhs of the last stmt in
2401              that block as PHI arg and that PHI should have 0 or 1
2402              corresponding to it in all other range test basic blocks
2403              considered.  */
2404           if (!is_cond)
2405             {
2406               if (gimple_phi_arg_def (phi, e->dest_idx)
2407                   == gimple_assign_lhs (stmt)
2408                   && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2409                       || integer_onep (gimple_phi_arg_def (phi,
2410                                                            e2->dest_idx))))
2411                 continue;
2412             }
2413           else
2414             {
2415               gimple test_last = last_stmt (test_bb);
2416               if (gimple_code (test_last) != GIMPLE_COND
2417                   && gimple_phi_arg_def (phi, e2->dest_idx)
2418                      == gimple_assign_lhs (test_last)
2419                   && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2420                       || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2421                 continue;
2422             }
2423
2424           return false;
2425         }
2426     }
2427   return true;
2428 }
2429
2430 /* Return true if BB doesn't have side-effects that would disallow
2431    range test optimization, all SSA_NAMEs set in the bb are consumed
2432    in the bb and there are no PHIs.  */
2433
2434 static bool
2435 no_side_effect_bb (basic_block bb)
2436 {
2437   gimple_stmt_iterator gsi;
2438   gimple last;
2439
2440   if (!gimple_seq_empty_p (phi_nodes (bb)))
2441     return false;
2442   last = last_stmt (bb);
2443   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2444     {
2445       gimple stmt = gsi_stmt (gsi);
2446       tree lhs;
2447       imm_use_iterator imm_iter;
2448       use_operand_p use_p;
2449
2450       if (is_gimple_debug (stmt))
2451         continue;
2452       if (gimple_has_side_effects (stmt))
2453         return false;
2454       if (stmt == last)
2455         return true;
2456       if (!is_gimple_assign (stmt))
2457         return false;
2458       lhs = gimple_assign_lhs (stmt);
2459       if (TREE_CODE (lhs) != SSA_NAME)
2460         return false;
2461       if (gimple_assign_rhs_could_trap_p (stmt))
2462         return false;
2463       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2464         {
2465           gimple use_stmt = USE_STMT (use_p);
2466           if (is_gimple_debug (use_stmt))
2467             continue;
2468           if (gimple_bb (use_stmt) != bb)
2469             return false;
2470         }
2471     }
2472   return false;
2473 }
2474
2475 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2476    return true and fill in *OPS recursively.  */
2477
2478 static bool
2479 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2480          struct loop *loop)
2481 {
2482   gimple stmt = SSA_NAME_DEF_STMT (var);
2483   tree rhs[2];
2484   int i;
2485
2486   if (!is_reassociable_op (stmt, code, loop))
2487     return false;
2488
2489   rhs[0] = gimple_assign_rhs1 (stmt);
2490   rhs[1] = gimple_assign_rhs2 (stmt);
2491   gimple_set_visited (stmt, true);
2492   for (i = 0; i < 2; i++)
2493     if (TREE_CODE (rhs[i]) == SSA_NAME
2494         && !get_ops (rhs[i], code, ops, loop)
2495         && has_single_use (rhs[i]))
2496       {
2497         operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
2498
2499         oe->op = rhs[i];
2500         oe->rank = code;
2501         oe->id = 0;
2502         oe->count = 1;
2503         ops->safe_push (oe);
2504       }
2505   return true;
2506 }
2507
2508 /* Inter-bb range test optimization.  */
2509
2510 static void
2511 maybe_optimize_range_tests (gimple stmt)
2512 {
2513   basic_block first_bb = gimple_bb (stmt);
2514   basic_block last_bb = first_bb;
2515   basic_block other_bb = NULL;
2516   basic_block bb;
2517   edge_iterator ei;
2518   edge e;
2519   vec<operand_entry_t> ops = vNULL;
2520
2521   /* Consider only basic blocks that end with GIMPLE_COND or
2522      a cast statement satisfying final_range_test_p.  All
2523      but the last bb in the first_bb .. last_bb range
2524      should end with GIMPLE_COND.  */
2525   if (gimple_code (stmt) == GIMPLE_COND)
2526     {
2527       if (EDGE_COUNT (first_bb->succs) != 2)
2528         return;
2529     }
2530   else if (final_range_test_p (stmt))
2531     other_bb = single_succ (first_bb);
2532   else
2533     return;
2534
2535   if (stmt_could_throw_p (stmt))
2536     return;
2537
2538   /* As relative ordering of post-dominator sons isn't fixed,
2539      maybe_optimize_range_tests can be called first on any
2540      bb in the range we want to optimize.  So, start searching
2541      backwards, if first_bb can be set to a predecessor.  */
2542   while (single_pred_p (first_bb))
2543     {
2544       basic_block pred_bb = single_pred (first_bb);
2545       if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
2546         break;
2547       if (!no_side_effect_bb (first_bb))
2548         break;
2549       first_bb = pred_bb;
2550     }
2551   /* If first_bb is last_bb, other_bb hasn't been computed yet.
2552      Before starting forward search in last_bb successors, find
2553      out the other_bb.  */
2554   if (first_bb == last_bb)
2555     {
2556       other_bb = NULL;
2557       /* As non-GIMPLE_COND last stmt always terminates the range,
2558          if forward search didn't discover anything, just give up.  */
2559       if (gimple_code (stmt) != GIMPLE_COND)
2560         return;
2561       /* Look at both successors.  Either it ends with a GIMPLE_COND
2562          and satisfies suitable_cond_bb, or ends with a cast and
2563          other_bb is that cast's successor.  */
2564       FOR_EACH_EDGE (e, ei, first_bb->succs)
2565         if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
2566             || e->dest == first_bb)
2567           return;
2568         else if (single_pred_p (e->dest))
2569           {
2570             stmt = last_stmt (e->dest);
2571             if (stmt
2572                 && gimple_code (stmt) == GIMPLE_COND
2573                 && EDGE_COUNT (e->dest->succs) == 2)
2574               {
2575                 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
2576                   break;
2577                 else
2578                   other_bb = NULL;
2579               }
2580             else if (stmt
2581                      && final_range_test_p (stmt)
2582                      && find_edge (first_bb, single_succ (e->dest)))
2583               {
2584                 other_bb = single_succ (e->dest);
2585                 if (other_bb == first_bb)
2586                   other_bb = NULL;
2587               }
2588           }
2589       if (other_bb == NULL)
2590         return;
2591     }
2592   /* Now do the forward search, moving last_bb to successor bbs
2593      that aren't other_bb.  */
2594   while (EDGE_COUNT (last_bb->succs) == 2)
2595     {
2596       FOR_EACH_EDGE (e, ei, last_bb->succs)
2597         if (e->dest != other_bb)
2598           break;
2599       if (e == NULL)
2600         break;
2601       if (!single_pred_p (e->dest))
2602         break;
2603       if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
2604         break;
2605       if (!no_side_effect_bb (e->dest))
2606         break;
2607       last_bb = e->dest;
2608     }
2609   if (first_bb == last_bb)
2610     return;
2611   /* Here basic blocks first_bb through last_bb's predecessor
2612      end with GIMPLE_COND, all of them have one of the edges to
2613      other_bb and another to another block in the range,
2614      all blocks except first_bb don't have side-effects and
2615      last_bb ends with either GIMPLE_COND, or cast satisfying
2616      final_range_test_p.  */
2617   for (bb = last_bb; ; bb = single_pred (bb))
2618     {
2619       enum tree_code code;
2620       tree lhs, rhs;
2621
2622       e = find_edge (bb, other_bb);
2623       stmt = last_stmt (bb);
2624       gimple_set_visited (stmt, true);
2625       if (gimple_code (stmt) != GIMPLE_COND)
2626         {
2627           use_operand_p use_p;
2628           gimple phi;
2629           edge e2;
2630           unsigned int d;
2631
2632           lhs = gimple_assign_lhs (stmt);
2633           rhs = gimple_assign_rhs1 (stmt);
2634           gcc_assert (bb == last_bb);
2635
2636           /* stmt is
2637              _123 = (int) _234;
2638
2639              followed by:
2640              <bb M>:
2641              # _345 = PHI <_123(N), 1(...), 1(...)>
2642
2643              or 0 instead of 1.  If it is 0, the _234
2644              range test is anded together with all the
2645              other range tests, if it is 1, it is ored with
2646              them.  */
2647           single_imm_use (lhs, &use_p, &phi);
2648           gcc_assert (gimple_code (phi) == GIMPLE_PHI);
2649           e2 = find_edge (first_bb, other_bb);
2650           d = e2->dest_idx;
2651           gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
2652           if (integer_zerop (gimple_phi_arg_def (phi, d)))
2653             code = BIT_AND_EXPR;
2654           else
2655             {
2656               gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
2657               code = BIT_IOR_EXPR;
2658             }
2659
2660           /* If _234 SSA_NAME_DEF_STMT is
2661              _234 = _567 | _789;
2662              (or &, corresponding to 1/0 in the phi arguments,
2663              push into ops the individual range test arguments
2664              of the bitwise or resp. and, recursively.  */
2665           if (!get_ops (rhs, code, &ops,
2666                         loop_containing_stmt (stmt))
2667               && has_single_use (rhs))
2668             {
2669               /* Otherwise, push the _234 range test itself.  */
2670               operand_entry_t oe
2671                 = (operand_entry_t) pool_alloc (operand_entry_pool);
2672
2673               oe->op = rhs;
2674               oe->rank = code;
2675               oe->id = 0;
2676               oe->count = 1;
2677               ops.safe_push (oe);
2678             }
2679           continue;
2680         }
2681       /* Otherwise stmt is GIMPLE_COND.  */
2682       code = gimple_cond_code (stmt);
2683       lhs = gimple_cond_lhs (stmt);
2684       rhs = gimple_cond_rhs (stmt);
2685       if (TREE_CODE (lhs) == SSA_NAME
2686           && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2687           && ((code != EQ_EXPR && code != NE_EXPR)
2688               || rhs != boolean_false_node
2689                  /* Either push into ops the individual bitwise
2690                     or resp. and operands, depending on which
2691                     edge is other_bb.  */
2692               || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
2693                                  ^ (code == EQ_EXPR))
2694                                 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
2695                            loop_containing_stmt (stmt))))
2696         {
2697           /* Or push the GIMPLE_COND stmt itself.  */
2698           operand_entry_t oe
2699             = (operand_entry_t) pool_alloc (operand_entry_pool);
2700
2701           oe->op = NULL;
2702           oe->rank = (e->flags & EDGE_TRUE_VALUE)
2703                      ? BIT_IOR_EXPR : BIT_AND_EXPR;
2704           /* oe->op = NULL signs that there is no SSA_NAME
2705              for the range test, and oe->id instead is the
2706              basic block number, at which's end the GIMPLE_COND
2707              is.  */
2708           oe->id = bb->index;
2709           oe->count = 1;
2710           ops.safe_push (oe);
2711         }
2712       if (bb == first_bb)
2713         break;
2714     }
2715   if (ops.length () > 1)
2716     optimize_range_tests (ERROR_MARK, &ops);
2717   ops.release ();
2718 }
2719
2720 /* Return true if OPERAND is defined by a PHI node which uses the LHS
2721    of STMT in it's operands.  This is also known as a "destructive
2722    update" operation.  */
2723
2724 static bool
2725 is_phi_for_stmt (gimple stmt, tree operand)
2726 {
2727   gimple def_stmt;
2728   tree lhs;
2729   use_operand_p arg_p;
2730   ssa_op_iter i;
2731
2732   if (TREE_CODE (operand) != SSA_NAME)
2733     return false;
2734
2735   lhs = gimple_assign_lhs (stmt);
2736
2737   def_stmt = SSA_NAME_DEF_STMT (operand);
2738   if (gimple_code (def_stmt) != GIMPLE_PHI)
2739     return false;
2740
2741   FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
2742     if (lhs == USE_FROM_PTR (arg_p))
2743       return true;
2744   return false;
2745 }
2746
2747 /* Remove def stmt of VAR if VAR has zero uses and recurse
2748    on rhs1 operand if so.  */
2749
2750 static void
2751 remove_visited_stmt_chain (tree var)
2752 {
2753   gimple stmt;
2754   gimple_stmt_iterator gsi;
2755
2756   while (1)
2757     {
2758       if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
2759         return;
2760       stmt = SSA_NAME_DEF_STMT (var);
2761       if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
2762         {
2763           var = gimple_assign_rhs1 (stmt);
2764           gsi = gsi_for_stmt (stmt);
2765           gsi_remove (&gsi, true);
2766           release_defs (stmt);
2767         }
2768       else
2769         return;
2770     }
2771 }
2772
2773 /* This function checks three consequtive operands in
2774    passed operands vector OPS starting from OPINDEX and
2775    swaps two operands if it is profitable for binary operation
2776    consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
2777
2778    We pair ops with the same rank if possible.
2779
2780    The alternative we try is to see if STMT is a destructive
2781    update style statement, which is like:
2782    b = phi (a, ...)
2783    a = c + b;
2784    In that case, we want to use the destructive update form to
2785    expose the possible vectorizer sum reduction opportunity.
2786    In that case, the third operand will be the phi node. This
2787    check is not performed if STMT is null.
2788
2789    We could, of course, try to be better as noted above, and do a
2790    lot of work to try to find these opportunities in >3 operand
2791    cases, but it is unlikely to be worth it.  */
2792
2793 static void
2794 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
2795                           unsigned int opindex, gimple stmt)
2796 {
2797   operand_entry_t oe1, oe2, oe3;
2798
2799   oe1 = ops[opindex];
2800   oe2 = ops[opindex + 1];
2801   oe3 = ops[opindex + 2];
2802
2803   if ((oe1->rank == oe2->rank
2804        && oe2->rank != oe3->rank)
2805       || (stmt && is_phi_for_stmt (stmt, oe3->op)
2806           && !is_phi_for_stmt (stmt, oe1->op)
2807           && !is_phi_for_stmt (stmt, oe2->op)))
2808     {
2809       struct operand_entry temp = *oe3;
2810       oe3->op = oe1->op;
2811       oe3->rank = oe1->rank;
2812       oe1->op = temp.op;
2813       oe1->rank= temp.rank;
2814     }
2815   else if ((oe1->rank == oe3->rank
2816             && oe2->rank != oe3->rank)
2817            || (stmt && is_phi_for_stmt (stmt, oe2->op)
2818                && !is_phi_for_stmt (stmt, oe1->op)
2819                && !is_phi_for_stmt (stmt, oe3->op)))
2820     {
2821       struct operand_entry temp = *oe2;
2822       oe2->op = oe1->op;
2823       oe2->rank = oe1->rank;
2824       oe1->op = temp.op;
2825       oe1->rank= temp.rank;
2826     }
2827 }
2828
2829 /* Recursively rewrite our linearized statements so that the operators
2830    match those in OPS[OPINDEX], putting the computation in rank
2831    order.  */
2832
2833 static void
2834 rewrite_expr_tree (gimple stmt, unsigned int opindex,
2835                    vec<operand_entry_t> ops, bool moved)
2836 {
2837   tree rhs1 = gimple_assign_rhs1 (stmt);
2838   tree rhs2 = gimple_assign_rhs2 (stmt);
2839   operand_entry_t oe;
2840
2841   /* If we have three operands left, then we want to make sure the ones
2842      that get the double binary op are chosen wisely.  */
2843   if (opindex + 3 == ops.length ())
2844     swap_ops_for_binary_stmt (ops, opindex, stmt);
2845
2846   /* The final recursion case for this function is that you have
2847      exactly two operations left.
2848      If we had one exactly one op in the entire list to start with, we
2849      would have never called this function, and the tail recursion
2850      rewrites them one at a time.  */
2851   if (opindex + 2 == ops.length ())
2852     {
2853       operand_entry_t oe1, oe2;
2854
2855       oe1 = ops[opindex];
2856       oe2 = ops[opindex + 1];
2857
2858       if (rhs1 != oe1->op || rhs2 != oe2->op)
2859         {
2860           if (dump_file && (dump_flags & TDF_DETAILS))
2861             {
2862               fprintf (dump_file, "Transforming ");
2863               print_gimple_stmt (dump_file, stmt, 0, 0);
2864             }
2865
2866           gimple_assign_set_rhs1 (stmt, oe1->op);
2867           gimple_assign_set_rhs2 (stmt, oe2->op);
2868           update_stmt (stmt);
2869           if (rhs1 != oe1->op && rhs1 != oe2->op)
2870             remove_visited_stmt_chain (rhs1);
2871
2872           if (dump_file && (dump_flags & TDF_DETAILS))
2873             {
2874               fprintf (dump_file, " into ");
2875               print_gimple_stmt (dump_file, stmt, 0, 0);
2876             }
2877         }
2878       return;
2879     }
2880
2881   /* If we hit here, we should have 3 or more ops left.  */
2882   gcc_assert (opindex + 2 < ops.length ());
2883
2884   /* Rewrite the next operator.  */
2885   oe = ops[opindex];
2886
2887   if (oe->op != rhs2)
2888     {
2889       if (!moved)
2890         {
2891           gimple_stmt_iterator gsinow, gsirhs1;
2892           gimple stmt1 = stmt, stmt2;
2893           unsigned int count;
2894
2895           gsinow = gsi_for_stmt (stmt);
2896           count = ops.length () - opindex - 2;
2897           while (count-- != 0)
2898             {
2899               stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
2900               gsirhs1 = gsi_for_stmt (stmt2);
2901               gsi_move_before (&gsirhs1, &gsinow);
2902               gsi_prev (&gsinow);
2903               stmt1 = stmt2;
2904             }
2905           moved = true;
2906         }
2907
2908       if (dump_file && (dump_flags & TDF_DETAILS))
2909         {
2910           fprintf (dump_file, "Transforming ");
2911           print_gimple_stmt (dump_file, stmt, 0, 0);
2912         }
2913
2914       gimple_assign_set_rhs2 (stmt, oe->op);
2915       update_stmt (stmt);
2916
2917       if (dump_file && (dump_flags & TDF_DETAILS))
2918         {
2919           fprintf (dump_file, " into ");
2920           print_gimple_stmt (dump_file, stmt, 0, 0);
2921         }
2922     }
2923   /* Recurse on the LHS of the binary operator, which is guaranteed to
2924      be the non-leaf side.  */
2925   rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, moved);
2926 }
2927
2928 /* Find out how many cycles we need to compute statements chain.
2929    OPS_NUM holds number os statements in a chain.  CPU_WIDTH is a
2930    maximum number of independent statements we may execute per cycle.  */
2931
2932 static int
2933 get_required_cycles (int ops_num, int cpu_width)
2934 {
2935   int res;
2936   int elog;
2937   unsigned int rest;
2938
2939   /* While we have more than 2 * cpu_width operands
2940      we may reduce number of operands by cpu_width
2941      per cycle.  */
2942   res = ops_num / (2 * cpu_width);
2943
2944   /* Remained operands count may be reduced twice per cycle
2945      until we have only one operand.  */
2946   rest = (unsigned)(ops_num - res * cpu_width);
2947   elog = exact_log2 (rest);
2948   if (elog >= 0)
2949     res += elog;
2950   else
2951     res += floor_log2 (rest) + 1;
2952
2953   return res;
2954 }
2955
2956 /* Returns an optimal number of registers to use for computation of
2957    given statements.  */
2958
2959 static int
2960 get_reassociation_width (int ops_num, enum tree_code opc,
2961                          enum machine_mode mode)
2962 {
2963   int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
2964   int width;
2965   int width_min;
2966   int cycles_best;
2967
2968   if (param_width > 0)
2969     width = param_width;
2970   else
2971     width = targetm.sched.reassociation_width (opc, mode);
2972
2973   if (width == 1)
2974     return width;
2975
2976   /* Get the minimal time required for sequence computation.  */
2977   cycles_best = get_required_cycles (ops_num, width);
2978
2979   /* Check if we may use less width and still compute sequence for
2980      the same time.  It will allow us to reduce registers usage.
2981      get_required_cycles is monotonically increasing with lower width
2982      so we can perform a binary search for the minimal width that still
2983      results in the optimal cycle count.  */
2984   width_min = 1;
2985   while (width > width_min)
2986     {
2987       int width_mid = (width + width_min) / 2;
2988
2989       if (get_required_cycles (ops_num, width_mid) == cycles_best)
2990         width = width_mid;
2991       else if (width_min < width_mid)
2992         width_min = width_mid;
2993       else
2994         break;
2995     }
2996
2997   return width;
2998 }
2999
3000 /* Recursively rewrite our linearized statements so that the operators
3001    match those in OPS[OPINDEX], putting the computation in rank
3002    order and trying to allow operations to be executed in
3003    parallel.  */
3004
3005 static void
3006 rewrite_expr_tree_parallel (gimple stmt, int width,
3007                             vec<operand_entry_t>  ops)
3008 {
3009   enum tree_code opcode = gimple_assign_rhs_code (stmt);
3010   int op_num = ops.length ();
3011   int stmt_num = op_num - 1;
3012   gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3013   int op_index = op_num - 1;
3014   int stmt_index = 0;
3015   int ready_stmts_end = 0;
3016   int i = 0;
3017   tree last_rhs1 = gimple_assign_rhs1 (stmt);
3018
3019   /* We start expression rewriting from the top statements.
3020      So, in this loop we create a full list of statements
3021      we will work with.  */
3022   stmts[stmt_num - 1] = stmt;
3023   for (i = stmt_num - 2; i >= 0; i--)
3024     stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3025
3026   for (i = 0; i < stmt_num; i++)
3027     {
3028       tree op1, op2;
3029
3030       /* Determine whether we should use results of
3031          already handled statements or not.  */
3032       if (ready_stmts_end == 0
3033           && (i - stmt_index >= width || op_index < 1))
3034         ready_stmts_end = i;
3035
3036       /* Now we choose operands for the next statement.  Non zero
3037          value in ready_stmts_end means here that we should use
3038          the result of already generated statements as new operand.  */
3039       if (ready_stmts_end > 0)
3040         {
3041           op1 = gimple_assign_lhs (stmts[stmt_index++]);
3042           if (ready_stmts_end > stmt_index)
3043             op2 = gimple_assign_lhs (stmts[stmt_index++]);
3044           else if (op_index >= 0)
3045             op2 = ops[op_index--]->op;
3046           else
3047             {
3048               gcc_assert (stmt_index < i);
3049               op2 = gimple_assign_lhs (stmts[stmt_index++]);
3050             }
3051
3052           if (stmt_index >= ready_stmts_end)
3053             ready_stmts_end = 0;
3054         }
3055       else
3056         {
3057           if (op_index > 1)
3058             swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3059           op2 = ops[op_index--]->op;
3060           op1 = ops[op_index--]->op;
3061         }
3062
3063       /* If we emit the last statement then we should put
3064          operands into the last statement.  It will also
3065          break the loop.  */
3066       if (op_index < 0 && stmt_index == i)
3067         i = stmt_num - 1;
3068
3069       if (dump_file && (dump_flags & TDF_DETAILS))
3070         {
3071           fprintf (dump_file, "Transforming ");
3072           print_gimple_stmt (dump_file, stmts[i], 0, 0);
3073         }
3074
3075       /* We keep original statement only for the last one.  All
3076          others are recreated.  */
3077       if (i == stmt_num - 1)
3078         {
3079           gimple_assign_set_rhs1 (stmts[i], op1);
3080           gimple_assign_set_rhs2 (stmts[i], op2);
3081           update_stmt (stmts[i]);
3082         }
3083       else
3084         stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3085
3086       if (dump_file && (dump_flags & TDF_DETAILS))
3087         {
3088           fprintf (dump_file, " into ");
3089           print_gimple_stmt (dump_file, stmts[i], 0, 0);
3090         }
3091     }
3092
3093   remove_visited_stmt_chain (last_rhs1);
3094 }
3095
3096 /* Transform STMT, which is really (A +B) + (C + D) into the left
3097    linear form, ((A+B)+C)+D.
3098    Recurse on D if necessary.  */
3099
3100 static void
3101 linearize_expr (gimple stmt)
3102 {
3103   gimple_stmt_iterator gsinow, gsirhs;
3104   gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3105   gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3106   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3107   gimple newbinrhs = NULL;
3108   struct loop *loop = loop_containing_stmt (stmt);
3109
3110   gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3111               && is_reassociable_op (binrhs, rhscode, loop));
3112
3113   gsinow = gsi_for_stmt (stmt);
3114   gsirhs = gsi_for_stmt (binrhs);
3115   gsi_move_before (&gsirhs, &gsinow);
3116
3117   gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3118   gimple_assign_set_rhs1 (binrhs, gimple_assign_lhs (binlhs));
3119   gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3120
3121   if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3122     newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3123
3124   if (dump_file && (dump_flags & TDF_DETAILS))
3125     {
3126       fprintf (dump_file, "Linearized: ");
3127       print_gimple_stmt (dump_file, stmt, 0, 0);
3128     }
3129
3130   reassociate_stats.linearized++;
3131   update_stmt (binrhs);
3132   update_stmt (binlhs);
3133   update_stmt (stmt);
3134
3135   gimple_set_visited (stmt, true);
3136   gimple_set_visited (binlhs, true);
3137   gimple_set_visited (binrhs, true);
3138
3139   /* Tail recurse on the new rhs if it still needs reassociation.  */
3140   if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3141     /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3142            want to change the algorithm while converting to tuples.  */
3143     linearize_expr (stmt);
3144 }
3145
3146 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3147    it.  Otherwise, return NULL.  */
3148
3149 static gimple
3150 get_single_immediate_use (tree lhs)
3151 {
3152   use_operand_p immuse;
3153   gimple immusestmt;
3154
3155   if (TREE_CODE (lhs) == SSA_NAME
3156       && single_imm_use (lhs, &immuse, &immusestmt)
3157       && is_gimple_assign (immusestmt))
3158     return immusestmt;
3159
3160   return NULL;
3161 }
3162
3163 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3164    representing the negated value.  Insertions of any necessary
3165    instructions go before GSI.
3166    This function is recursive in that, if you hand it "a_5" as the
3167    value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3168    transform b_3 + b_4 into a_5 = -b_3 + -b_4.  */
3169
3170 static tree
3171 negate_value (tree tonegate, gimple_stmt_iterator *gsi)
3172 {
3173   gimple negatedefstmt= NULL;
3174   tree resultofnegate;
3175
3176   /* If we are trying to negate a name, defined by an add, negate the
3177      add operands instead.  */
3178   if (TREE_CODE (tonegate) == SSA_NAME)
3179     negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3180   if (TREE_CODE (tonegate) == SSA_NAME
3181       && is_gimple_assign (negatedefstmt)
3182       && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3183       && has_single_use (gimple_assign_lhs (negatedefstmt))
3184       && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3185     {
3186       gimple_stmt_iterator gsi;
3187       tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3188       tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3189
3190       gsi = gsi_for_stmt (negatedefstmt);
3191       rhs1 = negate_value (rhs1, &gsi);
3192       gimple_assign_set_rhs1 (negatedefstmt, rhs1);
3193
3194       gsi = gsi_for_stmt (negatedefstmt);
3195       rhs2 = negate_value (rhs2, &gsi);
3196       gimple_assign_set_rhs2 (negatedefstmt, rhs2);
3197
3198       update_stmt (negatedefstmt);
3199       return gimple_assign_lhs (negatedefstmt);
3200     }
3201
3202   tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3203   resultofnegate = force_gimple_operand_gsi (gsi, tonegate, true,
3204                                              NULL_TREE, true, GSI_SAME_STMT);
3205   return resultofnegate;
3206 }
3207
3208 /* Return true if we should break up the subtract in STMT into an add
3209    with negate.  This is true when we the subtract operands are really
3210    adds, or the subtract itself is used in an add expression.  In
3211    either case, breaking up the subtract into an add with negate
3212    exposes the adds to reassociation.  */
3213
3214 static bool
3215 should_break_up_subtract (gimple stmt)
3216 {
3217   tree lhs = gimple_assign_lhs (stmt);
3218   tree binlhs = gimple_assign_rhs1 (stmt);
3219   tree binrhs = gimple_assign_rhs2 (stmt);
3220   gimple immusestmt;
3221   struct loop *loop = loop_containing_stmt (stmt);
3222
3223   if (TREE_CODE (binlhs) == SSA_NAME
3224       && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3225     return true;
3226
3227   if (TREE_CODE (binrhs) == SSA_NAME
3228       && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3229     return true;
3230
3231   if (TREE_CODE (lhs) == SSA_NAME
3232       && (immusestmt = get_single_immediate_use (lhs))
3233       && is_gimple_assign (immusestmt)
3234       && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3235           ||  gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3236     return true;
3237   return false;
3238 }
3239
3240 /* Transform STMT from A - B into A + -B.  */
3241
3242 static void
3243 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3244 {
3245   tree rhs1 = gimple_assign_rhs1 (stmt);
3246   tree rhs2 = gimple_assign_rhs2 (stmt);
3247
3248   if (dump_file && (dump_flags & TDF_DETAILS))
3249     {
3250       fprintf (dump_file, "Breaking up subtract ");
3251       print_gimple_stmt (dump_file, stmt, 0, 0);
3252     }
3253
3254   rhs2 = negate_value (rhs2, gsip);
3255   gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3256   update_stmt (stmt);
3257 }
3258
3259 /* Determine whether STMT is a builtin call that raises an SSA name
3260    to an integer power and has only one use.  If so, and this is early
3261    reassociation and unsafe math optimizations are permitted, place
3262    the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3263    If any of these conditions does not hold, return FALSE.  */
3264
3265 static bool
3266 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3267 {
3268   tree fndecl, arg1;
3269   REAL_VALUE_TYPE c, cint;
3270
3271   if (!first_pass_instance
3272       || !flag_unsafe_math_optimizations
3273       || !is_gimple_call (stmt)
3274       || !has_single_use (gimple_call_lhs (stmt)))
3275     return false;
3276
3277   fndecl = gimple_call_fndecl (stmt);
3278
3279   if (!fndecl
3280       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3281     return false;
3282
3283   switch (DECL_FUNCTION_CODE (fndecl))
3284     {
3285     CASE_FLT_FN (BUILT_IN_POW):
3286       *base = gimple_call_arg (stmt, 0);
3287       arg1 = gimple_call_arg (stmt, 1);
3288
3289       if (TREE_CODE (arg1) != REAL_CST)
3290         return false;
3291
3292       c = TREE_REAL_CST (arg1);
3293
3294       if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
3295         return false;
3296
3297       *exponent = real_to_integer (&c);
3298       real_from_integer (&cint, VOIDmode, *exponent,
3299                          *exponent < 0 ? -1 : 0, 0);
3300       if (!real_identical (&c, &cint))
3301         return false;
3302
3303       break;
3304
3305     CASE_FLT_FN (BUILT_IN_POWI):
3306       *base = gimple_call_arg (stmt, 0);
3307       arg1 = gimple_call_arg (stmt, 1);
3308
3309       if (!host_integerp (arg1, 0))
3310         return false;
3311
3312       *exponent = TREE_INT_CST_LOW (arg1);
3313       break;
3314
3315     default:
3316       return false;
3317     }
3318
3319   /* Expanding negative exponents is generally unproductive, so we don't
3320      complicate matters with those.  Exponents of zero and one should
3321      have been handled by expression folding.  */
3322   if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
3323     return false;
3324
3325   return true;
3326 }
3327
3328 /* Recursively linearize a binary expression that is the RHS of STMT.
3329    Place the operands of the expression tree in the vector named OPS.  */
3330
3331 static void
3332 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
3333                      bool is_associative, bool set_visited)
3334 {
3335   tree binlhs = gimple_assign_rhs1 (stmt);
3336   tree binrhs = gimple_assign_rhs2 (stmt);
3337   gimple binlhsdef = NULL, binrhsdef = NULL;
3338   bool binlhsisreassoc = false;
3339   bool binrhsisreassoc = false;
3340   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3341   struct loop *loop = loop_containing_stmt (stmt);
3342   tree base = NULL_TREE;
3343   HOST_WIDE_INT exponent = 0;
3344
3345   if (set_visited)
3346     gimple_set_visited (stmt, true);
3347
3348   if (TREE_CODE (binlhs) == SSA_NAME)
3349     {
3350       binlhsdef = SSA_NAME_DEF_STMT (binlhs);
3351       binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
3352                          && !stmt_could_throw_p (binlhsdef));
3353     }
3354
3355   if (TREE_CODE (binrhs) == SSA_NAME)
3356     {
3357       binrhsdef = SSA_NAME_DEF_STMT (binrhs);
3358       binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
3359                          && !stmt_could_throw_p (binrhsdef));
3360     }
3361
3362   /* If the LHS is not reassociable, but the RHS is, we need to swap
3363      them.  If neither is reassociable, there is nothing we can do, so
3364      just put them in the ops vector.  If the LHS is reassociable,
3365      linearize it.  If both are reassociable, then linearize the RHS
3366      and the LHS.  */
3367
3368   if (!binlhsisreassoc)
3369     {
3370       tree temp;
3371
3372       /* If this is not a associative operation like division, give up.  */
3373       if (!is_associative)
3374         {
3375           add_to_ops_vec (ops, binrhs);
3376           return;
3377         }
3378
3379       if (!binrhsisreassoc)
3380         {
3381           if (rhscode == MULT_EXPR
3382               && TREE_CODE (binrhs) == SSA_NAME
3383               && acceptable_pow_call (binrhsdef, &base, &exponent))
3384             {
3385               add_repeat_to_ops_vec (ops, base, exponent);
3386               gimple_set_visited (binrhsdef, true);
3387             }
3388           else
3389             add_to_ops_vec (ops, binrhs);
3390
3391           if (rhscode == MULT_EXPR
3392               && TREE_CODE (binlhs) == SSA_NAME
3393               && acceptable_pow_call (binlhsdef, &base, &exponent))
3394             {
3395               add_repeat_to_ops_vec (ops, base, exponent);
3396               gimple_set_visited (binlhsdef, true);
3397             }
3398           else
3399             add_to_ops_vec (ops, binlhs);
3400
3401           return;
3402         }
3403
3404       if (dump_file && (dump_flags & TDF_DETAILS))
3405         {
3406           fprintf (dump_file, "swapping operands of ");
3407           print_gimple_stmt (dump_file, stmt, 0, 0);
3408         }
3409
3410       swap_tree_operands (stmt,
3411                           gimple_assign_rhs1_ptr (stmt),
3412                           gimple_assign_rhs2_ptr (stmt));
3413       update_stmt (stmt);
3414
3415       if (dump_file && (dump_flags & TDF_DETAILS))
3416         {
3417           fprintf (dump_file, " is now ");
3418           print_gimple_stmt (dump_file, stmt, 0, 0);
3419         }
3420
3421       /* We want to make it so the lhs is always the reassociative op,
3422          so swap.  */
3423       temp = binlhs;
3424       binlhs = binrhs;
3425       binrhs = temp;
3426     }
3427   else if (binrhsisreassoc)
3428     {
3429       linearize_expr (stmt);
3430       binlhs = gimple_assign_rhs1 (stmt);
3431       binrhs = gimple_assign_rhs2 (stmt);
3432     }
3433
3434   gcc_assert (TREE_CODE (binrhs) != SSA_NAME
3435               || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
3436                                       rhscode, loop));
3437   linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
3438                        is_associative, set_visited);
3439
3440   if (rhscode == MULT_EXPR
3441       && TREE_CODE (binrhs) == SSA_NAME
3442       && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
3443     {
3444       add_repeat_to_ops_vec (ops, base, exponent);
3445       gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
3446     }
3447   else
3448     add_to_ops_vec (ops, binrhs);
3449 }
3450
3451 /* Repropagate the negates back into subtracts, since no other pass
3452    currently does it.  */
3453
3454 static void
3455 repropagate_negates (void)
3456 {
3457   unsigned int i = 0;
3458   tree negate;
3459
3460   FOR_EACH_VEC_ELT (plus_negates, i, negate)
3461     {
3462       gimple user = get_single_immediate_use (negate);
3463
3464       if (!user || !is_gimple_assign (user))
3465         continue;
3466
3467       /* The negate operand can be either operand of a PLUS_EXPR
3468          (it can be the LHS if the RHS is a constant for example).
3469
3470          Force the negate operand to the RHS of the PLUS_EXPR, then
3471          transform the PLUS_EXPR into a MINUS_EXPR.  */
3472       if (gimple_assign_rhs_code (user) == PLUS_EXPR)
3473         {
3474           /* If the negated operand appears on the LHS of the
3475              PLUS_EXPR, exchange the operands of the PLUS_EXPR
3476              to force the negated operand to the RHS of the PLUS_EXPR.  */
3477           if (gimple_assign_rhs1 (user) == negate)
3478             {
3479               swap_tree_operands (user,
3480                                   gimple_assign_rhs1_ptr (user),
3481                                   gimple_assign_rhs2_ptr (user));
3482             }
3483
3484           /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
3485              the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR.  */
3486           if (gimple_assign_rhs2 (user) == negate)
3487             {
3488               tree rhs1 = gimple_assign_rhs1 (user);
3489               tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
3490               gimple_stmt_iterator gsi = gsi_for_stmt (user);
3491               gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
3492               update_stmt (user);
3493             }
3494         }
3495       else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
3496         {
3497           if (gimple_assign_rhs1 (user) == negate)
3498             {
3499               /* We have
3500                    x = -a
3501                    y = x - b
3502                  which we transform into
3503                    x = a + b
3504                    y = -x .
3505                  This pushes down the negate which we possibly can merge
3506                  into some other operation, hence insert it into the
3507                  plus_negates vector.  */
3508               gimple feed = SSA_NAME_DEF_STMT (negate);
3509               tree a = gimple_assign_rhs1 (feed);
3510               tree rhs2 = gimple_assign_rhs2 (user);
3511               gimple_stmt_iterator gsi = gsi_for_stmt (feed), gsi2;
3512               gimple_replace_lhs (feed, negate);
3513               gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, a, rhs2);
3514               update_stmt (gsi_stmt (gsi));
3515               gsi2 = gsi_for_stmt (user);
3516               gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, negate, NULL);
3517               update_stmt (gsi_stmt (gsi2));
3518               gsi_move_before (&gsi, &gsi2);
3519               plus_negates.safe_push (gimple_assign_lhs (gsi_stmt (gsi2)));
3520             }
3521           else
3522             {
3523               /* Transform "x = -a; y = b - x" into "y = b + a", getting
3524                  rid of one operation.  */
3525               gimple feed = SSA_NAME_DEF_STMT (negate);
3526               tree a = gimple_assign_rhs1 (feed);
3527               tree rhs1 = gimple_assign_rhs1 (user);
3528               gimple_stmt_iterator gsi = gsi_for_stmt (user);
3529               gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
3530               update_stmt (gsi_stmt (gsi));
3531             }
3532         }
3533     }
3534 }
3535
3536 /* Returns true if OP is of a type for which we can do reassociation.
3537    That is for integral or non-saturating fixed-point types, and for
3538    floating point type when associative-math is enabled.  */
3539
3540 static bool
3541 can_reassociate_p (tree op)
3542 {
3543   tree type = TREE_TYPE (op);
3544   if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
3545       || NON_SAT_FIXED_POINT_TYPE_P (type)
3546       || (flag_associative_math && FLOAT_TYPE_P (type)))
3547     return true;
3548   return false;
3549 }
3550
3551 /* Break up subtract operations in block BB.
3552
3553    We do this top down because we don't know whether the subtract is
3554    part of a possible chain of reassociation except at the top.
3555
3556    IE given
3557    d = f + g
3558    c = a + e
3559    b = c - d
3560    q = b - r
3561    k = t - q
3562
3563    we want to break up k = t - q, but we won't until we've transformed q
3564    = b - r, which won't be broken up until we transform b = c - d.
3565
3566    En passant, clear the GIMPLE visited flag on every statement.  */
3567
3568 static void
3569 break_up_subtract_bb (basic_block bb)
3570 {
3571   gimple_stmt_iterator gsi;
3572   basic_block son;
3573
3574   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3575     {
3576       gimple stmt = gsi_stmt (gsi);
3577       gimple_set_visited (stmt, false);
3578
3579       if (!is_gimple_assign (stmt)
3580           || !can_reassociate_p (gimple_assign_lhs (stmt)))
3581         continue;
3582
3583       /* Look for simple gimple subtract operations.  */
3584       if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
3585         {
3586           if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
3587               || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
3588             continue;
3589
3590           /* Check for a subtract used only in an addition.  If this
3591              is the case, transform it into add of a negate for better
3592              reassociation.  IE transform C = A-B into C = A + -B if C
3593              is only used in an addition.  */
3594           if (should_break_up_subtract (stmt))
3595             break_up_subtract (stmt, &gsi);
3596         }
3597       else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
3598                && can_reassociate_p (gimple_assign_rhs1 (stmt)))
3599         plus_negates.safe_push (gimple_assign_lhs (stmt));
3600     }
3601   for (son = first_dom_son (CDI_DOMINATORS, bb);
3602        son;
3603        son = next_dom_son (CDI_DOMINATORS, son))
3604     break_up_subtract_bb (son);
3605 }
3606
3607 /* Used for repeated factor analysis.  */
3608 struct repeat_factor_d
3609 {
3610   /* An SSA name that occurs in a multiply chain.  */
3611   tree factor;
3612
3613   /* Cached rank of the factor.  */
3614   unsigned rank;
3615
3616   /* Number of occurrences of the factor in the chain.  */
3617   HOST_WIDE_INT count;
3618
3619   /* An SSA name representing the product of this factor and
3620      all factors appearing later in the repeated factor vector.  */
3621   tree repr;
3622 };
3623
3624 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
3625 typedef const struct repeat_factor_d *const_repeat_factor_t;
3626
3627
3628 static vec<repeat_factor> repeat_factor_vec;
3629
3630 /* Used for sorting the repeat factor vector.  Sort primarily by
3631    ascending occurrence count, secondarily by descending rank.  */
3632
3633 static int
3634 compare_repeat_factors (const void *x1, const void *x2)
3635 {
3636   const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
3637   const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
3638
3639   if (rf1->count != rf2->count)
3640     return rf1->count - rf2->count;
3641
3642   return rf2->rank - rf1->rank;
3643 }
3644
3645 /* Look for repeated operands in OPS in the multiply tree rooted at
3646    STMT.  Replace them with an optimal sequence of multiplies and powi
3647    builtin calls, and remove the used operands from OPS.  Return an
3648    SSA name representing the value of the replacement sequence.  */
3649
3650 static tree
3651 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
3652 {
3653   unsigned i, j, vec_len;
3654   int ii;
3655   operand_entry_t oe;
3656   repeat_factor_t rf1, rf2;
3657   repeat_factor rfnew;
3658   tree result = NULL_TREE;
3659   tree target_ssa, iter_result;
3660   tree type = TREE_TYPE (gimple_get_lhs (stmt));
3661   tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
3662   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3663   gimple mul_stmt, pow_stmt;
3664
3665   /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
3666      target.  */
3667   if (!powi_fndecl)
3668     return NULL_TREE;
3669
3670   /* Allocate the repeated factor vector.  */
3671   repeat_factor_vec.create (10);
3672
3673   /* Scan the OPS vector for all SSA names in the product and build
3674      up a vector of occurrence counts for each factor.  */
3675   FOR_EACH_VEC_ELT (*ops, i, oe)
3676     {
3677       if (TREE_CODE (oe->op) == SSA_NAME)
3678         {
3679           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
3680             {
3681               if (rf1->factor == oe->op)
3682                 {
3683                   rf1->count += oe->count;
3684                   break;
3685                 }
3686             }
3687
3688           if (j >= repeat_factor_vec.length ())
3689             {
3690               rfnew.factor = oe->op;
3691               rfnew.rank = oe->rank;
3692               rfnew.count = oe->count;
3693               rfnew.repr = NULL_TREE;
3694               repeat_factor_vec.safe_push (rfnew);
3695             }
3696         }
3697     }
3698
3699   /* Sort the repeated factor vector by (a) increasing occurrence count,
3700      and (b) decreasing rank.  */
3701   repeat_factor_vec.qsort (compare_repeat_factors);
3702
3703   /* It is generally best to combine as many base factors as possible
3704      into a product before applying __builtin_powi to the result.
3705      However, the sort order chosen for the repeated factor vector
3706      allows us to cache partial results for the product of the base
3707      factors for subsequent use.  When we already have a cached partial
3708      result from a previous iteration, it is best to make use of it
3709      before looking for another __builtin_pow opportunity.
3710
3711      As an example, consider x * x * y * y * y * z * z * z * z.
3712      We want to first compose the product x * y * z, raise it to the
3713      second power, then multiply this by y * z, and finally multiply
3714      by z.  This can be done in 5 multiplies provided we cache y * z
3715      for use in both expressions:
3716
3717         t1 = y * z
3718         t2 = t1 * x
3719         t3 = t2 * t2
3720         t4 = t1 * t3
3721         result = t4 * z
3722
3723      If we instead ignored the cached y * z and first multiplied by
3724      the __builtin_pow opportunity z * z, we would get the inferior:
3725
3726         t1 = y * z
3727         t2 = t1 * x
3728         t3 = t2 * t2
3729         t4 = z * z
3730         t5 = t3 * t4
3731         result = t5 * y  */
3732
3733   vec_len = repeat_factor_vec.length ();
3734
3735   /* Repeatedly look for opportunities to create a builtin_powi call.  */
3736   while (true)
3737     {
3738       HOST_WIDE_INT power;
3739
3740       /* First look for the largest cached product of factors from
3741          preceding iterations.  If found, create a builtin_powi for
3742          it if the minimum occurrence count for its factors is at
3743          least 2, or just use this cached product as our next
3744          multiplicand if the minimum occurrence count is 1.  */
3745       FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
3746         {
3747           if (rf1->repr && rf1->count > 0)
3748             break;
3749         }
3750
3751       if (j < vec_len)
3752         {
3753           power = rf1->count;
3754
3755           if (power == 1)
3756             {
3757               iter_result = rf1->repr;
3758
3759               if (dump_file && (dump_flags & TDF_DETAILS))
3760                 {
3761                   unsigned elt;
3762                   repeat_factor_t rf;
3763                   fputs ("Multiplying by cached product ", dump_file);
3764                   for (elt = j; elt < vec_len; elt++)
3765                     {
3766                       rf = &repeat_factor_vec[elt];
3767                       print_generic_expr (dump_file, rf->factor, 0);
3768                       if (elt < vec_len - 1)
3769                         fputs (" * ", dump_file);
3770                     }
3771                   fputs ("\n", dump_file);
3772                 }
3773             }
3774           else
3775             {
3776               iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
3777               pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
3778                                             build_int_cst (integer_type_node,
3779                                                            power));
3780               gimple_call_set_lhs (pow_stmt, iter_result);
3781               gimple_set_location (pow_stmt, gimple_location (stmt));
3782               gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
3783
3784               if (dump_file && (dump_flags & TDF_DETAILS))
3785                 {
3786                   unsigned elt;
3787                   repeat_factor_t rf;
3788                   fputs ("Building __builtin_pow call for cached product (",
3789                          dump_file);
3790                   for (elt = j; elt < vec_len; elt++)
3791                     {
3792                       rf = &repeat_factor_vec[elt];
3793                       print_generic_expr (dump_file, rf->factor, 0);
3794                       if (elt < vec_len - 1)
3795                         fputs (" * ", dump_file);
3796                     }
3797                   fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n",
3798                            power);
3799                 }
3800             }
3801         }
3802       else
3803         {
3804           /* Otherwise, find the first factor in the repeated factor
3805              vector whose occurrence count is at least 2.  If no such
3806              factor exists, there are no builtin_powi opportunities
3807              remaining.  */
3808           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
3809             {
3810               if (rf1->count >= 2)
3811                 break;
3812             }
3813
3814           if (j >= vec_len)
3815             break;
3816
3817           power = rf1->count;
3818
3819           if (dump_file && (dump_flags & TDF_DETAILS))
3820             {
3821               unsigned elt;
3822               repeat_factor_t rf;
3823               fputs ("Building __builtin_pow call for (", dump_file);
3824               for (elt = j; elt < vec_len; elt++)
3825                 {
3826                   rf = &repeat_factor_vec[elt];
3827                   print_generic_expr (dump_file, rf->factor, 0);
3828                   if (elt < vec_len - 1)
3829                     fputs (" * ", dump_file);
3830                 }
3831               fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power);
3832             }
3833
3834           reassociate_stats.pows_created++;
3835
3836           /* Visit each element of the vector in reverse order (so that
3837              high-occurrence elements are visited first, and within the
3838              same occurrence count, lower-ranked elements are visited
3839              first).  Form a linear product of all elements in this order
3840              whose occurrencce count is at least that of element J.
3841              Record the SSA name representing the product of each element
3842              with all subsequent elements in the vector.  */
3843           if (j == vec_len - 1)
3844             rf1->repr = rf1->factor;
3845           else
3846             {
3847               for (ii = vec_len - 2; ii >= (int)j; ii--)
3848                 {
3849                   tree op1, op2;
3850
3851                   rf1 = &repeat_factor_vec[ii];
3852                   rf2 = &repeat_factor_vec[ii + 1];
3853
3854                   /* Init the last factor's representative to be itself.  */
3855                   if (!rf2->repr)
3856                     rf2->repr = rf2->factor;
3857
3858                   op1 = rf1->factor;
3859                   op2 = rf2->repr;
3860
3861                   target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
3862                   mul_stmt = gimple_build_assign_with_ops (MULT_EXPR,
3863                                                            target_ssa,
3864                                                            op1, op2);
3865                   gimple_set_location (mul_stmt, gimple_location (stmt));
3866                   gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
3867                   rf1->repr = target_ssa;
3868
3869                   /* Don't reprocess the multiply we just introduced.  */
3870                   gimple_set_visited (mul_stmt, true);
3871                 }
3872             }
3873
3874           /* Form a call to __builtin_powi for the maximum product
3875              just formed, raised to the power obtained earlier.  */
3876           rf1 = &repeat_factor_vec[j];
3877           iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
3878           pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
3879                                         build_int_cst (integer_type_node,
3880                                                        power));
3881           gimple_call_set_lhs (pow_stmt, iter_result);
3882           gimple_set_location (pow_stmt, gimple_location (stmt));
3883           gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
3884         }
3885
3886       /* If we previously formed at least one other builtin_powi call,
3887          form the product of this one and those others.  */
3888       if (result)
3889         {
3890           tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
3891           mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result,
3892                                                    result, iter_result);
3893           gimple_set_location (mul_stmt, gimple_location (stmt));
3894           gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
3895           gimple_set_visited (mul_stmt, true);
3896           result = new_result;
3897         }
3898       else
3899         result = iter_result;
3900
3901       /* Decrement the occurrence count of each element in the product
3902          by the count found above, and remove this many copies of each
3903          factor from OPS.  */
3904       for (i = j; i < vec_len; i++)
3905         {
3906           unsigned k = power;
3907           unsigned n;
3908
3909           rf1 = &repeat_factor_vec[i];
3910           rf1->count -= power;
3911
3912           FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
3913             {
3914               if (oe->op == rf1->factor)
3915                 {
3916                   if (oe->count <= k)
3917                     {
3918                       ops->ordered_remove (n);
3919                       k -= oe->count;
3920
3921                       if (k == 0)
3922                         break;
3923                     }
3924                   else
3925                     {
3926                       oe->count -= k;
3927                       break;
3928                     }
3929                 }
3930             }
3931         }
3932     }
3933
3934   /* At this point all elements in the repeated factor vector have a
3935      remaining occurrence count of 0 or 1, and those with a count of 1
3936      don't have cached representatives.  Re-sort the ops vector and
3937      clean up.  */
3938   ops->qsort (sort_by_operand_rank);
3939   repeat_factor_vec.release ();
3940
3941   /* Return the final product computed herein.  Note that there may
3942      still be some elements with single occurrence count left in OPS;
3943      those will be handled by the normal reassociation logic.  */
3944   return result;
3945 }
3946
3947 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS.  */
3948
3949 static void
3950 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
3951 {
3952   tree rhs1;
3953
3954   if (dump_file && (dump_flags & TDF_DETAILS))
3955     {
3956       fprintf (dump_file, "Transforming ");
3957       print_gimple_stmt (dump_file, stmt, 0, 0);
3958     }
3959
3960   rhs1 = gimple_assign_rhs1 (stmt);
3961   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
3962   update_stmt (stmt);
3963   remove_visited_stmt_chain (rhs1);
3964
3965   if (dump_file && (dump_flags & TDF_DETAILS))
3966     {
3967       fprintf (dump_file, " into ");
3968       print_gimple_stmt (dump_file, stmt, 0, 0);
3969     }
3970 }
3971
3972 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2.  */
3973
3974 static void
3975 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
3976                             tree rhs1, tree rhs2)
3977 {
3978   if (dump_file && (dump_flags & TDF_DETAILS))
3979     {
3980       fprintf (dump_file, "Transforming ");
3981       print_gimple_stmt (dump_file, stmt, 0, 0);
3982     }
3983
3984   gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
3985   update_stmt (gsi_stmt (*gsi));
3986   remove_visited_stmt_chain (rhs1);
3987
3988   if (dump_file && (dump_flags & TDF_DETAILS))
3989     {
3990       fprintf (dump_file, " into ");
3991       print_gimple_stmt (dump_file, stmt, 0, 0);
3992     }
3993 }
3994
3995 /* Reassociate expressions in basic block BB and its post-dominator as
3996    children.  */
3997
3998 static void
3999 reassociate_bb (basic_block bb)
4000 {
4001   gimple_stmt_iterator gsi;
4002   basic_block son;
4003   gimple stmt = last_stmt (bb);
4004
4005   if (stmt && !gimple_visited_p (stmt))
4006     maybe_optimize_range_tests (stmt);
4007
4008   for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4009     {
4010       stmt = gsi_stmt (gsi);
4011
4012       if (is_gimple_assign (stmt)
4013           && !stmt_could_throw_p (stmt))
4014         {
4015           tree lhs, rhs1, rhs2;
4016           enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4017
4018           /* If this is not a gimple binary expression, there is
4019              nothing for us to do with it.  */
4020           if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4021             continue;
4022
4023           /* If this was part of an already processed statement,
4024              we don't need to touch it again. */
4025           if (gimple_visited_p (stmt))
4026             {
4027               /* This statement might have become dead because of previous
4028                  reassociations.  */
4029               if (has_zero_uses (gimple_get_lhs (stmt)))
4030                 {
4031                   gsi_remove (&gsi, true);
4032                   release_defs (stmt);
4033                   /* We might end up removing the last stmt above which
4034                      places the iterator to the end of the sequence.
4035                      Reset it to the last stmt in this case which might
4036                      be the end of the sequence as well if we removed
4037                      the last statement of the sequence.  In which case
4038                      we need to bail out.  */
4039                   if (gsi_end_p (gsi))
4040                     {
4041                       gsi = gsi_last_bb (bb);
4042                       if (gsi_end_p (gsi))
4043                         break;
4044                     }
4045                 }
4046               continue;
4047             }
4048
4049           lhs = gimple_assign_lhs (stmt);
4050           rhs1 = gimple_assign_rhs1 (stmt);
4051           rhs2 = gimple_assign_rhs2 (stmt);
4052
4053           /* For non-bit or min/max operations we can't associate
4054              all types.  Verify that here.  */
4055           if (rhs_code != BIT_IOR_EXPR
4056               && rhs_code != BIT_AND_EXPR
4057               && rhs_code != BIT_XOR_EXPR
4058               && rhs_code != MIN_EXPR
4059               && rhs_code != MAX_EXPR
4060               && (!can_reassociate_p (lhs)
4061                   || !can_reassociate_p (rhs1)
4062                   || !can_reassociate_p (rhs2)))
4063             continue;
4064
4065           if (associative_tree_code (rhs_code))
4066             {
4067               vec<operand_entry_t> ops = vNULL;
4068               tree powi_result = NULL_TREE;
4069
4070               /* There may be no immediate uses left by the time we
4071                  get here because we may have eliminated them all.  */
4072               if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4073                 continue;
4074
4075               gimple_set_visited (stmt, true);
4076               linearize_expr_tree (&ops, stmt, true, true);
4077               ops.qsort (sort_by_operand_rank);
4078               optimize_ops_list (rhs_code, &ops);
4079               if (undistribute_ops_list (rhs_code, &ops,
4080                                          loop_containing_stmt (stmt)))
4081                 {
4082                   ops.qsort (sort_by_operand_rank);
4083                   optimize_ops_list (rhs_code, &ops);
4084                 }
4085
4086               if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4087                 optimize_range_tests (rhs_code, &ops);
4088
4089               if (first_pass_instance
4090                   && rhs_code == MULT_EXPR
4091                   && flag_unsafe_math_optimizations)
4092                 powi_result = attempt_builtin_powi (stmt, &ops);
4093
4094               /* If the operand vector is now empty, all operands were
4095                  consumed by the __builtin_powi optimization.  */
4096               if (ops.length () == 0)
4097                 transform_stmt_to_copy (&gsi, stmt, powi_result);
4098               else if (ops.length () == 1)
4099                 {
4100                   tree last_op = ops.last ()->op;
4101
4102                   if (powi_result)
4103                     transform_stmt_to_multiply (&gsi, stmt, last_op,
4104                                                 powi_result);
4105                   else
4106                     transform_stmt_to_copy (&gsi, stmt, last_op);
4107                 }
4108               else
4109                 {
4110                   enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4111                   int ops_num = ops.length ();
4112                   int width = get_reassociation_width (ops_num, rhs_code, mode);
4113
4114                   if (dump_file && (dump_flags & TDF_DETAILS))
4115                     fprintf (dump_file,
4116                              "Width = %d was chosen for reassociation\n", width);
4117
4118                   if (width > 1
4119                       && ops.length () > 3)
4120                     rewrite_expr_tree_parallel (stmt, width, ops);
4121                   else
4122                     rewrite_expr_tree (stmt, 0, ops, false);
4123
4124                   /* If we combined some repeated factors into a
4125                      __builtin_powi call, multiply that result by the
4126                      reassociated operands.  */
4127                   if (powi_result)
4128                     {
4129                       gimple mul_stmt;
4130                       tree type = TREE_TYPE (gimple_get_lhs (stmt));
4131                       tree target_ssa = make_temp_ssa_name (type, NULL,
4132                                                             "reassocpow");
4133                       gimple_set_lhs (stmt, target_ssa);
4134                       update_stmt (stmt);
4135                       mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs,
4136                                                                powi_result,
4137                                                                target_ssa);
4138                       gimple_set_location (mul_stmt, gimple_location (stmt));
4139                       gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4140                     }
4141                 }
4142
4143               ops.release ();
4144             }
4145         }
4146     }
4147   for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4148        son;
4149        son = next_dom_son (CDI_POST_DOMINATORS, son))
4150     reassociate_bb (son);
4151 }
4152
4153 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4154 void debug_ops_vector (vec<operand_entry_t> ops);
4155
4156 /* Dump the operand entry vector OPS to FILE.  */
4157
4158 void
4159 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
4160 {
4161   operand_entry_t oe;
4162   unsigned int i;
4163
4164   FOR_EACH_VEC_ELT (ops, i, oe)
4165     {
4166       fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
4167       print_generic_expr (file, oe->op, 0);
4168     }
4169 }
4170
4171 /* Dump the operand entry vector OPS to STDERR.  */
4172
4173 DEBUG_FUNCTION void
4174 debug_ops_vector (vec<operand_entry_t> ops)
4175 {
4176   dump_ops_vector (stderr, ops);
4177 }
4178
4179 static void
4180 do_reassoc (void)
4181 {
4182   break_up_subtract_bb (ENTRY_BLOCK_PTR);
4183   reassociate_bb (EXIT_BLOCK_PTR);
4184 }
4185
4186 /* Initialize the reassociation pass.  */
4187
4188 static void
4189 init_reassoc (void)
4190 {
4191   int i;
4192   long rank = 2;
4193   int *bbs = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
4194
4195   /* Find the loops, so that we can prevent moving calculations in
4196      them.  */
4197   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4198
4199   memset (&reassociate_stats, 0, sizeof (reassociate_stats));
4200
4201   operand_entry_pool = create_alloc_pool ("operand entry pool",
4202                                           sizeof (struct operand_entry), 30);
4203   next_operand_entry_id = 0;
4204
4205   /* Reverse RPO (Reverse Post Order) will give us something where
4206      deeper loops come later.  */
4207   pre_and_rev_post_order_compute (NULL, bbs, false);
4208   bb_rank = XCNEWVEC (long, last_basic_block);
4209   operand_rank = pointer_map_create ();
4210
4211   /* Give each default definition a distinct rank.  This includes
4212      parameters and the static chain.  Walk backwards over all
4213      SSA names so that we get proper rank ordering according
4214      to tree_swap_operands_p.  */
4215   for (i = num_ssa_names - 1; i > 0; --i)
4216     {
4217       tree name = ssa_name (i);
4218       if (name && SSA_NAME_IS_DEFAULT_DEF (name))
4219         insert_operand_rank (name, ++rank);
4220     }
4221
4222   /* Set up rank for each BB  */
4223   for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
4224     bb_rank[bbs[i]] = ++rank  << 16;
4225
4226   free (bbs);
4227   calculate_dominance_info (CDI_POST_DOMINATORS);
4228   plus_negates = vNULL;
4229 }
4230
4231 /* Cleanup after the reassociation pass, and print stats if
4232    requested.  */
4233
4234 static void
4235 fini_reassoc (void)
4236 {
4237   statistics_counter_event (cfun, "Linearized",
4238                             reassociate_stats.linearized);
4239   statistics_counter_event (cfun, "Constants eliminated",
4240                             reassociate_stats.constants_eliminated);
4241   statistics_counter_event (cfun, "Ops eliminated",
4242                             reassociate_stats.ops_eliminated);
4243   statistics_counter_event (cfun, "Statements rewritten",
4244                             reassociate_stats.rewritten);
4245   statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
4246                             reassociate_stats.pows_encountered);
4247   statistics_counter_event (cfun, "Built-in powi calls created",
4248                             reassociate_stats.pows_created);
4249
4250   pointer_map_destroy (operand_rank);
4251   free_alloc_pool (operand_entry_pool);
4252   free (bb_rank);
4253   plus_negates.release ();
4254   free_dominance_info (CDI_POST_DOMINATORS);
4255   loop_optimizer_finalize ();
4256 }
4257
4258 /* Gate and execute functions for Reassociation.  */
4259
4260 static unsigned int
4261 execute_reassoc (void)
4262 {
4263   init_reassoc ();
4264
4265   do_reassoc ();
4266   repropagate_negates ();
4267
4268   fini_reassoc ();
4269   return 0;
4270 }
4271
4272 static bool
4273 gate_tree_ssa_reassoc (void)
4274 {
4275   return flag_tree_reassoc != 0;
4276 }
4277
4278 struct gimple_opt_pass pass_reassoc =
4279 {
4280  {
4281   GIMPLE_PASS,
4282   "reassoc",                            /* name */
4283   OPTGROUP_NONE,                        /* optinfo_flags */
4284   gate_tree_ssa_reassoc,                /* gate */
4285   execute_reassoc,                      /* execute */
4286   NULL,                                 /* sub */
4287   NULL,                                 /* next */
4288   0,                                    /* static_pass_number */
4289   TV_TREE_REASSOC,                      /* tv_id */
4290   PROP_cfg | PROP_ssa,                  /* properties_required */
4291   0,                                    /* properties_provided */
4292   0,                                    /* properties_destroyed */
4293   0,                                    /* todo_flags_start */
4294   TODO_verify_ssa
4295     | TODO_verify_flow
4296     | TODO_ggc_collect                  /* todo_flags_finish */
4297  }
4298 };