gcc/tree-ssa-reassoc.c

   1 /* Reassociation for trees.
   2    Copyright (C) 2005-2013 Free Software Foundation, Inc.
   3    Contributed by Daniel Berlin <dan@dberlin.org>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 3, or (at your option)
  10 any later version.
  11
  12 GCC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "tree.h"
  26 #include "basic-block.h"
  27 #include "gimple-pretty-print.h"
  28 #include "tree-inline.h"
  29 #include "tree-flow.h"
  30 #include "gimple.h"
  31 #include "tree-iterator.h"
  32 #include "tree-pass.h"
  33 #include "alloc-pool.h"
  34 #include "vec.h"
  35 #include "langhooks.h"
  36 #include "pointer-set.h"
  37 #include "cfgloop.h"
  38 #include "flags.h"
  39 #include "target.h"
  40 #include "params.h"
  41 #include "diagnostic-core.h"
  42
  43 /*  This is a simple global reassociation pass.  It is, in part, based
  44     on the LLVM pass of the same name (They do some things more/less
  45     than we do, in different orders, etc).
  46
  47     It consists of five steps:
  48
  49     1. Breaking up subtract operations into addition + negate, where
  50     it would promote the reassociation of adds.
  51
  52     2. Left linearization of the expression trees, so that (A+B)+(C+D)
  53     becomes (((A+B)+C)+D), which is easier for us to rewrite later.
  54     During linearization, we place the operands of the binary
  55     expressions into a vector of operand_entry_t
  56
  57     3. Optimization of the operand lists, eliminating things like a +
  58     -a, a & a, etc.
  59
  60     3a. Combine repeated factors with the same occurrence counts
  61     into a __builtin_powi call that will later be optimized into
  62     an optimal number of multiplies.
  63
  64     4. Rewrite the expression trees we linearized and optimized so
  65     they are in proper rank order.
  66
  67     5. Repropagate negates, as nothing else will clean it up ATM.
  68
  69     A bit of theory on #4, since nobody seems to write anything down
  70     about why it makes sense to do it the way they do it:
  71
  72     We could do this much nicer theoretically, but don't (for reasons
  73     explained after how to do it theoretically nice :P).
  74
  75     In order to promote the most redundancy elimination, you want
  76     binary expressions whose operands are the same rank (or
  77     preferably, the same value) exposed to the redundancy eliminator,
  78     for possible elimination.
  79
  80     So the way to do this if we really cared, is to build the new op
  81     tree from the leaves to the roots, merging as you go, and putting the
  82     new op on the end of the worklist, until you are left with one
  83     thing on the worklist.
  84
  85     IE if you have to rewrite the following set of operands (listed with
  86     rank in parentheses), with opcode PLUS_EXPR:
  87
  88     a (1),  b (1),  c (1),  d (2), e (2)
  89
  90
  91     We start with our merge worklist empty, and the ops list with all of
  92     those on it.
  93
  94     You want to first merge all leaves of the same rank, as much as
  95     possible.
  96
  97     So first build a binary op of
  98
  99     mergetmp = a + b, and put "mergetmp" on the merge worklist.
 100
 101     Because there is no three operand form of PLUS_EXPR, c is not going to
 102     be exposed to redundancy elimination as a rank 1 operand.
 103
 104     So you might as well throw it on the merge worklist (you could also
 105     consider it to now be a rank two operand, and merge it with d and e,
 106     but in this case, you then have evicted e from a binary op. So at
 107     least in this situation, you can't win.)
 108
 109     Then build a binary op of d + e
 110     mergetmp2 = d + e
 111
 112     and put mergetmp2 on the merge worklist.
 113
 114     so merge worklist = {mergetmp, c, mergetmp2}
 115
 116     Continue building binary ops of these operations until you have only
 117     one operation left on the worklist.
 118
 119     So we have
 120
 121     build binary op
 122     mergetmp3 = mergetmp + c
 123
 124     worklist = {mergetmp2, mergetmp3}
 125
 126     mergetmp4 = mergetmp2 + mergetmp3
 127
 128     worklist = {mergetmp4}
 129
 130     because we have one operation left, we can now just set the original
 131     statement equal to the result of that operation.
 132
 133     This will at least expose a + b  and d + e to redundancy elimination
 134     as binary operations.
 135
 136     For extra points, you can reuse the old statements to build the
 137     mergetmps, since you shouldn't run out.
 138
 139     So why don't we do this?
 140
 141     Because it's expensive, and rarely will help.  Most trees we are
 142     reassociating have 3 or less ops.  If they have 2 ops, they already
 143     will be written into a nice single binary op.  If you have 3 ops, a
 144     single simple check suffices to tell you whether the first two are of the
 145     same rank.  If so, you know to order it
 146
 147     mergetmp = op1 + op2
 148     newstmt = mergetmp + op3
 149
 150     instead of
 151     mergetmp = op2 + op3
 152     newstmt = mergetmp + op1
 153
 154     If all three are of the same rank, you can't expose them all in a
 155     single binary operator anyway, so the above is *still* the best you
 156     can do.
 157
 158     Thus, this is what we do.  When we have three ops left, we check to see
 159     what order to put them in, and call it a day.  As a nod to vector sum
 160     reduction, we check if any of the ops are really a phi node that is a
 161     destructive update for the associating op, and keep the destructive
 162     update together for vector sum reduction recognition.  */
 163
 164
 165 /* Statistics */
 166 static struct
 167 {
 168   int linearized;
 169   int constants_eliminated;
 170   int ops_eliminated;
 171   int rewritten;
 172   int pows_encountered;
 173   int pows_created;
 174 } reassociate_stats;
 175
 176 /* Operator, rank pair.  */
 177 typedef struct operand_entry
 178 {
 179   unsigned int rank;
 180   int id;
 181   tree op;
 182   unsigned int count;
 183 } *operand_entry_t;
 184
 185 static alloc_pool operand_entry_pool;
 186
 187 /* This is used to assign a unique ID to each struct operand_entry
 188    so that qsort results are identical on different hosts.  */
 189 static int next_operand_entry_id;
 190
 191 /* Starting rank number for a given basic block, so that we can rank
 192    operations using unmovable instructions in that BB based on the bb
 193    depth.  */
 194 static long *bb_rank;
 195
 196 /* Operand->rank hashtable.  */
 197 static struct pointer_map_t *operand_rank;
 198
 199 /* Forward decls.  */
 200 static long get_rank (tree);
 201
 202
 203 /* Bias amount for loop-carried phis.  We want this to be larger than
 204    the depth of any reassociation tree we can see, but not larger than
 205    the rank difference between two blocks.  */
 206 #define PHI_LOOP_BIAS (1 << 15)
 207
 208 /* Rank assigned to a phi statement.  If STMT is a loop-carried phi of
 209    an innermost loop, and the phi has only a single use which is inside
 210    the loop, then the rank is the block rank of the loop latch plus an
 211    extra bias for the loop-carried dependence.  This causes expressions
 212    calculated into an accumulator variable to be independent for each
 213    iteration of the loop.  If STMT is some other phi, the rank is the
 214    block rank of its containing block.  */
 215 static long
 216 phi_rank (gimple stmt)
 217 {
 218   basic_block bb = gimple_bb (stmt);
 219   struct loop *father = bb->loop_father;
 220   tree res;
 221   unsigned i;
 222   use_operand_p use;
 223   gimple use_stmt;
 224
 225   /* We only care about real loops (those with a latch).  */
 226   if (!father->latch)
 227     return bb_rank[bb->index];
 228
 229   /* Interesting phis must be in headers of innermost loops.  */
 230   if (bb != father->header
 231       || father->inner)
 232     return bb_rank[bb->index];
 233
 234   /* Ignore virtual SSA_NAMEs.  */
 235   res = gimple_phi_result (stmt);
 236   if (virtual_operand_p (res))
 237     return bb_rank[bb->index];
 238
 239   /* The phi definition must have a single use, and that use must be
 240      within the loop.  Otherwise this isn't an accumulator pattern.  */
 241   if (!single_imm_use (res, &use, &use_stmt)
 242       || gimple_bb (use_stmt)->loop_father != father)
 243     return bb_rank[bb->index];
 244
 245   /* Look for phi arguments from within the loop.  If found, bias this phi.  */
 246   for (i = 0; i < gimple_phi_num_args (stmt); i++)
 247     {
 248       tree arg = gimple_phi_arg_def (stmt, i);
 249       if (TREE_CODE (arg) == SSA_NAME
 250           && !SSA_NAME_IS_DEFAULT_DEF (arg))
 251         {
 252           gimple def_stmt = SSA_NAME_DEF_STMT (arg);
 253           if (gimple_bb (def_stmt)->loop_father == father)
 254             return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
 255         }
 256     }
 257
 258   /* Must be an uninteresting phi.  */
 259   return bb_rank[bb->index];
 260 }
 261
 262 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
 263    loop-carried dependence of an innermost loop, return TRUE; else
 264    return FALSE.  */
 265 static bool
 266 loop_carried_phi (tree exp)
 267 {
 268   gimple phi_stmt;
 269   long block_rank;
 270
 271   if (TREE_CODE (exp) != SSA_NAME
 272       || SSA_NAME_IS_DEFAULT_DEF (exp))
 273     return false;
 274
 275   phi_stmt = SSA_NAME_DEF_STMT (exp);
 276
 277   if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
 278     return false;
 279
 280   /* Non-loop-carried phis have block rank.  Loop-carried phis have
 281      an additional bias added in.  If this phi doesn't have block rank,
 282      it's biased and should not be propagated.  */
 283   block_rank = bb_rank[gimple_bb (phi_stmt)->index];
 284
 285   if (phi_rank (phi_stmt) != block_rank)
 286     return true;
 287
 288   return false;
 289 }
 290
 291 /* Return the maximum of RANK and the rank that should be propagated
 292    from expression OP.  For most operands, this is just the rank of OP.
 293    For loop-carried phis, the value is zero to avoid undoing the bias
 294    in favor of the phi.  */
 295 static long
 296 propagate_rank (long rank, tree op)
 297 {
 298   long op_rank;
 299
 300   if (loop_carried_phi (op))
 301     return rank;
 302
 303   op_rank = get_rank (op);
 304
 305   return MAX (rank, op_rank);
 306 }
 307
 308 /* Look up the operand rank structure for expression E.  */
 309
 310 static inline long
 311 find_operand_rank (tree e)
 312 {
 313   void **slot = pointer_map_contains (operand_rank, e);
 314   return slot ? (long) (intptr_t) *slot : -1;
 315 }
 316
 317 /* Insert {E,RANK} into the operand rank hashtable.  */
 318
 319 static inline void
 320 insert_operand_rank (tree e, long rank)
 321 {
 322   void **slot;
 323   gcc_assert (rank > 0);
 324   slot = pointer_map_insert (operand_rank, e);
 325   gcc_assert (!*slot);
 326   *slot = (void *) (intptr_t) rank;
 327 }
 328
 329 /* Given an expression E, return the rank of the expression.  */
 330
 331 static long
 332 get_rank (tree e)
 333 {
 334   /* Constants have rank 0.  */
 335   if (is_gimple_min_invariant (e))
 336     return 0;
 337
 338   /* SSA_NAME's have the rank of the expression they are the result
 339      of.
 340      For globals and uninitialized values, the rank is 0.
 341      For function arguments, use the pre-setup rank.
 342      For PHI nodes, stores, asm statements, etc, we use the rank of
 343      the BB.
 344      For simple operations, the rank is the maximum rank of any of
 345      its operands, or the bb_rank, whichever is less.
 346      I make no claims that this is optimal, however, it gives good
 347      results.  */
 348
 349   /* We make an exception to the normal ranking system to break
 350      dependences of accumulator variables in loops.  Suppose we
 351      have a simple one-block loop containing:
 352
 353        x_1 = phi(x_0, x_2)
 354        b = a + x_1
 355        c = b + d
 356        x_2 = c + e
 357
 358      As shown, each iteration of the calculation into x is fully
 359      dependent upon the iteration before it.  We would prefer to
 360      see this in the form:
 361
 362        x_1 = phi(x_0, x_2)
 363        b = a + d
 364        c = b + e
 365        x_2 = c + x_1
 366
 367      If the loop is unrolled, the calculations of b and c from
 368      different iterations can be interleaved.
 369
 370      To obtain this result during reassociation, we bias the rank
 371      of the phi definition x_1 upward, when it is recognized as an
 372      accumulator pattern.  The artificial rank causes it to be
 373      added last, providing the desired independence.  */
 374
 375   if (TREE_CODE (e) == SSA_NAME)
 376     {
 377       gimple stmt;
 378       long rank;
 379       int i, n;
 380       tree op;
 381
 382       if (SSA_NAME_IS_DEFAULT_DEF (e))
 383         return find_operand_rank (e);
 384
 385       stmt = SSA_NAME_DEF_STMT (e);
 386       if (gimple_code (stmt) == GIMPLE_PHI)
 387         return phi_rank (stmt);
 388
 389       if (!is_gimple_assign (stmt)
 390           || gimple_vdef (stmt))
 391         return bb_rank[gimple_bb (stmt)->index];
 392
 393       /* If we already have a rank for this expression, use that.  */
 394       rank = find_operand_rank (e);
 395       if (rank != -1)
 396         return rank;
 397
 398       /* Otherwise, find the maximum rank for the operands.  As an
 399          exception, remove the bias from loop-carried phis when propagating
 400          the rank so that dependent operations are not also biased.  */
 401       rank = 0;
 402       if (gimple_assign_single_p (stmt))
 403         {
 404           tree rhs = gimple_assign_rhs1 (stmt);
 405           n = TREE_OPERAND_LENGTH (rhs);
 406           if (n == 0)
 407             rank = propagate_rank (rank, rhs);
 408           else
 409             {
 410               for (i = 0; i < n; i++)
 411                 {
 412                   op = TREE_OPERAND (rhs, i);
 413
 414                   if (op != NULL_TREE)
 415                     rank = propagate_rank (rank, op);
 416                 }
 417             }
 418         }
 419       else
 420         {
 421           n = gimple_num_ops (stmt);
 422           for (i = 1; i < n; i++)
 423             {
 424               op = gimple_op (stmt, i);
 425               gcc_assert (op);
 426               rank = propagate_rank (rank, op);
 427             }
 428         }
 429
 430       if (dump_file && (dump_flags & TDF_DETAILS))
 431         {
 432           fprintf (dump_file, "Rank for ");
 433           print_generic_expr (dump_file, e, 0);
 434           fprintf (dump_file, " is %ld\n", (rank + 1));
 435         }
 436
 437       /* Note the rank in the hashtable so we don't recompute it.  */
 438       insert_operand_rank (e, (rank + 1));
 439       return (rank + 1);
 440     }
 441
 442   /* Globals, etc,  are rank 0 */
 443   return 0;
 444 }
 445
 446
 447 /* We want integer ones to end up last no matter what, since they are
 448    the ones we can do the most with.  */
 449 #define INTEGER_CONST_TYPE 1 << 3
 450 #define FLOAT_CONST_TYPE 1 << 2
 451 #define OTHER_CONST_TYPE 1 << 1
 452
 453 /* Classify an invariant tree into integer, float, or other, so that
 454    we can sort them to be near other constants of the same type.  */
 455 static inline int
 456 constant_type (tree t)
 457 {
 458   if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
 459     return INTEGER_CONST_TYPE;
 460   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
 461     return FLOAT_CONST_TYPE;
 462   else
 463     return OTHER_CONST_TYPE;
 464 }
 465
 466 /* qsort comparison function to sort operand entries PA and PB by rank
 467    so that the sorted array is ordered by rank in decreasing order.  */
 468 static int
 469 sort_by_operand_rank (const void *pa, const void *pb)
 470 {
 471   const operand_entry_t oea = *(const operand_entry_t *)pa;
 472   const operand_entry_t oeb = *(const operand_entry_t *)pb;
 473
 474   /* It's nicer for optimize_expression if constants that are likely
 475      to fold when added/multiplied//whatever are put next to each
 476      other.  Since all constants have rank 0, order them by type.  */
 477   if (oeb->rank == 0 && oea->rank == 0)
 478     {
 479       if (constant_type (oeb->op) != constant_type (oea->op))
 480         return constant_type (oeb->op) - constant_type (oea->op);
 481       else
 482         /* To make sorting result stable, we use unique IDs to determine
 483            order.  */
 484         return oeb->id - oea->id;
 485     }
 486
 487   /* Lastly, make sure the versions that are the same go next to each
 488      other.  We use SSA_NAME_VERSION because it's stable.  */
 489   if ((oeb->rank - oea->rank == 0)
 490       && TREE_CODE (oea->op) == SSA_NAME
 491       && TREE_CODE (oeb->op) == SSA_NAME)
 492     {
 493       if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
 494         return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
 495       else
 496         return oeb->id - oea->id;
 497     }
 498
 499   if (oeb->rank != oea->rank)
 500     return oeb->rank - oea->rank;
 501   else
 502     return oeb->id - oea->id;
 503 }
 504
 505 /* Add an operand entry to *OPS for the tree operand OP.  */
 506
 507 static void
 508 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
 509 {
 510   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
 511
 512   oe->op = op;
 513   oe->rank = get_rank (op);
 514   oe->id = next_operand_entry_id++;
 515   oe->count = 1;
 516   ops->safe_push (oe);
 517 }
 518
 519 /* Add an operand entry to *OPS for the tree operand OP with repeat
 520    count REPEAT.  */
 521
 522 static void
 523 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
 524                        HOST_WIDE_INT repeat)
 525 {
 526   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
 527
 528   oe->op = op;
 529   oe->rank = get_rank (op);
 530   oe->id = next_operand_entry_id++;
 531   oe->count = repeat;
 532   ops->safe_push (oe);
 533
 534   reassociate_stats.pows_encountered++;
 535 }
 536
 537 /* Return true if STMT is reassociable operation containing a binary
 538    operation with tree code CODE, and is inside LOOP.  */
 539
 540 static bool
 541 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
 542 {
 543   basic_block bb = gimple_bb (stmt);
 544
 545   if (gimple_bb (stmt) == NULL)
 546     return false;
 547
 548   if (!flow_bb_inside_loop_p (loop, bb))
 549     return false;
 550
 551   if (is_gimple_assign (stmt)
 552       && gimple_assign_rhs_code (stmt) == code
 553       && has_single_use (gimple_assign_lhs (stmt)))
 554     return true;
 555
 556   return false;
 557 }
 558
 559
 560 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
 561    operand of the negate operation.  Otherwise, return NULL.  */
 562
 563 static tree
 564 get_unary_op (tree name, enum tree_code opcode)
 565 {
 566   gimple stmt = SSA_NAME_DEF_STMT (name);
 567
 568   if (!is_gimple_assign (stmt))
 569     return NULL_TREE;
 570
 571   if (gimple_assign_rhs_code (stmt) == opcode)
 572     return gimple_assign_rhs1 (stmt);
 573   return NULL_TREE;
 574 }
 575
 576 /* If CURR and LAST are a pair of ops that OPCODE allows us to
 577    eliminate through equivalences, do so, remove them from OPS, and
 578    return true.  Otherwise, return false.  */
 579
 580 static bool
 581 eliminate_duplicate_pair (enum tree_code opcode,
 582                           vec<operand_entry_t> *ops,
 583                           bool *all_done,
 584                           unsigned int i,
 585                           operand_entry_t curr,
 586                           operand_entry_t last)
 587 {
 588
 589   /* If we have two of the same op, and the opcode is & |, min, or max,
 590      we can eliminate one of them.
 591      If we have two of the same op, and the opcode is ^, we can
 592      eliminate both of them.  */
 593
 594   if (last && last->op == curr->op)
 595     {
 596       switch (opcode)
 597         {
 598         case MAX_EXPR:
 599         case MIN_EXPR:
 600         case BIT_IOR_EXPR:
 601         case BIT_AND_EXPR:
 602           if (dump_file && (dump_flags & TDF_DETAILS))
 603             {
 604               fprintf (dump_file, "Equivalence: ");
 605               print_generic_expr (dump_file, curr->op, 0);
 606               fprintf (dump_file, " [&|minmax] ");
 607               print_generic_expr (dump_file, last->op, 0);
 608               fprintf (dump_file, " -> ");
 609               print_generic_stmt (dump_file, last->op, 0);
 610             }
 611
 612           ops->ordered_remove (i);
 613           reassociate_stats.ops_eliminated ++;
 614
 615           return true;
 616
 617         case BIT_XOR_EXPR:
 618           if (dump_file && (dump_flags & TDF_DETAILS))
 619             {
 620               fprintf (dump_file, "Equivalence: ");
 621               print_generic_expr (dump_file, curr->op, 0);
 622               fprintf (dump_file, " ^ ");
 623               print_generic_expr (dump_file, last->op, 0);
 624               fprintf (dump_file, " -> nothing\n");
 625             }
 626
 627           reassociate_stats.ops_eliminated += 2;
 628
 629           if (ops->length () == 2)
 630             {
 631               ops->create (0);
 632               add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
 633               *all_done = true;
 634             }
 635           else
 636             {
 637               ops->ordered_remove (i-1);
 638               ops->ordered_remove (i-1);
 639             }
 640
 641           return true;
 642
 643         default:
 644           break;
 645         }
 646     }
 647   return false;
 648 }
 649
 650 static vec<tree> plus_negates;
 651
 652 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
 653    expression, look in OPS for a corresponding positive operation to cancel
 654    it out.  If we find one, remove the other from OPS, replace
 655    OPS[CURRINDEX] with 0 or -1, respectively, and return true.  Otherwise,
 656    return false. */
 657
 658 static bool
 659 eliminate_plus_minus_pair (enum tree_code opcode,
 660                            vec<operand_entry_t> *ops,
 661                            unsigned int currindex,
 662                            operand_entry_t curr)
 663 {
 664   tree negateop;
 665   tree notop;
 666   unsigned int i;
 667   operand_entry_t oe;
 668
 669   if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
 670     return false;
 671
 672   negateop = get_unary_op (curr->op, NEGATE_EXPR);
 673   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 674   if (negateop == NULL_TREE && notop == NULL_TREE)
 675     return false;
 676
 677   /* Any non-negated version will have a rank that is one less than
 678      the current rank.  So once we hit those ranks, if we don't find
 679      one, we can stop.  */
 680
 681   for (i = currindex + 1;
 682        ops->iterate (i, &oe)
 683        && oe->rank >= curr->rank - 1 ;
 684        i++)
 685     {
 686       if (oe->op == negateop)
 687         {
 688
 689           if (dump_file && (dump_flags & TDF_DETAILS))
 690             {
 691               fprintf (dump_file, "Equivalence: ");
 692               print_generic_expr (dump_file, negateop, 0);
 693               fprintf (dump_file, " + -");
 694               print_generic_expr (dump_file, oe->op, 0);
 695               fprintf (dump_file, " -> 0\n");
 696             }
 697
 698           ops->ordered_remove (i);
 699           add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
 700           ops->ordered_remove (currindex);
 701           reassociate_stats.ops_eliminated ++;
 702
 703           return true;
 704         }
 705       else if (oe->op == notop)
 706         {
 707           tree op_type = TREE_TYPE (oe->op);
 708
 709           if (dump_file && (dump_flags & TDF_DETAILS))
 710             {
 711               fprintf (dump_file, "Equivalence: ");
 712               print_generic_expr (dump_file, notop, 0);
 713               fprintf (dump_file, " + ~");
 714               print_generic_expr (dump_file, oe->op, 0);
 715               fprintf (dump_file, " -> -1\n");
 716             }
 717
 718           ops->ordered_remove (i);
 719           add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
 720           ops->ordered_remove (currindex);
 721           reassociate_stats.ops_eliminated ++;
 722
 723           return true;
 724         }
 725     }
 726
 727   /* CURR->OP is a negate expr in a plus expr: save it for later
 728      inspection in repropagate_negates().  */
 729   if (negateop != NULL_TREE)
 730     plus_negates.safe_push (curr->op);
 731
 732   return false;
 733 }
 734
 735 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
 736    bitwise not expression, look in OPS for a corresponding operand to
 737    cancel it out.  If we find one, remove the other from OPS, replace
 738    OPS[CURRINDEX] with 0, and return true.  Otherwise, return
 739    false. */
 740
 741 static bool
 742 eliminate_not_pairs (enum tree_code opcode,
 743                      vec<operand_entry_t> *ops,
 744                      unsigned int currindex,
 745                      operand_entry_t curr)
 746 {
 747   tree notop;
 748   unsigned int i;
 749   operand_entry_t oe;
 750
 751   if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
 752       || TREE_CODE (curr->op) != SSA_NAME)
 753     return false;
 754
 755   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 756   if (notop == NULL_TREE)
 757     return false;
 758
 759   /* Any non-not version will have a rank that is one less than
 760      the current rank.  So once we hit those ranks, if we don't find
 761      one, we can stop.  */
 762
 763   for (i = currindex + 1;
 764        ops->iterate (i, &oe)
 765        && oe->rank >= curr->rank - 1;
 766        i++)
 767     {
 768       if (oe->op == notop)
 769         {
 770           if (dump_file && (dump_flags & TDF_DETAILS))
 771             {
 772               fprintf (dump_file, "Equivalence: ");
 773               print_generic_expr (dump_file, notop, 0);
 774               if (opcode == BIT_AND_EXPR)
 775                 fprintf (dump_file, " & ~");
 776               else if (opcode == BIT_IOR_EXPR)
 777                 fprintf (dump_file, " | ~");
 778               print_generic_expr (dump_file, oe->op, 0);
 779               if (opcode == BIT_AND_EXPR)
 780                 fprintf (dump_file, " -> 0\n");
 781               else if (opcode == BIT_IOR_EXPR)
 782                 fprintf (dump_file, " -> -1\n");
 783             }
 784
 785           if (opcode == BIT_AND_EXPR)
 786             oe->op = build_zero_cst (TREE_TYPE (oe->op));
 787           else if (opcode == BIT_IOR_EXPR)
 788             oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
 789
 790           reassociate_stats.ops_eliminated += ops->length () - 1;
 791           ops->truncate (0);
 792           ops->quick_push (oe);
 793           return true;
 794         }
 795     }
 796
 797   return false;
 798 }
 799
 800 /* Use constant value that may be present in OPS to try to eliminate
 801    operands.  Note that this function is only really used when we've
 802    eliminated ops for other reasons, or merged constants.  Across
 803    single statements, fold already does all of this, plus more.  There
 804    is little point in duplicating logic, so I've only included the
 805    identities that I could ever construct testcases to trigger.  */
 806
 807 static void
 808 eliminate_using_constants (enum tree_code opcode,
 809                            vec<operand_entry_t> *ops)
 810 {
 811   operand_entry_t oelast = ops->last ();
 812   tree type = TREE_TYPE (oelast->op);
 813
 814   if (oelast->rank == 0
 815       && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
 816     {
 817       switch (opcode)
 818         {
 819         case BIT_AND_EXPR:
 820           if (integer_zerop (oelast->op))
 821             {
 822               if (ops->length () != 1)
 823                 {
 824                   if (dump_file && (dump_flags & TDF_DETAILS))
 825                     fprintf (dump_file, "Found & 0, removing all other ops\n");
 826
 827                   reassociate_stats.ops_eliminated += ops->length () - 1;
 828
 829                   ops->truncate (0);
 830                   ops->quick_push (oelast);
 831                   return;
 832                 }
 833             }
 834           else if (integer_all_onesp (oelast->op))
 835             {
 836               if (ops->length () != 1)
 837                 {
 838                   if (dump_file && (dump_flags & TDF_DETAILS))
 839                     fprintf (dump_file, "Found & -1, removing\n");
 840                   ops->pop ();
 841                   reassociate_stats.ops_eliminated++;
 842                 }
 843             }
 844           break;
 845         case BIT_IOR_EXPR:
 846           if (integer_all_onesp (oelast->op))
 847             {
 848               if (ops->length () != 1)
 849                 {
 850                   if (dump_file && (dump_flags & TDF_DETAILS))
 851                     fprintf (dump_file, "Found | -1, removing all other ops\n");
 852
 853                   reassociate_stats.ops_eliminated += ops->length () - 1;
 854
 855                   ops->truncate (0);
 856                   ops->quick_push (oelast);
 857                   return;
 858                 }
 859             }
 860           else if (integer_zerop (oelast->op))
 861             {
 862               if (ops->length () != 1)
 863                 {
 864                   if (dump_file && (dump_flags & TDF_DETAILS))
 865                     fprintf (dump_file, "Found | 0, removing\n");
 866                   ops->pop ();
 867                   reassociate_stats.ops_eliminated++;
 868                 }
 869             }
 870           break;
 871         case MULT_EXPR:
 872           if (integer_zerop (oelast->op)
 873               || (FLOAT_TYPE_P (type)
 874                   && !HONOR_NANS (TYPE_MODE (type))
 875                   && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
 876                   && real_zerop (oelast->op)))
 877             {
 878               if (ops->length () != 1)
 879                 {
 880                   if (dump_file && (dump_flags & TDF_DETAILS))
 881                     fprintf (dump_file, "Found * 0, removing all other ops\n");
 882
 883                   reassociate_stats.ops_eliminated += ops->length () - 1;
 884                   ops->truncate (1);
 885                   ops->quick_push (oelast);
 886                   return;
 887                 }
 888             }
 889           else if (integer_onep (oelast->op)
 890                    || (FLOAT_TYPE_P (type)
 891                        && !HONOR_SNANS (TYPE_MODE (type))
 892                        && real_onep (oelast->op)))
 893             {
 894               if (ops->length () != 1)
 895                 {
 896                   if (dump_file && (dump_flags & TDF_DETAILS))
 897                     fprintf (dump_file, "Found * 1, removing\n");
 898                   ops->pop ();
 899                   reassociate_stats.ops_eliminated++;
 900                   return;
 901                 }
 902             }
 903           break;
 904         case BIT_XOR_EXPR:
 905         case PLUS_EXPR:
 906         case MINUS_EXPR:
 907           if (integer_zerop (oelast->op)
 908               || (FLOAT_TYPE_P (type)
 909                   && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
 910                   && fold_real_zero_addition_p (type, oelast->op,
 911                                                 opcode == MINUS_EXPR)))
 912             {
 913               if (ops->length () != 1)
 914                 {
 915                   if (dump_file && (dump_flags & TDF_DETAILS))
 916                     fprintf (dump_file, "Found [|^+] 0, removing\n");
 917                   ops->pop ();
 918                   reassociate_stats.ops_eliminated++;
 919                   return;
 920                 }
 921             }
 922           break;
 923         default:
 924           break;
 925         }
 926     }
 927 }
 928
 929
 930 static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
 931                                  bool, bool);
 932
 933 /* Structure for tracking and counting operands.  */
 934 typedef struct oecount_s {
 935   int cnt;
 936   int id;
 937   enum tree_code oecode;
 938   tree op;
 939 } oecount;
 940
 941
 942 /* The heap for the oecount hashtable and the sorted list of operands.  */
 943 static vec<oecount> cvec;
 944
 945 /* Hash function for oecount.  */
 946
 947 static hashval_t
 948 oecount_hash (const void *p)
 949 {
 950   const oecount *c = &cvec[(size_t)p - 42];
 951   return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
 952 }
 953
 954 /* Comparison function for oecount.  */
 955
 956 static int
 957 oecount_eq (const void *p1, const void *p2)
 958 {
 959   const oecount *c1 = &cvec[(size_t)p1 - 42];
 960   const oecount *c2 = &cvec[(size_t)p2 - 42];
 961   return (c1->oecode == c2->oecode
 962           && c1->op == c2->op);
 963 }
 964
 965 /* Comparison function for qsort sorting oecount elements by count.  */
 966
 967 static int
 968 oecount_cmp (const void *p1, const void *p2)
 969 {
 970   const oecount *c1 = (const oecount *)p1;
 971   const oecount *c2 = (const oecount *)p2;
 972   if (c1->cnt != c2->cnt)
 973     return c1->cnt - c2->cnt;
 974   else
 975     /* If counts are identical, use unique IDs to stabilize qsort.  */
 976     return c1->id - c2->id;
 977 }
 978
 979 /* Return TRUE iff STMT represents a builtin call that raises OP
 980    to some exponent.  */
 981
 982 static bool
 983 stmt_is_power_of_op (gimple stmt, tree op)
 984 {
 985   tree fndecl;
 986
 987   if (!is_gimple_call (stmt))
 988     return false;
 989
 990   fndecl = gimple_call_fndecl (stmt);
 991
 992   if (!fndecl
 993       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
 994     return false;
 995
 996   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
 997     {
 998     CASE_FLT_FN (BUILT_IN_POW):
 999     CASE_FLT_FN (BUILT_IN_POWI):
1000       return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1001
1002     default:
1003       return false;
1004     }
1005 }
1006
1007 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1008    in place and return the result.  Assumes that stmt_is_power_of_op
1009    was previously called for STMT and returned TRUE.  */
1010
1011 static HOST_WIDE_INT
1012 decrement_power (gimple stmt)
1013 {
1014   REAL_VALUE_TYPE c, cint;
1015   HOST_WIDE_INT power;
1016   tree arg1;
1017
1018   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1019     {
1020     CASE_FLT_FN (BUILT_IN_POW):
1021       arg1 = gimple_call_arg (stmt, 1);
1022       c = TREE_REAL_CST (arg1);
1023       power = real_to_integer (&c) - 1;
1024       real_from_integer (&cint, VOIDmode, power, 0, 0);
1025       gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1026       return power;
1027
1028     CASE_FLT_FN (BUILT_IN_POWI):
1029       arg1 = gimple_call_arg (stmt, 1);
1030       power = TREE_INT_CST_LOW (arg1) - 1;
1031       gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1032       return power;
1033
1034     default:
1035       gcc_unreachable ();
1036     }
1037 }
1038
1039 /* Find the single immediate use of STMT's LHS, and replace it
1040    with OP.  Remove STMT.  If STMT's LHS is the same as *DEF,
1041    replace *DEF with OP as well.  */
1042
1043 static void
1044 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1045 {
1046   tree lhs;
1047   gimple use_stmt;
1048   use_operand_p use;
1049   gimple_stmt_iterator gsi;
1050
1051   if (is_gimple_call (stmt))
1052     lhs = gimple_call_lhs (stmt);
1053   else
1054     lhs = gimple_assign_lhs (stmt);
1055
1056   gcc_assert (has_single_use (lhs));
1057   single_imm_use (lhs, &use, &use_stmt);
1058   if (lhs == *def)
1059     *def = op;
1060   SET_USE (use, op);
1061   if (TREE_CODE (op) != SSA_NAME)
1062     update_stmt (use_stmt);
1063   gsi = gsi_for_stmt (stmt);
1064   unlink_stmt_vdef (stmt);
1065   gsi_remove (&gsi, true);
1066   release_defs (stmt);
1067 }
1068
1069 /* Walks the linear chain with result *DEF searching for an operation
1070    with operand OP and code OPCODE removing that from the chain.  *DEF
1071    is updated if there is only one operand but no operation left.  */
1072
1073 static void
1074 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1075 {
1076   gimple stmt = SSA_NAME_DEF_STMT (*def);
1077
1078   do
1079     {
1080       tree name;
1081
1082       if (opcode == MULT_EXPR
1083           && stmt_is_power_of_op (stmt, op))
1084         {
1085           if (decrement_power (stmt) == 1)
1086             propagate_op_to_single_use (op, stmt, def);
1087           return;
1088         }
1089
1090       name = gimple_assign_rhs1 (stmt);
1091
1092       /* If this is the operation we look for and one of the operands
1093          is ours simply propagate the other operand into the stmts
1094          single use.  */
1095       if (gimple_assign_rhs_code (stmt) == opcode
1096           && (name == op
1097               || gimple_assign_rhs2 (stmt) == op))
1098         {
1099           if (name == op)
1100             name = gimple_assign_rhs2 (stmt);
1101           propagate_op_to_single_use (name, stmt, def);
1102           return;
1103         }
1104
1105       /* We might have a multiply of two __builtin_pow* calls, and
1106          the operand might be hiding in the rightmost one.  */
1107       if (opcode == MULT_EXPR
1108           && gimple_assign_rhs_code (stmt) == opcode
1109           && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1110           && has_single_use (gimple_assign_rhs2 (stmt)))
1111         {
1112           gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1113           if (stmt_is_power_of_op (stmt2, op))
1114             {
1115               if (decrement_power (stmt2) == 1)
1116                 propagate_op_to_single_use (op, stmt2, def);
1117               return;
1118             }
1119         }
1120
1121       /* Continue walking the chain.  */
1122       gcc_assert (name != op
1123                   && TREE_CODE (name) == SSA_NAME);
1124       stmt = SSA_NAME_DEF_STMT (name);
1125     }
1126   while (1);
1127 }
1128
1129 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1130    the result.  Places the statement after the definition of either
1131    OP1 or OP2.  Returns the new statement.  */
1132
1133 static gimple
1134 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1135 {
1136   gimple op1def = NULL, op2def = NULL;
1137   gimple_stmt_iterator gsi;
1138   tree op;
1139   gimple sum;
1140
1141   /* Create the addition statement.  */
1142   op = make_ssa_name (type, NULL);
1143   sum = gimple_build_assign_with_ops (opcode, op, op1, op2);
1144
1145   /* Find an insertion place and insert.  */
1146   if (TREE_CODE (op1) == SSA_NAME)
1147     op1def = SSA_NAME_DEF_STMT (op1);
1148   if (TREE_CODE (op2) == SSA_NAME)
1149     op2def = SSA_NAME_DEF_STMT (op2);
1150   if ((!op1def || gimple_nop_p (op1def))
1151       && (!op2def || gimple_nop_p (op2def)))
1152     {
1153       gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR));
1154       gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1155     }
1156   else if ((!op1def || gimple_nop_p (op1def))
1157            || (op2def && !gimple_nop_p (op2def)
1158                && stmt_dominates_stmt_p (op1def, op2def)))
1159     {
1160       if (gimple_code (op2def) == GIMPLE_PHI)
1161         {
1162           gsi = gsi_after_labels (gimple_bb (op2def));
1163           gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1164         }
1165       else
1166         {
1167           if (!stmt_ends_bb_p (op2def))
1168             {
1169               gsi = gsi_for_stmt (op2def);
1170               gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1171             }
1172           else
1173             {
1174               edge e;
1175               edge_iterator ei;
1176
1177               FOR_EACH_EDGE (e, ei, gimple_bb (op2def)->succs)
1178                 if (e->flags & EDGE_FALLTHRU)
1179                   gsi_insert_on_edge_immediate (e, sum);
1180             }
1181         }
1182     }
1183   else
1184     {
1185       if (gimple_code (op1def) == GIMPLE_PHI)
1186         {
1187           gsi = gsi_after_labels (gimple_bb (op1def));
1188           gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1189         }
1190       else
1191         {
1192           if (!stmt_ends_bb_p (op1def))
1193             {
1194               gsi = gsi_for_stmt (op1def);
1195               gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1196             }
1197           else
1198             {
1199               edge e;
1200               edge_iterator ei;
1201
1202               FOR_EACH_EDGE (e, ei, gimple_bb (op1def)->succs)
1203                 if (e->flags & EDGE_FALLTHRU)
1204                   gsi_insert_on_edge_immediate (e, sum);
1205             }
1206         }
1207     }
1208   update_stmt (sum);
1209
1210   return sum;
1211 }
1212
1213 /* Perform un-distribution of divisions and multiplications.
1214    A * X + B * X is transformed into (A + B) * X and A / X + B / X
1215    to (A + B) / X for real X.
1216
1217    The algorithm is organized as follows.
1218
1219     - First we walk the addition chain *OPS looking for summands that
1220       are defined by a multiplication or a real division.  This results
1221       in the candidates bitmap with relevant indices into *OPS.
1222
1223     - Second we build the chains of multiplications or divisions for
1224       these candidates, counting the number of occurrences of (operand, code)
1225       pairs in all of the candidates chains.
1226
1227     - Third we sort the (operand, code) pairs by number of occurrence and
1228       process them starting with the pair with the most uses.
1229
1230       * For each such pair we walk the candidates again to build a
1231         second candidate bitmap noting all multiplication/division chains
1232         that have at least one occurrence of (operand, code).
1233
1234       * We build an alternate addition chain only covering these
1235         candidates with one (operand, code) operation removed from their
1236         multiplication/division chain.
1237
1238       * The first candidate gets replaced by the alternate addition chain
1239         multiplied/divided by the operand.
1240
1241       * All candidate chains get disabled for further processing and
1242         processing of (operand, code) pairs continues.
1243
1244   The alternate addition chains built are re-processed by the main
1245   reassociation algorithm which allows optimizing a * x * y + b * y * x
1246   to (a + b ) * x * y in one invocation of the reassociation pass.  */
1247
1248 static bool
1249 undistribute_ops_list (enum tree_code opcode,
1250                        vec<operand_entry_t> *ops, struct loop *loop)
1251 {
1252   unsigned int length = ops->length ();
1253   operand_entry_t oe1;
1254   unsigned i, j;
1255   sbitmap candidates, candidates2;
1256   unsigned nr_candidates, nr_candidates2;
1257   sbitmap_iterator sbi0;
1258   vec<operand_entry_t> *subops;
1259   htab_t ctable;
1260   bool changed = false;
1261   int next_oecount_id = 0;
1262
1263   if (length <= 1
1264       || opcode != PLUS_EXPR)
1265     return false;
1266
1267   /* Build a list of candidates to process.  */
1268   candidates = sbitmap_alloc (length);
1269   bitmap_clear (candidates);
1270   nr_candidates = 0;
1271   FOR_EACH_VEC_ELT (*ops, i, oe1)
1272     {
1273       enum tree_code dcode;
1274       gimple oe1def;
1275
1276       if (TREE_CODE (oe1->op) != SSA_NAME)
1277         continue;
1278       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1279       if (!is_gimple_assign (oe1def))
1280         continue;
1281       dcode = gimple_assign_rhs_code (oe1def);
1282       if ((dcode != MULT_EXPR
1283            && dcode != RDIV_EXPR)
1284           || !is_reassociable_op (oe1def, dcode, loop))
1285         continue;
1286
1287       bitmap_set_bit (candidates, i);
1288       nr_candidates++;
1289     }
1290
1291   if (nr_candidates < 2)
1292     {
1293       sbitmap_free (candidates);
1294       return false;
1295     }
1296
1297   if (dump_file && (dump_flags & TDF_DETAILS))
1298     {
1299       fprintf (dump_file, "searching for un-distribute opportunities ");
1300       print_generic_expr (dump_file,
1301         (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1302       fprintf (dump_file, " %d\n", nr_candidates);
1303     }
1304
1305   /* Build linearized sub-operand lists and the counting table.  */
1306   cvec.create (0);
1307   ctable = htab_create (15, oecount_hash, oecount_eq, NULL);
1308   /* ??? Macro arguments cannot have multi-argument template types in
1309      them.  This typedef is needed to workaround that limitation.  */
1310   typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1311   subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1312   EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1313     {
1314       gimple oedef;
1315       enum tree_code oecode;
1316       unsigned j;
1317
1318       oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1319       oecode = gimple_assign_rhs_code (oedef);
1320       linearize_expr_tree (&subops[i], oedef,
1321                            associative_tree_code (oecode), false);
1322
1323       FOR_EACH_VEC_ELT (subops[i], j, oe1)
1324         {
1325           oecount c;
1326           void **slot;
1327           size_t idx;
1328           c.oecode = oecode;
1329           c.cnt = 1;
1330           c.id = next_oecount_id++;
1331           c.op = oe1->op;
1332           cvec.safe_push (c);
1333           idx = cvec.length () + 41;
1334           slot = htab_find_slot (ctable, (void *)idx, INSERT);
1335           if (!*slot)
1336             {
1337               *slot = (void *)idx;
1338             }
1339           else
1340             {
1341               cvec.pop ();
1342               cvec[(size_t)*slot - 42].cnt++;
1343             }
1344         }
1345     }
1346   htab_delete (ctable);
1347
1348   /* Sort the counting table.  */
1349   cvec.qsort (oecount_cmp);
1350
1351   if (dump_file && (dump_flags & TDF_DETAILS))
1352     {
1353       oecount *c;
1354       fprintf (dump_file, "Candidates:\n");
1355       FOR_EACH_VEC_ELT (cvec, j, c)
1356         {
1357           fprintf (dump_file, "  %u %s: ", c->cnt,
1358                    c->oecode == MULT_EXPR
1359                    ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1360           print_generic_expr (dump_file, c->op, 0);
1361           fprintf (dump_file, "\n");
1362         }
1363     }
1364
1365   /* Process the (operand, code) pairs in order of most occurence.  */
1366   candidates2 = sbitmap_alloc (length);
1367   while (!cvec.is_empty ())
1368     {
1369       oecount *c = &cvec.last ();
1370       if (c->cnt < 2)
1371         break;
1372
1373       /* Now collect the operands in the outer chain that contain
1374          the common operand in their inner chain.  */
1375       bitmap_clear (candidates2);
1376       nr_candidates2 = 0;
1377       EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1378         {
1379           gimple oedef;
1380           enum tree_code oecode;
1381           unsigned j;
1382           tree op = (*ops)[i]->op;
1383
1384           /* If we undistributed in this chain already this may be
1385              a constant.  */
1386           if (TREE_CODE (op) != SSA_NAME)
1387             continue;
1388
1389           oedef = SSA_NAME_DEF_STMT (op);
1390           oecode = gimple_assign_rhs_code (oedef);
1391           if (oecode != c->oecode)
1392             continue;
1393
1394           FOR_EACH_VEC_ELT (subops[i], j, oe1)
1395             {
1396               if (oe1->op == c->op)
1397                 {
1398                   bitmap_set_bit (candidates2, i);
1399                   ++nr_candidates2;
1400                   break;
1401                 }
1402             }
1403         }
1404
1405       if (nr_candidates2 >= 2)
1406         {
1407           operand_entry_t oe1, oe2;
1408           gimple prod;
1409           int first = bitmap_first_set_bit (candidates2);
1410
1411           /* Build the new addition chain.  */
1412           oe1 = (*ops)[first];
1413           if (dump_file && (dump_flags & TDF_DETAILS))
1414             {
1415               fprintf (dump_file, "Building (");
1416               print_generic_expr (dump_file, oe1->op, 0);
1417             }
1418           zero_one_operation (&oe1->op, c->oecode, c->op);
1419           EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1420             {
1421               gimple sum;
1422               oe2 = (*ops)[i];
1423               if (dump_file && (dump_flags & TDF_DETAILS))
1424                 {
1425                   fprintf (dump_file, " + ");
1426                   print_generic_expr (dump_file, oe2->op, 0);
1427                 }
1428               zero_one_operation (&oe2->op, c->oecode, c->op);
1429               sum = build_and_add_sum (TREE_TYPE (oe1->op),
1430                                        oe1->op, oe2->op, opcode);
1431               oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1432               oe2->rank = 0;
1433               oe1->op = gimple_get_lhs (sum);
1434             }
1435
1436           /* Apply the multiplication/division.  */
1437           prod = build_and_add_sum (TREE_TYPE (oe1->op),
1438                                     oe1->op, c->op, c->oecode);
1439           if (dump_file && (dump_flags & TDF_DETAILS))
1440             {
1441               fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1442               print_generic_expr (dump_file, c->op, 0);
1443               fprintf (dump_file, "\n");
1444             }
1445
1446           /* Record it in the addition chain and disable further
1447              undistribution with this op.  */
1448           oe1->op = gimple_assign_lhs (prod);
1449           oe1->rank = get_rank (oe1->op);
1450           subops[first].release ();
1451
1452           changed = true;
1453         }
1454
1455       cvec.pop ();
1456     }
1457
1458   for (i = 0; i < ops->length (); ++i)
1459     subops[i].release ();
1460   free (subops);
1461   cvec.release ();
1462   sbitmap_free (candidates);
1463   sbitmap_free (candidates2);
1464
1465   return changed;
1466 }
1467
1468 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1469    expression, examine the other OPS to see if any of them are comparisons
1470    of the same values, which we may be able to combine or eliminate.
1471    For example, we can rewrite (a < b) | (a == b) as (a <= b).  */
1472
1473 static bool
1474 eliminate_redundant_comparison (enum tree_code opcode,
1475                                 vec<operand_entry_t> *ops,
1476                                 unsigned int currindex,
1477                                 operand_entry_t curr)
1478 {
1479   tree op1, op2;
1480   enum tree_code lcode, rcode;
1481   gimple def1, def2;
1482   int i;
1483   operand_entry_t oe;
1484
1485   if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1486     return false;
1487
1488   /* Check that CURR is a comparison.  */
1489   if (TREE_CODE (curr->op) != SSA_NAME)
1490     return false;
1491   def1 = SSA_NAME_DEF_STMT (curr->op);
1492   if (!is_gimple_assign (def1))
1493     return false;
1494   lcode = gimple_assign_rhs_code (def1);
1495   if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1496     return false;
1497   op1 = gimple_assign_rhs1 (def1);
1498   op2 = gimple_assign_rhs2 (def1);
1499
1500   /* Now look for a similar comparison in the remaining OPS.  */
1501   for (i = currindex + 1; ops->iterate (i, &oe); i++)
1502     {
1503       tree t;
1504
1505       if (TREE_CODE (oe->op) != SSA_NAME)
1506         continue;
1507       def2 = SSA_NAME_DEF_STMT (oe->op);
1508       if (!is_gimple_assign (def2))
1509         continue;
1510       rcode = gimple_assign_rhs_code (def2);
1511       if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1512         continue;
1513
1514       /* If we got here, we have a match.  See if we can combine the
1515          two comparisons.  */
1516       if (opcode == BIT_IOR_EXPR)
1517         t = maybe_fold_or_comparisons (lcode, op1, op2,
1518                                        rcode, gimple_assign_rhs1 (def2),
1519                                        gimple_assign_rhs2 (def2));
1520       else
1521         t = maybe_fold_and_comparisons (lcode, op1, op2,
1522                                         rcode, gimple_assign_rhs1 (def2),
1523                                         gimple_assign_rhs2 (def2));
1524       if (!t)
1525         continue;
1526
1527       /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1528          always give us a boolean_type_node value back.  If the original
1529          BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1530          we need to convert.  */
1531       if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1532         t = fold_convert (TREE_TYPE (curr->op), t);
1533
1534       if (TREE_CODE (t) != INTEGER_CST
1535           && !operand_equal_p (t, curr->op, 0))
1536         {
1537           enum tree_code subcode;
1538           tree newop1, newop2;
1539           if (!COMPARISON_CLASS_P (t))
1540             continue;
1541           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1542           STRIP_USELESS_TYPE_CONVERSION (newop1);
1543           STRIP_USELESS_TYPE_CONVERSION (newop2);
1544           if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1545             continue;
1546         }
1547
1548       if (dump_file && (dump_flags & TDF_DETAILS))
1549         {
1550           fprintf (dump_file, "Equivalence: ");
1551           print_generic_expr (dump_file, curr->op, 0);
1552           fprintf (dump_file, " %s ", op_symbol_code (opcode));
1553           print_generic_expr (dump_file, oe->op, 0);
1554           fprintf (dump_file, " -> ");
1555           print_generic_expr (dump_file, t, 0);
1556           fprintf (dump_file, "\n");
1557         }
1558
1559       /* Now we can delete oe, as it has been subsumed by the new combined
1560          expression t.  */
1561       ops->ordered_remove (i);
1562       reassociate_stats.ops_eliminated ++;
1563
1564       /* If t is the same as curr->op, we're done.  Otherwise we must
1565          replace curr->op with t.  Special case is if we got a constant
1566          back, in which case we add it to the end instead of in place of
1567          the current entry.  */
1568       if (TREE_CODE (t) == INTEGER_CST)
1569         {
1570           ops->ordered_remove (currindex);
1571           add_to_ops_vec (ops, t);
1572         }
1573       else if (!operand_equal_p (t, curr->op, 0))
1574         {
1575           gimple sum;
1576           enum tree_code subcode;
1577           tree newop1;
1578           tree newop2;
1579           gcc_assert (COMPARISON_CLASS_P (t));
1580           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1581           STRIP_USELESS_TYPE_CONVERSION (newop1);
1582           STRIP_USELESS_TYPE_CONVERSION (newop2);
1583           gcc_checking_assert (is_gimple_val (newop1)
1584                                && is_gimple_val (newop2));
1585           sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1586           curr->op = gimple_get_lhs (sum);
1587         }
1588       return true;
1589     }
1590
1591   return false;
1592 }
1593
1594 /* Perform various identities and other optimizations on the list of
1595    operand entries, stored in OPS.  The tree code for the binary
1596    operation between all the operands is OPCODE.  */
1597
1598 static void
1599 optimize_ops_list (enum tree_code opcode,
1600                    vec<operand_entry_t> *ops)
1601 {
1602   unsigned int length = ops->length ();
1603   unsigned int i;
1604   operand_entry_t oe;
1605   operand_entry_t oelast = NULL;
1606   bool iterate = false;
1607
1608   if (length == 1)
1609     return;
1610
1611   oelast = ops->last ();
1612
1613   /* If the last two are constants, pop the constants off, merge them
1614      and try the next two.  */
1615   if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1616     {
1617       operand_entry_t oelm1 = (*ops)[length - 2];
1618
1619       if (oelm1->rank == 0
1620           && is_gimple_min_invariant (oelm1->op)
1621           && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1622                                        TREE_TYPE (oelast->op)))
1623         {
1624           tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1625                                      oelm1->op, oelast->op);
1626
1627           if (folded && is_gimple_min_invariant (folded))
1628             {
1629               if (dump_file && (dump_flags & TDF_DETAILS))
1630                 fprintf (dump_file, "Merging constants\n");
1631
1632               ops->pop ();
1633               ops->pop ();
1634
1635               add_to_ops_vec (ops, folded);
1636               reassociate_stats.constants_eliminated++;
1637
1638               optimize_ops_list (opcode, ops);
1639               return;
1640             }
1641         }
1642     }
1643
1644   eliminate_using_constants (opcode, ops);
1645   oelast = NULL;
1646
1647   for (i = 0; ops->iterate (i, &oe);)
1648     {
1649       bool done = false;
1650
1651       if (eliminate_not_pairs (opcode, ops, i, oe))
1652         return;
1653       if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1654           || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1655           || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1656         {
1657           if (done)
1658             return;
1659           iterate = true;
1660           oelast = NULL;
1661           continue;
1662         }
1663       oelast = oe;
1664       i++;
1665     }
1666
1667   length = ops->length ();
1668   oelast = ops->last ();
1669
1670   if (iterate)
1671     optimize_ops_list (opcode, ops);
1672 }
1673
1674 /* The following functions are subroutines to optimize_range_tests and allow
1675    it to try to change a logical combination of comparisons into a range
1676    test.
1677
1678    For example, both
1679         X == 2 || X == 5 || X == 3 || X == 4
1680    and
1681         X >= 2 && X <= 5
1682    are converted to
1683         (unsigned) (X - 2) <= 3
1684
1685    For more information see comments above fold_test_range in fold-const.c,
1686    this implementation is for GIMPLE.  */
1687
1688 struct range_entry
1689 {
1690   tree exp;
1691   tree low;
1692   tree high;
1693   bool in_p;
1694   bool strict_overflow_p;
1695   unsigned int idx, next;
1696 };
1697
1698 /* This is similar to make_range in fold-const.c, but on top of
1699    GIMPLE instead of trees.  If EXP is non-NULL, it should be
1700    an SSA_NAME and STMT argument is ignored, otherwise STMT
1701    argument should be a GIMPLE_COND.  */
1702
1703 static void
1704 init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1705 {
1706   int in_p;
1707   tree low, high;
1708   bool is_bool, strict_overflow_p;
1709
1710   r->exp = NULL_TREE;
1711   r->in_p = false;
1712   r->strict_overflow_p = false;
1713   r->low = NULL_TREE;
1714   r->high = NULL_TREE;
1715   if (exp != NULL_TREE
1716       && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1717     return;
1718
1719   /* Start with simply saying "EXP != 0" and then look at the code of EXP
1720      and see if we can refine the range.  Some of the cases below may not
1721      happen, but it doesn't seem worth worrying about this.  We "continue"
1722      the outer loop when we've changed something; otherwise we "break"
1723      the switch, which will "break" the while.  */
1724   low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1725   high = low;
1726   in_p = 0;
1727   strict_overflow_p = false;
1728   is_bool = false;
1729   if (exp == NULL_TREE)
1730     is_bool = true;
1731   else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1732     {
1733       if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1734         is_bool = true;
1735       else
1736         return;
1737     }
1738   else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1739     is_bool = true;
1740
1741   while (1)
1742     {
1743       enum tree_code code;
1744       tree arg0, arg1, exp_type;
1745       tree nexp;
1746       location_t loc;
1747
1748       if (exp != NULL_TREE)
1749         {
1750           if (TREE_CODE (exp) != SSA_NAME)
1751             break;
1752
1753           stmt = SSA_NAME_DEF_STMT (exp);
1754           if (!is_gimple_assign (stmt))
1755             break;
1756
1757           code = gimple_assign_rhs_code (stmt);
1758           arg0 = gimple_assign_rhs1 (stmt);
1759           arg1 = gimple_assign_rhs2 (stmt);
1760           exp_type = TREE_TYPE (exp);
1761         }
1762       else
1763         {
1764           code = gimple_cond_code (stmt);
1765           arg0 = gimple_cond_lhs (stmt);
1766           arg1 = gimple_cond_rhs (stmt);
1767           exp_type = boolean_type_node;
1768         }
1769
1770       if (TREE_CODE (arg0) != SSA_NAME)
1771         break;
1772       loc = gimple_location (stmt);
1773       switch (code)
1774         {
1775         case BIT_NOT_EXPR:
1776           if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
1777               /* Ensure the range is either +[-,0], +[0,0],
1778                  -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1779                  -[1,1].  If it is e.g. +[-,-] or -[-,-]
1780                  or similar expression of unconditional true or
1781                  false, it should not be negated.  */
1782               && ((high && integer_zerop (high))
1783                   || (low && integer_onep (low))))
1784             {
1785               in_p = !in_p;
1786               exp = arg0;
1787               continue;
1788             }
1789           break;
1790         case SSA_NAME:
1791           exp = arg0;
1792           continue;
1793         CASE_CONVERT:
1794           if (is_bool)
1795             goto do_default;
1796           if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1797             {
1798               if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1799                 is_bool = true;
1800               else
1801                 return;
1802             }
1803           else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1804             is_bool = true;
1805           goto do_default;
1806         case EQ_EXPR:
1807         case NE_EXPR:
1808         case LT_EXPR:
1809         case LE_EXPR:
1810         case GE_EXPR:
1811         case GT_EXPR:
1812           is_bool = true;
1813           /* FALLTHRU */
1814         default:
1815           if (!is_bool)
1816             return;
1817         do_default:
1818           nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1819                                   &low, &high, &in_p,
1820                                   &strict_overflow_p);
1821           if (nexp != NULL_TREE)
1822             {
1823               exp = nexp;
1824               gcc_assert (TREE_CODE (exp) == SSA_NAME);
1825               continue;
1826             }
1827           break;
1828         }
1829       break;
1830     }
1831   if (is_bool)
1832     {
1833       r->exp = exp;
1834       r->in_p = in_p;
1835       r->low = low;
1836       r->high = high;
1837       r->strict_overflow_p = strict_overflow_p;
1838     }
1839 }
1840
1841 /* Comparison function for qsort.  Sort entries
1842    without SSA_NAME exp first, then with SSA_NAMEs sorted
1843    by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1844    by increasing ->low and if ->low is the same, by increasing
1845    ->high.  ->low == NULL_TREE means minimum, ->high == NULL_TREE
1846    maximum.  */
1847
1848 static int
1849 range_entry_cmp (const void *a, const void *b)
1850 {
1851   const struct range_entry *p = (const struct range_entry *) a;
1852   const struct range_entry *q = (const struct range_entry *) b;
1853
1854   if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
1855     {
1856       if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1857         {
1858           /* Group range_entries for the same SSA_NAME together.  */
1859           if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
1860             return -1;
1861           else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
1862             return 1;
1863           /* If ->low is different, NULL low goes first, then by
1864              ascending low.  */
1865           if (p->low != NULL_TREE)
1866             {
1867               if (q->low != NULL_TREE)
1868                 {
1869                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
1870                                           p->low, q->low);
1871                   if (tem && integer_onep (tem))
1872                     return -1;
1873                   tem = fold_binary (GT_EXPR, boolean_type_node,
1874                                      p->low, q->low);
1875                   if (tem && integer_onep (tem))
1876                     return 1;
1877                 }
1878               else
1879                 return 1;
1880             }
1881           else if (q->low != NULL_TREE)
1882             return -1;
1883           /* If ->high is different, NULL high goes last, before that by
1884              ascending high.  */
1885           if (p->high != NULL_TREE)
1886             {
1887               if (q->high != NULL_TREE)
1888                 {
1889                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
1890                                           p->high, q->high);
1891                   if (tem && integer_onep (tem))
1892                     return -1;
1893                   tem = fold_binary (GT_EXPR, boolean_type_node,
1894                                      p->high, q->high);
1895                   if (tem && integer_onep (tem))
1896                     return 1;
1897                 }
1898               else
1899                 return -1;
1900             }
1901           else if (q->high != NULL_TREE)
1902             return 1;
1903           /* If both ranges are the same, sort below by ascending idx.  */
1904         }
1905       else
1906         return 1;
1907     }
1908   else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1909     return -1;
1910
1911   if (p->idx < q->idx)
1912     return -1;
1913   else
1914     {
1915       gcc_checking_assert (p->idx > q->idx);
1916       return 1;
1917     }
1918 }
1919
1920 /* Helper routine of optimize_range_test.
1921    [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
1922    RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
1923    OPCODE and OPS are arguments of optimize_range_tests.  Return
1924    true if the range merge has been successful.
1925    If OPCODE is ERROR_MARK, this is called from within
1926    maybe_optimize_range_tests and is performing inter-bb range optimization.
1927    Changes should be then performed right away, and whether an op is
1928    BIT_AND_EXPR or BIT_IOR_EXPR is found in oe->rank.  */
1929
1930 static bool
1931 update_range_test (struct range_entry *range, struct range_entry *otherrange,
1932                    unsigned int count, enum tree_code opcode,
1933                    vec<operand_entry_t> *ops, tree exp, bool in_p,
1934                    tree low, tree high, bool strict_overflow_p)
1935 {
1936   operand_entry_t oe = (*ops)[range->idx];
1937   tree op = oe->op;
1938   gimple stmt = op ? SSA_NAME_DEF_STMT (op) : last_stmt (BASIC_BLOCK (oe->id));
1939   location_t loc = gimple_location (stmt);
1940   tree optype = op ? TREE_TYPE (op) : boolean_type_node;
1941   tree tem = build_range_check (loc, optype, exp, in_p, low, high);
1942   enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
1943   gimple_stmt_iterator gsi;
1944
1945   if (tem == NULL_TREE)
1946     return false;
1947
1948   if (strict_overflow_p && issue_strict_overflow_warning (wc))
1949     warning_at (loc, OPT_Wstrict_overflow,
1950                 "assuming signed overflow does not occur "
1951                 "when simplifying range test");
1952
1953   if (dump_file && (dump_flags & TDF_DETAILS))
1954     {
1955       struct range_entry *r;
1956       fprintf (dump_file, "Optimizing range tests ");
1957       print_generic_expr (dump_file, range->exp, 0);
1958       fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
1959       print_generic_expr (dump_file, range->low, 0);
1960       fprintf (dump_file, ", ");
1961       print_generic_expr (dump_file, range->high, 0);
1962       fprintf (dump_file, "]");
1963       for (r = otherrange; r < otherrange + count; r++)
1964         {
1965           fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
1966           print_generic_expr (dump_file, r->low, 0);
1967           fprintf (dump_file, ", ");
1968           print_generic_expr (dump_file, r->high, 0);
1969           fprintf (dump_file, "]");
1970         }
1971       fprintf (dump_file, "\n into ");
1972       print_generic_expr (dump_file, tem, 0);
1973       fprintf (dump_file, "\n");
1974     }
1975
1976   if (opcode == BIT_IOR_EXPR
1977       || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
1978     tem = invert_truthvalue_loc (loc, tem);
1979
1980   tem = fold_convert_loc (loc, optype, tem);
1981   gsi = gsi_for_stmt (stmt);
1982   /* In rare cases range->exp can be equal to lhs of stmt.
1983      In that case we have to insert after the stmt rather then before
1984      it.  */
1985   if (op == range->exp)
1986     tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false,
1987                                     GSI_SAME_STMT);
1988   else
1989     tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
1990                                     GSI_SAME_STMT);
1991
1992   /* If doing inter-bb range test optimization, update the
1993      stmts immediately.  Start with changing the first range test
1994      immediate use to the new value (TEM), or, if the first range
1995      test is a GIMPLE_COND stmt, change that condition.  */
1996   if (opcode == ERROR_MARK)
1997     {
1998       if (op)
1999         {
2000           imm_use_iterator iter;
2001           use_operand_p use_p;
2002           gimple use_stmt;
2003
2004           FOR_EACH_IMM_USE_STMT (use_stmt, iter, op)
2005             {
2006               if (is_gimple_debug (use_stmt))
2007                 continue;
2008               FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2009                 SET_USE (use_p, tem);
2010               update_stmt (use_stmt);
2011             }
2012         }
2013       else
2014         {
2015           gimple_cond_set_code (stmt, NE_EXPR);
2016           gimple_cond_set_lhs (stmt, tem);
2017           gimple_cond_set_rhs (stmt, boolean_false_node);
2018           update_stmt (stmt);
2019         }
2020     }
2021   oe->op = tem;
2022   range->exp = exp;
2023   range->low = low;
2024   range->high = high;
2025   range->in_p = in_p;
2026   range->strict_overflow_p = false;
2027
2028   for (range = otherrange; range < otherrange + count; range++)
2029     {
2030       oe = (*ops)[range->idx];
2031       /* Now change all the other range test immediate uses, so that
2032          those tests will be optimized away.  */
2033       if (opcode == ERROR_MARK)
2034         {
2035           if (oe->op)
2036             {
2037               imm_use_iterator iter;
2038               use_operand_p use_p;
2039               gimple use_stmt;
2040
2041               FOR_EACH_IMM_USE_STMT (use_stmt, iter, oe->op)
2042                 {
2043                   if (is_gimple_debug (use_stmt))
2044                     continue;
2045                   /* If imm use of _8 is a statement like _7 = _8 | _9;,
2046                      adjust it into _7 = _9;.  */
2047                   if (is_gimple_assign (use_stmt)
2048                       && gimple_assign_rhs_code (use_stmt) == oe->rank)
2049                     {
2050                       tree expr = NULL_TREE;
2051                       if (oe->op == gimple_assign_rhs1 (use_stmt))
2052                         expr = gimple_assign_rhs2 (use_stmt);
2053                       else if (oe->op == gimple_assign_rhs2 (use_stmt))
2054                         expr = gimple_assign_rhs1 (use_stmt);
2055                       if (expr
2056                           && expr != oe->op
2057                           && TREE_CODE (expr) == SSA_NAME)
2058                         {
2059                           gimple_stmt_iterator gsi2 = gsi_for_stmt (use_stmt);
2060                           gimple_assign_set_rhs_with_ops (&gsi2, SSA_NAME,
2061                                                           expr, NULL_TREE);
2062                           update_stmt (use_stmt);
2063                           continue;
2064                         }
2065                     }
2066                   /* If imm use of _8 is a statement like _7 = (int) _8;,
2067                      adjust it into _7 = 0; or _7 = 1;.  */
2068                   if (gimple_assign_cast_p (use_stmt)
2069                       && oe->op == gimple_assign_rhs1 (use_stmt))
2070                     {
2071                       tree lhs = gimple_assign_lhs (use_stmt);
2072                       if (INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
2073                         {
2074                           gimple_stmt_iterator gsi2
2075                             = gsi_for_stmt (use_stmt);
2076                           tree expr = build_int_cst (TREE_TYPE (lhs),
2077                                                      oe->rank == BIT_IOR_EXPR
2078                                                      ? 0 : 1);
2079                           gimple_assign_set_rhs_with_ops (&gsi2,
2080                                                           INTEGER_CST,
2081                                                           expr, NULL_TREE);
2082                           update_stmt (use_stmt);
2083                           continue;
2084                         }
2085                     }
2086                   /* Otherwise replace the use with 0 or 1.  */
2087                   FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2088                     SET_USE (use_p,
2089                              build_int_cst (TREE_TYPE (oe->op),
2090                                             oe->rank == BIT_IOR_EXPR
2091                                             ? 0 : 1));
2092                   update_stmt (use_stmt);
2093                 }
2094             }
2095           else
2096             {
2097               /* If range test was a GIMPLE_COND, simply change it
2098                  into an always false or always true condition.  */
2099               stmt = last_stmt (BASIC_BLOCK (oe->id));
2100               if (oe->rank == BIT_IOR_EXPR)
2101                 gimple_cond_make_false (stmt);
2102               else
2103                 gimple_cond_make_true (stmt);
2104               update_stmt (stmt);
2105             }
2106         }
2107       oe->op = error_mark_node;
2108       range->exp = NULL_TREE;
2109     }
2110   return true;
2111 }
2112
2113 /* Optimize range tests, similarly how fold_range_test optimizes
2114    it on trees.  The tree code for the binary
2115    operation between all the operands is OPCODE.
2116    If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2117    maybe_optimize_range_tests for inter-bb range optimization.
2118    In that case if oe->op is NULL, oe->id is bb->index whose
2119    GIMPLE_COND is && or ||ed into the test, and oe->rank says
2120    the actual opcode.  */
2121
2122 static void
2123 optimize_range_tests (enum tree_code opcode,
2124                       vec<operand_entry_t> *ops)
2125 {
2126   unsigned int length = ops->length (), i, j, first;
2127   operand_entry_t oe;
2128   struct range_entry *ranges;
2129   bool any_changes = false;
2130
2131   if (length == 1)
2132     return;
2133
2134   ranges = XNEWVEC (struct range_entry, length);
2135   for (i = 0; i < length; i++)
2136     {
2137       oe = (*ops)[i];
2138       ranges[i].idx = i;
2139       init_range_entry (ranges + i, oe->op,
2140                         oe->op ? NULL : last_stmt (BASIC_BLOCK (oe->id)));
2141       /* For | invert it now, we will invert it again before emitting
2142          the optimized expression.  */
2143       if (opcode == BIT_IOR_EXPR
2144           || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2145         ranges[i].in_p = !ranges[i].in_p;
2146     }
2147
2148   qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2149   for (i = 0; i < length; i++)
2150     if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2151       break;
2152
2153   /* Try to merge ranges.  */
2154   for (first = i; i < length; i++)
2155     {
2156       tree low = ranges[i].low;
2157       tree high = ranges[i].high;
2158       int in_p = ranges[i].in_p;
2159       bool strict_overflow_p = ranges[i].strict_overflow_p;
2160       int update_fail_count = 0;
2161
2162       for (j = i + 1; j < length; j++)
2163         {
2164           if (ranges[i].exp != ranges[j].exp)
2165             break;
2166           if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2167                              ranges[j].in_p, ranges[j].low, ranges[j].high))
2168             break;
2169           strict_overflow_p |= ranges[j].strict_overflow_p;
2170         }
2171
2172       if (j == i + 1)
2173         continue;
2174
2175       if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
2176                              ops, ranges[i].exp, in_p, low, high,
2177                              strict_overflow_p))
2178         {
2179           i = j - 1;
2180           any_changes = true;
2181         }
2182       /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2183          while update_range_test would fail.  */
2184       else if (update_fail_count == 64)
2185         i = j - 1;
2186       else
2187         ++update_fail_count;
2188     }
2189
2190   /* Optimize X == CST1 || X == CST2
2191      if popcount (CST1 ^ CST2) == 1 into
2192      (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2193      Similarly for ranges.  E.g.
2194      X != 2 && X != 3 && X != 10 && X != 11
2195      will be transformed by the above loop into
2196      (X - 2U) <= 1U && (X - 10U) <= 1U
2197      and this loop can transform that into
2198      ((X & ~8) - 2U) <= 1U.  */
2199   for (i = first; i < length; i++)
2200     {
2201       tree lowi, highi, lowj, highj, type, lowxor, highxor, tem, exp;
2202
2203       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2204         continue;
2205       type = TREE_TYPE (ranges[i].exp);
2206       if (!INTEGRAL_TYPE_P (type))
2207         continue;
2208       lowi = ranges[i].low;
2209       if (lowi == NULL_TREE)
2210         lowi = TYPE_MIN_VALUE (type);
2211       highi = ranges[i].high;
2212       if (highi == NULL_TREE)
2213         continue;
2214       for (j = i + 1; j < length && j < i + 64; j++)
2215         {
2216           if (ranges[j].exp == NULL_TREE)
2217             continue;
2218           if (ranges[i].exp != ranges[j].exp)
2219             break;
2220           if (ranges[j].in_p)
2221             continue;
2222           lowj = ranges[j].low;
2223           if (lowj == NULL_TREE)
2224             continue;
2225           highj = ranges[j].high;
2226           if (highj == NULL_TREE)
2227             highj = TYPE_MAX_VALUE (type);
2228           tem = fold_binary (GT_EXPR, boolean_type_node,
2229                              lowj, highi);
2230           if (tem == NULL_TREE || !integer_onep (tem))
2231             continue;
2232           lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2233           if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2234             continue;
2235           gcc_checking_assert (!integer_zerop (lowxor));
2236           tem = fold_binary (MINUS_EXPR, type, lowxor,
2237                              build_int_cst (type, 1));
2238           if (tem == NULL_TREE)
2239             continue;
2240           tem = fold_binary (BIT_AND_EXPR, type, lowxor, tem);
2241           if (tem == NULL_TREE || !integer_zerop (tem))
2242             continue;
2243           highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2244           if (!tree_int_cst_equal (lowxor, highxor))
2245             continue;
2246           tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2247           exp = fold_build2 (BIT_AND_EXPR, type, ranges[i].exp, tem);
2248           lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2249           highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2250           if (update_range_test (ranges + i, ranges + j, 1, opcode, ops, exp,
2251                                  ranges[i].in_p, lowj, highj,
2252                                  ranges[i].strict_overflow_p
2253                                  || ranges[j].strict_overflow_p))
2254             {
2255               any_changes = true;
2256               break;
2257             }
2258         }
2259     }
2260
2261   if (any_changes && opcode != ERROR_MARK)
2262     {
2263       j = 0;
2264       FOR_EACH_VEC_ELT (*ops, i, oe)
2265         {
2266           if (oe->op == error_mark_node)
2267             continue;
2268           else if (i != j)
2269             (*ops)[j] = oe;
2270           j++;
2271         }
2272       ops->truncate (j);
2273     }
2274
2275   XDELETEVEC (ranges);
2276 }
2277
2278 /* Return true if STMT is a cast like:
2279    <bb N>:
2280    ...
2281    _123 = (int) _234;
2282
2283    <bb M>:
2284    # _345 = PHI <_123(N), 1(...), 1(...)>
2285    where _234 has bool type, _123 has single use and
2286    bb N has a single successor M.  This is commonly used in
2287    the last block of a range test.  */
2288
2289 static bool
2290 final_range_test_p (gimple stmt)
2291 {
2292   basic_block bb, rhs_bb;
2293   edge e;
2294   tree lhs, rhs;
2295   use_operand_p use_p;
2296   gimple use_stmt;
2297
2298   if (!gimple_assign_cast_p (stmt))
2299     return false;
2300   bb = gimple_bb (stmt);
2301   if (!single_succ_p (bb))
2302     return false;
2303   e = single_succ_edge (bb);
2304   if (e->flags & EDGE_COMPLEX)
2305     return false;
2306
2307   lhs = gimple_assign_lhs (stmt);
2308   rhs = gimple_assign_rhs1 (stmt);
2309   if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2310       || TREE_CODE (rhs) != SSA_NAME
2311       || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2312     return false;
2313
2314   /* Test whether lhs is consumed only by a PHI in the only successor bb.  */
2315   if (!single_imm_use (lhs, &use_p, &use_stmt))
2316     return false;
2317
2318   if (gimple_code (use_stmt) != GIMPLE_PHI
2319       || gimple_bb (use_stmt) != e->dest)
2320     return false;
2321
2322   /* And that the rhs is defined in the same loop.  */
2323   rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2324   if (rhs_bb == NULL
2325       || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2326     return false;
2327
2328   return true;
2329 }
2330
2331 /* Return true if BB is suitable basic block for inter-bb range test
2332    optimization.  If BACKWARD is true, BB should be the only predecessor
2333    of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2334    or compared with to find a common basic block to which all conditions
2335    branch to if true resp. false.  If BACKWARD is false, TEST_BB should
2336    be the only predecessor of BB.  */
2337
2338 static bool
2339 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2340                   bool backward)
2341 {
2342   edge_iterator ei, ei2;
2343   edge e, e2;
2344   gimple stmt;
2345   gimple_stmt_iterator gsi;
2346   bool other_edge_seen = false;
2347   bool is_cond;
2348
2349   if (test_bb == bb)
2350     return false;
2351   /* Check last stmt first.  */
2352   stmt = last_stmt (bb);
2353   if (stmt == NULL
2354       || (gimple_code (stmt) != GIMPLE_COND
2355           && (backward || !final_range_test_p (stmt)))
2356       || gimple_visited_p (stmt)
2357       || stmt_could_throw_p (stmt)
2358       || *other_bb == bb)
2359     return false;
2360   is_cond = gimple_code (stmt) == GIMPLE_COND;
2361   if (is_cond)
2362     {
2363       /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2364          goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2365          to *OTHER_BB (if not set yet, try to find it out).  */
2366       if (EDGE_COUNT (bb->succs) != 2)
2367         return false;
2368       FOR_EACH_EDGE (e, ei, bb->succs)
2369         {
2370           if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2371             return false;
2372           if (e->dest == test_bb)
2373             {
2374               if (backward)
2375                 continue;
2376               else
2377                 return false;
2378             }
2379           if (e->dest == bb)
2380             return false;
2381           if (*other_bb == NULL)
2382             {
2383               FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2384                 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2385                   return false;
2386                 else if (e->dest == e2->dest)
2387                   *other_bb = e->dest;
2388               if (*other_bb == NULL)
2389                 return false;
2390             }
2391           if (e->dest == *other_bb)
2392             other_edge_seen = true;
2393           else if (backward)
2394             return false;
2395         }
2396       if (*other_bb == NULL || !other_edge_seen)
2397         return false;
2398     }
2399   else if (single_succ (bb) != *other_bb)
2400     return false;
2401
2402   /* Now check all PHIs of *OTHER_BB.  */
2403   e = find_edge (bb, *other_bb);
2404   e2 = find_edge (test_bb, *other_bb);
2405   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2406     {
2407       gimple phi = gsi_stmt (gsi);
2408       /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2409          corresponding to BB and TEST_BB predecessor must be the same.  */
2410       if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2411                             gimple_phi_arg_def (phi, e2->dest_idx), 0))
2412         {
2413           /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2414              one of the PHIs should have the lhs of the last stmt in
2415              that block as PHI arg and that PHI should have 0 or 1
2416              corresponding to it in all other range test basic blocks
2417              considered.  */
2418           if (!is_cond)
2419             {
2420               if (gimple_phi_arg_def (phi, e->dest_idx)
2421                   == gimple_assign_lhs (stmt)
2422                   && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2423                       || integer_onep (gimple_phi_arg_def (phi,
2424                                                            e2->dest_idx))))
2425                 continue;
2426             }
2427           else
2428             {
2429               gimple test_last = last_stmt (test_bb);
2430               if (gimple_code (test_last) != GIMPLE_COND
2431                   && gimple_phi_arg_def (phi, e2->dest_idx)
2432                      == gimple_assign_lhs (test_last)
2433                   && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2434                       || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2435                 continue;
2436             }
2437
2438           return false;
2439         }
2440     }
2441   return true;
2442 }
2443
2444 /* Return true if BB doesn't have side-effects that would disallow
2445    range test optimization, all SSA_NAMEs set in the bb are consumed
2446    in the bb and there are no PHIs.  */
2447
2448 static bool
2449 no_side_effect_bb (basic_block bb)
2450 {
2451   gimple_stmt_iterator gsi;
2452   gimple last;
2453
2454   if (!gimple_seq_empty_p (phi_nodes (bb)))
2455     return false;
2456   last = last_stmt (bb);
2457   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2458     {
2459       gimple stmt = gsi_stmt (gsi);
2460       tree lhs;
2461       imm_use_iterator imm_iter;
2462       use_operand_p use_p;
2463
2464       if (is_gimple_debug (stmt))
2465         continue;
2466       if (gimple_has_side_effects (stmt))
2467         return false;
2468       if (stmt == last)
2469         return true;
2470       if (!is_gimple_assign (stmt))
2471         return false;
2472       lhs = gimple_assign_lhs (stmt);
2473       if (TREE_CODE (lhs) != SSA_NAME)
2474         return false;
2475       if (gimple_assign_rhs_could_trap_p (stmt))
2476         return false;
2477       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2478         {
2479           gimple use_stmt = USE_STMT (use_p);
2480           if (is_gimple_debug (use_stmt))
2481             continue;
2482           if (gimple_bb (use_stmt) != bb)
2483             return false;
2484         }
2485     }
2486   return false;
2487 }
2488
2489 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2490    return true and fill in *OPS recursively.  */
2491
2492 static bool
2493 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2494          struct loop *loop)
2495 {
2496   gimple stmt = SSA_NAME_DEF_STMT (var);
2497   tree rhs[2];
2498   int i;
2499
2500   if (!is_reassociable_op (stmt, code, loop))
2501     return false;
2502
2503   rhs[0] = gimple_assign_rhs1 (stmt);
2504   rhs[1] = gimple_assign_rhs2 (stmt);
2505   gimple_set_visited (stmt, true);
2506   for (i = 0; i < 2; i++)
2507     if (TREE_CODE (rhs[i]) == SSA_NAME
2508         && !get_ops (rhs[i], code, ops, loop)
2509         && has_single_use (rhs[i]))
2510       {
2511         operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
2512
2513         oe->op = rhs[i];
2514         oe->rank = code;
2515         oe->id = 0;
2516         oe->count = 1;
2517         ops->safe_push (oe);
2518       }
2519   return true;
2520 }
2521
2522 /* Inter-bb range test optimization.  */
2523
2524 static void
2525 maybe_optimize_range_tests (gimple stmt)
2526 {
2527   basic_block first_bb = gimple_bb (stmt);
2528   basic_block last_bb = first_bb;
2529   basic_block other_bb = NULL;
2530   basic_block bb;
2531   edge_iterator ei;
2532   edge e;
2533   vec<operand_entry_t> ops = vNULL;
2534
2535   /* Consider only basic blocks that end with GIMPLE_COND or
2536      a cast statement satisfying final_range_test_p.  All
2537      but the last bb in the first_bb .. last_bb range
2538      should end with GIMPLE_COND.  */
2539   if (gimple_code (stmt) == GIMPLE_COND)
2540     {
2541       if (EDGE_COUNT (first_bb->succs) != 2)
2542         return;
2543     }
2544   else if (final_range_test_p (stmt))
2545     other_bb = single_succ (first_bb);
2546   else
2547     return;
2548
2549   if (stmt_could_throw_p (stmt))
2550     return;
2551
2552   /* As relative ordering of post-dominator sons isn't fixed,
2553      maybe_optimize_range_tests can be called first on any
2554      bb in the range we want to optimize.  So, start searching
2555      backwards, if first_bb can be set to a predecessor.  */
2556   while (single_pred_p (first_bb))
2557     {
2558       basic_block pred_bb = single_pred (first_bb);
2559       if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
2560         break;
2561       if (!no_side_effect_bb (first_bb))
2562         break;
2563       first_bb = pred_bb;
2564     }
2565   /* If first_bb is last_bb, other_bb hasn't been computed yet.
2566      Before starting forward search in last_bb successors, find
2567      out the other_bb.  */
2568   if (first_bb == last_bb)
2569     {
2570       other_bb = NULL;
2571       /* As non-GIMPLE_COND last stmt always terminates the range,
2572          if forward search didn't discover anything, just give up.  */
2573       if (gimple_code (stmt) != GIMPLE_COND)
2574         return;
2575       /* Look at both successors.  Either it ends with a GIMPLE_COND
2576          and satisfies suitable_cond_bb, or ends with a cast and
2577          other_bb is that cast's successor.  */
2578       FOR_EACH_EDGE (e, ei, first_bb->succs)
2579         if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
2580             || e->dest == first_bb)
2581           return;
2582         else if (single_pred_p (e->dest))
2583           {
2584             stmt = last_stmt (e->dest);
2585             if (stmt
2586                 && gimple_code (stmt) == GIMPLE_COND
2587                 && EDGE_COUNT (e->dest->succs) == 2)
2588               {
2589                 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
2590                   break;
2591                 else
2592                   other_bb = NULL;
2593               }
2594             else if (stmt
2595                      && final_range_test_p (stmt)
2596                      && find_edge (first_bb, single_succ (e->dest)))
2597               {
2598                 other_bb = single_succ (e->dest);
2599                 if (other_bb == first_bb)
2600                   other_bb = NULL;
2601               }
2602           }
2603       if (other_bb == NULL)
2604         return;
2605     }
2606   /* Now do the forward search, moving last_bb to successor bbs
2607      that aren't other_bb.  */
2608   while (EDGE_COUNT (last_bb->succs) == 2)
2609     {
2610       FOR_EACH_EDGE (e, ei, last_bb->succs)
2611         if (e->dest != other_bb)
2612           break;
2613       if (e == NULL)
2614         break;
2615       if (!single_pred_p (e->dest))
2616         break;
2617       if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
2618         break;
2619       if (!no_side_effect_bb (e->dest))
2620         break;
2621       last_bb = e->dest;
2622     }
2623   if (first_bb == last_bb)
2624     return;
2625   /* Here basic blocks first_bb through last_bb's predecessor
2626      end with GIMPLE_COND, all of them have one of the edges to
2627      other_bb and another to another block in the range,
2628      all blocks except first_bb don't have side-effects and
2629      last_bb ends with either GIMPLE_COND, or cast satisfying
2630      final_range_test_p.  */
2631   for (bb = last_bb; ; bb = single_pred (bb))
2632     {
2633       enum tree_code code;
2634       tree lhs, rhs;
2635
2636       e = find_edge (bb, other_bb);
2637       stmt = last_stmt (bb);
2638       gimple_set_visited (stmt, true);
2639       if (gimple_code (stmt) != GIMPLE_COND)
2640         {
2641           use_operand_p use_p;
2642           gimple phi;
2643           edge e2;
2644           unsigned int d;
2645
2646           lhs = gimple_assign_lhs (stmt);
2647           rhs = gimple_assign_rhs1 (stmt);
2648           gcc_assert (bb == last_bb);
2649
2650           /* stmt is
2651              _123 = (int) _234;
2652
2653              followed by:
2654              <bb M>:
2655              # _345 = PHI <_123(N), 1(...), 1(...)>
2656
2657              or 0 instead of 1.  If it is 0, the _234
2658              range test is anded together with all the
2659              other range tests, if it is 1, it is ored with
2660              them.  */
2661           single_imm_use (lhs, &use_p, &phi);
2662           gcc_assert (gimple_code (phi) == GIMPLE_PHI);
2663           e2 = find_edge (first_bb, other_bb);
2664           d = e2->dest_idx;
2665           gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
2666           if (integer_zerop (gimple_phi_arg_def (phi, d)))
2667             code = BIT_AND_EXPR;
2668           else
2669             {
2670               gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
2671               code = BIT_IOR_EXPR;
2672             }
2673
2674           /* If _234 SSA_NAME_DEF_STMT is
2675              _234 = _567 | _789;
2676              (or &, corresponding to 1/0 in the phi arguments,
2677              push into ops the individual range test arguments
2678              of the bitwise or resp. and, recursively.  */
2679           if (!get_ops (rhs, code, &ops,
2680                         loop_containing_stmt (stmt))
2681               && has_single_use (rhs))
2682             {
2683               /* Otherwise, push the _234 range test itself.  */
2684               operand_entry_t oe
2685                 = (operand_entry_t) pool_alloc (operand_entry_pool);
2686
2687               oe->op = rhs;
2688               oe->rank = code;
2689               oe->id = 0;
2690               oe->count = 1;
2691               ops.safe_push (oe);
2692             }
2693           continue;
2694         }
2695       /* Otherwise stmt is GIMPLE_COND.  */
2696       code = gimple_cond_code (stmt);
2697       lhs = gimple_cond_lhs (stmt);
2698       rhs = gimple_cond_rhs (stmt);
2699       if (TREE_CODE (lhs) == SSA_NAME
2700           && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2701           && ((code != EQ_EXPR && code != NE_EXPR)
2702               || rhs != boolean_false_node
2703                  /* Either push into ops the individual bitwise
2704                     or resp. and operands, depending on which
2705                     edge is other_bb.  */
2706               || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
2707                                  ^ (code == EQ_EXPR))
2708                                 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
2709                            loop_containing_stmt (stmt))))
2710         {
2711           /* Or push the GIMPLE_COND stmt itself.  */
2712           operand_entry_t oe
2713             = (operand_entry_t) pool_alloc (operand_entry_pool);
2714
2715           oe->op = NULL;
2716           oe->rank = (e->flags & EDGE_TRUE_VALUE)
2717                      ? BIT_IOR_EXPR : BIT_AND_EXPR;
2718           /* oe->op = NULL signs that there is no SSA_NAME
2719              for the range test, and oe->id instead is the
2720              basic block number, at which's end the GIMPLE_COND
2721              is.  */
2722           oe->id = bb->index;
2723           oe->count = 1;
2724           ops.safe_push (oe);
2725         }
2726       if (bb == first_bb)
2727         break;
2728     }
2729   if (ops.length () > 1)
2730     optimize_range_tests (ERROR_MARK, &ops);
2731   ops.release ();
2732 }
2733
2734 /* Return true if OPERAND is defined by a PHI node which uses the LHS
2735    of STMT in it's operands.  This is also known as a "destructive
2736    update" operation.  */
2737
2738 static bool
2739 is_phi_for_stmt (gimple stmt, tree operand)
2740 {
2741   gimple def_stmt;
2742   tree lhs;
2743   use_operand_p arg_p;
2744   ssa_op_iter i;
2745
2746   if (TREE_CODE (operand) != SSA_NAME)
2747     return false;
2748
2749   lhs = gimple_assign_lhs (stmt);
2750
2751   def_stmt = SSA_NAME_DEF_STMT (operand);
2752   if (gimple_code (def_stmt) != GIMPLE_PHI)
2753     return false;
2754
2755   FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
2756     if (lhs == USE_FROM_PTR (arg_p))
2757       return true;
2758   return false;
2759 }
2760
2761 /* Remove def stmt of VAR if VAR has zero uses and recurse
2762    on rhs1 operand if so.  */
2763
2764 static void
2765 remove_visited_stmt_chain (tree var)
2766 {
2767   gimple stmt;
2768   gimple_stmt_iterator gsi;
2769
2770   while (1)
2771     {
2772       if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
2773         return;
2774       stmt = SSA_NAME_DEF_STMT (var);
2775       if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
2776         {
2777           var = gimple_assign_rhs1 (stmt);
2778           gsi = gsi_for_stmt (stmt);
2779           gsi_remove (&gsi, true);
2780           release_defs (stmt);
2781         }
2782       else
2783         return;
2784     }
2785 }
2786
2787 /* This function checks three consequtive operands in
2788    passed operands vector OPS starting from OPINDEX and
2789    swaps two operands if it is profitable for binary operation
2790    consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
2791
2792    We pair ops with the same rank if possible.
2793
2794    The alternative we try is to see if STMT is a destructive
2795    update style statement, which is like:
2796    b = phi (a, ...)
2797    a = c + b;
2798    In that case, we want to use the destructive update form to
2799    expose the possible vectorizer sum reduction opportunity.
2800    In that case, the third operand will be the phi node. This
2801    check is not performed if STMT is null.
2802
2803    We could, of course, try to be better as noted above, and do a
2804    lot of work to try to find these opportunities in >3 operand
2805    cases, but it is unlikely to be worth it.  */
2806
2807 static void
2808 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
2809                           unsigned int opindex, gimple stmt)
2810 {
2811   operand_entry_t oe1, oe2, oe3;
2812
2813   oe1 = ops[opindex];
2814   oe2 = ops[opindex + 1];
2815   oe3 = ops[opindex + 2];
2816
2817   if ((oe1->rank == oe2->rank
2818        && oe2->rank != oe3->rank)
2819       || (stmt && is_phi_for_stmt (stmt, oe3->op)
2820           && !is_phi_for_stmt (stmt, oe1->op)
2821           && !is_phi_for_stmt (stmt, oe2->op)))
2822     {
2823       struct operand_entry temp = *oe3;
2824       oe3->op = oe1->op;
2825       oe3->rank = oe1->rank;
2826       oe1->op = temp.op;
2827       oe1->rank= temp.rank;
2828     }
2829   else if ((oe1->rank == oe3->rank
2830             && oe2->rank != oe3->rank)
2831            || (stmt && is_phi_for_stmt (stmt, oe2->op)
2832                && !is_phi_for_stmt (stmt, oe1->op)
2833                && !is_phi_for_stmt (stmt, oe3->op)))
2834     {
2835       struct operand_entry temp = *oe2;
2836       oe2->op = oe1->op;
2837       oe2->rank = oe1->rank;
2838       oe1->op = temp.op;
2839       oe1->rank= temp.rank;
2840     }
2841 }
2842
2843 /* Recursively rewrite our linearized statements so that the operators
2844    match those in OPS[OPINDEX], putting the computation in rank
2845    order.  */
2846
2847 static void
2848 rewrite_expr_tree (gimple stmt, unsigned int opindex,
2849                    vec<operand_entry_t> ops, bool moved)
2850 {
2851   tree rhs1 = gimple_assign_rhs1 (stmt);
2852   tree rhs2 = gimple_assign_rhs2 (stmt);
2853   operand_entry_t oe;
2854
2855   /* If we have three operands left, then we want to make sure the ones
2856      that get the double binary op are chosen wisely.  */
2857   if (opindex + 3 == ops.length ())
2858     swap_ops_for_binary_stmt (ops, opindex, stmt);
2859
2860   /* The final recursion case for this function is that you have
2861      exactly two operations left.
2862      If we had one exactly one op in the entire list to start with, we
2863      would have never called this function, and the tail recursion
2864      rewrites them one at a time.  */
2865   if (opindex + 2 == ops.length ())
2866     {
2867       operand_entry_t oe1, oe2;
2868
2869       oe1 = ops[opindex];
2870       oe2 = ops[opindex + 1];
2871
2872       if (rhs1 != oe1->op || rhs2 != oe2->op)
2873         {
2874           if (dump_file && (dump_flags & TDF_DETAILS))
2875             {
2876               fprintf (dump_file, "Transforming ");
2877               print_gimple_stmt (dump_file, stmt, 0, 0);
2878             }
2879
2880           gimple_assign_set_rhs1 (stmt, oe1->op);
2881           gimple_assign_set_rhs2 (stmt, oe2->op);
2882           update_stmt (stmt);
2883           if (rhs1 != oe1->op && rhs1 != oe2->op)
2884             remove_visited_stmt_chain (rhs1);
2885
2886           if (dump_file && (dump_flags & TDF_DETAILS))
2887             {
2888               fprintf (dump_file, " into ");
2889               print_gimple_stmt (dump_file, stmt, 0, 0);
2890             }
2891         }
2892       return;
2893     }
2894
2895   /* If we hit here, we should have 3 or more ops left.  */
2896   gcc_assert (opindex + 2 < ops.length ());
2897
2898   /* Rewrite the next operator.  */
2899   oe = ops[opindex];
2900
2901   if (oe->op != rhs2)
2902     {
2903       if (!moved)
2904         {
2905           gimple_stmt_iterator gsinow, gsirhs1;
2906           gimple stmt1 = stmt, stmt2;
2907           unsigned int count;
2908
2909           gsinow = gsi_for_stmt (stmt);
2910           count = ops.length () - opindex - 2;
2911           while (count-- != 0)
2912             {
2913               stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
2914               gsirhs1 = gsi_for_stmt (stmt2);
2915               gsi_move_before (&gsirhs1, &gsinow);
2916               gsi_prev (&gsinow);
2917               stmt1 = stmt2;
2918             }
2919           moved = true;
2920         }
2921
2922       if (dump_file && (dump_flags & TDF_DETAILS))
2923         {
2924           fprintf (dump_file, "Transforming ");
2925           print_gimple_stmt (dump_file, stmt, 0, 0);
2926         }
2927
2928       gimple_assign_set_rhs2 (stmt, oe->op);
2929       update_stmt (stmt);
2930
2931       if (dump_file && (dump_flags & TDF_DETAILS))
2932         {
2933           fprintf (dump_file, " into ");
2934           print_gimple_stmt (dump_file, stmt, 0, 0);
2935         }
2936     }
2937   /* Recurse on the LHS of the binary operator, which is guaranteed to
2938      be the non-leaf side.  */
2939   rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, moved);
2940 }
2941
2942 /* Find out how many cycles we need to compute statements chain.
2943    OPS_NUM holds number os statements in a chain.  CPU_WIDTH is a
2944    maximum number of independent statements we may execute per cycle.  */
2945
2946 static int
2947 get_required_cycles (int ops_num, int cpu_width)
2948 {
2949   int res;
2950   int elog;
2951   unsigned int rest;
2952
2953   /* While we have more than 2 * cpu_width operands
2954      we may reduce number of operands by cpu_width
2955      per cycle.  */
2956   res = ops_num / (2 * cpu_width);
2957
2958   /* Remained operands count may be reduced twice per cycle
2959      until we have only one operand.  */
2960   rest = (unsigned)(ops_num - res * cpu_width);
2961   elog = exact_log2 (rest);
2962   if (elog >= 0)
2963     res += elog;
2964   else
2965     res += floor_log2 (rest) + 1;
2966
2967   return res;
2968 }
2969
2970 /* Returns an optimal number of registers to use for computation of
2971    given statements.  */
2972
2973 static int
2974 get_reassociation_width (int ops_num, enum tree_code opc,
2975                          enum machine_mode mode)
2976 {
2977   int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
2978   int width;
2979   int width_min;
2980   int cycles_best;
2981
2982   if (param_width > 0)
2983     width = param_width;
2984   else
2985     width = targetm.sched.reassociation_width (opc, mode);
2986
2987   if (width == 1)
2988     return width;
2989
2990   /* Get the minimal time required for sequence computation.  */
2991   cycles_best = get_required_cycles (ops_num, width);
2992
2993   /* Check if we may use less width and still compute sequence for
2994      the same time.  It will allow us to reduce registers usage.
2995      get_required_cycles is monotonically increasing with lower width
2996      so we can perform a binary search for the minimal width that still
2997      results in the optimal cycle count.  */
2998   width_min = 1;
2999   while (width > width_min)
3000     {
3001       int width_mid = (width + width_min) / 2;
3002
3003       if (get_required_cycles (ops_num, width_mid) == cycles_best)
3004         width = width_mid;
3005       else if (width_min < width_mid)
3006         width_min = width_mid;
3007       else
3008         break;
3009     }
3010
3011   return width;
3012 }
3013
3014 /* Recursively rewrite our linearized statements so that the operators
3015    match those in OPS[OPINDEX], putting the computation in rank
3016    order and trying to allow operations to be executed in
3017    parallel.  */
3018
3019 static void
3020 rewrite_expr_tree_parallel (gimple stmt, int width,
3021                             vec<operand_entry_t>  ops)
3022 {
3023   enum tree_code opcode = gimple_assign_rhs_code (stmt);
3024   int op_num = ops.length ();
3025   int stmt_num = op_num - 1;
3026   gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3027   int op_index = op_num - 1;
3028   int stmt_index = 0;
3029   int ready_stmts_end = 0;
3030   int i = 0;
3031   tree last_rhs1 = gimple_assign_rhs1 (stmt);
3032
3033   /* We start expression rewriting from the top statements.
3034      So, in this loop we create a full list of statements
3035      we will work with.  */
3036   stmts[stmt_num - 1] = stmt;
3037   for (i = stmt_num - 2; i >= 0; i--)
3038     stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3039
3040   for (i = 0; i < stmt_num; i++)
3041     {
3042       tree op1, op2;
3043
3044       /* Determine whether we should use results of
3045          already handled statements or not.  */
3046       if (ready_stmts_end == 0
3047           && (i - stmt_index >= width || op_index < 1))
3048         ready_stmts_end = i;
3049
3050       /* Now we choose operands for the next statement.  Non zero
3051          value in ready_stmts_end means here that we should use
3052          the result of already generated statements as new operand.  */
3053       if (ready_stmts_end > 0)
3054         {
3055           op1 = gimple_assign_lhs (stmts[stmt_index++]);
3056           if (ready_stmts_end > stmt_index)
3057             op2 = gimple_assign_lhs (stmts[stmt_index++]);
3058           else if (op_index >= 0)
3059             op2 = ops[op_index--]->op;
3060           else
3061             {
3062               gcc_assert (stmt_index < i);
3063               op2 = gimple_assign_lhs (stmts[stmt_index++]);
3064             }
3065
3066           if (stmt_index >= ready_stmts_end)
3067             ready_stmts_end = 0;
3068         }
3069       else
3070         {
3071           if (op_index > 1)
3072             swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3073           op2 = ops[op_index--]->op;
3074           op1 = ops[op_index--]->op;
3075         }
3076
3077       /* If we emit the last statement then we should put
3078          operands into the last statement.  It will also
3079          break the loop.  */
3080       if (op_index < 0 && stmt_index == i)
3081         i = stmt_num - 1;
3082
3083       if (dump_file && (dump_flags & TDF_DETAILS))
3084         {
3085           fprintf (dump_file, "Transforming ");
3086           print_gimple_stmt (dump_file, stmts[i], 0, 0);
3087         }
3088
3089       /* We keep original statement only for the last one.  All
3090          others are recreated.  */
3091       if (i == stmt_num - 1)
3092         {
3093           gimple_assign_set_rhs1 (stmts[i], op1);
3094           gimple_assign_set_rhs2 (stmts[i], op2);
3095           update_stmt (stmts[i]);
3096         }
3097       else
3098         stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3099
3100       if (dump_file && (dump_flags & TDF_DETAILS))
3101         {
3102           fprintf (dump_file, " into ");
3103           print_gimple_stmt (dump_file, stmts[i], 0, 0);
3104         }
3105     }
3106
3107   remove_visited_stmt_chain (last_rhs1);
3108 }
3109
3110 /* Transform STMT, which is really (A +B) + (C + D) into the left
3111    linear form, ((A+B)+C)+D.
3112    Recurse on D if necessary.  */
3113
3114 static void
3115 linearize_expr (gimple stmt)
3116 {
3117   gimple_stmt_iterator gsinow, gsirhs;
3118   gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3119   gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3120   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3121   gimple newbinrhs = NULL;
3122   struct loop *loop = loop_containing_stmt (stmt);
3123
3124   gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3125               && is_reassociable_op (binrhs, rhscode, loop));
3126
3127   gsinow = gsi_for_stmt (stmt);
3128   gsirhs = gsi_for_stmt (binrhs);
3129   gsi_move_before (&gsirhs, &gsinow);
3130
3131   gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3132   gimple_assign_set_rhs1 (binrhs, gimple_assign_lhs (binlhs));
3133   gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3134
3135   if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3136     newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3137
3138   if (dump_file && (dump_flags & TDF_DETAILS))
3139     {
3140       fprintf (dump_file, "Linearized: ");
3141       print_gimple_stmt (dump_file, stmt, 0, 0);
3142     }
3143
3144   reassociate_stats.linearized++;
3145   update_stmt (binrhs);
3146   update_stmt (binlhs);
3147   update_stmt (stmt);
3148
3149   gimple_set_visited (stmt, true);
3150   gimple_set_visited (binlhs, true);
3151   gimple_set_visited (binrhs, true);
3152
3153   /* Tail recurse on the new rhs if it still needs reassociation.  */
3154   if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3155     /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3156            want to change the algorithm while converting to tuples.  */
3157     linearize_expr (stmt);
3158 }
3159
3160 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3161    it.  Otherwise, return NULL.  */
3162
3163 static gimple
3164 get_single_immediate_use (tree lhs)
3165 {
3166   use_operand_p immuse;
3167   gimple immusestmt;
3168
3169   if (TREE_CODE (lhs) == SSA_NAME
3170       && single_imm_use (lhs, &immuse, &immusestmt)
3171       && is_gimple_assign (immusestmt))
3172     return immusestmt;
3173
3174   return NULL;
3175 }
3176
3177 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3178    representing the negated value.  Insertions of any necessary
3179    instructions go before GSI.
3180    This function is recursive in that, if you hand it "a_5" as the
3181    value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3182    transform b_3 + b_4 into a_5 = -b_3 + -b_4.  */
3183
3184 static tree
3185 negate_value (tree tonegate, gimple_stmt_iterator *gsi)
3186 {
3187   gimple negatedefstmt= NULL;
3188   tree resultofnegate;
3189
3190   /* If we are trying to negate a name, defined by an add, negate the
3191      add operands instead.  */
3192   if (TREE_CODE (tonegate) == SSA_NAME)
3193     negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3194   if (TREE_CODE (tonegate) == SSA_NAME
3195       && is_gimple_assign (negatedefstmt)
3196       && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3197       && has_single_use (gimple_assign_lhs (negatedefstmt))
3198       && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3199     {
3200       gimple_stmt_iterator gsi;
3201       tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3202       tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3203
3204       gsi = gsi_for_stmt (negatedefstmt);
3205       rhs1 = negate_value (rhs1, &gsi);
3206       gimple_assign_set_rhs1 (negatedefstmt, rhs1);
3207
3208       gsi = gsi_for_stmt (negatedefstmt);
3209       rhs2 = negate_value (rhs2, &gsi);
3210       gimple_assign_set_rhs2 (negatedefstmt, rhs2);
3211
3212       update_stmt (negatedefstmt);
3213       return gimple_assign_lhs (negatedefstmt);
3214     }
3215
3216   tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3217   resultofnegate = force_gimple_operand_gsi (gsi, tonegate, true,
3218                                              NULL_TREE, true, GSI_SAME_STMT);
3219   return resultofnegate;
3220 }
3221
3222 /* Return true if we should break up the subtract in STMT into an add
3223    with negate.  This is true when we the subtract operands are really
3224    adds, or the subtract itself is used in an add expression.  In
3225    either case, breaking up the subtract into an add with negate
3226    exposes the adds to reassociation.  */
3227
3228 static bool
3229 should_break_up_subtract (gimple stmt)
3230 {
3231   tree lhs = gimple_assign_lhs (stmt);
3232   tree binlhs = gimple_assign_rhs1 (stmt);
3233   tree binrhs = gimple_assign_rhs2 (stmt);
3234   gimple immusestmt;
3235   struct loop *loop = loop_containing_stmt (stmt);
3236
3237   if (TREE_CODE (binlhs) == SSA_NAME
3238       && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3239     return true;
3240
3241   if (TREE_CODE (binrhs) == SSA_NAME
3242       && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3243     return true;
3244
3245   if (TREE_CODE (lhs) == SSA_NAME
3246       && (immusestmt = get_single_immediate_use (lhs))
3247       && is_gimple_assign (immusestmt)
3248       && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3249           ||  gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3250     return true;
3251   return false;
3252 }
3253
3254 /* Transform STMT from A - B into A + -B.  */
3255
3256 static void
3257 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3258 {
3259   tree rhs1 = gimple_assign_rhs1 (stmt);
3260   tree rhs2 = gimple_assign_rhs2 (stmt);
3261
3262   if (dump_file && (dump_flags & TDF_DETAILS))
3263     {
3264       fprintf (dump_file, "Breaking up subtract ");
3265       print_gimple_stmt (dump_file, stmt, 0, 0);
3266     }
3267
3268   rhs2 = negate_value (rhs2, gsip);
3269   gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3270   update_stmt (stmt);
3271 }
3272
3273 /* Determine whether STMT is a builtin call that raises an SSA name
3274    to an integer power and has only one use.  If so, and this is early
3275    reassociation and unsafe math optimizations are permitted, place
3276    the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3277    If any of these conditions does not hold, return FALSE.  */
3278
3279 static bool
3280 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3281 {
3282   tree fndecl, arg1;
3283   REAL_VALUE_TYPE c, cint;
3284
3285   if (!first_pass_instance
3286       || !flag_unsafe_math_optimizations
3287       || !is_gimple_call (stmt)
3288       || !has_single_use (gimple_call_lhs (stmt)))
3289     return false;
3290
3291   fndecl = gimple_call_fndecl (stmt);
3292
3293   if (!fndecl
3294       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3295     return false;
3296
3297   switch (DECL_FUNCTION_CODE (fndecl))
3298     {
3299     CASE_FLT_FN (BUILT_IN_POW):
3300       if (flag_errno_math)
3301         return false;
3302
3303       *base = gimple_call_arg (stmt, 0);
3304       arg1 = gimple_call_arg (stmt, 1);
3305
3306       if (TREE_CODE (arg1) != REAL_CST)
3307         return false;
3308
3309       c = TREE_REAL_CST (arg1);
3310
3311       if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
3312         return false;
3313
3314       *exponent = real_to_integer (&c);
3315       real_from_integer (&cint, VOIDmode, *exponent,
3316                          *exponent < 0 ? -1 : 0, 0);
3317       if (!real_identical (&c, &cint))
3318         return false;
3319
3320       break;
3321
3322     CASE_FLT_FN (BUILT_IN_POWI):
3323       *base = gimple_call_arg (stmt, 0);
3324       arg1 = gimple_call_arg (stmt, 1);
3325
3326       if (!host_integerp (arg1, 0))
3327         return false;
3328
3329       *exponent = TREE_INT_CST_LOW (arg1);
3330       break;
3331
3332     default:
3333       return false;
3334     }
3335
3336   /* Expanding negative exponents is generally unproductive, so we don't
3337      complicate matters with those.  Exponents of zero and one should
3338      have been handled by expression folding.  */
3339   if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
3340     return false;
3341
3342   return true;
3343 }
3344
3345 /* Recursively linearize a binary expression that is the RHS of STMT.
3346    Place the operands of the expression tree in the vector named OPS.  */
3347
3348 static void
3349 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
3350                      bool is_associative, bool set_visited)
3351 {
3352   tree binlhs = gimple_assign_rhs1 (stmt);
3353   tree binrhs = gimple_assign_rhs2 (stmt);
3354   gimple binlhsdef = NULL, binrhsdef = NULL;
3355   bool binlhsisreassoc = false;
3356   bool binrhsisreassoc = false;
3357   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3358   struct loop *loop = loop_containing_stmt (stmt);
3359   tree base = NULL_TREE;
3360   HOST_WIDE_INT exponent = 0;
3361
3362   if (set_visited)
3363     gimple_set_visited (stmt, true);
3364
3365   if (TREE_CODE (binlhs) == SSA_NAME)
3366     {
3367       binlhsdef = SSA_NAME_DEF_STMT (binlhs);
3368       binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
3369                          && !stmt_could_throw_p (binlhsdef));
3370     }
3371
3372   if (TREE_CODE (binrhs) == SSA_NAME)
3373     {
3374       binrhsdef = SSA_NAME_DEF_STMT (binrhs);
3375       binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
3376                          && !stmt_could_throw_p (binrhsdef));
3377     }
3378
3379   /* If the LHS is not reassociable, but the RHS is, we need to swap
3380      them.  If neither is reassociable, there is nothing we can do, so
3381      just put them in the ops vector.  If the LHS is reassociable,
3382      linearize it.  If both are reassociable, then linearize the RHS
3383      and the LHS.  */
3384
3385   if (!binlhsisreassoc)
3386     {
3387       tree temp;
3388
3389       /* If this is not a associative operation like division, give up.  */
3390       if (!is_associative)
3391         {
3392           add_to_ops_vec (ops, binrhs);
3393           return;
3394         }
3395
3396       if (!binrhsisreassoc)
3397         {
3398           if (rhscode == MULT_EXPR
3399               && TREE_CODE (binrhs) == SSA_NAME
3400               && acceptable_pow_call (binrhsdef, &base, &exponent))
3401             {
3402               add_repeat_to_ops_vec (ops, base, exponent);
3403               gimple_set_visited (binrhsdef, true);
3404             }
3405           else
3406             add_to_ops_vec (ops, binrhs);
3407
3408           if (rhscode == MULT_EXPR
3409               && TREE_CODE (binlhs) == SSA_NAME
3410               && acceptable_pow_call (binlhsdef, &base, &exponent))
3411             {
3412               add_repeat_to_ops_vec (ops, base, exponent);
3413               gimple_set_visited (binlhsdef, true);
3414             }
3415           else
3416             add_to_ops_vec (ops, binlhs);
3417
3418           return;
3419         }
3420
3421       if (dump_file && (dump_flags & TDF_DETAILS))
3422         {
3423           fprintf (dump_file, "swapping operands of ");
3424           print_gimple_stmt (dump_file, stmt, 0, 0);
3425         }
3426
3427       swap_tree_operands (stmt,
3428                           gimple_assign_rhs1_ptr (stmt),
3429                           gimple_assign_rhs2_ptr (stmt));
3430       update_stmt (stmt);
3431
3432       if (dump_file && (dump_flags & TDF_DETAILS))
3433         {
3434           fprintf (dump_file, " is now ");
3435           print_gimple_stmt (dump_file, stmt, 0, 0);
3436         }
3437
3438       /* We want to make it so the lhs is always the reassociative op,
3439          so swap.  */
3440       temp = binlhs;
3441       binlhs = binrhs;
3442       binrhs = temp;
3443     }
3444   else if (binrhsisreassoc)
3445     {
3446       linearize_expr (stmt);
3447       binlhs = gimple_assign_rhs1 (stmt);
3448       binrhs = gimple_assign_rhs2 (stmt);
3449     }
3450
3451   gcc_assert (TREE_CODE (binrhs) != SSA_NAME
3452               || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
3453                                       rhscode, loop));
3454   linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
3455                        is_associative, set_visited);
3456
3457   if (rhscode == MULT_EXPR
3458       && TREE_CODE (binrhs) == SSA_NAME
3459       && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
3460     {
3461       add_repeat_to_ops_vec (ops, base, exponent);
3462       gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
3463     }
3464   else
3465     add_to_ops_vec (ops, binrhs);
3466 }
3467
3468 /* Repropagate the negates back into subtracts, since no other pass
3469    currently does it.  */
3470
3471 static void
3472 repropagate_negates (void)
3473 {
3474   unsigned int i = 0;
3475   tree negate;
3476
3477   FOR_EACH_VEC_ELT (plus_negates, i, negate)
3478     {
3479       gimple user = get_single_immediate_use (negate);
3480
3481       if (!user || !is_gimple_assign (user))
3482         continue;
3483
3484       /* The negate operand can be either operand of a PLUS_EXPR
3485          (it can be the LHS if the RHS is a constant for example).
3486
3487          Force the negate operand to the RHS of the PLUS_EXPR, then
3488          transform the PLUS_EXPR into a MINUS_EXPR.  */
3489       if (gimple_assign_rhs_code (user) == PLUS_EXPR)
3490         {
3491           /* If the negated operand appears on the LHS of the
3492              PLUS_EXPR, exchange the operands of the PLUS_EXPR
3493              to force the negated operand to the RHS of the PLUS_EXPR.  */
3494           if (gimple_assign_rhs1 (user) == negate)
3495             {
3496               swap_tree_operands (user,
3497                                   gimple_assign_rhs1_ptr (user),
3498                                   gimple_assign_rhs2_ptr (user));
3499             }
3500
3501           /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
3502              the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR.  */
3503           if (gimple_assign_rhs2 (user) == negate)
3504             {
3505               tree rhs1 = gimple_assign_rhs1 (user);
3506               tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
3507               gimple_stmt_iterator gsi = gsi_for_stmt (user);
3508               gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
3509               update_stmt (user);
3510             }
3511         }
3512       else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
3513         {
3514           if (gimple_assign_rhs1 (user) == negate)
3515             {
3516               /* We have
3517                    x = -a
3518                    y = x - b
3519                  which we transform into
3520                    x = a + b
3521                    y = -x .
3522                  This pushes down the negate which we possibly can merge
3523                  into some other operation, hence insert it into the
3524                  plus_negates vector.  */
3525               gimple feed = SSA_NAME_DEF_STMT (negate);
3526               tree a = gimple_assign_rhs1 (feed);
3527               tree rhs2 = gimple_assign_rhs2 (user);
3528               gimple_stmt_iterator gsi = gsi_for_stmt (feed), gsi2;
3529               gimple_replace_lhs (feed, negate);
3530               gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, a, rhs2);
3531               update_stmt (gsi_stmt (gsi));
3532               gsi2 = gsi_for_stmt (user);
3533               gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, negate, NULL);
3534               update_stmt (gsi_stmt (gsi2));
3535               gsi_move_before (&gsi, &gsi2);
3536               plus_negates.safe_push (gimple_assign_lhs (gsi_stmt (gsi2)));
3537             }
3538           else
3539             {
3540               /* Transform "x = -a; y = b - x" into "y = b + a", getting
3541                  rid of one operation.  */
3542               gimple feed = SSA_NAME_DEF_STMT (negate);
3543               tree a = gimple_assign_rhs1 (feed);
3544               tree rhs1 = gimple_assign_rhs1 (user);
3545               gimple_stmt_iterator gsi = gsi_for_stmt (user);
3546               gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
3547               update_stmt (gsi_stmt (gsi));
3548             }
3549         }
3550     }
3551 }
3552
3553 /* Returns true if OP is of a type for which we can do reassociation.
3554    That is for integral or non-saturating fixed-point types, and for
3555    floating point type when associative-math is enabled.  */
3556
3557 static bool
3558 can_reassociate_p (tree op)
3559 {
3560   tree type = TREE_TYPE (op);
3561   if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
3562       || NON_SAT_FIXED_POINT_TYPE_P (type)
3563       || (flag_associative_math && FLOAT_TYPE_P (type)))
3564     return true;
3565   return false;
3566 }
3567
3568 /* Break up subtract operations in block BB.
3569
3570    We do this top down because we don't know whether the subtract is
3571    part of a possible chain of reassociation except at the top.
3572
3573    IE given
3574    d = f + g
3575    c = a + e
3576    b = c - d
3577    q = b - r
3578    k = t - q
3579
3580    we want to break up k = t - q, but we won't until we've transformed q
3581    = b - r, which won't be broken up until we transform b = c - d.
3582
3583    En passant, clear the GIMPLE visited flag on every statement.  */
3584
3585 static void
3586 break_up_subtract_bb (basic_block bb)
3587 {
3588   gimple_stmt_iterator gsi;
3589   basic_block son;
3590
3591   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3592     {
3593       gimple stmt = gsi_stmt (gsi);
3594       gimple_set_visited (stmt, false);
3595
3596       if (!is_gimple_assign (stmt)
3597           || !can_reassociate_p (gimple_assign_lhs (stmt)))
3598         continue;
3599
3600       /* Look for simple gimple subtract operations.  */
3601       if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
3602         {
3603           if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
3604               || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
3605             continue;
3606
3607           /* Check for a subtract used only in an addition.  If this
3608              is the case, transform it into add of a negate for better
3609              reassociation.  IE transform C = A-B into C = A + -B if C
3610              is only used in an addition.  */
3611           if (should_break_up_subtract (stmt))
3612             break_up_subtract (stmt, &gsi);
3613         }
3614       else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
3615                && can_reassociate_p (gimple_assign_rhs1 (stmt)))
3616         plus_negates.safe_push (gimple_assign_lhs (stmt));
3617     }
3618   for (son = first_dom_son (CDI_DOMINATORS, bb);
3619        son;
3620        son = next_dom_son (CDI_DOMINATORS, son))
3621     break_up_subtract_bb (son);
3622 }
3623
3624 /* Used for repeated factor analysis.  */
3625 struct repeat_factor_d
3626 {
3627   /* An SSA name that occurs in a multiply chain.  */
3628   tree factor;
3629
3630   /* Cached rank of the factor.  */
3631   unsigned rank;
3632
3633   /* Number of occurrences of the factor in the chain.  */
3634   HOST_WIDE_INT count;
3635
3636   /* An SSA name representing the product of this factor and
3637      all factors appearing later in the repeated factor vector.  */
3638   tree repr;
3639 };
3640
3641 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
3642 typedef const struct repeat_factor_d *const_repeat_factor_t;
3643
3644
3645 static vec<repeat_factor> repeat_factor_vec;
3646
3647 /* Used for sorting the repeat factor vector.  Sort primarily by
3648    ascending occurrence count, secondarily by descending rank.  */
3649
3650 static int
3651 compare_repeat_factors (const void *x1, const void *x2)
3652 {
3653   const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
3654   const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
3655
3656   if (rf1->count != rf2->count)
3657     return rf1->count - rf2->count;
3658
3659   return rf2->rank - rf1->rank;
3660 }
3661
3662 /* Look for repeated operands in OPS in the multiply tree rooted at
3663    STMT.  Replace them with an optimal sequence of multiplies and powi
3664    builtin calls, and remove the used operands from OPS.  Return an
3665    SSA name representing the value of the replacement sequence.  */
3666
3667 static tree
3668 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
3669 {
3670   unsigned i, j, vec_len;
3671   int ii;
3672   operand_entry_t oe;
3673   repeat_factor_t rf1, rf2;
3674   repeat_factor rfnew;
3675   tree result = NULL_TREE;
3676   tree target_ssa, iter_result;
3677   tree type = TREE_TYPE (gimple_get_lhs (stmt));
3678   tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
3679   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3680   gimple mul_stmt, pow_stmt;
3681
3682   /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
3683      target.  */
3684   if (!powi_fndecl)
3685     return NULL_TREE;
3686
3687   /* Allocate the repeated factor vector.  */
3688   repeat_factor_vec.create (10);
3689
3690   /* Scan the OPS vector for all SSA names in the product and build
3691      up a vector of occurrence counts for each factor.  */
3692   FOR_EACH_VEC_ELT (*ops, i, oe)
3693     {
3694       if (TREE_CODE (oe->op) == SSA_NAME)
3695         {
3696           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
3697             {
3698               if (rf1->factor == oe->op)
3699                 {
3700                   rf1->count += oe->count;
3701                   break;
3702                 }
3703             }
3704
3705           if (j >= repeat_factor_vec.length ())
3706             {
3707               rfnew.factor = oe->op;
3708               rfnew.rank = oe->rank;
3709               rfnew.count = oe->count;
3710               rfnew.repr = NULL_TREE;
3711               repeat_factor_vec.safe_push (rfnew);
3712             }
3713         }
3714     }
3715
3716   /* Sort the repeated factor vector by (a) increasing occurrence count,
3717      and (b) decreasing rank.  */
3718   repeat_factor_vec.qsort (compare_repeat_factors);
3719
3720   /* It is generally best to combine as many base factors as possible
3721      into a product before applying __builtin_powi to the result.
3722      However, the sort order chosen for the repeated factor vector
3723      allows us to cache partial results for the product of the base
3724      factors for subsequent use.  When we already have a cached partial
3725      result from a previous iteration, it is best to make use of it
3726      before looking for another __builtin_pow opportunity.
3727
3728      As an example, consider x * x * y * y * y * z * z * z * z.
3729      We want to first compose the product x * y * z, raise it to the
3730      second power, then multiply this by y * z, and finally multiply
3731      by z.  This can be done in 5 multiplies provided we cache y * z
3732      for use in both expressions:
3733
3734         t1 = y * z
3735         t2 = t1 * x
3736         t3 = t2 * t2
3737         t4 = t1 * t3
3738         result = t4 * z
3739
3740      If we instead ignored the cached y * z and first multiplied by
3741      the __builtin_pow opportunity z * z, we would get the inferior:
3742
3743         t1 = y * z
3744         t2 = t1 * x
3745         t3 = t2 * t2
3746         t4 = z * z
3747         t5 = t3 * t4
3748         result = t5 * y  */
3749
3750   vec_len = repeat_factor_vec.length ();
3751
3752   /* Repeatedly look for opportunities to create a builtin_powi call.  */
3753   while (true)
3754     {
3755       HOST_WIDE_INT power;
3756
3757       /* First look for the largest cached product of factors from
3758          preceding iterations.  If found, create a builtin_powi for
3759          it if the minimum occurrence count for its factors is at
3760          least 2, or just use this cached product as our next
3761          multiplicand if the minimum occurrence count is 1.  */
3762       FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
3763         {
3764           if (rf1->repr && rf1->count > 0)
3765             break;
3766         }
3767
3768       if (j < vec_len)
3769         {
3770           power = rf1->count;
3771
3772           if (power == 1)
3773             {
3774               iter_result = rf1->repr;
3775
3776               if (dump_file && (dump_flags & TDF_DETAILS))
3777                 {
3778                   unsigned elt;
3779                   repeat_factor_t rf;
3780                   fputs ("Multiplying by cached product ", dump_file);
3781                   for (elt = j; elt < vec_len; elt++)
3782                     {
3783                       rf = &repeat_factor_vec[elt];
3784                       print_generic_expr (dump_file, rf->factor, 0);
3785                       if (elt < vec_len - 1)
3786                         fputs (" * ", dump_file);
3787                     }
3788                   fputs ("\n", dump_file);
3789                 }
3790             }
3791           else
3792             {
3793               iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
3794               pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
3795                                             build_int_cst (integer_type_node,
3796                                                            power));
3797               gimple_call_set_lhs (pow_stmt, iter_result);
3798               gimple_set_location (pow_stmt, gimple_location (stmt));
3799               gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
3800
3801               if (dump_file && (dump_flags & TDF_DETAILS))
3802                 {
3803                   unsigned elt;
3804                   repeat_factor_t rf;
3805                   fputs ("Building __builtin_pow call for cached product (",
3806                          dump_file);
3807                   for (elt = j; elt < vec_len; elt++)
3808                     {
3809                       rf = &repeat_factor_vec[elt];
3810                       print_generic_expr (dump_file, rf->factor, 0);
3811                       if (elt < vec_len - 1)
3812                         fputs (" * ", dump_file);
3813                     }
3814                   fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n",
3815                            power);
3816                 }
3817             }
3818         }
3819       else
3820         {
3821           /* Otherwise, find the first factor in the repeated factor
3822              vector whose occurrence count is at least 2.  If no such
3823              factor exists, there are no builtin_powi opportunities
3824              remaining.  */
3825           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
3826             {
3827               if (rf1->count >= 2)
3828                 break;
3829             }
3830
3831           if (j >= vec_len)
3832             break;
3833
3834           power = rf1->count;
3835
3836           if (dump_file && (dump_flags & TDF_DETAILS))
3837             {
3838               unsigned elt;
3839               repeat_factor_t rf;
3840               fputs ("Building __builtin_pow call for (", dump_file);
3841               for (elt = j; elt < vec_len; elt++)
3842                 {
3843                   rf = &repeat_factor_vec[elt];
3844                   print_generic_expr (dump_file, rf->factor, 0);
3845                   if (elt < vec_len - 1)
3846                     fputs (" * ", dump_file);
3847                 }
3848               fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power);
3849             }
3850
3851           reassociate_stats.pows_created++;
3852
3853           /* Visit each element of the vector in reverse order (so that
3854              high-occurrence elements are visited first, and within the
3855              same occurrence count, lower-ranked elements are visited
3856              first).  Form a linear product of all elements in this order
3857              whose occurrencce count is at least that of element J.
3858              Record the SSA name representing the product of each element
3859              with all subsequent elements in the vector.  */
3860           if (j == vec_len - 1)
3861             rf1->repr = rf1->factor;
3862           else
3863             {
3864               for (ii = vec_len - 2; ii >= (int)j; ii--)
3865                 {
3866                   tree op1, op2;
3867
3868                   rf1 = &repeat_factor_vec[ii];
3869                   rf2 = &repeat_factor_vec[ii + 1];
3870
3871                   /* Init the last factor's representative to be itself.  */
3872                   if (!rf2->repr)
3873                     rf2->repr = rf2->factor;
3874
3875                   op1 = rf1->factor;
3876                   op2 = rf2->repr;
3877
3878                   target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
3879                   mul_stmt = gimple_build_assign_with_ops (MULT_EXPR,
3880                                                            target_ssa,
3881                                                            op1, op2);
3882                   gimple_set_location (mul_stmt, gimple_location (stmt));
3883                   gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
3884                   rf1->repr = target_ssa;
3885
3886                   /* Don't reprocess the multiply we just introduced.  */
3887                   gimple_set_visited (mul_stmt, true);
3888                 }
3889             }
3890
3891           /* Form a call to __builtin_powi for the maximum product
3892              just formed, raised to the power obtained earlier.  */
3893           rf1 = &repeat_factor_vec[j];
3894           iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
3895           pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
3896                                         build_int_cst (integer_type_node,
3897                                                        power));
3898           gimple_call_set_lhs (pow_stmt, iter_result);
3899           gimple_set_location (pow_stmt, gimple_location (stmt));
3900           gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
3901         }
3902
3903       /* If we previously formed at least one other builtin_powi call,
3904          form the product of this one and those others.  */
3905       if (result)
3906         {
3907           tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
3908           mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result,
3909                                                    result, iter_result);
3910           gimple_set_location (mul_stmt, gimple_location (stmt));
3911           gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
3912           gimple_set_visited (mul_stmt, true);
3913           result = new_result;
3914         }
3915       else
3916         result = iter_result;
3917
3918       /* Decrement the occurrence count of each element in the product
3919          by the count found above, and remove this many copies of each
3920          factor from OPS.  */
3921       for (i = j; i < vec_len; i++)
3922         {
3923           unsigned k = power;
3924           unsigned n;
3925
3926           rf1 = &repeat_factor_vec[i];
3927           rf1->count -= power;
3928
3929           FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
3930             {
3931               if (oe->op == rf1->factor)
3932                 {
3933                   if (oe->count <= k)
3934                     {
3935                       ops->ordered_remove (n);
3936                       k -= oe->count;
3937
3938                       if (k == 0)
3939                         break;
3940                     }
3941                   else
3942                     {
3943                       oe->count -= k;
3944                       break;
3945                     }
3946                 }
3947             }
3948         }
3949     }
3950
3951   /* At this point all elements in the repeated factor vector have a
3952      remaining occurrence count of 0 or 1, and those with a count of 1
3953      don't have cached representatives.  Re-sort the ops vector and
3954      clean up.  */
3955   ops->qsort (sort_by_operand_rank);
3956   repeat_factor_vec.release ();
3957
3958   /* Return the final product computed herein.  Note that there may
3959      still be some elements with single occurrence count left in OPS;
3960      those will be handled by the normal reassociation logic.  */
3961   return result;
3962 }
3963
3964 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS.  */
3965
3966 static void
3967 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
3968 {
3969   tree rhs1;
3970
3971   if (dump_file && (dump_flags & TDF_DETAILS))
3972     {
3973       fprintf (dump_file, "Transforming ");
3974       print_gimple_stmt (dump_file, stmt, 0, 0);
3975     }
3976
3977   rhs1 = gimple_assign_rhs1 (stmt);
3978   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
3979   update_stmt (stmt);
3980   remove_visited_stmt_chain (rhs1);
3981
3982   if (dump_file && (dump_flags & TDF_DETAILS))
3983     {
3984       fprintf (dump_file, " into ");
3985       print_gimple_stmt (dump_file, stmt, 0, 0);
3986     }
3987 }
3988
3989 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2.  */
3990
3991 static void
3992 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
3993                             tree rhs1, tree rhs2)
3994 {
3995   if (dump_file && (dump_flags & TDF_DETAILS))
3996     {
3997       fprintf (dump_file, "Transforming ");
3998       print_gimple_stmt (dump_file, stmt, 0, 0);
3999     }
4000
4001   gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
4002   update_stmt (gsi_stmt (*gsi));
4003   remove_visited_stmt_chain (rhs1);
4004
4005   if (dump_file && (dump_flags & TDF_DETAILS))
4006     {
4007       fprintf (dump_file, " into ");
4008       print_gimple_stmt (dump_file, stmt, 0, 0);
4009     }
4010 }
4011
4012 /* Reassociate expressions in basic block BB and its post-dominator as
4013    children.  */
4014
4015 static void
4016 reassociate_bb (basic_block bb)
4017 {
4018   gimple_stmt_iterator gsi;
4019   basic_block son;
4020   gimple stmt = last_stmt (bb);
4021
4022   if (stmt && !gimple_visited_p (stmt))
4023     maybe_optimize_range_tests (stmt);
4024
4025   for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4026     {
4027       stmt = gsi_stmt (gsi);
4028
4029       if (is_gimple_assign (stmt)
4030           && !stmt_could_throw_p (stmt))
4031         {
4032           tree lhs, rhs1, rhs2;
4033           enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4034
4035           /* If this is not a gimple binary expression, there is
4036              nothing for us to do with it.  */
4037           if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4038             continue;
4039
4040           /* If this was part of an already processed statement,
4041              we don't need to touch it again. */
4042           if (gimple_visited_p (stmt))
4043             {
4044               /* This statement might have become dead because of previous
4045                  reassociations.  */
4046               if (has_zero_uses (gimple_get_lhs (stmt)))
4047                 {
4048                   gsi_remove (&gsi, true);
4049                   release_defs (stmt);
4050                   /* We might end up removing the last stmt above which
4051                      places the iterator to the end of the sequence.
4052                      Reset it to the last stmt in this case which might
4053                      be the end of the sequence as well if we removed
4054                      the last statement of the sequence.  In which case
4055                      we need to bail out.  */
4056                   if (gsi_end_p (gsi))
4057                     {
4058                       gsi = gsi_last_bb (bb);
4059                       if (gsi_end_p (gsi))
4060                         break;
4061                     }
4062                 }
4063               continue;
4064             }
4065
4066           lhs = gimple_assign_lhs (stmt);
4067           rhs1 = gimple_assign_rhs1 (stmt);
4068           rhs2 = gimple_assign_rhs2 (stmt);
4069
4070           /* For non-bit or min/max operations we can't associate
4071              all types.  Verify that here.  */
4072           if (rhs_code != BIT_IOR_EXPR
4073               && rhs_code != BIT_AND_EXPR
4074               && rhs_code != BIT_XOR_EXPR
4075               && rhs_code != MIN_EXPR
4076               && rhs_code != MAX_EXPR
4077               && (!can_reassociate_p (lhs)
4078                   || !can_reassociate_p (rhs1)
4079                   || !can_reassociate_p (rhs2)))
4080             continue;
4081
4082           if (associative_tree_code (rhs_code))
4083             {
4084               vec<operand_entry_t> ops = vNULL;
4085               tree powi_result = NULL_TREE;
4086
4087               /* There may be no immediate uses left by the time we
4088                  get here because we may have eliminated them all.  */
4089               if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4090                 continue;
4091
4092               gimple_set_visited (stmt, true);
4093               linearize_expr_tree (&ops, stmt, true, true);
4094               ops.qsort (sort_by_operand_rank);
4095               optimize_ops_list (rhs_code, &ops);
4096               if (undistribute_ops_list (rhs_code, &ops,
4097                                          loop_containing_stmt (stmt)))
4098                 {
4099                   ops.qsort (sort_by_operand_rank);
4100                   optimize_ops_list (rhs_code, &ops);
4101                 }
4102
4103               if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4104                 optimize_range_tests (rhs_code, &ops);
4105
4106               if (first_pass_instance
4107                   && rhs_code == MULT_EXPR
4108                   && flag_unsafe_math_optimizations)
4109                 powi_result = attempt_builtin_powi (stmt, &ops);
4110
4111               /* If the operand vector is now empty, all operands were
4112                  consumed by the __builtin_powi optimization.  */
4113               if (ops.length () == 0)
4114                 transform_stmt_to_copy (&gsi, stmt, powi_result);
4115               else if (ops.length () == 1)
4116                 {
4117                   tree last_op = ops.last ()->op;
4118
4119                   if (powi_result)
4120                     transform_stmt_to_multiply (&gsi, stmt, last_op,
4121                                                 powi_result);
4122                   else
4123                     transform_stmt_to_copy (&gsi, stmt, last_op);
4124                 }
4125               else
4126                 {
4127                   enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4128                   int ops_num = ops.length ();
4129                   int width = get_reassociation_width (ops_num, rhs_code, mode);
4130
4131                   if (dump_file && (dump_flags & TDF_DETAILS))
4132                     fprintf (dump_file,
4133                              "Width = %d was chosen for reassociation\n", width);
4134
4135                   if (width > 1
4136                       && ops.length () > 3)
4137                     rewrite_expr_tree_parallel (stmt, width, ops);
4138                   else
4139                     rewrite_expr_tree (stmt, 0, ops, false);
4140
4141                   /* If we combined some repeated factors into a
4142                      __builtin_powi call, multiply that result by the
4143                      reassociated operands.  */
4144                   if (powi_result)
4145                     {
4146                       gimple mul_stmt;
4147                       tree type = TREE_TYPE (gimple_get_lhs (stmt));
4148                       tree target_ssa = make_temp_ssa_name (type, NULL,
4149                                                             "reassocpow");
4150                       gimple_set_lhs (stmt, target_ssa);
4151                       update_stmt (stmt);
4152                       mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs,
4153                                                                powi_result,
4154                                                                target_ssa);
4155                       gimple_set_location (mul_stmt, gimple_location (stmt));
4156                       gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4157                     }
4158                 }
4159
4160               ops.release ();
4161             }
4162         }
4163     }
4164   for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4165        son;
4166        son = next_dom_son (CDI_POST_DOMINATORS, son))
4167     reassociate_bb (son);
4168 }
4169
4170 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4171 void debug_ops_vector (vec<operand_entry_t> ops);
4172
4173 /* Dump the operand entry vector OPS to FILE.  */
4174
4175 void
4176 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
4177 {
4178   operand_entry_t oe;
4179   unsigned int i;
4180
4181   FOR_EACH_VEC_ELT (ops, i, oe)
4182     {
4183       fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
4184       print_generic_expr (file, oe->op, 0);
4185     }
4186 }
4187
4188 /* Dump the operand entry vector OPS to STDERR.  */
4189
4190 DEBUG_FUNCTION void
4191 debug_ops_vector (vec<operand_entry_t> ops)
4192 {
4193   dump_ops_vector (stderr, ops);
4194 }
4195
4196 static void
4197 do_reassoc (void)
4198 {
4199   break_up_subtract_bb (ENTRY_BLOCK_PTR);
4200   reassociate_bb (EXIT_BLOCK_PTR);
4201 }
4202
4203 /* Initialize the reassociation pass.  */
4204
4205 static void
4206 init_reassoc (void)
4207 {
4208   int i;
4209   long rank = 2;
4210   int *bbs = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
4211
4212   /* Find the loops, so that we can prevent moving calculations in
4213      them.  */
4214   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4215
4216   memset (&reassociate_stats, 0, sizeof (reassociate_stats));
4217
4218   operand_entry_pool = create_alloc_pool ("operand entry pool",
4219                                           sizeof (struct operand_entry), 30);
4220   next_operand_entry_id = 0;
4221
4222   /* Reverse RPO (Reverse Post Order) will give us something where
4223      deeper loops come later.  */
4224   pre_and_rev_post_order_compute (NULL, bbs, false);
4225   bb_rank = XCNEWVEC (long, last_basic_block);
4226   operand_rank = pointer_map_create ();
4227
4228   /* Give each default definition a distinct rank.  This includes
4229      parameters and the static chain.  Walk backwards over all
4230      SSA names so that we get proper rank ordering according
4231      to tree_swap_operands_p.  */
4232   for (i = num_ssa_names - 1; i > 0; --i)
4233     {
4234       tree name = ssa_name (i);
4235       if (name && SSA_NAME_IS_DEFAULT_DEF (name))
4236         insert_operand_rank (name, ++rank);
4237     }
4238
4239   /* Set up rank for each BB  */
4240   for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
4241     bb_rank[bbs[i]] = ++rank  << 16;
4242
4243   free (bbs);
4244   calculate_dominance_info (CDI_POST_DOMINATORS);
4245   plus_negates = vNULL;
4246 }
4247
4248 /* Cleanup after the reassociation pass, and print stats if
4249    requested.  */
4250
4251 static void
4252 fini_reassoc (void)
4253 {
4254   statistics_counter_event (cfun, "Linearized",
4255                             reassociate_stats.linearized);
4256   statistics_counter_event (cfun, "Constants eliminated",
4257                             reassociate_stats.constants_eliminated);
4258   statistics_counter_event (cfun, "Ops eliminated",
4259                             reassociate_stats.ops_eliminated);
4260   statistics_counter_event (cfun, "Statements rewritten",
4261                             reassociate_stats.rewritten);
4262   statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
4263                             reassociate_stats.pows_encountered);
4264   statistics_counter_event (cfun, "Built-in powi calls created",
4265                             reassociate_stats.pows_created);
4266
4267   pointer_map_destroy (operand_rank);
4268   free_alloc_pool (operand_entry_pool);
4269   free (bb_rank);
4270   plus_negates.release ();
4271   free_dominance_info (CDI_POST_DOMINATORS);
4272   loop_optimizer_finalize ();
4273 }
4274
4275 /* Gate and execute functions for Reassociation.  */
4276
4277 static unsigned int
4278 execute_reassoc (void)
4279 {
4280   init_reassoc ();
4281
4282   do_reassoc ();
4283   repropagate_negates ();
4284
4285   fini_reassoc ();
4286   return 0;
4287 }
4288
4289 static bool
4290 gate_tree_ssa_reassoc (void)
4291 {
4292   return flag_tree_reassoc != 0;
4293 }
4294
4295 struct gimple_opt_pass pass_reassoc =
4296 {
4297  {
4298   GIMPLE_PASS,
4299   "reassoc",                            /* name */
4300   OPTGROUP_NONE,                        /* optinfo_flags */
4301   gate_tree_ssa_reassoc,                /* gate */
4302   execute_reassoc,                      /* execute */
4303   NULL,                                 /* sub */
4304   NULL,                                 /* next */
4305   0,                                    /* static_pass_number */
4306   TV_TREE_REASSOC,                      /* tv_id */
4307   PROP_cfg | PROP_ssa,                  /* properties_required */
4308   0,                                    /* properties_provided */
4309   0,                                    /* properties_destroyed */
4310   0,                                    /* todo_flags_start */
4311   TODO_verify_ssa
4312     | TODO_update_ssa_only_virtuals
4313     | TODO_verify_flow
4314     | TODO_ggc_collect                  /* todo_flags_finish */
4315  }
4316 };