gcc/tree-ssa-reassoc.c

   1 /* Reassociation for trees.
   2    Copyright (C) 2005-2013 Free Software Foundation, Inc.
   3    Contributed by Daniel Berlin <dan@dberlin.org>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 3, or (at your option)
  10 any later version.
  11
  12 GCC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "hash-table.h"
  25 #include "tm.h"
  26 #include "tree.h"
  27 #include "basic-block.h"
  28 #include "gimple-pretty-print.h"
  29 #include "tree-inline.h"
  30 #include "tree-ssa.h"
  31 #include "gimple.h"
  32 #include "tree-iterator.h"
  33 #include "tree-pass.h"
  34 #include "alloc-pool.h"
  35 #include "vec.h"
  36 #include "langhooks.h"
  37 #include "pointer-set.h"
  38 #include "cfgloop.h"
  39 #include "flags.h"
  40 #include "target.h"
  41 #include "params.h"
  42 #include "diagnostic-core.h"
  43
  44 /*  This is a simple global reassociation pass.  It is, in part, based
  45     on the LLVM pass of the same name (They do some things more/less
  46     than we do, in different orders, etc).
  47
  48     It consists of five steps:
  49
  50     1. Breaking up subtract operations into addition + negate, where
  51     it would promote the reassociation of adds.
  52
  53     2. Left linearization of the expression trees, so that (A+B)+(C+D)
  54     becomes (((A+B)+C)+D), which is easier for us to rewrite later.
  55     During linearization, we place the operands of the binary
  56     expressions into a vector of operand_entry_t
  57
  58     3. Optimization of the operand lists, eliminating things like a +
  59     -a, a & a, etc.
  60
  61     3a. Combine repeated factors with the same occurrence counts
  62     into a __builtin_powi call that will later be optimized into
  63     an optimal number of multiplies.
  64
  65     4. Rewrite the expression trees we linearized and optimized so
  66     they are in proper rank order.
  67
  68     5. Repropagate negates, as nothing else will clean it up ATM.
  69
  70     A bit of theory on #4, since nobody seems to write anything down
  71     about why it makes sense to do it the way they do it:
  72
  73     We could do this much nicer theoretically, but don't (for reasons
  74     explained after how to do it theoretically nice :P).
  75
  76     In order to promote the most redundancy elimination, you want
  77     binary expressions whose operands are the same rank (or
  78     preferably, the same value) exposed to the redundancy eliminator,
  79     for possible elimination.
  80
  81     So the way to do this if we really cared, is to build the new op
  82     tree from the leaves to the roots, merging as you go, and putting the
  83     new op on the end of the worklist, until you are left with one
  84     thing on the worklist.
  85
  86     IE if you have to rewrite the following set of operands (listed with
  87     rank in parentheses), with opcode PLUS_EXPR:
  88
  89     a (1),  b (1),  c (1),  d (2), e (2)
  90
  91
  92     We start with our merge worklist empty, and the ops list with all of
  93     those on it.
  94
  95     You want to first merge all leaves of the same rank, as much as
  96     possible.
  97
  98     So first build a binary op of
  99
 100     mergetmp = a + b, and put "mergetmp" on the merge worklist.
 101
 102     Because there is no three operand form of PLUS_EXPR, c is not going to
 103     be exposed to redundancy elimination as a rank 1 operand.
 104
 105     So you might as well throw it on the merge worklist (you could also
 106     consider it to now be a rank two operand, and merge it with d and e,
 107     but in this case, you then have evicted e from a binary op. So at
 108     least in this situation, you can't win.)
 109
 110     Then build a binary op of d + e
 111     mergetmp2 = d + e
 112
 113     and put mergetmp2 on the merge worklist.
 114
 115     so merge worklist = {mergetmp, c, mergetmp2}
 116
 117     Continue building binary ops of these operations until you have only
 118     one operation left on the worklist.
 119
 120     So we have
 121
 122     build binary op
 123     mergetmp3 = mergetmp + c
 124
 125     worklist = {mergetmp2, mergetmp3}
 126
 127     mergetmp4 = mergetmp2 + mergetmp3
 128
 129     worklist = {mergetmp4}
 130
 131     because we have one operation left, we can now just set the original
 132     statement equal to the result of that operation.
 133
 134     This will at least expose a + b  and d + e to redundancy elimination
 135     as binary operations.
 136
 137     For extra points, you can reuse the old statements to build the
 138     mergetmps, since you shouldn't run out.
 139
 140     So why don't we do this?
 141
 142     Because it's expensive, and rarely will help.  Most trees we are
 143     reassociating have 3 or less ops.  If they have 2 ops, they already
 144     will be written into a nice single binary op.  If you have 3 ops, a
 145     single simple check suffices to tell you whether the first two are of the
 146     same rank.  If so, you know to order it
 147
 148     mergetmp = op1 + op2
 149     newstmt = mergetmp + op3
 150
 151     instead of
 152     mergetmp = op2 + op3
 153     newstmt = mergetmp + op1
 154
 155     If all three are of the same rank, you can't expose them all in a
 156     single binary operator anyway, so the above is *still* the best you
 157     can do.
 158
 159     Thus, this is what we do.  When we have three ops left, we check to see
 160     what order to put them in, and call it a day.  As a nod to vector sum
 161     reduction, we check if any of the ops are really a phi node that is a
 162     destructive update for the associating op, and keep the destructive
 163     update together for vector sum reduction recognition.  */
 164
 165
 166 /* Statistics */
 167 static struct
 168 {
 169   int linearized;
 170   int constants_eliminated;
 171   int ops_eliminated;
 172   int rewritten;
 173   int pows_encountered;
 174   int pows_created;
 175 } reassociate_stats;
 176
 177 /* Operator, rank pair.  */
 178 typedef struct operand_entry
 179 {
 180   unsigned int rank;
 181   int id;
 182   tree op;
 183   unsigned int count;
 184 } *operand_entry_t;
 185
 186 static alloc_pool operand_entry_pool;
 187
 188 /* This is used to assign a unique ID to each struct operand_entry
 189    so that qsort results are identical on different hosts.  */
 190 static int next_operand_entry_id;
 191
 192 /* Starting rank number for a given basic block, so that we can rank
 193    operations using unmovable instructions in that BB based on the bb
 194    depth.  */
 195 static long *bb_rank;
 196
 197 /* Operand->rank hashtable.  */
 198 static struct pointer_map_t *operand_rank;
 199
 200 /* Forward decls.  */
 201 static long get_rank (tree);
 202
 203
 204 /* Bias amount for loop-carried phis.  We want this to be larger than
 205    the depth of any reassociation tree we can see, but not larger than
 206    the rank difference between two blocks.  */
 207 #define PHI_LOOP_BIAS (1 << 15)
 208
 209 /* Rank assigned to a phi statement.  If STMT is a loop-carried phi of
 210    an innermost loop, and the phi has only a single use which is inside
 211    the loop, then the rank is the block rank of the loop latch plus an
 212    extra bias for the loop-carried dependence.  This causes expressions
 213    calculated into an accumulator variable to be independent for each
 214    iteration of the loop.  If STMT is some other phi, the rank is the
 215    block rank of its containing block.  */
 216 static long
 217 phi_rank (gimple stmt)
 218 {
 219   basic_block bb = gimple_bb (stmt);
 220   struct loop *father = bb->loop_father;
 221   tree res;
 222   unsigned i;
 223   use_operand_p use;
 224   gimple use_stmt;
 225
 226   /* We only care about real loops (those with a latch).  */
 227   if (!father->latch)
 228     return bb_rank[bb->index];
 229
 230   /* Interesting phis must be in headers of innermost loops.  */
 231   if (bb != father->header
 232       || father->inner)
 233     return bb_rank[bb->index];
 234
 235   /* Ignore virtual SSA_NAMEs.  */
 236   res = gimple_phi_result (stmt);
 237   if (virtual_operand_p (res))
 238     return bb_rank[bb->index];
 239
 240   /* The phi definition must have a single use, and that use must be
 241      within the loop.  Otherwise this isn't an accumulator pattern.  */
 242   if (!single_imm_use (res, &use, &use_stmt)
 243       || gimple_bb (use_stmt)->loop_father != father)
 244     return bb_rank[bb->index];
 245
 246   /* Look for phi arguments from within the loop.  If found, bias this phi.  */
 247   for (i = 0; i < gimple_phi_num_args (stmt); i++)
 248     {
 249       tree arg = gimple_phi_arg_def (stmt, i);
 250       if (TREE_CODE (arg) == SSA_NAME
 251           && !SSA_NAME_IS_DEFAULT_DEF (arg))
 252         {
 253           gimple def_stmt = SSA_NAME_DEF_STMT (arg);
 254           if (gimple_bb (def_stmt)->loop_father == father)
 255             return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
 256         }
 257     }
 258
 259   /* Must be an uninteresting phi.  */
 260   return bb_rank[bb->index];
 261 }
 262
 263 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
 264    loop-carried dependence of an innermost loop, return TRUE; else
 265    return FALSE.  */
 266 static bool
 267 loop_carried_phi (tree exp)
 268 {
 269   gimple phi_stmt;
 270   long block_rank;
 271
 272   if (TREE_CODE (exp) != SSA_NAME
 273       || SSA_NAME_IS_DEFAULT_DEF (exp))
 274     return false;
 275
 276   phi_stmt = SSA_NAME_DEF_STMT (exp);
 277
 278   if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
 279     return false;
 280
 281   /* Non-loop-carried phis have block rank.  Loop-carried phis have
 282      an additional bias added in.  If this phi doesn't have block rank,
 283      it's biased and should not be propagated.  */
 284   block_rank = bb_rank[gimple_bb (phi_stmt)->index];
 285
 286   if (phi_rank (phi_stmt) != block_rank)
 287     return true;
 288
 289   return false;
 290 }
 291
 292 /* Return the maximum of RANK and the rank that should be propagated
 293    from expression OP.  For most operands, this is just the rank of OP.
 294    For loop-carried phis, the value is zero to avoid undoing the bias
 295    in favor of the phi.  */
 296 static long
 297 propagate_rank (long rank, tree op)
 298 {
 299   long op_rank;
 300
 301   if (loop_carried_phi (op))
 302     return rank;
 303
 304   op_rank = get_rank (op);
 305
 306   return MAX (rank, op_rank);
 307 }
 308
 309 /* Look up the operand rank structure for expression E.  */
 310
 311 static inline long
 312 find_operand_rank (tree e)
 313 {
 314   void **slot = pointer_map_contains (operand_rank, e);
 315   return slot ? (long) (intptr_t) *slot : -1;
 316 }
 317
 318 /* Insert {E,RANK} into the operand rank hashtable.  */
 319
 320 static inline void
 321 insert_operand_rank (tree e, long rank)
 322 {
 323   void **slot;
 324   gcc_assert (rank > 0);
 325   slot = pointer_map_insert (operand_rank, e);
 326   gcc_assert (!*slot);
 327   *slot = (void *) (intptr_t) rank;
 328 }
 329
 330 /* Given an expression E, return the rank of the expression.  */
 331
 332 static long
 333 get_rank (tree e)
 334 {
 335   /* Constants have rank 0.  */
 336   if (is_gimple_min_invariant (e))
 337     return 0;
 338
 339   /* SSA_NAME's have the rank of the expression they are the result
 340      of.
 341      For globals and uninitialized values, the rank is 0.
 342      For function arguments, use the pre-setup rank.
 343      For PHI nodes, stores, asm statements, etc, we use the rank of
 344      the BB.
 345      For simple operations, the rank is the maximum rank of any of
 346      its operands, or the bb_rank, whichever is less.
 347      I make no claims that this is optimal, however, it gives good
 348      results.  */
 349
 350   /* We make an exception to the normal ranking system to break
 351      dependences of accumulator variables in loops.  Suppose we
 352      have a simple one-block loop containing:
 353
 354        x_1 = phi(x_0, x_2)
 355        b = a + x_1
 356        c = b + d
 357        x_2 = c + e
 358
 359      As shown, each iteration of the calculation into x is fully
 360      dependent upon the iteration before it.  We would prefer to
 361      see this in the form:
 362
 363        x_1 = phi(x_0, x_2)
 364        b = a + d
 365        c = b + e
 366        x_2 = c + x_1
 367
 368      If the loop is unrolled, the calculations of b and c from
 369      different iterations can be interleaved.
 370
 371      To obtain this result during reassociation, we bias the rank
 372      of the phi definition x_1 upward, when it is recognized as an
 373      accumulator pattern.  The artificial rank causes it to be
 374      added last, providing the desired independence.  */
 375
 376   if (TREE_CODE (e) == SSA_NAME)
 377     {
 378       gimple stmt;
 379       long rank;
 380       int i, n;
 381       tree op;
 382
 383       if (SSA_NAME_IS_DEFAULT_DEF (e))
 384         return find_operand_rank (e);
 385
 386       stmt = SSA_NAME_DEF_STMT (e);
 387       if (gimple_code (stmt) == GIMPLE_PHI)
 388         return phi_rank (stmt);
 389
 390       if (!is_gimple_assign (stmt)
 391           || gimple_vdef (stmt))
 392         return bb_rank[gimple_bb (stmt)->index];
 393
 394       /* If we already have a rank for this expression, use that.  */
 395       rank = find_operand_rank (e);
 396       if (rank != -1)
 397         return rank;
 398
 399       /* Otherwise, find the maximum rank for the operands.  As an
 400          exception, remove the bias from loop-carried phis when propagating
 401          the rank so that dependent operations are not also biased.  */
 402       rank = 0;
 403       if (gimple_assign_single_p (stmt))
 404         {
 405           tree rhs = gimple_assign_rhs1 (stmt);
 406           n = TREE_OPERAND_LENGTH (rhs);
 407           if (n == 0)
 408             rank = propagate_rank (rank, rhs);
 409           else
 410             {
 411               for (i = 0; i < n; i++)
 412                 {
 413                   op = TREE_OPERAND (rhs, i);
 414
 415                   if (op != NULL_TREE)
 416                     rank = propagate_rank (rank, op);
 417                 }
 418             }
 419         }
 420       else
 421         {
 422           n = gimple_num_ops (stmt);
 423           for (i = 1; i < n; i++)
 424             {
 425               op = gimple_op (stmt, i);
 426               gcc_assert (op);
 427               rank = propagate_rank (rank, op);
 428             }
 429         }
 430
 431       if (dump_file && (dump_flags & TDF_DETAILS))
 432         {
 433           fprintf (dump_file, "Rank for ");
 434           print_generic_expr (dump_file, e, 0);
 435           fprintf (dump_file, " is %ld\n", (rank + 1));
 436         }
 437
 438       /* Note the rank in the hashtable so we don't recompute it.  */
 439       insert_operand_rank (e, (rank + 1));
 440       return (rank + 1);
 441     }
 442
 443   /* Globals, etc,  are rank 0 */
 444   return 0;
 445 }
 446
 447
 448 /* We want integer ones to end up last no matter what, since they are
 449    the ones we can do the most with.  */
 450 #define INTEGER_CONST_TYPE 1 << 3
 451 #define FLOAT_CONST_TYPE 1 << 2
 452 #define OTHER_CONST_TYPE 1 << 1
 453
 454 /* Classify an invariant tree into integer, float, or other, so that
 455    we can sort them to be near other constants of the same type.  */
 456 static inline int
 457 constant_type (tree t)
 458 {
 459   if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
 460     return INTEGER_CONST_TYPE;
 461   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
 462     return FLOAT_CONST_TYPE;
 463   else
 464     return OTHER_CONST_TYPE;
 465 }
 466
 467 /* qsort comparison function to sort operand entries PA and PB by rank
 468    so that the sorted array is ordered by rank in decreasing order.  */
 469 static int
 470 sort_by_operand_rank (const void *pa, const void *pb)
 471 {
 472   const operand_entry_t oea = *(const operand_entry_t *)pa;
 473   const operand_entry_t oeb = *(const operand_entry_t *)pb;
 474
 475   /* It's nicer for optimize_expression if constants that are likely
 476      to fold when added/multiplied//whatever are put next to each
 477      other.  Since all constants have rank 0, order them by type.  */
 478   if (oeb->rank == 0 && oea->rank == 0)
 479     {
 480       if (constant_type (oeb->op) != constant_type (oea->op))
 481         return constant_type (oeb->op) - constant_type (oea->op);
 482       else
 483         /* To make sorting result stable, we use unique IDs to determine
 484            order.  */
 485         return oeb->id - oea->id;
 486     }
 487
 488   /* Lastly, make sure the versions that are the same go next to each
 489      other.  We use SSA_NAME_VERSION because it's stable.  */
 490   if ((oeb->rank - oea->rank == 0)
 491       && TREE_CODE (oea->op) == SSA_NAME
 492       && TREE_CODE (oeb->op) == SSA_NAME)
 493     {
 494       if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
 495         return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
 496       else
 497         return oeb->id - oea->id;
 498     }
 499
 500   if (oeb->rank != oea->rank)
 501     return oeb->rank - oea->rank;
 502   else
 503     return oeb->id - oea->id;
 504 }
 505
 506 /* Add an operand entry to *OPS for the tree operand OP.  */
 507
 508 static void
 509 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
 510 {
 511   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
 512
 513   oe->op = op;
 514   oe->rank = get_rank (op);
 515   oe->id = next_operand_entry_id++;
 516   oe->count = 1;
 517   ops->safe_push (oe);
 518 }
 519
 520 /* Add an operand entry to *OPS for the tree operand OP with repeat
 521    count REPEAT.  */
 522
 523 static void
 524 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
 525                        HOST_WIDE_INT repeat)
 526 {
 527   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
 528
 529   oe->op = op;
 530   oe->rank = get_rank (op);
 531   oe->id = next_operand_entry_id++;
 532   oe->count = repeat;
 533   ops->safe_push (oe);
 534
 535   reassociate_stats.pows_encountered++;
 536 }
 537
 538 /* Return true if STMT is reassociable operation containing a binary
 539    operation with tree code CODE, and is inside LOOP.  */
 540
 541 static bool
 542 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
 543 {
 544   basic_block bb = gimple_bb (stmt);
 545
 546   if (gimple_bb (stmt) == NULL)
 547     return false;
 548
 549   if (!flow_bb_inside_loop_p (loop, bb))
 550     return false;
 551
 552   if (is_gimple_assign (stmt)
 553       && gimple_assign_rhs_code (stmt) == code
 554       && has_single_use (gimple_assign_lhs (stmt)))
 555     return true;
 556
 557   return false;
 558 }
 559
 560
 561 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
 562    operand of the negate operation.  Otherwise, return NULL.  */
 563
 564 static tree
 565 get_unary_op (tree name, enum tree_code opcode)
 566 {
 567   gimple stmt = SSA_NAME_DEF_STMT (name);
 568
 569   if (!is_gimple_assign (stmt))
 570     return NULL_TREE;
 571
 572   if (gimple_assign_rhs_code (stmt) == opcode)
 573     return gimple_assign_rhs1 (stmt);
 574   return NULL_TREE;
 575 }
 576
 577 /* If CURR and LAST are a pair of ops that OPCODE allows us to
 578    eliminate through equivalences, do so, remove them from OPS, and
 579    return true.  Otherwise, return false.  */
 580
 581 static bool
 582 eliminate_duplicate_pair (enum tree_code opcode,
 583                           vec<operand_entry_t> *ops,
 584                           bool *all_done,
 585                           unsigned int i,
 586                           operand_entry_t curr,
 587                           operand_entry_t last)
 588 {
 589
 590   /* If we have two of the same op, and the opcode is & |, min, or max,
 591      we can eliminate one of them.
 592      If we have two of the same op, and the opcode is ^, we can
 593      eliminate both of them.  */
 594
 595   if (last && last->op == curr->op)
 596     {
 597       switch (opcode)
 598         {
 599         case MAX_EXPR:
 600         case MIN_EXPR:
 601         case BIT_IOR_EXPR:
 602         case BIT_AND_EXPR:
 603           if (dump_file && (dump_flags & TDF_DETAILS))
 604             {
 605               fprintf (dump_file, "Equivalence: ");
 606               print_generic_expr (dump_file, curr->op, 0);
 607               fprintf (dump_file, " [&|minmax] ");
 608               print_generic_expr (dump_file, last->op, 0);
 609               fprintf (dump_file, " -> ");
 610               print_generic_stmt (dump_file, last->op, 0);
 611             }
 612
 613           ops->ordered_remove (i);
 614           reassociate_stats.ops_eliminated ++;
 615
 616           return true;
 617
 618         case BIT_XOR_EXPR:
 619           if (dump_file && (dump_flags & TDF_DETAILS))
 620             {
 621               fprintf (dump_file, "Equivalence: ");
 622               print_generic_expr (dump_file, curr->op, 0);
 623               fprintf (dump_file, " ^ ");
 624               print_generic_expr (dump_file, last->op, 0);
 625               fprintf (dump_file, " -> nothing\n");
 626             }
 627
 628           reassociate_stats.ops_eliminated += 2;
 629
 630           if (ops->length () == 2)
 631             {
 632               ops->create (0);
 633               add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
 634               *all_done = true;
 635             }
 636           else
 637             {
 638               ops->ordered_remove (i-1);
 639               ops->ordered_remove (i-1);
 640             }
 641
 642           return true;
 643
 644         default:
 645           break;
 646         }
 647     }
 648   return false;
 649 }
 650
 651 static vec<tree> plus_negates;
 652
 653 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
 654    expression, look in OPS for a corresponding positive operation to cancel
 655    it out.  If we find one, remove the other from OPS, replace
 656    OPS[CURRINDEX] with 0 or -1, respectively, and return true.  Otherwise,
 657    return false. */
 658
 659 static bool
 660 eliminate_plus_minus_pair (enum tree_code opcode,
 661                            vec<operand_entry_t> *ops,
 662                            unsigned int currindex,
 663                            operand_entry_t curr)
 664 {
 665   tree negateop;
 666   tree notop;
 667   unsigned int i;
 668   operand_entry_t oe;
 669
 670   if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
 671     return false;
 672
 673   negateop = get_unary_op (curr->op, NEGATE_EXPR);
 674   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 675   if (negateop == NULL_TREE && notop == NULL_TREE)
 676     return false;
 677
 678   /* Any non-negated version will have a rank that is one less than
 679      the current rank.  So once we hit those ranks, if we don't find
 680      one, we can stop.  */
 681
 682   for (i = currindex + 1;
 683        ops->iterate (i, &oe)
 684        && oe->rank >= curr->rank - 1 ;
 685        i++)
 686     {
 687       if (oe->op == negateop)
 688         {
 689
 690           if (dump_file && (dump_flags & TDF_DETAILS))
 691             {
 692               fprintf (dump_file, "Equivalence: ");
 693               print_generic_expr (dump_file, negateop, 0);
 694               fprintf (dump_file, " + -");
 695               print_generic_expr (dump_file, oe->op, 0);
 696               fprintf (dump_file, " -> 0\n");
 697             }
 698
 699           ops->ordered_remove (i);
 700           add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
 701           ops->ordered_remove (currindex);
 702           reassociate_stats.ops_eliminated ++;
 703
 704           return true;
 705         }
 706       else if (oe->op == notop)
 707         {
 708           tree op_type = TREE_TYPE (oe->op);
 709
 710           if (dump_file && (dump_flags & TDF_DETAILS))
 711             {
 712               fprintf (dump_file, "Equivalence: ");
 713               print_generic_expr (dump_file, notop, 0);
 714               fprintf (dump_file, " + ~");
 715               print_generic_expr (dump_file, oe->op, 0);
 716               fprintf (dump_file, " -> -1\n");
 717             }
 718
 719           ops->ordered_remove (i);
 720           add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
 721           ops->ordered_remove (currindex);
 722           reassociate_stats.ops_eliminated ++;
 723
 724           return true;
 725         }
 726     }
 727
 728   /* CURR->OP is a negate expr in a plus expr: save it for later
 729      inspection in repropagate_negates().  */
 730   if (negateop != NULL_TREE)
 731     plus_negates.safe_push (curr->op);
 732
 733   return false;
 734 }
 735
 736 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
 737    bitwise not expression, look in OPS for a corresponding operand to
 738    cancel it out.  If we find one, remove the other from OPS, replace
 739    OPS[CURRINDEX] with 0, and return true.  Otherwise, return
 740    false. */
 741
 742 static bool
 743 eliminate_not_pairs (enum tree_code opcode,
 744                      vec<operand_entry_t> *ops,
 745                      unsigned int currindex,
 746                      operand_entry_t curr)
 747 {
 748   tree notop;
 749   unsigned int i;
 750   operand_entry_t oe;
 751
 752   if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
 753       || TREE_CODE (curr->op) != SSA_NAME)
 754     return false;
 755
 756   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 757   if (notop == NULL_TREE)
 758     return false;
 759
 760   /* Any non-not version will have a rank that is one less than
 761      the current rank.  So once we hit those ranks, if we don't find
 762      one, we can stop.  */
 763
 764   for (i = currindex + 1;
 765        ops->iterate (i, &oe)
 766        && oe->rank >= curr->rank - 1;
 767        i++)
 768     {
 769       if (oe->op == notop)
 770         {
 771           if (dump_file && (dump_flags & TDF_DETAILS))
 772             {
 773               fprintf (dump_file, "Equivalence: ");
 774               print_generic_expr (dump_file, notop, 0);
 775               if (opcode == BIT_AND_EXPR)
 776                 fprintf (dump_file, " & ~");
 777               else if (opcode == BIT_IOR_EXPR)
 778                 fprintf (dump_file, " | ~");
 779               print_generic_expr (dump_file, oe->op, 0);
 780               if (opcode == BIT_AND_EXPR)
 781                 fprintf (dump_file, " -> 0\n");
 782               else if (opcode == BIT_IOR_EXPR)
 783                 fprintf (dump_file, " -> -1\n");
 784             }
 785
 786           if (opcode == BIT_AND_EXPR)
 787             oe->op = build_zero_cst (TREE_TYPE (oe->op));
 788           else if (opcode == BIT_IOR_EXPR)
 789             oe->op = build_low_bits_mask (TREE_TYPE (oe->op),
 790                                           TYPE_PRECISION (TREE_TYPE (oe->op)));
 791
 792           reassociate_stats.ops_eliminated += ops->length () - 1;
 793           ops->truncate (0);
 794           ops->quick_push (oe);
 795           return true;
 796         }
 797     }
 798
 799   return false;
 800 }
 801
 802 /* Use constant value that may be present in OPS to try to eliminate
 803    operands.  Note that this function is only really used when we've
 804    eliminated ops for other reasons, or merged constants.  Across
 805    single statements, fold already does all of this, plus more.  There
 806    is little point in duplicating logic, so I've only included the
 807    identities that I could ever construct testcases to trigger.  */
 808
 809 static void
 810 eliminate_using_constants (enum tree_code opcode,
 811                            vec<operand_entry_t> *ops)
 812 {
 813   operand_entry_t oelast = ops->last ();
 814   tree type = TREE_TYPE (oelast->op);
 815
 816   if (oelast->rank == 0
 817       && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
 818     {
 819       switch (opcode)
 820         {
 821         case BIT_AND_EXPR:
 822           if (integer_zerop (oelast->op))
 823             {
 824               if (ops->length () != 1)
 825                 {
 826                   if (dump_file && (dump_flags & TDF_DETAILS))
 827                     fprintf (dump_file, "Found & 0, removing all other ops\n");
 828
 829                   reassociate_stats.ops_eliminated += ops->length () - 1;
 830
 831                   ops->truncate (0);
 832                   ops->quick_push (oelast);
 833                   return;
 834                 }
 835             }
 836           else if (integer_all_onesp (oelast->op))
 837             {
 838               if (ops->length () != 1)
 839                 {
 840                   if (dump_file && (dump_flags & TDF_DETAILS))
 841                     fprintf (dump_file, "Found & -1, removing\n");
 842                   ops->pop ();
 843                   reassociate_stats.ops_eliminated++;
 844                 }
 845             }
 846           break;
 847         case BIT_IOR_EXPR:
 848           if (integer_all_onesp (oelast->op))
 849             {
 850               if (ops->length () != 1)
 851                 {
 852                   if (dump_file && (dump_flags & TDF_DETAILS))
 853                     fprintf (dump_file, "Found | -1, removing all other ops\n");
 854
 855                   reassociate_stats.ops_eliminated += ops->length () - 1;
 856
 857                   ops->truncate (0);
 858                   ops->quick_push (oelast);
 859                   return;
 860                 }
 861             }
 862           else if (integer_zerop (oelast->op))
 863             {
 864               if (ops->length () != 1)
 865                 {
 866                   if (dump_file && (dump_flags & TDF_DETAILS))
 867                     fprintf (dump_file, "Found | 0, removing\n");
 868                   ops->pop ();
 869                   reassociate_stats.ops_eliminated++;
 870                 }
 871             }
 872           break;
 873         case MULT_EXPR:
 874           if (integer_zerop (oelast->op)
 875               || (FLOAT_TYPE_P (type)
 876                   && !HONOR_NANS (TYPE_MODE (type))
 877                   && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
 878                   && real_zerop (oelast->op)))
 879             {
 880               if (ops->length () != 1)
 881                 {
 882                   if (dump_file && (dump_flags & TDF_DETAILS))
 883                     fprintf (dump_file, "Found * 0, removing all other ops\n");
 884
 885                   reassociate_stats.ops_eliminated += ops->length () - 1;
 886                   ops->truncate (1);
 887                   ops->quick_push (oelast);
 888                   return;
 889                 }
 890             }
 891           else if (integer_onep (oelast->op)
 892                    || (FLOAT_TYPE_P (type)
 893                        && !HONOR_SNANS (TYPE_MODE (type))
 894                        && real_onep (oelast->op)))
 895             {
 896               if (ops->length () != 1)
 897                 {
 898                   if (dump_file && (dump_flags & TDF_DETAILS))
 899                     fprintf (dump_file, "Found * 1, removing\n");
 900                   ops->pop ();
 901                   reassociate_stats.ops_eliminated++;
 902                   return;
 903                 }
 904             }
 905           break;
 906         case BIT_XOR_EXPR:
 907         case PLUS_EXPR:
 908         case MINUS_EXPR:
 909           if (integer_zerop (oelast->op)
 910               || (FLOAT_TYPE_P (type)
 911                   && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
 912                   && fold_real_zero_addition_p (type, oelast->op,
 913                                                 opcode == MINUS_EXPR)))
 914             {
 915               if (ops->length () != 1)
 916                 {
 917                   if (dump_file && (dump_flags & TDF_DETAILS))
 918                     fprintf (dump_file, "Found [|^+] 0, removing\n");
 919                   ops->pop ();
 920                   reassociate_stats.ops_eliminated++;
 921                   return;
 922                 }
 923             }
 924           break;
 925         default:
 926           break;
 927         }
 928     }
 929 }
 930
 931
 932 static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
 933                                  bool, bool);
 934
 935 /* Structure for tracking and counting operands.  */
 936 typedef struct oecount_s {
 937   int cnt;
 938   int id;
 939   enum tree_code oecode;
 940   tree op;
 941 } oecount;
 942
 943
 944 /* The heap for the oecount hashtable and the sorted list of operands.  */
 945 static vec<oecount> cvec;
 946
 947
 948 /* Oecount hashtable helpers.  */
 949
 950 struct oecount_hasher : typed_noop_remove <void>
 951 {
 952   /* Note that this hash table stores integers, not pointers.
 953      So, observe the casting in the member functions.  */
 954   typedef void value_type;
 955   typedef void compare_type;
 956   static inline hashval_t hash (const value_type *);
 957   static inline bool equal (const value_type *, const compare_type *);
 958 };
 959
 960 /* Hash function for oecount.  */
 961
 962 inline hashval_t
 963 oecount_hasher::hash (const value_type *p)
 964 {
 965   const oecount *c = &cvec[(size_t)p - 42];
 966   return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
 967 }
 968
 969 /* Comparison function for oecount.  */
 970
 971 inline bool
 972 oecount_hasher::equal (const value_type *p1, const compare_type *p2)
 973 {
 974   const oecount *c1 = &cvec[(size_t)p1 - 42];
 975   const oecount *c2 = &cvec[(size_t)p2 - 42];
 976   return (c1->oecode == c2->oecode
 977           && c1->op == c2->op);
 978 }
 979
 980 /* Comparison function for qsort sorting oecount elements by count.  */
 981
 982 static int
 983 oecount_cmp (const void *p1, const void *p2)
 984 {
 985   const oecount *c1 = (const oecount *)p1;
 986   const oecount *c2 = (const oecount *)p2;
 987   if (c1->cnt != c2->cnt)
 988     return c1->cnt - c2->cnt;
 989   else
 990     /* If counts are identical, use unique IDs to stabilize qsort.  */
 991     return c1->id - c2->id;
 992 }
 993
 994 /* Return TRUE iff STMT represents a builtin call that raises OP
 995    to some exponent.  */
 996
 997 static bool
 998 stmt_is_power_of_op (gimple stmt, tree op)
 999 {
1000   tree fndecl;
1001
1002   if (!is_gimple_call (stmt))
1003     return false;
1004
1005   fndecl = gimple_call_fndecl (stmt);
1006
1007   if (!fndecl
1008       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
1009     return false;
1010
1011   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1012     {
1013     CASE_FLT_FN (BUILT_IN_POW):
1014     CASE_FLT_FN (BUILT_IN_POWI):
1015       return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1016
1017     default:
1018       return false;
1019     }
1020 }
1021
1022 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1023    in place and return the result.  Assumes that stmt_is_power_of_op
1024    was previously called for STMT and returned TRUE.  */
1025
1026 static HOST_WIDE_INT
1027 decrement_power (gimple stmt)
1028 {
1029   REAL_VALUE_TYPE c, cint;
1030   HOST_WIDE_INT power;
1031   tree arg1;
1032
1033   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1034     {
1035     CASE_FLT_FN (BUILT_IN_POW):
1036       arg1 = gimple_call_arg (stmt, 1);
1037       c = TREE_REAL_CST (arg1);
1038       power = real_to_integer (&c) - 1;
1039       real_from_integer (&cint, VOIDmode, power, 0, 0);
1040       gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1041       return power;
1042
1043     CASE_FLT_FN (BUILT_IN_POWI):
1044       arg1 = gimple_call_arg (stmt, 1);
1045       power = TREE_INT_CST_LOW (arg1) - 1;
1046       gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1047       return power;
1048
1049     default:
1050       gcc_unreachable ();
1051     }
1052 }
1053
1054 /* Find the single immediate use of STMT's LHS, and replace it
1055    with OP.  Remove STMT.  If STMT's LHS is the same as *DEF,
1056    replace *DEF with OP as well.  */
1057
1058 static void
1059 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1060 {
1061   tree lhs;
1062   gimple use_stmt;
1063   use_operand_p use;
1064   gimple_stmt_iterator gsi;
1065
1066   if (is_gimple_call (stmt))
1067     lhs = gimple_call_lhs (stmt);
1068   else
1069     lhs = gimple_assign_lhs (stmt);
1070
1071   gcc_assert (has_single_use (lhs));
1072   single_imm_use (lhs, &use, &use_stmt);
1073   if (lhs == *def)
1074     *def = op;
1075   SET_USE (use, op);
1076   if (TREE_CODE (op) != SSA_NAME)
1077     update_stmt (use_stmt);
1078   gsi = gsi_for_stmt (stmt);
1079   unlink_stmt_vdef (stmt);
1080   gsi_remove (&gsi, true);
1081   release_defs (stmt);
1082 }
1083
1084 /* Walks the linear chain with result *DEF searching for an operation
1085    with operand OP and code OPCODE removing that from the chain.  *DEF
1086    is updated if there is only one operand but no operation left.  */
1087
1088 static void
1089 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1090 {
1091   gimple stmt = SSA_NAME_DEF_STMT (*def);
1092
1093   do
1094     {
1095       tree name;
1096
1097       if (opcode == MULT_EXPR
1098           && stmt_is_power_of_op (stmt, op))
1099         {
1100           if (decrement_power (stmt) == 1)
1101             propagate_op_to_single_use (op, stmt, def);
1102           return;
1103         }
1104
1105       name = gimple_assign_rhs1 (stmt);
1106
1107       /* If this is the operation we look for and one of the operands
1108          is ours simply propagate the other operand into the stmts
1109          single use.  */
1110       if (gimple_assign_rhs_code (stmt) == opcode
1111           && (name == op
1112               || gimple_assign_rhs2 (stmt) == op))
1113         {
1114           if (name == op)
1115             name = gimple_assign_rhs2 (stmt);
1116           propagate_op_to_single_use (name, stmt, def);
1117           return;
1118         }
1119
1120       /* We might have a multiply of two __builtin_pow* calls, and
1121          the operand might be hiding in the rightmost one.  */
1122       if (opcode == MULT_EXPR
1123           && gimple_assign_rhs_code (stmt) == opcode
1124           && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1125           && has_single_use (gimple_assign_rhs2 (stmt)))
1126         {
1127           gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1128           if (stmt_is_power_of_op (stmt2, op))
1129             {
1130               if (decrement_power (stmt2) == 1)
1131                 propagate_op_to_single_use (op, stmt2, def);
1132               return;
1133             }
1134         }
1135
1136       /* Continue walking the chain.  */
1137       gcc_assert (name != op
1138                   && TREE_CODE (name) == SSA_NAME);
1139       stmt = SSA_NAME_DEF_STMT (name);
1140     }
1141   while (1);
1142 }
1143
1144 /* Returns the UID of STMT if it is non-NULL. Otherwise return 1.  */
1145
1146 static inline unsigned
1147 get_stmt_uid_with_default (gimple stmt)
1148 {
1149   return stmt ? gimple_uid (stmt) : 1;
1150 }
1151
1152 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1153    the result.  Places the statement after the definition of either
1154    OP1 or OP2.  Returns the new statement.  */
1155
1156 static gimple
1157 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1158 {
1159   gimple op1def = NULL, op2def = NULL;
1160   gimple_stmt_iterator gsi;
1161   tree op;
1162   gimple sum;
1163
1164   /* Create the addition statement.  */
1165   op = make_ssa_name (type, NULL);
1166   sum = gimple_build_assign_with_ops (opcode, op, op1, op2);
1167
1168   /* Find an insertion place and insert.  */
1169   if (TREE_CODE (op1) == SSA_NAME)
1170     op1def = SSA_NAME_DEF_STMT (op1);
1171   if (TREE_CODE (op2) == SSA_NAME)
1172     op2def = SSA_NAME_DEF_STMT (op2);
1173   if ((!op1def || gimple_nop_p (op1def))
1174       && (!op2def || gimple_nop_p (op2def)))
1175     {
1176       gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR));
1177       gimple_set_uid (sum, get_stmt_uid_with_default (gsi_stmt (gsi)));
1178       gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1179     }
1180   else if ((!op1def || gimple_nop_p (op1def))
1181            || (op2def && !gimple_nop_p (op2def)
1182                && stmt_dominates_stmt_p (op1def, op2def)))
1183     {
1184       if (gimple_code (op2def) == GIMPLE_PHI)
1185         {
1186           gsi = gsi_after_labels (gimple_bb (op2def));
1187           gimple_set_uid (sum, get_stmt_uid_with_default (gsi_stmt (gsi)));
1188           gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1189         }
1190       else
1191         {
1192           if (!stmt_ends_bb_p (op2def))
1193             {
1194               gsi = gsi_for_stmt (op2def);
1195               gimple_set_uid (sum, gimple_uid (op2def));
1196               gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1197             }
1198           else
1199             {
1200               edge e;
1201               edge_iterator ei;
1202
1203               FOR_EACH_EDGE (e, ei, gimple_bb (op2def)->succs)
1204                 if (e->flags & EDGE_FALLTHRU)
1205                   gsi_insert_on_edge_immediate (e, sum);
1206             }
1207         }
1208     }
1209   else
1210     {
1211       if (gimple_code (op1def) == GIMPLE_PHI)
1212         {
1213           gsi = gsi_after_labels (gimple_bb (op1def));
1214           gimple_set_uid (sum, get_stmt_uid_with_default (gsi_stmt (gsi)));
1215           gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1216         }
1217       else
1218         {
1219           if (!stmt_ends_bb_p (op1def))
1220             {
1221               gsi = gsi_for_stmt (op1def);
1222               gimple_set_uid (sum, gimple_uid (op1def));
1223               gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1224             }
1225           else
1226             {
1227               edge e;
1228               edge_iterator ei;
1229
1230               FOR_EACH_EDGE (e, ei, gimple_bb (op1def)->succs)
1231                 if (e->flags & EDGE_FALLTHRU)
1232                   gsi_insert_on_edge_immediate (e, sum);
1233             }
1234         }
1235     }
1236   update_stmt (sum);
1237
1238   return sum;
1239 }
1240
1241 /* Perform un-distribution of divisions and multiplications.
1242    A * X + B * X is transformed into (A + B) * X and A / X + B / X
1243    to (A + B) / X for real X.
1244
1245    The algorithm is organized as follows.
1246
1247     - First we walk the addition chain *OPS looking for summands that
1248       are defined by a multiplication or a real division.  This results
1249       in the candidates bitmap with relevant indices into *OPS.
1250
1251     - Second we build the chains of multiplications or divisions for
1252       these candidates, counting the number of occurrences of (operand, code)
1253       pairs in all of the candidates chains.
1254
1255     - Third we sort the (operand, code) pairs by number of occurrence and
1256       process them starting with the pair with the most uses.
1257
1258       * For each such pair we walk the candidates again to build a
1259         second candidate bitmap noting all multiplication/division chains
1260         that have at least one occurrence of (operand, code).
1261
1262       * We build an alternate addition chain only covering these
1263         candidates with one (operand, code) operation removed from their
1264         multiplication/division chain.
1265
1266       * The first candidate gets replaced by the alternate addition chain
1267         multiplied/divided by the operand.
1268
1269       * All candidate chains get disabled for further processing and
1270         processing of (operand, code) pairs continues.
1271
1272   The alternate addition chains built are re-processed by the main
1273   reassociation algorithm which allows optimizing a * x * y + b * y * x
1274   to (a + b ) * x * y in one invocation of the reassociation pass.  */
1275
1276 static bool
1277 undistribute_ops_list (enum tree_code opcode,
1278                        vec<operand_entry_t> *ops, struct loop *loop)
1279 {
1280   unsigned int length = ops->length ();
1281   operand_entry_t oe1;
1282   unsigned i, j;
1283   sbitmap candidates, candidates2;
1284   unsigned nr_candidates, nr_candidates2;
1285   sbitmap_iterator sbi0;
1286   vec<operand_entry_t> *subops;
1287   hash_table <oecount_hasher> ctable;
1288   bool changed = false;
1289   int next_oecount_id = 0;
1290
1291   if (length <= 1
1292       || opcode != PLUS_EXPR)
1293     return false;
1294
1295   /* Build a list of candidates to process.  */
1296   candidates = sbitmap_alloc (length);
1297   bitmap_clear (candidates);
1298   nr_candidates = 0;
1299   FOR_EACH_VEC_ELT (*ops, i, oe1)
1300     {
1301       enum tree_code dcode;
1302       gimple oe1def;
1303
1304       if (TREE_CODE (oe1->op) != SSA_NAME)
1305         continue;
1306       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1307       if (!is_gimple_assign (oe1def))
1308         continue;
1309       dcode = gimple_assign_rhs_code (oe1def);
1310       if ((dcode != MULT_EXPR
1311            && dcode != RDIV_EXPR)
1312           || !is_reassociable_op (oe1def, dcode, loop))
1313         continue;
1314
1315       bitmap_set_bit (candidates, i);
1316       nr_candidates++;
1317     }
1318
1319   if (nr_candidates < 2)
1320     {
1321       sbitmap_free (candidates);
1322       return false;
1323     }
1324
1325   if (dump_file && (dump_flags & TDF_DETAILS))
1326     {
1327       fprintf (dump_file, "searching for un-distribute opportunities ");
1328       print_generic_expr (dump_file,
1329         (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1330       fprintf (dump_file, " %d\n", nr_candidates);
1331     }
1332
1333   /* Build linearized sub-operand lists and the counting table.  */
1334   cvec.create (0);
1335   ctable.create (15);
1336   /* ??? Macro arguments cannot have multi-argument template types in
1337      them.  This typedef is needed to workaround that limitation.  */
1338   typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1339   subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1340   EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1341     {
1342       gimple oedef;
1343       enum tree_code oecode;
1344       unsigned j;
1345
1346       oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1347       oecode = gimple_assign_rhs_code (oedef);
1348       linearize_expr_tree (&subops[i], oedef,
1349                            associative_tree_code (oecode), false);
1350
1351       FOR_EACH_VEC_ELT (subops[i], j, oe1)
1352         {
1353           oecount c;
1354           void **slot;
1355           size_t idx;
1356           c.oecode = oecode;
1357           c.cnt = 1;
1358           c.id = next_oecount_id++;
1359           c.op = oe1->op;
1360           cvec.safe_push (c);
1361           idx = cvec.length () + 41;
1362           slot = ctable.find_slot ((void *)idx, INSERT);
1363           if (!*slot)
1364             {
1365               *slot = (void *)idx;
1366             }
1367           else
1368             {
1369               cvec.pop ();
1370               cvec[(size_t)*slot - 42].cnt++;
1371             }
1372         }
1373     }
1374   ctable.dispose ();
1375
1376   /* Sort the counting table.  */
1377   cvec.qsort (oecount_cmp);
1378
1379   if (dump_file && (dump_flags & TDF_DETAILS))
1380     {
1381       oecount *c;
1382       fprintf (dump_file, "Candidates:\n");
1383       FOR_EACH_VEC_ELT (cvec, j, c)
1384         {
1385           fprintf (dump_file, "  %u %s: ", c->cnt,
1386                    c->oecode == MULT_EXPR
1387                    ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1388           print_generic_expr (dump_file, c->op, 0);
1389           fprintf (dump_file, "\n");
1390         }
1391     }
1392
1393   /* Process the (operand, code) pairs in order of most occurrence.  */
1394   candidates2 = sbitmap_alloc (length);
1395   while (!cvec.is_empty ())
1396     {
1397       oecount *c = &cvec.last ();
1398       if (c->cnt < 2)
1399         break;
1400
1401       /* Now collect the operands in the outer chain that contain
1402          the common operand in their inner chain.  */
1403       bitmap_clear (candidates2);
1404       nr_candidates2 = 0;
1405       EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1406         {
1407           gimple oedef;
1408           enum tree_code oecode;
1409           unsigned j;
1410           tree op = (*ops)[i]->op;
1411
1412           /* If we undistributed in this chain already this may be
1413              a constant.  */
1414           if (TREE_CODE (op) != SSA_NAME)
1415             continue;
1416
1417           oedef = SSA_NAME_DEF_STMT (op);
1418           oecode = gimple_assign_rhs_code (oedef);
1419           if (oecode != c->oecode)
1420             continue;
1421
1422           FOR_EACH_VEC_ELT (subops[i], j, oe1)
1423             {
1424               if (oe1->op == c->op)
1425                 {
1426                   bitmap_set_bit (candidates2, i);
1427                   ++nr_candidates2;
1428                   break;
1429                 }
1430             }
1431         }
1432
1433       if (nr_candidates2 >= 2)
1434         {
1435           operand_entry_t oe1, oe2;
1436           gimple prod;
1437           int first = bitmap_first_set_bit (candidates2);
1438
1439           /* Build the new addition chain.  */
1440           oe1 = (*ops)[first];
1441           if (dump_file && (dump_flags & TDF_DETAILS))
1442             {
1443               fprintf (dump_file, "Building (");
1444               print_generic_expr (dump_file, oe1->op, 0);
1445             }
1446           zero_one_operation (&oe1->op, c->oecode, c->op);
1447           EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1448             {
1449               gimple sum;
1450               oe2 = (*ops)[i];
1451               if (dump_file && (dump_flags & TDF_DETAILS))
1452                 {
1453                   fprintf (dump_file, " + ");
1454                   print_generic_expr (dump_file, oe2->op, 0);
1455                 }
1456               zero_one_operation (&oe2->op, c->oecode, c->op);
1457               sum = build_and_add_sum (TREE_TYPE (oe1->op),
1458                                        oe1->op, oe2->op, opcode);
1459               oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1460               oe2->rank = 0;
1461               oe1->op = gimple_get_lhs (sum);
1462             }
1463
1464           /* Apply the multiplication/division.  */
1465           prod = build_and_add_sum (TREE_TYPE (oe1->op),
1466                                     oe1->op, c->op, c->oecode);
1467           if (dump_file && (dump_flags & TDF_DETAILS))
1468             {
1469               fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1470               print_generic_expr (dump_file, c->op, 0);
1471               fprintf (dump_file, "\n");
1472             }
1473
1474           /* Record it in the addition chain and disable further
1475              undistribution with this op.  */
1476           oe1->op = gimple_assign_lhs (prod);
1477           oe1->rank = get_rank (oe1->op);
1478           subops[first].release ();
1479
1480           changed = true;
1481         }
1482
1483       cvec.pop ();
1484     }
1485
1486   for (i = 0; i < ops->length (); ++i)
1487     subops[i].release ();
1488   free (subops);
1489   cvec.release ();
1490   sbitmap_free (candidates);
1491   sbitmap_free (candidates2);
1492
1493   return changed;
1494 }
1495
1496 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1497    expression, examine the other OPS to see if any of them are comparisons
1498    of the same values, which we may be able to combine or eliminate.
1499    For example, we can rewrite (a < b) | (a == b) as (a <= b).  */
1500
1501 static bool
1502 eliminate_redundant_comparison (enum tree_code opcode,
1503                                 vec<operand_entry_t> *ops,
1504                                 unsigned int currindex,
1505                                 operand_entry_t curr)
1506 {
1507   tree op1, op2;
1508   enum tree_code lcode, rcode;
1509   gimple def1, def2;
1510   int i;
1511   operand_entry_t oe;
1512
1513   if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1514     return false;
1515
1516   /* Check that CURR is a comparison.  */
1517   if (TREE_CODE (curr->op) != SSA_NAME)
1518     return false;
1519   def1 = SSA_NAME_DEF_STMT (curr->op);
1520   if (!is_gimple_assign (def1))
1521     return false;
1522   lcode = gimple_assign_rhs_code (def1);
1523   if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1524     return false;
1525   op1 = gimple_assign_rhs1 (def1);
1526   op2 = gimple_assign_rhs2 (def1);
1527
1528   /* Now look for a similar comparison in the remaining OPS.  */
1529   for (i = currindex + 1; ops->iterate (i, &oe); i++)
1530     {
1531       tree t;
1532
1533       if (TREE_CODE (oe->op) != SSA_NAME)
1534         continue;
1535       def2 = SSA_NAME_DEF_STMT (oe->op);
1536       if (!is_gimple_assign (def2))
1537         continue;
1538       rcode = gimple_assign_rhs_code (def2);
1539       if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1540         continue;
1541
1542       /* If we got here, we have a match.  See if we can combine the
1543          two comparisons.  */
1544       if (opcode == BIT_IOR_EXPR)
1545         t = maybe_fold_or_comparisons (lcode, op1, op2,
1546                                        rcode, gimple_assign_rhs1 (def2),
1547                                        gimple_assign_rhs2 (def2));
1548       else
1549         t = maybe_fold_and_comparisons (lcode, op1, op2,
1550                                         rcode, gimple_assign_rhs1 (def2),
1551                                         gimple_assign_rhs2 (def2));
1552       if (!t)
1553         continue;
1554
1555       /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1556          always give us a boolean_type_node value back.  If the original
1557          BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1558          we need to convert.  */
1559       if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1560         t = fold_convert (TREE_TYPE (curr->op), t);
1561
1562       if (TREE_CODE (t) != INTEGER_CST
1563           && !operand_equal_p (t, curr->op, 0))
1564         {
1565           enum tree_code subcode;
1566           tree newop1, newop2;
1567           if (!COMPARISON_CLASS_P (t))
1568             continue;
1569           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1570           STRIP_USELESS_TYPE_CONVERSION (newop1);
1571           STRIP_USELESS_TYPE_CONVERSION (newop2);
1572           if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1573             continue;
1574         }
1575
1576       if (dump_file && (dump_flags & TDF_DETAILS))
1577         {
1578           fprintf (dump_file, "Equivalence: ");
1579           print_generic_expr (dump_file, curr->op, 0);
1580           fprintf (dump_file, " %s ", op_symbol_code (opcode));
1581           print_generic_expr (dump_file, oe->op, 0);
1582           fprintf (dump_file, " -> ");
1583           print_generic_expr (dump_file, t, 0);
1584           fprintf (dump_file, "\n");
1585         }
1586
1587       /* Now we can delete oe, as it has been subsumed by the new combined
1588          expression t.  */
1589       ops->ordered_remove (i);
1590       reassociate_stats.ops_eliminated ++;
1591
1592       /* If t is the same as curr->op, we're done.  Otherwise we must
1593          replace curr->op with t.  Special case is if we got a constant
1594          back, in which case we add it to the end instead of in place of
1595          the current entry.  */
1596       if (TREE_CODE (t) == INTEGER_CST)
1597         {
1598           ops->ordered_remove (currindex);
1599           add_to_ops_vec (ops, t);
1600         }
1601       else if (!operand_equal_p (t, curr->op, 0))
1602         {
1603           gimple sum;
1604           enum tree_code subcode;
1605           tree newop1;
1606           tree newop2;
1607           gcc_assert (COMPARISON_CLASS_P (t));
1608           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1609           STRIP_USELESS_TYPE_CONVERSION (newop1);
1610           STRIP_USELESS_TYPE_CONVERSION (newop2);
1611           gcc_checking_assert (is_gimple_val (newop1)
1612                                && is_gimple_val (newop2));
1613           sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1614           curr->op = gimple_get_lhs (sum);
1615         }
1616       return true;
1617     }
1618
1619   return false;
1620 }
1621
1622 /* Perform various identities and other optimizations on the list of
1623    operand entries, stored in OPS.  The tree code for the binary
1624    operation between all the operands is OPCODE.  */
1625
1626 static void
1627 optimize_ops_list (enum tree_code opcode,
1628                    vec<operand_entry_t> *ops)
1629 {
1630   unsigned int length = ops->length ();
1631   unsigned int i;
1632   operand_entry_t oe;
1633   operand_entry_t oelast = NULL;
1634   bool iterate = false;
1635
1636   if (length == 1)
1637     return;
1638
1639   oelast = ops->last ();
1640
1641   /* If the last two are constants, pop the constants off, merge them
1642      and try the next two.  */
1643   if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1644     {
1645       operand_entry_t oelm1 = (*ops)[length - 2];
1646
1647       if (oelm1->rank == 0
1648           && is_gimple_min_invariant (oelm1->op)
1649           && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1650                                        TREE_TYPE (oelast->op)))
1651         {
1652           tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1653                                      oelm1->op, oelast->op);
1654
1655           if (folded && is_gimple_min_invariant (folded))
1656             {
1657               if (dump_file && (dump_flags & TDF_DETAILS))
1658                 fprintf (dump_file, "Merging constants\n");
1659
1660               ops->pop ();
1661               ops->pop ();
1662
1663               add_to_ops_vec (ops, folded);
1664               reassociate_stats.constants_eliminated++;
1665
1666               optimize_ops_list (opcode, ops);
1667               return;
1668             }
1669         }
1670     }
1671
1672   eliminate_using_constants (opcode, ops);
1673   oelast = NULL;
1674
1675   for (i = 0; ops->iterate (i, &oe);)
1676     {
1677       bool done = false;
1678
1679       if (eliminate_not_pairs (opcode, ops, i, oe))
1680         return;
1681       if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1682           || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1683           || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1684         {
1685           if (done)
1686             return;
1687           iterate = true;
1688           oelast = NULL;
1689           continue;
1690         }
1691       oelast = oe;
1692       i++;
1693     }
1694
1695   length = ops->length ();
1696   oelast = ops->last ();
1697
1698   if (iterate)
1699     optimize_ops_list (opcode, ops);
1700 }
1701
1702 /* The following functions are subroutines to optimize_range_tests and allow
1703    it to try to change a logical combination of comparisons into a range
1704    test.
1705
1706    For example, both
1707         X == 2 || X == 5 || X == 3 || X == 4
1708    and
1709         X >= 2 && X <= 5
1710    are converted to
1711         (unsigned) (X - 2) <= 3
1712
1713    For more information see comments above fold_test_range in fold-const.c,
1714    this implementation is for GIMPLE.  */
1715
1716 struct range_entry
1717 {
1718   tree exp;
1719   tree low;
1720   tree high;
1721   bool in_p;
1722   bool strict_overflow_p;
1723   unsigned int idx, next;
1724 };
1725
1726 /* This is similar to make_range in fold-const.c, but on top of
1727    GIMPLE instead of trees.  If EXP is non-NULL, it should be
1728    an SSA_NAME and STMT argument is ignored, otherwise STMT
1729    argument should be a GIMPLE_COND.  */
1730
1731 static void
1732 init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1733 {
1734   int in_p;
1735   tree low, high;
1736   bool is_bool, strict_overflow_p;
1737
1738   r->exp = NULL_TREE;
1739   r->in_p = false;
1740   r->strict_overflow_p = false;
1741   r->low = NULL_TREE;
1742   r->high = NULL_TREE;
1743   if (exp != NULL_TREE
1744       && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1745     return;
1746
1747   /* Start with simply saying "EXP != 0" and then look at the code of EXP
1748      and see if we can refine the range.  Some of the cases below may not
1749      happen, but it doesn't seem worth worrying about this.  We "continue"
1750      the outer loop when we've changed something; otherwise we "break"
1751      the switch, which will "break" the while.  */
1752   low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1753   high = low;
1754   in_p = 0;
1755   strict_overflow_p = false;
1756   is_bool = false;
1757   if (exp == NULL_TREE)
1758     is_bool = true;
1759   else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1760     {
1761       if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1762         is_bool = true;
1763       else
1764         return;
1765     }
1766   else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1767     is_bool = true;
1768
1769   while (1)
1770     {
1771       enum tree_code code;
1772       tree arg0, arg1, exp_type;
1773       tree nexp;
1774       location_t loc;
1775
1776       if (exp != NULL_TREE)
1777         {
1778           if (TREE_CODE (exp) != SSA_NAME)
1779             break;
1780
1781           stmt = SSA_NAME_DEF_STMT (exp);
1782           if (!is_gimple_assign (stmt))
1783             break;
1784
1785           code = gimple_assign_rhs_code (stmt);
1786           arg0 = gimple_assign_rhs1 (stmt);
1787           arg1 = gimple_assign_rhs2 (stmt);
1788           exp_type = TREE_TYPE (exp);
1789         }
1790       else
1791         {
1792           code = gimple_cond_code (stmt);
1793           arg0 = gimple_cond_lhs (stmt);
1794           arg1 = gimple_cond_rhs (stmt);
1795           exp_type = boolean_type_node;
1796         }
1797
1798       if (TREE_CODE (arg0) != SSA_NAME)
1799         break;
1800       loc = gimple_location (stmt);
1801       switch (code)
1802         {
1803         case BIT_NOT_EXPR:
1804           if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
1805               /* Ensure the range is either +[-,0], +[0,0],
1806                  -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1807                  -[1,1].  If it is e.g. +[-,-] or -[-,-]
1808                  or similar expression of unconditional true or
1809                  false, it should not be negated.  */
1810               && ((high && integer_zerop (high))
1811                   || (low && integer_onep (low))))
1812             {
1813               in_p = !in_p;
1814               exp = arg0;
1815               continue;
1816             }
1817           break;
1818         case SSA_NAME:
1819           exp = arg0;
1820           continue;
1821         CASE_CONVERT:
1822           if (is_bool)
1823             goto do_default;
1824           if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1825             {
1826               if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1827                 is_bool = true;
1828               else
1829                 return;
1830             }
1831           else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1832             is_bool = true;
1833           goto do_default;
1834         case EQ_EXPR:
1835         case NE_EXPR:
1836         case LT_EXPR:
1837         case LE_EXPR:
1838         case GE_EXPR:
1839         case GT_EXPR:
1840           is_bool = true;
1841           /* FALLTHRU */
1842         default:
1843           if (!is_bool)
1844             return;
1845         do_default:
1846           nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1847                                   &low, &high, &in_p,
1848                                   &strict_overflow_p);
1849           if (nexp != NULL_TREE)
1850             {
1851               exp = nexp;
1852               gcc_assert (TREE_CODE (exp) == SSA_NAME);
1853               continue;
1854             }
1855           break;
1856         }
1857       break;
1858     }
1859   if (is_bool)
1860     {
1861       r->exp = exp;
1862       r->in_p = in_p;
1863       r->low = low;
1864       r->high = high;
1865       r->strict_overflow_p = strict_overflow_p;
1866     }
1867 }
1868
1869 /* Comparison function for qsort.  Sort entries
1870    without SSA_NAME exp first, then with SSA_NAMEs sorted
1871    by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1872    by increasing ->low and if ->low is the same, by increasing
1873    ->high.  ->low == NULL_TREE means minimum, ->high == NULL_TREE
1874    maximum.  */
1875
1876 static int
1877 range_entry_cmp (const void *a, const void *b)
1878 {
1879   const struct range_entry *p = (const struct range_entry *) a;
1880   const struct range_entry *q = (const struct range_entry *) b;
1881
1882   if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
1883     {
1884       if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1885         {
1886           /* Group range_entries for the same SSA_NAME together.  */
1887           if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
1888             return -1;
1889           else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
1890             return 1;
1891           /* If ->low is different, NULL low goes first, then by
1892              ascending low.  */
1893           if (p->low != NULL_TREE)
1894             {
1895               if (q->low != NULL_TREE)
1896                 {
1897                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
1898                                           p->low, q->low);
1899                   if (tem && integer_onep (tem))
1900                     return -1;
1901                   tem = fold_binary (GT_EXPR, boolean_type_node,
1902                                      p->low, q->low);
1903                   if (tem && integer_onep (tem))
1904                     return 1;
1905                 }
1906               else
1907                 return 1;
1908             }
1909           else if (q->low != NULL_TREE)
1910             return -1;
1911           /* If ->high is different, NULL high goes last, before that by
1912              ascending high.  */
1913           if (p->high != NULL_TREE)
1914             {
1915               if (q->high != NULL_TREE)
1916                 {
1917                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
1918                                           p->high, q->high);
1919                   if (tem && integer_onep (tem))
1920                     return -1;
1921                   tem = fold_binary (GT_EXPR, boolean_type_node,
1922                                      p->high, q->high);
1923                   if (tem && integer_onep (tem))
1924                     return 1;
1925                 }
1926               else
1927                 return -1;
1928             }
1929           else if (p->high != NULL_TREE)
1930             return 1;
1931           /* If both ranges are the same, sort below by ascending idx.  */
1932         }
1933       else
1934         return 1;
1935     }
1936   else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1937     return -1;
1938
1939   if (p->idx < q->idx)
1940     return -1;
1941   else
1942     {
1943       gcc_checking_assert (p->idx > q->idx);
1944       return 1;
1945     }
1946 }
1947
1948 /* Helper routine of optimize_range_test.
1949    [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
1950    RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
1951    OPCODE and OPS are arguments of optimize_range_tests.  Return
1952    true if the range merge has been successful.
1953    If OPCODE is ERROR_MARK, this is called from within
1954    maybe_optimize_range_tests and is performing inter-bb range optimization.
1955    Changes should be then performed right away, and whether an op is
1956    BIT_AND_EXPR or BIT_IOR_EXPR is found in oe->rank.  */
1957
1958 static bool
1959 update_range_test (struct range_entry *range, struct range_entry *otherrange,
1960                    unsigned int count, enum tree_code opcode,
1961                    vec<operand_entry_t> *ops, tree exp, bool in_p,
1962                    tree low, tree high, bool strict_overflow_p)
1963 {
1964   operand_entry_t oe = (*ops)[range->idx];
1965   tree op = oe->op;
1966   gimple stmt = op ? SSA_NAME_DEF_STMT (op) : last_stmt (BASIC_BLOCK (oe->id));
1967   location_t loc = gimple_location (stmt);
1968   tree optype = op ? TREE_TYPE (op) : boolean_type_node;
1969   tree tem = build_range_check (loc, optype, exp, in_p, low, high);
1970   enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
1971   gimple_stmt_iterator gsi;
1972
1973   if (tem == NULL_TREE)
1974     return false;
1975
1976   if (strict_overflow_p && issue_strict_overflow_warning (wc))
1977     warning_at (loc, OPT_Wstrict_overflow,
1978                 "assuming signed overflow does not occur "
1979                 "when simplifying range test");
1980
1981   if (dump_file && (dump_flags & TDF_DETAILS))
1982     {
1983       struct range_entry *r;
1984       fprintf (dump_file, "Optimizing range tests ");
1985       print_generic_expr (dump_file, range->exp, 0);
1986       fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
1987       print_generic_expr (dump_file, range->low, 0);
1988       fprintf (dump_file, ", ");
1989       print_generic_expr (dump_file, range->high, 0);
1990       fprintf (dump_file, "]");
1991       for (r = otherrange; r < otherrange + count; r++)
1992         {
1993           fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
1994           print_generic_expr (dump_file, r->low, 0);
1995           fprintf (dump_file, ", ");
1996           print_generic_expr (dump_file, r->high, 0);
1997           fprintf (dump_file, "]");
1998         }
1999       fprintf (dump_file, "\n into ");
2000       print_generic_expr (dump_file, tem, 0);
2001       fprintf (dump_file, "\n");
2002     }
2003
2004   if (opcode == BIT_IOR_EXPR
2005       || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2006     tem = invert_truthvalue_loc (loc, tem);
2007
2008   tem = fold_convert_loc (loc, optype, tem);
2009   gsi = gsi_for_stmt (stmt);
2010   tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2011                                   GSI_SAME_STMT);
2012
2013   /* If doing inter-bb range test optimization, update the
2014      stmts immediately.  Start with changing the first range test
2015      immediate use to the new value (TEM), or, if the first range
2016      test is a GIMPLE_COND stmt, change that condition.  */
2017   if (opcode == ERROR_MARK)
2018     {
2019       if (op)
2020         {
2021           imm_use_iterator iter;
2022           use_operand_p use_p;
2023           gimple use_stmt;
2024
2025           FOR_EACH_IMM_USE_STMT (use_stmt, iter, op)
2026             {
2027               if (is_gimple_debug (use_stmt))
2028                 continue;
2029               FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2030                 SET_USE (use_p, tem);
2031               update_stmt (use_stmt);
2032             }
2033         }
2034       else
2035         {
2036           gimple_cond_set_code (stmt, NE_EXPR);
2037           gimple_cond_set_lhs (stmt, tem);
2038           gimple_cond_set_rhs (stmt, boolean_false_node);
2039           update_stmt (stmt);
2040         }
2041     }
2042   oe->op = tem;
2043   range->exp = exp;
2044   range->low = low;
2045   range->high = high;
2046   range->in_p = in_p;
2047   range->strict_overflow_p = false;
2048
2049   for (range = otherrange; range < otherrange + count; range++)
2050     {
2051       oe = (*ops)[range->idx];
2052       /* Now change all the other range test immediate uses, so that
2053          those tests will be optimized away.  */
2054       if (opcode == ERROR_MARK)
2055         {
2056           if (oe->op)
2057             {
2058               imm_use_iterator iter;
2059               use_operand_p use_p;
2060               gimple use_stmt;
2061
2062               FOR_EACH_IMM_USE_STMT (use_stmt, iter, oe->op)
2063                 {
2064                   if (is_gimple_debug (use_stmt))
2065                     continue;
2066                   /* If imm use of _8 is a statement like _7 = _8 | _9;,
2067                      adjust it into _7 = _9;.  */
2068                   if (is_gimple_assign (use_stmt)
2069                       && gimple_assign_rhs_code (use_stmt) == oe->rank)
2070                     {
2071                       tree expr = NULL_TREE;
2072                       if (oe->op == gimple_assign_rhs1 (use_stmt))
2073                         expr = gimple_assign_rhs2 (use_stmt);
2074                       else if (oe->op == gimple_assign_rhs2 (use_stmt))
2075                         expr = gimple_assign_rhs1 (use_stmt);
2076                       if (expr
2077                           && expr != oe->op
2078                           && TREE_CODE (expr) == SSA_NAME)
2079                         {
2080                           gimple_stmt_iterator gsi2 = gsi_for_stmt (use_stmt);
2081                           gimple_assign_set_rhs_with_ops (&gsi2, SSA_NAME,
2082                                                           expr, NULL_TREE);
2083                           update_stmt (use_stmt);
2084                           continue;
2085                         }
2086                     }
2087                   /* If imm use of _8 is a statement like _7 = (int) _8;,
2088                      adjust it into _7 = 0; or _7 = 1;.  */
2089                   if (gimple_assign_cast_p (use_stmt)
2090                       && oe->op == gimple_assign_rhs1 (use_stmt))
2091                     {
2092                       tree lhs = gimple_assign_lhs (use_stmt);
2093                       if (INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
2094                         {
2095                           gimple_stmt_iterator gsi2
2096                             = gsi_for_stmt (use_stmt);
2097                           tree expr = build_int_cst (TREE_TYPE (lhs),
2098                                                      oe->rank == BIT_IOR_EXPR
2099                                                      ? 0 : 1);
2100                           gimple_assign_set_rhs_with_ops (&gsi2,
2101                                                           INTEGER_CST,
2102                                                           expr, NULL_TREE);
2103                           update_stmt (use_stmt);
2104                           continue;
2105                         }
2106                     }
2107                   /* Otherwise replace the use with 0 or 1.  */
2108                   FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2109                     SET_USE (use_p,
2110                              build_int_cst (TREE_TYPE (oe->op),
2111                                             oe->rank == BIT_IOR_EXPR
2112                                             ? 0 : 1));
2113                   update_stmt (use_stmt);
2114                 }
2115             }
2116           else
2117             {
2118               /* If range test was a GIMPLE_COND, simply change it
2119                  into an always false or always true condition.  */
2120               stmt = last_stmt (BASIC_BLOCK (oe->id));
2121               if (oe->rank == BIT_IOR_EXPR)
2122                 gimple_cond_make_false (stmt);
2123               else
2124                 gimple_cond_make_true (stmt);
2125               update_stmt (stmt);
2126             }
2127         }
2128       oe->op = error_mark_node;
2129       range->exp = NULL_TREE;
2130     }
2131   return true;
2132 }
2133
2134 /* Optimize range tests, similarly how fold_range_test optimizes
2135    it on trees.  The tree code for the binary
2136    operation between all the operands is OPCODE.
2137    If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2138    maybe_optimize_range_tests for inter-bb range optimization.
2139    In that case if oe->op is NULL, oe->id is bb->index whose
2140    GIMPLE_COND is && or ||ed into the test, and oe->rank says
2141    the actual opcode.  */
2142
2143 static void
2144 optimize_range_tests (enum tree_code opcode,
2145                       vec<operand_entry_t> *ops)
2146 {
2147   unsigned int length = ops->length (), i, j, first;
2148   operand_entry_t oe;
2149   struct range_entry *ranges;
2150   bool any_changes = false;
2151
2152   if (length == 1)
2153     return;
2154
2155   ranges = XNEWVEC (struct range_entry, length);
2156   for (i = 0; i < length; i++)
2157     {
2158       oe = (*ops)[i];
2159       ranges[i].idx = i;
2160       init_range_entry (ranges + i, oe->op,
2161                         oe->op ? NULL : last_stmt (BASIC_BLOCK (oe->id)));
2162       /* For | invert it now, we will invert it again before emitting
2163          the optimized expression.  */
2164       if (opcode == BIT_IOR_EXPR
2165           || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2166         ranges[i].in_p = !ranges[i].in_p;
2167     }
2168
2169   qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2170   for (i = 0; i < length; i++)
2171     if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2172       break;
2173
2174   /* Try to merge ranges.  */
2175   for (first = i; i < length; i++)
2176     {
2177       tree low = ranges[i].low;
2178       tree high = ranges[i].high;
2179       int in_p = ranges[i].in_p;
2180       bool strict_overflow_p = ranges[i].strict_overflow_p;
2181       int update_fail_count = 0;
2182
2183       for (j = i + 1; j < length; j++)
2184         {
2185           if (ranges[i].exp != ranges[j].exp)
2186             break;
2187           if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2188                              ranges[j].in_p, ranges[j].low, ranges[j].high))
2189             break;
2190           strict_overflow_p |= ranges[j].strict_overflow_p;
2191         }
2192
2193       if (j == i + 1)
2194         continue;
2195
2196       if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
2197                              ops, ranges[i].exp, in_p, low, high,
2198                              strict_overflow_p))
2199         {
2200           i = j - 1;
2201           any_changes = true;
2202         }
2203       /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2204          while update_range_test would fail.  */
2205       else if (update_fail_count == 64)
2206         i = j - 1;
2207       else
2208         ++update_fail_count;
2209     }
2210
2211   /* Optimize X == CST1 || X == CST2
2212      if popcount (CST1 ^ CST2) == 1 into
2213      (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2214      Similarly for ranges.  E.g.
2215      X != 2 && X != 3 && X != 10 && X != 11
2216      will be transformed by the above loop into
2217      (X - 2U) <= 1U && (X - 10U) <= 1U
2218      and this loop can transform that into
2219      ((X & ~8) - 2U) <= 1U.  */
2220   for (i = first; i < length; i++)
2221     {
2222       tree lowi, highi, lowj, highj, type, lowxor, highxor, tem, exp;
2223
2224       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2225         continue;
2226       type = TREE_TYPE (ranges[i].exp);
2227       if (!INTEGRAL_TYPE_P (type))
2228         continue;
2229       lowi = ranges[i].low;
2230       if (lowi == NULL_TREE)
2231         lowi = TYPE_MIN_VALUE (type);
2232       highi = ranges[i].high;
2233       if (highi == NULL_TREE)
2234         continue;
2235       for (j = i + 1; j < length && j < i + 64; j++)
2236         {
2237           if (ranges[j].exp == NULL_TREE)
2238             continue;
2239           if (ranges[i].exp != ranges[j].exp)
2240             break;
2241           if (ranges[j].in_p)
2242             continue;
2243           lowj = ranges[j].low;
2244           if (lowj == NULL_TREE)
2245             continue;
2246           highj = ranges[j].high;
2247           if (highj == NULL_TREE)
2248             highj = TYPE_MAX_VALUE (type);
2249           tem = fold_binary (GT_EXPR, boolean_type_node,
2250                              lowj, highi);
2251           if (tem == NULL_TREE || !integer_onep (tem))
2252             continue;
2253           lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2254           if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2255             continue;
2256           gcc_checking_assert (!integer_zerop (lowxor));
2257           tem = fold_binary (MINUS_EXPR, type, lowxor,
2258                              build_int_cst (type, 1));
2259           if (tem == NULL_TREE)
2260             continue;
2261           tem = fold_binary (BIT_AND_EXPR, type, lowxor, tem);
2262           if (tem == NULL_TREE || !integer_zerop (tem))
2263             continue;
2264           highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2265           if (!tree_int_cst_equal (lowxor, highxor))
2266             continue;
2267           tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2268           exp = fold_build2 (BIT_AND_EXPR, type, ranges[i].exp, tem);
2269           lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2270           highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2271           if (update_range_test (ranges + i, ranges + j, 1, opcode, ops, exp,
2272                                  ranges[i].in_p, lowj, highj,
2273                                  ranges[i].strict_overflow_p
2274                                  || ranges[j].strict_overflow_p))
2275             {
2276               any_changes = true;
2277               break;
2278             }
2279         }
2280     }
2281
2282   if (any_changes && opcode != ERROR_MARK)
2283     {
2284       j = 0;
2285       FOR_EACH_VEC_ELT (*ops, i, oe)
2286         {
2287           if (oe->op == error_mark_node)
2288             continue;
2289           else if (i != j)
2290             (*ops)[j] = oe;
2291           j++;
2292         }
2293       ops->truncate (j);
2294     }
2295
2296   XDELETEVEC (ranges);
2297 }
2298
2299 /* Return true if STMT is a cast like:
2300    <bb N>:
2301    ...
2302    _123 = (int) _234;
2303
2304    <bb M>:
2305    # _345 = PHI <_123(N), 1(...), 1(...)>
2306    where _234 has bool type, _123 has single use and
2307    bb N has a single successor M.  This is commonly used in
2308    the last block of a range test.  */
2309
2310 static bool
2311 final_range_test_p (gimple stmt)
2312 {
2313   basic_block bb, rhs_bb;
2314   edge e;
2315   tree lhs, rhs;
2316   use_operand_p use_p;
2317   gimple use_stmt;
2318
2319   if (!gimple_assign_cast_p (stmt))
2320     return false;
2321   bb = gimple_bb (stmt);
2322   if (!single_succ_p (bb))
2323     return false;
2324   e = single_succ_edge (bb);
2325   if (e->flags & EDGE_COMPLEX)
2326     return false;
2327
2328   lhs = gimple_assign_lhs (stmt);
2329   rhs = gimple_assign_rhs1 (stmt);
2330   if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2331       || TREE_CODE (rhs) != SSA_NAME
2332       || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2333     return false;
2334
2335   /* Test whether lhs is consumed only by a PHI in the only successor bb.  */
2336   if (!single_imm_use (lhs, &use_p, &use_stmt))
2337     return false;
2338
2339   if (gimple_code (use_stmt) != GIMPLE_PHI
2340       || gimple_bb (use_stmt) != e->dest)
2341     return false;
2342
2343   /* And that the rhs is defined in the same loop.  */
2344   rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2345   if (rhs_bb == NULL
2346       || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2347     return false;
2348
2349   return true;
2350 }
2351
2352 /* Return true if BB is suitable basic block for inter-bb range test
2353    optimization.  If BACKWARD is true, BB should be the only predecessor
2354    of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2355    or compared with to find a common basic block to which all conditions
2356    branch to if true resp. false.  If BACKWARD is false, TEST_BB should
2357    be the only predecessor of BB.  */
2358
2359 static bool
2360 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2361                   bool backward)
2362 {
2363   edge_iterator ei, ei2;
2364   edge e, e2;
2365   gimple stmt;
2366   gimple_stmt_iterator gsi;
2367   bool other_edge_seen = false;
2368   bool is_cond;
2369
2370   if (test_bb == bb)
2371     return false;
2372   /* Check last stmt first.  */
2373   stmt = last_stmt (bb);
2374   if (stmt == NULL
2375       || (gimple_code (stmt) != GIMPLE_COND
2376           && (backward || !final_range_test_p (stmt)))
2377       || gimple_visited_p (stmt)
2378       || stmt_could_throw_p (stmt)
2379       || *other_bb == bb)
2380     return false;
2381   is_cond = gimple_code (stmt) == GIMPLE_COND;
2382   if (is_cond)
2383     {
2384       /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2385          goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2386          to *OTHER_BB (if not set yet, try to find it out).  */
2387       if (EDGE_COUNT (bb->succs) != 2)
2388         return false;
2389       FOR_EACH_EDGE (e, ei, bb->succs)
2390         {
2391           if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2392             return false;
2393           if (e->dest == test_bb)
2394             {
2395               if (backward)
2396                 continue;
2397               else
2398                 return false;
2399             }
2400           if (e->dest == bb)
2401             return false;
2402           if (*other_bb == NULL)
2403             {
2404               FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2405                 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2406                   return false;
2407                 else if (e->dest == e2->dest)
2408                   *other_bb = e->dest;
2409               if (*other_bb == NULL)
2410                 return false;
2411             }
2412           if (e->dest == *other_bb)
2413             other_edge_seen = true;
2414           else if (backward)
2415             return false;
2416         }
2417       if (*other_bb == NULL || !other_edge_seen)
2418         return false;
2419     }
2420   else if (single_succ (bb) != *other_bb)
2421     return false;
2422
2423   /* Now check all PHIs of *OTHER_BB.  */
2424   e = find_edge (bb, *other_bb);
2425   e2 = find_edge (test_bb, *other_bb);
2426   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2427     {
2428       gimple phi = gsi_stmt (gsi);
2429       /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2430          corresponding to BB and TEST_BB predecessor must be the same.  */
2431       if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2432                             gimple_phi_arg_def (phi, e2->dest_idx), 0))
2433         {
2434           /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2435              one of the PHIs should have the lhs of the last stmt in
2436              that block as PHI arg and that PHI should have 0 or 1
2437              corresponding to it in all other range test basic blocks
2438              considered.  */
2439           if (!is_cond)
2440             {
2441               if (gimple_phi_arg_def (phi, e->dest_idx)
2442                   == gimple_assign_lhs (stmt)
2443                   && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2444                       || integer_onep (gimple_phi_arg_def (phi,
2445                                                            e2->dest_idx))))
2446                 continue;
2447             }
2448           else
2449             {
2450               gimple test_last = last_stmt (test_bb);
2451               if (gimple_code (test_last) != GIMPLE_COND
2452                   && gimple_phi_arg_def (phi, e2->dest_idx)
2453                      == gimple_assign_lhs (test_last)
2454                   && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2455                       || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2456                 continue;
2457             }
2458
2459           return false;
2460         }
2461     }
2462   return true;
2463 }
2464
2465 /* Return true if BB doesn't have side-effects that would disallow
2466    range test optimization, all SSA_NAMEs set in the bb are consumed
2467    in the bb and there are no PHIs.  */
2468
2469 static bool
2470 no_side_effect_bb (basic_block bb)
2471 {
2472   gimple_stmt_iterator gsi;
2473   gimple last;
2474
2475   if (!gimple_seq_empty_p (phi_nodes (bb)))
2476     return false;
2477   last = last_stmt (bb);
2478   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2479     {
2480       gimple stmt = gsi_stmt (gsi);
2481       tree lhs;
2482       imm_use_iterator imm_iter;
2483       use_operand_p use_p;
2484
2485       if (is_gimple_debug (stmt))
2486         continue;
2487       if (gimple_has_side_effects (stmt))
2488         return false;
2489       if (stmt == last)
2490         return true;
2491       if (!is_gimple_assign (stmt))
2492         return false;
2493       lhs = gimple_assign_lhs (stmt);
2494       if (TREE_CODE (lhs) != SSA_NAME)
2495         return false;
2496       if (gimple_assign_rhs_could_trap_p (stmt))
2497         return false;
2498       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2499         {
2500           gimple use_stmt = USE_STMT (use_p);
2501           if (is_gimple_debug (use_stmt))
2502             continue;
2503           if (gimple_bb (use_stmt) != bb)
2504             return false;
2505         }
2506     }
2507   return false;
2508 }
2509
2510 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2511    return true and fill in *OPS recursively.  */
2512
2513 static bool
2514 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2515          struct loop *loop)
2516 {
2517   gimple stmt = SSA_NAME_DEF_STMT (var);
2518   tree rhs[2];
2519   int i;
2520
2521   if (!is_reassociable_op (stmt, code, loop))
2522     return false;
2523
2524   rhs[0] = gimple_assign_rhs1 (stmt);
2525   rhs[1] = gimple_assign_rhs2 (stmt);
2526   gimple_set_visited (stmt, true);
2527   for (i = 0; i < 2; i++)
2528     if (TREE_CODE (rhs[i]) == SSA_NAME
2529         && !get_ops (rhs[i], code, ops, loop)
2530         && has_single_use (rhs[i]))
2531       {
2532         operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
2533
2534         oe->op = rhs[i];
2535         oe->rank = code;
2536         oe->id = 0;
2537         oe->count = 1;
2538         ops->safe_push (oe);
2539       }
2540   return true;
2541 }
2542
2543 /* Inter-bb range test optimization.  */
2544
2545 static void
2546 maybe_optimize_range_tests (gimple stmt)
2547 {
2548   basic_block first_bb = gimple_bb (stmt);
2549   basic_block last_bb = first_bb;
2550   basic_block other_bb = NULL;
2551   basic_block bb;
2552   edge_iterator ei;
2553   edge e;
2554   vec<operand_entry_t> ops = vNULL;
2555
2556   /* Consider only basic blocks that end with GIMPLE_COND or
2557      a cast statement satisfying final_range_test_p.  All
2558      but the last bb in the first_bb .. last_bb range
2559      should end with GIMPLE_COND.  */
2560   if (gimple_code (stmt) == GIMPLE_COND)
2561     {
2562       if (EDGE_COUNT (first_bb->succs) != 2)
2563         return;
2564     }
2565   else if (final_range_test_p (stmt))
2566     other_bb = single_succ (first_bb);
2567   else
2568     return;
2569
2570   if (stmt_could_throw_p (stmt))
2571     return;
2572
2573   /* As relative ordering of post-dominator sons isn't fixed,
2574      maybe_optimize_range_tests can be called first on any
2575      bb in the range we want to optimize.  So, start searching
2576      backwards, if first_bb can be set to a predecessor.  */
2577   while (single_pred_p (first_bb))
2578     {
2579       basic_block pred_bb = single_pred (first_bb);
2580       if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
2581         break;
2582       if (!no_side_effect_bb (first_bb))
2583         break;
2584       first_bb = pred_bb;
2585     }
2586   /* If first_bb is last_bb, other_bb hasn't been computed yet.
2587      Before starting forward search in last_bb successors, find
2588      out the other_bb.  */
2589   if (first_bb == last_bb)
2590     {
2591       other_bb = NULL;
2592       /* As non-GIMPLE_COND last stmt always terminates the range,
2593          if forward search didn't discover anything, just give up.  */
2594       if (gimple_code (stmt) != GIMPLE_COND)
2595         return;
2596       /* Look at both successors.  Either it ends with a GIMPLE_COND
2597          and satisfies suitable_cond_bb, or ends with a cast and
2598          other_bb is that cast's successor.  */
2599       FOR_EACH_EDGE (e, ei, first_bb->succs)
2600         if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
2601             || e->dest == first_bb)
2602           return;
2603         else if (single_pred_p (e->dest))
2604           {
2605             stmt = last_stmt (e->dest);
2606             if (stmt
2607                 && gimple_code (stmt) == GIMPLE_COND
2608                 && EDGE_COUNT (e->dest->succs) == 2)
2609               {
2610                 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
2611                   break;
2612                 else
2613                   other_bb = NULL;
2614               }
2615             else if (stmt
2616                      && final_range_test_p (stmt)
2617                      && find_edge (first_bb, single_succ (e->dest)))
2618               {
2619                 other_bb = single_succ (e->dest);
2620                 if (other_bb == first_bb)
2621                   other_bb = NULL;
2622               }
2623           }
2624       if (other_bb == NULL)
2625         return;
2626     }
2627   /* Now do the forward search, moving last_bb to successor bbs
2628      that aren't other_bb.  */
2629   while (EDGE_COUNT (last_bb->succs) == 2)
2630     {
2631       FOR_EACH_EDGE (e, ei, last_bb->succs)
2632         if (e->dest != other_bb)
2633           break;
2634       if (e == NULL)
2635         break;
2636       if (!single_pred_p (e->dest))
2637         break;
2638       if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
2639         break;
2640       if (!no_side_effect_bb (e->dest))
2641         break;
2642       last_bb = e->dest;
2643     }
2644   if (first_bb == last_bb)
2645     return;
2646   /* Here basic blocks first_bb through last_bb's predecessor
2647      end with GIMPLE_COND, all of them have one of the edges to
2648      other_bb and another to another block in the range,
2649      all blocks except first_bb don't have side-effects and
2650      last_bb ends with either GIMPLE_COND, or cast satisfying
2651      final_range_test_p.  */
2652   for (bb = last_bb; ; bb = single_pred (bb))
2653     {
2654       enum tree_code code;
2655       tree lhs, rhs;
2656
2657       e = find_edge (bb, other_bb);
2658       stmt = last_stmt (bb);
2659       gimple_set_visited (stmt, true);
2660       if (gimple_code (stmt) != GIMPLE_COND)
2661         {
2662           use_operand_p use_p;
2663           gimple phi;
2664           edge e2;
2665           unsigned int d;
2666
2667           lhs = gimple_assign_lhs (stmt);
2668           rhs = gimple_assign_rhs1 (stmt);
2669           gcc_assert (bb == last_bb);
2670
2671           /* stmt is
2672              _123 = (int) _234;
2673
2674              followed by:
2675              <bb M>:
2676              # _345 = PHI <_123(N), 1(...), 1(...)>
2677
2678              or 0 instead of 1.  If it is 0, the _234
2679              range test is anded together with all the
2680              other range tests, if it is 1, it is ored with
2681              them.  */
2682           single_imm_use (lhs, &use_p, &phi);
2683           gcc_assert (gimple_code (phi) == GIMPLE_PHI);
2684           e2 = find_edge (first_bb, other_bb);
2685           d = e2->dest_idx;
2686           gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
2687           if (integer_zerop (gimple_phi_arg_def (phi, d)))
2688             code = BIT_AND_EXPR;
2689           else
2690             {
2691               gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
2692               code = BIT_IOR_EXPR;
2693             }
2694
2695           /* If _234 SSA_NAME_DEF_STMT is
2696              _234 = _567 | _789;
2697              (or &, corresponding to 1/0 in the phi arguments,
2698              push into ops the individual range test arguments
2699              of the bitwise or resp. and, recursively.  */
2700           if (!get_ops (rhs, code, &ops,
2701                         loop_containing_stmt (stmt))
2702               && has_single_use (rhs))
2703             {
2704               /* Otherwise, push the _234 range test itself.  */
2705               operand_entry_t oe
2706                 = (operand_entry_t) pool_alloc (operand_entry_pool);
2707
2708               oe->op = rhs;
2709               oe->rank = code;
2710               oe->id = 0;
2711               oe->count = 1;
2712               ops.safe_push (oe);
2713             }
2714           continue;
2715         }
2716       /* Otherwise stmt is GIMPLE_COND.  */
2717       code = gimple_cond_code (stmt);
2718       lhs = gimple_cond_lhs (stmt);
2719       rhs = gimple_cond_rhs (stmt);
2720       if (TREE_CODE (lhs) == SSA_NAME
2721           && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2722           && ((code != EQ_EXPR && code != NE_EXPR)
2723               || rhs != boolean_false_node
2724                  /* Either push into ops the individual bitwise
2725                     or resp. and operands, depending on which
2726                     edge is other_bb.  */
2727               || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
2728                                  ^ (code == EQ_EXPR))
2729                                 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
2730                            loop_containing_stmt (stmt))))
2731         {
2732           /* Or push the GIMPLE_COND stmt itself.  */
2733           operand_entry_t oe
2734             = (operand_entry_t) pool_alloc (operand_entry_pool);
2735
2736           oe->op = NULL;
2737           oe->rank = (e->flags & EDGE_TRUE_VALUE)
2738                      ? BIT_IOR_EXPR : BIT_AND_EXPR;
2739           /* oe->op = NULL signs that there is no SSA_NAME
2740              for the range test, and oe->id instead is the
2741              basic block number, at which's end the GIMPLE_COND
2742              is.  */
2743           oe->id = bb->index;
2744           oe->count = 1;
2745           ops.safe_push (oe);
2746         }
2747       if (bb == first_bb)
2748         break;
2749     }
2750   if (ops.length () > 1)
2751     optimize_range_tests (ERROR_MARK, &ops);
2752   ops.release ();
2753 }
2754
2755 /* Return true if OPERAND is defined by a PHI node which uses the LHS
2756    of STMT in it's operands.  This is also known as a "destructive
2757    update" operation.  */
2758
2759 static bool
2760 is_phi_for_stmt (gimple stmt, tree operand)
2761 {
2762   gimple def_stmt;
2763   tree lhs;
2764   use_operand_p arg_p;
2765   ssa_op_iter i;
2766
2767   if (TREE_CODE (operand) != SSA_NAME)
2768     return false;
2769
2770   lhs = gimple_assign_lhs (stmt);
2771
2772   def_stmt = SSA_NAME_DEF_STMT (operand);
2773   if (gimple_code (def_stmt) != GIMPLE_PHI)
2774     return false;
2775
2776   FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
2777     if (lhs == USE_FROM_PTR (arg_p))
2778       return true;
2779   return false;
2780 }
2781
2782 /* Remove def stmt of VAR if VAR has zero uses and recurse
2783    on rhs1 operand if so.  */
2784
2785 static void
2786 remove_visited_stmt_chain (tree var)
2787 {
2788   gimple stmt;
2789   gimple_stmt_iterator gsi;
2790
2791   while (1)
2792     {
2793       if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
2794         return;
2795       stmt = SSA_NAME_DEF_STMT (var);
2796       if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
2797         {
2798           var = gimple_assign_rhs1 (stmt);
2799           gsi = gsi_for_stmt (stmt);
2800           gsi_remove (&gsi, true);
2801           release_defs (stmt);
2802         }
2803       else
2804         return;
2805     }
2806 }
2807
2808 /* This function checks three consequtive operands in
2809    passed operands vector OPS starting from OPINDEX and
2810    swaps two operands if it is profitable for binary operation
2811    consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
2812
2813    We pair ops with the same rank if possible.
2814
2815    The alternative we try is to see if STMT is a destructive
2816    update style statement, which is like:
2817    b = phi (a, ...)
2818    a = c + b;
2819    In that case, we want to use the destructive update form to
2820    expose the possible vectorizer sum reduction opportunity.
2821    In that case, the third operand will be the phi node. This
2822    check is not performed if STMT is null.
2823
2824    We could, of course, try to be better as noted above, and do a
2825    lot of work to try to find these opportunities in >3 operand
2826    cases, but it is unlikely to be worth it.  */
2827
2828 static void
2829 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
2830                           unsigned int opindex, gimple stmt)
2831 {
2832   operand_entry_t oe1, oe2, oe3;
2833
2834   oe1 = ops[opindex];
2835   oe2 = ops[opindex + 1];
2836   oe3 = ops[opindex + 2];
2837
2838   if ((oe1->rank == oe2->rank
2839        && oe2->rank != oe3->rank)
2840       || (stmt && is_phi_for_stmt (stmt, oe3->op)
2841           && !is_phi_for_stmt (stmt, oe1->op)
2842           && !is_phi_for_stmt (stmt, oe2->op)))
2843     {
2844       struct operand_entry temp = *oe3;
2845       oe3->op = oe1->op;
2846       oe3->rank = oe1->rank;
2847       oe1->op = temp.op;
2848       oe1->rank= temp.rank;
2849     }
2850   else if ((oe1->rank == oe3->rank
2851             && oe2->rank != oe3->rank)
2852            || (stmt && is_phi_for_stmt (stmt, oe2->op)
2853                && !is_phi_for_stmt (stmt, oe1->op)
2854                && !is_phi_for_stmt (stmt, oe3->op)))
2855     {
2856       struct operand_entry temp = *oe2;
2857       oe2->op = oe1->op;
2858       oe2->rank = oe1->rank;
2859       oe1->op = temp.op;
2860       oe1->rank= temp.rank;
2861     }
2862 }
2863
2864 /* Determine if stmt A is not dominated by stmt B. If A and B are in
2865    same basic block, then A's UID has to be less than B. If they are
2866    in different BB's, then A's BB must not be dominated by B's BB.  */
2867
2868 static inline bool
2869 not_dominated_by (gimple a, gimple b)
2870 {
2871   basic_block bb_a, bb_b;
2872   bb_a = gimple_bb (a);
2873   bb_b = gimple_bb (b);
2874   return ((bb_a == bb_b && gimple_uid (a) < gimple_uid (b))
2875           || (bb_a != bb_b
2876               && !dominated_by_p (CDI_DOMINATORS, bb_a, bb_b)));
2877
2878 }
2879
2880 /* Among STMT1 and STMT2, return the statement that appears later. Both
2881    statements are in same BB and have the same UID.  */
2882
2883 static gimple
2884 appears_later_in_bb (gimple stmt1, gimple stmt2)
2885 {
2886   unsigned uid = gimple_uid (stmt1);
2887   gimple_stmt_iterator gsi = gsi_for_stmt (stmt1);
2888   for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
2889     {
2890       gimple stmt = gsi_stmt (gsi);
2891
2892       /* If STMT has a different UID than STMT1 and we haven't seen
2893          STMT2 during traversal, we know STMT1 appears later.  */
2894       if (gimple_uid (stmt) != uid)
2895         return stmt1;
2896       else if (stmt == stmt2)
2897         return stmt2;
2898     }
2899   return stmt1;
2900 }
2901
2902 /* Find the statement after which STMT must be moved so that the
2903    dependency from DEP_STMT to STMT is maintained.  */
2904
2905 static gimple
2906 find_insert_point (gimple stmt, gimple dep_stmt)
2907 {
2908   gimple insert_stmt = stmt;
2909   if (dep_stmt == NULL)
2910     return stmt;
2911   if (gimple_uid (insert_stmt) == gimple_uid (dep_stmt)
2912       && gimple_bb (insert_stmt) == gimple_bb (dep_stmt)
2913       && insert_stmt != dep_stmt)
2914     insert_stmt = appears_later_in_bb (insert_stmt, dep_stmt);
2915   else if (not_dominated_by (insert_stmt, dep_stmt))
2916     insert_stmt = dep_stmt;
2917   return insert_stmt;
2918 }
2919
2920 /* Insert STMT after INSERT_POINT.  */
2921
2922 static void
2923 insert_stmt_after (gimple stmt, gimple insert_point)
2924 {
2925   imm_use_iterator iter;
2926   tree lhs;
2927   gimple use_stmt;
2928   gimple_stmt_iterator gsistmt = gsi_for_stmt (stmt), gsi_insert;
2929   basic_block insert_bb = gimple_bb (insert_point);
2930   bool insert_bb_different = (insert_bb != gimple_bb (stmt));
2931   lhs = gimple_assign_lhs (stmt);
2932   /* If there are any debug uses of LHS, reset them.  */
2933   FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
2934     {
2935       if (is_gimple_debug (use_stmt)
2936           && not_dominated_by (use_stmt, insert_point))
2937         {
2938           gimple_debug_bind_reset_value (use_stmt);
2939           update_stmt (use_stmt);
2940         }
2941     }
2942   /* If INSERT_STMT is a phi node, then do not insert just after that statement.
2943      Instead, find the first non-label gimple statement in BB and insert before
2944      that.  */
2945   if (gimple_code (insert_point) == GIMPLE_PHI)
2946     {
2947       gsi_insert = gsi_after_labels (insert_bb);
2948       gsi_move_before (&gsistmt, &gsi_insert);
2949     }
2950   /* Statements marked for throw can not be in the middle of a basic block. So
2951      we can not insert a statement (not marked for throw) immediately after.  */
2952   else if (stmt_ends_bb_p (insert_point))
2953     {
2954       edge succ_edge = find_fallthru_edge (insert_bb->succs);
2955       insert_bb = succ_edge->dest;
2956       insert_bb_different = (insert_bb != gimple_bb (stmt));
2957       /* Insert STMT at the beginning of the successor basic block.  */
2958       gsi_insert = gsi_after_labels (insert_bb);
2959       gsi_move_before (&gsistmt, &gsi_insert);
2960     }
2961   else
2962     {
2963       gsi_insert = gsi_for_stmt (insert_point);
2964       gsi_move_after (&gsistmt, &gsi_insert);
2965     }
2966   /* Set the UID of STMT to that of INSERT_POINT so that subsequent comparisons
2967      of UIDs to determine dominance within a basic block works.  */
2968   gimple_set_uid (stmt, gimple_uid (insert_point));
2969   if (dump_file && (dump_flags & TDF_DETAILS))
2970     {
2971       fprintf (dump_file, "Moved stmt ");
2972       print_gimple_stmt (dump_file, stmt, 0, 0);
2973       fprintf (dump_file, " %s to satisfy dependences\n",
2974                insert_bb_different ? "to a different BB" : "within same BB");
2975     }
2976
2977 }
2978
2979 /* If OP is a SSA variable and is not the default definition, return the
2980    gimple statement that defines OP. Else return NULL.  */
2981
2982 static inline gimple
2983 get_def_stmt (tree op)
2984 {
2985   if (TREE_CODE (op) == SSA_NAME
2986       && !SSA_NAME_IS_DEFAULT_DEF (op))
2987     return SSA_NAME_DEF_STMT (op);
2988   else
2989     return NULL;
2990 }
2991
2992 /* Ensure that operands in the OPS vector are available for STMT and all
2993    gimple statements on which STMT depends.  */
2994
2995 static void
2996 ensure_ops_are_available (gimple stmt, vec<operand_entry_t> ops, int opindex)
2997 {
2998   unsigned int len = ops.length ();
2999   gimple insert_stmt = stmt;
3000   gimple dep_stmts[2];
3001   dep_stmts[0] = get_def_stmt (ops[opindex]->op);
3002   if (len - opindex == 2)
3003     {
3004       dep_stmts[1] = get_def_stmt (ops[opindex + 1]->op);
3005     }
3006   else
3007     {
3008       gimple stmt1 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3009       ensure_ops_are_available (stmt1, ops, opindex + 1);
3010       dep_stmts[1] = stmt1;
3011     }
3012   for (int i = 0; i < 2; i++)
3013     insert_stmt = find_insert_point (insert_stmt, dep_stmts[i]);
3014
3015   if (insert_stmt != stmt)
3016     insert_stmt_after (stmt, insert_stmt);
3017 }
3018
3019 /* Recursively rewrite our linearized statements so that the operators
3020    match those in OPS[OPINDEX], putting the computation in rank
3021    order.  */
3022
3023 static void
3024 rewrite_expr_tree (gimple stmt, unsigned int opindex,
3025                    vec<operand_entry_t> ops, bool moved)
3026 {
3027   tree rhs1 = gimple_assign_rhs1 (stmt);
3028   tree rhs2 = gimple_assign_rhs2 (stmt);
3029   operand_entry_t oe;
3030
3031   /* The final recursion case for this function is that you have
3032      exactly two operations left.
3033      If we had one exactly one op in the entire list to start with, we
3034      would have never called this function, and the tail recursion
3035      rewrites them one at a time.  */
3036   if (opindex + 2 == ops.length ())
3037     {
3038       operand_entry_t oe1, oe2;
3039
3040       oe1 = ops[opindex];
3041       oe2 = ops[opindex + 1];
3042
3043       if (rhs1 != oe1->op || rhs2 != oe2->op)
3044         {
3045           if (dump_file && (dump_flags & TDF_DETAILS))
3046             {
3047               fprintf (dump_file, "Transforming ");
3048               print_gimple_stmt (dump_file, stmt, 0, 0);
3049             }
3050
3051           gimple_assign_set_rhs1 (stmt, oe1->op);
3052           gimple_assign_set_rhs2 (stmt, oe2->op);
3053           update_stmt (stmt);
3054           if (rhs1 != oe1->op && rhs1 != oe2->op)
3055             remove_visited_stmt_chain (rhs1);
3056
3057           if (dump_file && (dump_flags & TDF_DETAILS))
3058             {
3059               fprintf (dump_file, " into ");
3060               print_gimple_stmt (dump_file, stmt, 0, 0);
3061             }
3062         }
3063       return;
3064     }
3065
3066   /* If we hit here, we should have 3 or more ops left.  */
3067   gcc_assert (opindex + 2 < ops.length ());
3068
3069   /* Rewrite the next operator.  */
3070   oe = ops[opindex];
3071
3072   if (oe->op != rhs2)
3073     {
3074       if (!moved)
3075         {
3076           ensure_ops_are_available (stmt, ops, opindex);
3077           moved = true;
3078         }
3079
3080       if (dump_file && (dump_flags & TDF_DETAILS))
3081         {
3082           fprintf (dump_file, "Transforming ");
3083           print_gimple_stmt (dump_file, stmt, 0, 0);
3084         }
3085
3086       gimple_assign_set_rhs2 (stmt, oe->op);
3087       update_stmt (stmt);
3088
3089       if (dump_file && (dump_flags & TDF_DETAILS))
3090         {
3091           fprintf (dump_file, " into ");
3092           print_gimple_stmt (dump_file, stmt, 0, 0);
3093         }
3094     }
3095   /* Recurse on the LHS of the binary operator, which is guaranteed to
3096      be the non-leaf side.  */
3097   rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, moved);
3098 }
3099
3100 /* Find out how many cycles we need to compute statements chain.
3101    OPS_NUM holds number os statements in a chain.  CPU_WIDTH is a
3102    maximum number of independent statements we may execute per cycle.  */
3103
3104 static int
3105 get_required_cycles (int ops_num, int cpu_width)
3106 {
3107   int res;
3108   int elog;
3109   unsigned int rest;
3110
3111   /* While we have more than 2 * cpu_width operands
3112      we may reduce number of operands by cpu_width
3113      per cycle.  */
3114   res = ops_num / (2 * cpu_width);
3115
3116   /* Remained operands count may be reduced twice per cycle
3117      until we have only one operand.  */
3118   rest = (unsigned)(ops_num - res * cpu_width);
3119   elog = exact_log2 (rest);
3120   if (elog >= 0)
3121     res += elog;
3122   else
3123     res += floor_log2 (rest) + 1;
3124
3125   return res;
3126 }
3127
3128 /* Returns an optimal number of registers to use for computation of
3129    given statements.  */
3130
3131 static int
3132 get_reassociation_width (int ops_num, enum tree_code opc,
3133                          enum machine_mode mode)
3134 {
3135   int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
3136   int width;
3137   int width_min;
3138   int cycles_best;
3139
3140   if (param_width > 0)
3141     width = param_width;
3142   else
3143     width = targetm.sched.reassociation_width (opc, mode);
3144
3145   if (width == 1)
3146     return width;
3147
3148   /* Get the minimal time required for sequence computation.  */
3149   cycles_best = get_required_cycles (ops_num, width);
3150
3151   /* Check if we may use less width and still compute sequence for
3152      the same time.  It will allow us to reduce registers usage.
3153      get_required_cycles is monotonically increasing with lower width
3154      so we can perform a binary search for the minimal width that still
3155      results in the optimal cycle count.  */
3156   width_min = 1;
3157   while (width > width_min)
3158     {
3159       int width_mid = (width + width_min) / 2;
3160
3161       if (get_required_cycles (ops_num, width_mid) == cycles_best)
3162         width = width_mid;
3163       else if (width_min < width_mid)
3164         width_min = width_mid;
3165       else
3166         break;
3167     }
3168
3169   return width;
3170 }
3171
3172 /* Recursively rewrite our linearized statements so that the operators
3173    match those in OPS[OPINDEX], putting the computation in rank
3174    order and trying to allow operations to be executed in
3175    parallel.  */
3176
3177 static void
3178 rewrite_expr_tree_parallel (gimple stmt, int width,
3179                             vec<operand_entry_t>  ops)
3180 {
3181   enum tree_code opcode = gimple_assign_rhs_code (stmt);
3182   int op_num = ops.length ();
3183   int stmt_num = op_num - 1;
3184   gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3185   int op_index = op_num - 1;
3186   int stmt_index = 0;
3187   int ready_stmts_end = 0;
3188   int i = 0;
3189   tree last_rhs1 = gimple_assign_rhs1 (stmt);
3190
3191   /* We start expression rewriting from the top statements.
3192      So, in this loop we create a full list of statements
3193      we will work with.  */
3194   stmts[stmt_num - 1] = stmt;
3195   for (i = stmt_num - 2; i >= 0; i--)
3196     stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3197
3198   for (i = 0; i < stmt_num; i++)
3199     {
3200       tree op1, op2;
3201
3202       /* Determine whether we should use results of
3203          already handled statements or not.  */
3204       if (ready_stmts_end == 0
3205           && (i - stmt_index >= width || op_index < 1))
3206         ready_stmts_end = i;
3207
3208       /* Now we choose operands for the next statement.  Non zero
3209          value in ready_stmts_end means here that we should use
3210          the result of already generated statements as new operand.  */
3211       if (ready_stmts_end > 0)
3212         {
3213           op1 = gimple_assign_lhs (stmts[stmt_index++]);
3214           if (ready_stmts_end > stmt_index)
3215             op2 = gimple_assign_lhs (stmts[stmt_index++]);
3216           else if (op_index >= 0)
3217             op2 = ops[op_index--]->op;
3218           else
3219             {
3220               gcc_assert (stmt_index < i);
3221               op2 = gimple_assign_lhs (stmts[stmt_index++]);
3222             }
3223
3224           if (stmt_index >= ready_stmts_end)
3225             ready_stmts_end = 0;
3226         }
3227       else
3228         {
3229           if (op_index > 1)
3230             swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3231           op2 = ops[op_index--]->op;
3232           op1 = ops[op_index--]->op;
3233         }
3234
3235       /* If we emit the last statement then we should put
3236          operands into the last statement.  It will also
3237          break the loop.  */
3238       if (op_index < 0 && stmt_index == i)
3239         i = stmt_num - 1;
3240
3241       if (dump_file && (dump_flags & TDF_DETAILS))
3242         {
3243           fprintf (dump_file, "Transforming ");
3244           print_gimple_stmt (dump_file, stmts[i], 0, 0);
3245         }
3246
3247       /* We keep original statement only for the last one.  All
3248          others are recreated.  */
3249       if (i == stmt_num - 1)
3250         {
3251           gimple_assign_set_rhs1 (stmts[i], op1);
3252           gimple_assign_set_rhs2 (stmts[i], op2);
3253           update_stmt (stmts[i]);
3254         }
3255       else
3256         stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3257
3258       if (dump_file && (dump_flags & TDF_DETAILS))
3259         {
3260           fprintf (dump_file, " into ");
3261           print_gimple_stmt (dump_file, stmts[i], 0, 0);
3262         }
3263     }
3264
3265   remove_visited_stmt_chain (last_rhs1);
3266 }
3267
3268 /* Transform STMT, which is really (A +B) + (C + D) into the left
3269    linear form, ((A+B)+C)+D.
3270    Recurse on D if necessary.  */
3271
3272 static void
3273 linearize_expr (gimple stmt)
3274 {
3275   gimple_stmt_iterator gsinow, gsirhs;
3276   gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3277   gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3278   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3279   gimple newbinrhs = NULL;
3280   struct loop *loop = loop_containing_stmt (stmt);
3281
3282   gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3283               && is_reassociable_op (binrhs, rhscode, loop));
3284
3285   gsinow = gsi_for_stmt (stmt);
3286   gsirhs = gsi_for_stmt (binrhs);
3287   gsi_move_before (&gsirhs, &gsinow);
3288   gimple_set_uid (binrhs, gimple_uid (stmt));
3289
3290   gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3291   gimple_assign_set_rhs1 (binrhs, gimple_assign_lhs (binlhs));
3292   gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3293
3294   if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3295     newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3296
3297   if (dump_file && (dump_flags & TDF_DETAILS))
3298     {
3299       fprintf (dump_file, "Linearized: ");
3300       print_gimple_stmt (dump_file, stmt, 0, 0);
3301     }
3302
3303   reassociate_stats.linearized++;
3304   update_stmt (binrhs);
3305   update_stmt (binlhs);
3306   update_stmt (stmt);
3307
3308   gimple_set_visited (stmt, true);
3309   gimple_set_visited (binlhs, true);
3310   gimple_set_visited (binrhs, true);
3311
3312   /* Tail recurse on the new rhs if it still needs reassociation.  */
3313   if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3314     /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3315            want to change the algorithm while converting to tuples.  */
3316     linearize_expr (stmt);
3317 }
3318
3319 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3320    it.  Otherwise, return NULL.  */
3321
3322 static gimple
3323 get_single_immediate_use (tree lhs)
3324 {
3325   use_operand_p immuse;
3326   gimple immusestmt;
3327
3328   if (TREE_CODE (lhs) == SSA_NAME
3329       && single_imm_use (lhs, &immuse, &immusestmt)
3330       && is_gimple_assign (immusestmt))
3331     return immusestmt;
3332
3333   return NULL;
3334 }
3335
3336 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3337    representing the negated value.  Insertions of any necessary
3338    instructions go before GSI.
3339    This function is recursive in that, if you hand it "a_5" as the
3340    value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3341    transform b_3 + b_4 into a_5 = -b_3 + -b_4.  */
3342
3343 static tree
3344 negate_value (tree tonegate, gimple_stmt_iterator *gsi)
3345 {
3346   gimple negatedefstmt= NULL;
3347   tree resultofnegate;
3348
3349   /* If we are trying to negate a name, defined by an add, negate the
3350      add operands instead.  */
3351   if (TREE_CODE (tonegate) == SSA_NAME)
3352     negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3353   if (TREE_CODE (tonegate) == SSA_NAME
3354       && is_gimple_assign (negatedefstmt)
3355       && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3356       && has_single_use (gimple_assign_lhs (negatedefstmt))
3357       && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3358     {
3359       gimple_stmt_iterator gsi;
3360       tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3361       tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3362
3363       gsi = gsi_for_stmt (negatedefstmt);
3364       rhs1 = negate_value (rhs1, &gsi);
3365       gimple_assign_set_rhs1 (negatedefstmt, rhs1);
3366
3367       gsi = gsi_for_stmt (negatedefstmt);
3368       rhs2 = negate_value (rhs2, &gsi);
3369       gimple_assign_set_rhs2 (negatedefstmt, rhs2);
3370
3371       update_stmt (negatedefstmt);
3372       return gimple_assign_lhs (negatedefstmt);
3373     }
3374
3375   tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3376   resultofnegate = force_gimple_operand_gsi (gsi, tonegate, true,
3377                                              NULL_TREE, true, GSI_SAME_STMT);
3378   return resultofnegate;
3379 }
3380
3381 /* Return true if we should break up the subtract in STMT into an add
3382    with negate.  This is true when we the subtract operands are really
3383    adds, or the subtract itself is used in an add expression.  In
3384    either case, breaking up the subtract into an add with negate
3385    exposes the adds to reassociation.  */
3386
3387 static bool
3388 should_break_up_subtract (gimple stmt)
3389 {
3390   tree lhs = gimple_assign_lhs (stmt);
3391   tree binlhs = gimple_assign_rhs1 (stmt);
3392   tree binrhs = gimple_assign_rhs2 (stmt);
3393   gimple immusestmt;
3394   struct loop *loop = loop_containing_stmt (stmt);
3395
3396   if (TREE_CODE (binlhs) == SSA_NAME
3397       && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3398     return true;
3399
3400   if (TREE_CODE (binrhs) == SSA_NAME
3401       && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3402     return true;
3403
3404   if (TREE_CODE (lhs) == SSA_NAME
3405       && (immusestmt = get_single_immediate_use (lhs))
3406       && is_gimple_assign (immusestmt)
3407       && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3408           ||  gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3409     return true;
3410   return false;
3411 }
3412
3413 /* Transform STMT from A - B into A + -B.  */
3414
3415 static void
3416 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3417 {
3418   tree rhs1 = gimple_assign_rhs1 (stmt);
3419   tree rhs2 = gimple_assign_rhs2 (stmt);
3420
3421   if (dump_file && (dump_flags & TDF_DETAILS))
3422     {
3423       fprintf (dump_file, "Breaking up subtract ");
3424       print_gimple_stmt (dump_file, stmt, 0, 0);
3425     }
3426
3427   rhs2 = negate_value (rhs2, gsip);
3428   gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3429   update_stmt (stmt);
3430 }
3431
3432 /* Determine whether STMT is a builtin call that raises an SSA name
3433    to an integer power and has only one use.  If so, and this is early
3434    reassociation and unsafe math optimizations are permitted, place
3435    the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3436    If any of these conditions does not hold, return FALSE.  */
3437
3438 static bool
3439 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3440 {
3441   tree fndecl, arg1;
3442   REAL_VALUE_TYPE c, cint;
3443
3444   if (!first_pass_instance
3445       || !flag_unsafe_math_optimizations
3446       || !is_gimple_call (stmt)
3447       || !has_single_use (gimple_call_lhs (stmt)))
3448     return false;
3449
3450   fndecl = gimple_call_fndecl (stmt);
3451
3452   if (!fndecl
3453       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3454     return false;
3455
3456   switch (DECL_FUNCTION_CODE (fndecl))
3457     {
3458     CASE_FLT_FN (BUILT_IN_POW):
3459       *base = gimple_call_arg (stmt, 0);
3460       arg1 = gimple_call_arg (stmt, 1);
3461
3462       if (TREE_CODE (arg1) != REAL_CST)
3463         return false;
3464
3465       c = TREE_REAL_CST (arg1);
3466
3467       if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
3468         return false;
3469
3470       *exponent = real_to_integer (&c);
3471       real_from_integer (&cint, VOIDmode, *exponent,
3472                          *exponent < 0 ? -1 : 0, 0);
3473       if (!real_identical (&c, &cint))
3474         return false;
3475
3476       break;
3477
3478     CASE_FLT_FN (BUILT_IN_POWI):
3479       *base = gimple_call_arg (stmt, 0);
3480       arg1 = gimple_call_arg (stmt, 1);
3481
3482       if (!host_integerp (arg1, 0))
3483         return false;
3484
3485       *exponent = TREE_INT_CST_LOW (arg1);
3486       break;
3487
3488     default:
3489       return false;
3490     }
3491
3492   /* Expanding negative exponents is generally unproductive, so we don't
3493      complicate matters with those.  Exponents of zero and one should
3494      have been handled by expression folding.  */
3495   if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
3496     return false;
3497
3498   return true;
3499 }
3500
3501 /* Recursively linearize a binary expression that is the RHS of STMT.
3502    Place the operands of the expression tree in the vector named OPS.  */
3503
3504 static void
3505 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
3506                      bool is_associative, bool set_visited)
3507 {
3508   tree binlhs = gimple_assign_rhs1 (stmt);
3509   tree binrhs = gimple_assign_rhs2 (stmt);
3510   gimple binlhsdef = NULL, binrhsdef = NULL;
3511   bool binlhsisreassoc = false;
3512   bool binrhsisreassoc = false;
3513   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3514   struct loop *loop = loop_containing_stmt (stmt);
3515   tree base = NULL_TREE;
3516   HOST_WIDE_INT exponent = 0;
3517
3518   if (set_visited)
3519     gimple_set_visited (stmt, true);
3520
3521   if (TREE_CODE (binlhs) == SSA_NAME)
3522     {
3523       binlhsdef = SSA_NAME_DEF_STMT (binlhs);
3524       binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
3525                          && !stmt_could_throw_p (binlhsdef));
3526     }
3527
3528   if (TREE_CODE (binrhs) == SSA_NAME)
3529     {
3530       binrhsdef = SSA_NAME_DEF_STMT (binrhs);
3531       binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
3532                          && !stmt_could_throw_p (binrhsdef));
3533     }
3534
3535   /* If the LHS is not reassociable, but the RHS is, we need to swap
3536      them.  If neither is reassociable, there is nothing we can do, so
3537      just put them in the ops vector.  If the LHS is reassociable,
3538      linearize it.  If both are reassociable, then linearize the RHS
3539      and the LHS.  */
3540
3541   if (!binlhsisreassoc)
3542     {
3543       tree temp;
3544
3545       /* If this is not a associative operation like division, give up.  */
3546       if (!is_associative)
3547         {
3548           add_to_ops_vec (ops, binrhs);
3549           return;
3550         }
3551
3552       if (!binrhsisreassoc)
3553         {
3554           if (rhscode == MULT_EXPR
3555               && TREE_CODE (binrhs) == SSA_NAME
3556               && acceptable_pow_call (binrhsdef, &base, &exponent))
3557             {
3558               add_repeat_to_ops_vec (ops, base, exponent);
3559               gimple_set_visited (binrhsdef, true);
3560             }
3561           else
3562             add_to_ops_vec (ops, binrhs);
3563
3564           if (rhscode == MULT_EXPR
3565               && TREE_CODE (binlhs) == SSA_NAME
3566               && acceptable_pow_call (binlhsdef, &base, &exponent))
3567             {
3568               add_repeat_to_ops_vec (ops, base, exponent);
3569               gimple_set_visited (binlhsdef, true);
3570             }
3571           else
3572             add_to_ops_vec (ops, binlhs);
3573
3574           return;
3575         }
3576
3577       if (dump_file && (dump_flags & TDF_DETAILS))
3578         {
3579           fprintf (dump_file, "swapping operands of ");
3580           print_gimple_stmt (dump_file, stmt, 0, 0);
3581         }
3582
3583       swap_tree_operands (stmt,
3584                           gimple_assign_rhs1_ptr (stmt),
3585                           gimple_assign_rhs2_ptr (stmt));
3586       update_stmt (stmt);
3587
3588       if (dump_file && (dump_flags & TDF_DETAILS))
3589         {
3590           fprintf (dump_file, " is now ");
3591           print_gimple_stmt (dump_file, stmt, 0, 0);
3592         }
3593
3594       /* We want to make it so the lhs is always the reassociative op,
3595          so swap.  */
3596       temp = binlhs;
3597       binlhs = binrhs;
3598       binrhs = temp;
3599     }
3600   else if (binrhsisreassoc)
3601     {
3602       linearize_expr (stmt);
3603       binlhs = gimple_assign_rhs1 (stmt);
3604       binrhs = gimple_assign_rhs2 (stmt);
3605     }
3606
3607   gcc_assert (TREE_CODE (binrhs) != SSA_NAME
3608               || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
3609                                       rhscode, loop));
3610   linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
3611                        is_associative, set_visited);
3612
3613   if (rhscode == MULT_EXPR
3614       && TREE_CODE (binrhs) == SSA_NAME
3615       && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
3616     {
3617       add_repeat_to_ops_vec (ops, base, exponent);
3618       gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
3619     }
3620   else
3621     add_to_ops_vec (ops, binrhs);
3622 }
3623
3624 /* Repropagate the negates back into subtracts, since no other pass
3625    currently does it.  */
3626
3627 static void
3628 repropagate_negates (void)
3629 {
3630   unsigned int i = 0;
3631   tree negate;
3632
3633   FOR_EACH_VEC_ELT (plus_negates, i, negate)
3634     {
3635       gimple user = get_single_immediate_use (negate);
3636
3637       if (!user || !is_gimple_assign (user))
3638         continue;
3639
3640       /* The negate operand can be either operand of a PLUS_EXPR
3641          (it can be the LHS if the RHS is a constant for example).
3642
3643          Force the negate operand to the RHS of the PLUS_EXPR, then
3644          transform the PLUS_EXPR into a MINUS_EXPR.  */
3645       if (gimple_assign_rhs_code (user) == PLUS_EXPR)
3646         {
3647           /* If the negated operand appears on the LHS of the
3648              PLUS_EXPR, exchange the operands of the PLUS_EXPR
3649              to force the negated operand to the RHS of the PLUS_EXPR.  */
3650           if (gimple_assign_rhs1 (user) == negate)
3651             {
3652               swap_tree_operands (user,
3653                                   gimple_assign_rhs1_ptr (user),
3654                                   gimple_assign_rhs2_ptr (user));
3655             }
3656
3657           /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
3658              the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR.  */
3659           if (gimple_assign_rhs2 (user) == negate)
3660             {
3661               tree rhs1 = gimple_assign_rhs1 (user);
3662               tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
3663               gimple_stmt_iterator gsi = gsi_for_stmt (user);
3664               gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
3665               update_stmt (user);
3666             }
3667         }
3668       else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
3669         {
3670           if (gimple_assign_rhs1 (user) == negate)
3671             {
3672               /* We have
3673                    x = -a
3674                    y = x - b
3675                  which we transform into
3676                    x = a + b
3677                    y = -x .
3678                  This pushes down the negate which we possibly can merge
3679                  into some other operation, hence insert it into the
3680                  plus_negates vector.  */
3681               gimple feed = SSA_NAME_DEF_STMT (negate);
3682               tree a = gimple_assign_rhs1 (feed);
3683               tree rhs2 = gimple_assign_rhs2 (user);
3684               gimple_stmt_iterator gsi = gsi_for_stmt (feed), gsi2;
3685               gimple_replace_lhs (feed, negate);
3686               gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, a, rhs2);
3687               update_stmt (gsi_stmt (gsi));
3688               gsi2 = gsi_for_stmt (user);
3689               gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, negate, NULL);
3690               update_stmt (gsi_stmt (gsi2));
3691               gsi_move_before (&gsi, &gsi2);
3692               plus_negates.safe_push (gimple_assign_lhs (gsi_stmt (gsi2)));
3693             }
3694           else
3695             {
3696               /* Transform "x = -a; y = b - x" into "y = b + a", getting
3697                  rid of one operation.  */
3698               gimple feed = SSA_NAME_DEF_STMT (negate);
3699               tree a = gimple_assign_rhs1 (feed);
3700               tree rhs1 = gimple_assign_rhs1 (user);
3701               gimple_stmt_iterator gsi = gsi_for_stmt (user);
3702               gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
3703               update_stmt (gsi_stmt (gsi));
3704             }
3705         }
3706     }
3707 }
3708
3709 /* Returns true if OP is of a type for which we can do reassociation.
3710    That is for integral or non-saturating fixed-point types, and for
3711    floating point type when associative-math is enabled.  */
3712
3713 static bool
3714 can_reassociate_p (tree op)
3715 {
3716   tree type = TREE_TYPE (op);
3717   if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
3718       || NON_SAT_FIXED_POINT_TYPE_P (type)
3719       || (flag_associative_math && FLOAT_TYPE_P (type)))
3720     return true;
3721   return false;
3722 }
3723
3724 /* Break up subtract operations in block BB.
3725
3726    We do this top down because we don't know whether the subtract is
3727    part of a possible chain of reassociation except at the top.
3728
3729    IE given
3730    d = f + g
3731    c = a + e
3732    b = c - d
3733    q = b - r
3734    k = t - q
3735
3736    we want to break up k = t - q, but we won't until we've transformed q
3737    = b - r, which won't be broken up until we transform b = c - d.
3738
3739    En passant, clear the GIMPLE visited flag on every statement.  */
3740
3741 static void
3742 break_up_subtract_bb (basic_block bb)
3743 {
3744   gimple_stmt_iterator gsi;
3745   basic_block son;
3746
3747   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3748     {
3749       gimple stmt = gsi_stmt (gsi);
3750       gimple_set_visited (stmt, false);
3751
3752       if (!is_gimple_assign (stmt)
3753           || !can_reassociate_p (gimple_assign_lhs (stmt)))
3754         continue;
3755
3756       /* Look for simple gimple subtract operations.  */
3757       if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
3758         {
3759           if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
3760               || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
3761             continue;
3762
3763           /* Check for a subtract used only in an addition.  If this
3764              is the case, transform it into add of a negate for better
3765              reassociation.  IE transform C = A-B into C = A + -B if C
3766              is only used in an addition.  */
3767           if (should_break_up_subtract (stmt))
3768             break_up_subtract (stmt, &gsi);
3769         }
3770       else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
3771                && can_reassociate_p (gimple_assign_rhs1 (stmt)))
3772         plus_negates.safe_push (gimple_assign_lhs (stmt));
3773     }
3774   for (son = first_dom_son (CDI_DOMINATORS, bb);
3775        son;
3776        son = next_dom_son (CDI_DOMINATORS, son))
3777     break_up_subtract_bb (son);
3778 }
3779
3780 /* Used for repeated factor analysis.  */
3781 struct repeat_factor_d
3782 {
3783   /* An SSA name that occurs in a multiply chain.  */
3784   tree factor;
3785
3786   /* Cached rank of the factor.  */
3787   unsigned rank;
3788
3789   /* Number of occurrences of the factor in the chain.  */
3790   HOST_WIDE_INT count;
3791
3792   /* An SSA name representing the product of this factor and
3793      all factors appearing later in the repeated factor vector.  */
3794   tree repr;
3795 };
3796
3797 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
3798 typedef const struct repeat_factor_d *const_repeat_factor_t;
3799
3800
3801 static vec<repeat_factor> repeat_factor_vec;
3802
3803 /* Used for sorting the repeat factor vector.  Sort primarily by
3804    ascending occurrence count, secondarily by descending rank.  */
3805
3806 static int
3807 compare_repeat_factors (const void *x1, const void *x2)
3808 {
3809   const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
3810   const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
3811
3812   if (rf1->count != rf2->count)
3813     return rf1->count - rf2->count;
3814
3815   return rf2->rank - rf1->rank;
3816 }
3817
3818 /* Look for repeated operands in OPS in the multiply tree rooted at
3819    STMT.  Replace them with an optimal sequence of multiplies and powi
3820    builtin calls, and remove the used operands from OPS.  Return an
3821    SSA name representing the value of the replacement sequence.  */
3822
3823 static tree
3824 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
3825 {
3826   unsigned i, j, vec_len;
3827   int ii;
3828   operand_entry_t oe;
3829   repeat_factor_t rf1, rf2;
3830   repeat_factor rfnew;
3831   tree result = NULL_TREE;
3832   tree target_ssa, iter_result;
3833   tree type = TREE_TYPE (gimple_get_lhs (stmt));
3834   tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
3835   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3836   gimple mul_stmt, pow_stmt;
3837
3838   /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
3839      target.  */
3840   if (!powi_fndecl)
3841     return NULL_TREE;
3842
3843   /* Allocate the repeated factor vector.  */
3844   repeat_factor_vec.create (10);
3845
3846   /* Scan the OPS vector for all SSA names in the product and build
3847      up a vector of occurrence counts for each factor.  */
3848   FOR_EACH_VEC_ELT (*ops, i, oe)
3849     {
3850       if (TREE_CODE (oe->op) == SSA_NAME)
3851         {
3852           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
3853             {
3854               if (rf1->factor == oe->op)
3855                 {
3856                   rf1->count += oe->count;
3857                   break;
3858                 }
3859             }
3860
3861           if (j >= repeat_factor_vec.length ())
3862             {
3863               rfnew.factor = oe->op;
3864               rfnew.rank = oe->rank;
3865               rfnew.count = oe->count;
3866               rfnew.repr = NULL_TREE;
3867               repeat_factor_vec.safe_push (rfnew);
3868             }
3869         }
3870     }
3871
3872   /* Sort the repeated factor vector by (a) increasing occurrence count,
3873      and (b) decreasing rank.  */
3874   repeat_factor_vec.qsort (compare_repeat_factors);
3875
3876   /* It is generally best to combine as many base factors as possible
3877      into a product before applying __builtin_powi to the result.
3878      However, the sort order chosen for the repeated factor vector
3879      allows us to cache partial results for the product of the base
3880      factors for subsequent use.  When we already have a cached partial
3881      result from a previous iteration, it is best to make use of it
3882      before looking for another __builtin_pow opportunity.
3883
3884      As an example, consider x * x * y * y * y * z * z * z * z.
3885      We want to first compose the product x * y * z, raise it to the
3886      second power, then multiply this by y * z, and finally multiply
3887      by z.  This can be done in 5 multiplies provided we cache y * z
3888      for use in both expressions:
3889
3890         t1 = y * z
3891         t2 = t1 * x
3892         t3 = t2 * t2
3893         t4 = t1 * t3
3894         result = t4 * z
3895
3896      If we instead ignored the cached y * z and first multiplied by
3897      the __builtin_pow opportunity z * z, we would get the inferior:
3898
3899         t1 = y * z
3900         t2 = t1 * x
3901         t3 = t2 * t2
3902         t4 = z * z
3903         t5 = t3 * t4
3904         result = t5 * y  */
3905
3906   vec_len = repeat_factor_vec.length ();
3907
3908   /* Repeatedly look for opportunities to create a builtin_powi call.  */
3909   while (true)
3910     {
3911       HOST_WIDE_INT power;
3912
3913       /* First look for the largest cached product of factors from
3914          preceding iterations.  If found, create a builtin_powi for
3915          it if the minimum occurrence count for its factors is at
3916          least 2, or just use this cached product as our next
3917          multiplicand if the minimum occurrence count is 1.  */
3918       FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
3919         {
3920           if (rf1->repr && rf1->count > 0)
3921             break;
3922         }
3923
3924       if (j < vec_len)
3925         {
3926           power = rf1->count;
3927
3928           if (power == 1)
3929             {
3930               iter_result = rf1->repr;
3931
3932               if (dump_file && (dump_flags & TDF_DETAILS))
3933                 {
3934                   unsigned elt;
3935                   repeat_factor_t rf;
3936                   fputs ("Multiplying by cached product ", dump_file);
3937                   for (elt = j; elt < vec_len; elt++)
3938                     {
3939                       rf = &repeat_factor_vec[elt];
3940                       print_generic_expr (dump_file, rf->factor, 0);
3941                       if (elt < vec_len - 1)
3942                         fputs (" * ", dump_file);
3943                     }
3944                   fputs ("\n", dump_file);
3945                 }
3946             }
3947           else
3948             {
3949               iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
3950               pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
3951                                             build_int_cst (integer_type_node,
3952                                                            power));
3953               gimple_call_set_lhs (pow_stmt, iter_result);
3954               gimple_set_location (pow_stmt, gimple_location (stmt));
3955               gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
3956
3957               if (dump_file && (dump_flags & TDF_DETAILS))
3958                 {
3959                   unsigned elt;
3960                   repeat_factor_t rf;
3961                   fputs ("Building __builtin_pow call for cached product (",
3962                          dump_file);
3963                   for (elt = j; elt < vec_len; elt++)
3964                     {
3965                       rf = &repeat_factor_vec[elt];
3966                       print_generic_expr (dump_file, rf->factor, 0);
3967                       if (elt < vec_len - 1)
3968                         fputs (" * ", dump_file);
3969                     }
3970                   fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n",
3971                            power);
3972                 }
3973             }
3974         }
3975       else
3976         {
3977           /* Otherwise, find the first factor in the repeated factor
3978              vector whose occurrence count is at least 2.  If no such
3979              factor exists, there are no builtin_powi opportunities
3980              remaining.  */
3981           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
3982             {
3983               if (rf1->count >= 2)
3984                 break;
3985             }
3986
3987           if (j >= vec_len)
3988             break;
3989
3990           power = rf1->count;
3991
3992           if (dump_file && (dump_flags & TDF_DETAILS))
3993             {
3994               unsigned elt;
3995               repeat_factor_t rf;
3996               fputs ("Building __builtin_pow call for (", dump_file);
3997               for (elt = j; elt < vec_len; elt++)
3998                 {
3999                   rf = &repeat_factor_vec[elt];
4000                   print_generic_expr (dump_file, rf->factor, 0);
4001                   if (elt < vec_len - 1)
4002                     fputs (" * ", dump_file);
4003                 }
4004               fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power);
4005             }
4006
4007           reassociate_stats.pows_created++;
4008
4009           /* Visit each element of the vector in reverse order (so that
4010              high-occurrence elements are visited first, and within the
4011              same occurrence count, lower-ranked elements are visited
4012              first).  Form a linear product of all elements in this order
4013              whose occurrencce count is at least that of element J.
4014              Record the SSA name representing the product of each element
4015              with all subsequent elements in the vector.  */
4016           if (j == vec_len - 1)
4017             rf1->repr = rf1->factor;
4018           else
4019             {
4020               for (ii = vec_len - 2; ii >= (int)j; ii--)
4021                 {
4022                   tree op1, op2;
4023
4024                   rf1 = &repeat_factor_vec[ii];
4025                   rf2 = &repeat_factor_vec[ii + 1];
4026
4027                   /* Init the last factor's representative to be itself.  */
4028                   if (!rf2->repr)
4029                     rf2->repr = rf2->factor;
4030
4031                   op1 = rf1->factor;
4032                   op2 = rf2->repr;
4033
4034                   target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4035                   mul_stmt = gimple_build_assign_with_ops (MULT_EXPR,
4036                                                            target_ssa,
4037                                                            op1, op2);
4038                   gimple_set_location (mul_stmt, gimple_location (stmt));
4039                   gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4040                   rf1->repr = target_ssa;
4041
4042                   /* Don't reprocess the multiply we just introduced.  */
4043                   gimple_set_visited (mul_stmt, true);
4044                 }
4045             }
4046
4047           /* Form a call to __builtin_powi for the maximum product
4048              just formed, raised to the power obtained earlier.  */
4049           rf1 = &repeat_factor_vec[j];
4050           iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4051           pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4052                                         build_int_cst (integer_type_node,
4053                                                        power));
4054           gimple_call_set_lhs (pow_stmt, iter_result);
4055           gimple_set_location (pow_stmt, gimple_location (stmt));
4056           gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4057         }
4058
4059       /* If we previously formed at least one other builtin_powi call,
4060          form the product of this one and those others.  */
4061       if (result)
4062         {
4063           tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4064           mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result,
4065                                                    result, iter_result);
4066           gimple_set_location (mul_stmt, gimple_location (stmt));
4067           gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4068           gimple_set_visited (mul_stmt, true);
4069           result = new_result;
4070         }
4071       else
4072         result = iter_result;
4073
4074       /* Decrement the occurrence count of each element in the product
4075          by the count found above, and remove this many copies of each
4076          factor from OPS.  */
4077       for (i = j; i < vec_len; i++)
4078         {
4079           unsigned k = power;
4080           unsigned n;
4081
4082           rf1 = &repeat_factor_vec[i];
4083           rf1->count -= power;
4084
4085           FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
4086             {
4087               if (oe->op == rf1->factor)
4088                 {
4089                   if (oe->count <= k)
4090                     {
4091                       ops->ordered_remove (n);
4092                       k -= oe->count;
4093
4094                       if (k == 0)
4095                         break;
4096                     }
4097                   else
4098                     {
4099                       oe->count -= k;
4100                       break;
4101                     }
4102                 }
4103             }
4104         }
4105     }
4106
4107   /* At this point all elements in the repeated factor vector have a
4108      remaining occurrence count of 0 or 1, and those with a count of 1
4109      don't have cached representatives.  Re-sort the ops vector and
4110      clean up.  */
4111   ops->qsort (sort_by_operand_rank);
4112   repeat_factor_vec.release ();
4113
4114   /* Return the final product computed herein.  Note that there may
4115      still be some elements with single occurrence count left in OPS;
4116      those will be handled by the normal reassociation logic.  */
4117   return result;
4118 }
4119
4120 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS.  */
4121
4122 static void
4123 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
4124 {
4125   tree rhs1;
4126
4127   if (dump_file && (dump_flags & TDF_DETAILS))
4128     {
4129       fprintf (dump_file, "Transforming ");
4130       print_gimple_stmt (dump_file, stmt, 0, 0);
4131     }
4132
4133   rhs1 = gimple_assign_rhs1 (stmt);
4134   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
4135   update_stmt (stmt);
4136   remove_visited_stmt_chain (rhs1);
4137
4138   if (dump_file && (dump_flags & TDF_DETAILS))
4139     {
4140       fprintf (dump_file, " into ");
4141       print_gimple_stmt (dump_file, stmt, 0, 0);
4142     }
4143 }
4144
4145 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2.  */
4146
4147 static void
4148 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
4149                             tree rhs1, tree rhs2)
4150 {
4151   if (dump_file && (dump_flags & TDF_DETAILS))
4152     {
4153       fprintf (dump_file, "Transforming ");
4154       print_gimple_stmt (dump_file, stmt, 0, 0);
4155     }
4156
4157   gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
4158   update_stmt (gsi_stmt (*gsi));
4159   remove_visited_stmt_chain (rhs1);
4160
4161   if (dump_file && (dump_flags & TDF_DETAILS))
4162     {
4163       fprintf (dump_file, " into ");
4164       print_gimple_stmt (dump_file, stmt, 0, 0);
4165     }
4166 }
4167
4168 /* Reassociate expressions in basic block BB and its post-dominator as
4169    children.  */
4170
4171 static void
4172 reassociate_bb (basic_block bb)
4173 {
4174   gimple_stmt_iterator gsi;
4175   basic_block son;
4176   gimple stmt = last_stmt (bb);
4177
4178   if (stmt && !gimple_visited_p (stmt))
4179     maybe_optimize_range_tests (stmt);
4180
4181   for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4182     {
4183       stmt = gsi_stmt (gsi);
4184
4185       if (is_gimple_assign (stmt)
4186           && !stmt_could_throw_p (stmt))
4187         {
4188           tree lhs, rhs1, rhs2;
4189           enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4190
4191           /* If this is not a gimple binary expression, there is
4192              nothing for us to do with it.  */
4193           if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4194             continue;
4195
4196           /* If this was part of an already processed statement,
4197              we don't need to touch it again. */
4198           if (gimple_visited_p (stmt))
4199             {
4200               /* This statement might have become dead because of previous
4201                  reassociations.  */
4202               if (has_zero_uses (gimple_get_lhs (stmt)))
4203                 {
4204                   gsi_remove (&gsi, true);
4205                   release_defs (stmt);
4206                   /* We might end up removing the last stmt above which
4207                      places the iterator to the end of the sequence.
4208                      Reset it to the last stmt in this case which might
4209                      be the end of the sequence as well if we removed
4210                      the last statement of the sequence.  In which case
4211                      we need to bail out.  */
4212                   if (gsi_end_p (gsi))
4213                     {
4214                       gsi = gsi_last_bb (bb);
4215                       if (gsi_end_p (gsi))
4216                         break;
4217                     }
4218                 }
4219               continue;
4220             }
4221
4222           lhs = gimple_assign_lhs (stmt);
4223           rhs1 = gimple_assign_rhs1 (stmt);
4224           rhs2 = gimple_assign_rhs2 (stmt);
4225
4226           /* For non-bit or min/max operations we can't associate
4227              all types.  Verify that here.  */
4228           if (rhs_code != BIT_IOR_EXPR
4229               && rhs_code != BIT_AND_EXPR
4230               && rhs_code != BIT_XOR_EXPR
4231               && rhs_code != MIN_EXPR
4232               && rhs_code != MAX_EXPR
4233               && (!can_reassociate_p (lhs)
4234                   || !can_reassociate_p (rhs1)
4235                   || !can_reassociate_p (rhs2)))
4236             continue;
4237
4238           if (associative_tree_code (rhs_code))
4239             {
4240               vec<operand_entry_t> ops = vNULL;
4241               tree powi_result = NULL_TREE;
4242
4243               /* There may be no immediate uses left by the time we
4244                  get here because we may have eliminated them all.  */
4245               if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4246                 continue;
4247
4248               gimple_set_visited (stmt, true);
4249               linearize_expr_tree (&ops, stmt, true, true);
4250               ops.qsort (sort_by_operand_rank);
4251               optimize_ops_list (rhs_code, &ops);
4252               if (undistribute_ops_list (rhs_code, &ops,
4253                                          loop_containing_stmt (stmt)))
4254                 {
4255                   ops.qsort (sort_by_operand_rank);
4256                   optimize_ops_list (rhs_code, &ops);
4257                 }
4258
4259               if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4260                 optimize_range_tests (rhs_code, &ops);
4261
4262               if (first_pass_instance
4263                   && rhs_code == MULT_EXPR
4264                   && flag_unsafe_math_optimizations)
4265                 powi_result = attempt_builtin_powi (stmt, &ops);
4266
4267               /* If the operand vector is now empty, all operands were
4268                  consumed by the __builtin_powi optimization.  */
4269               if (ops.length () == 0)
4270                 transform_stmt_to_copy (&gsi, stmt, powi_result);
4271               else if (ops.length () == 1)
4272                 {
4273                   tree last_op = ops.last ()->op;
4274
4275                   if (powi_result)
4276                     transform_stmt_to_multiply (&gsi, stmt, last_op,
4277                                                 powi_result);
4278                   else
4279                     transform_stmt_to_copy (&gsi, stmt, last_op);
4280                 }
4281               else
4282                 {
4283                   enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4284                   int ops_num = ops.length ();
4285                   int width = get_reassociation_width (ops_num, rhs_code, mode);
4286
4287                   if (dump_file && (dump_flags & TDF_DETAILS))
4288                     fprintf (dump_file,
4289                              "Width = %d was chosen for reassociation\n", width);
4290
4291                   if (width > 1
4292                       && ops.length () > 3)
4293                     rewrite_expr_tree_parallel (stmt, width, ops);
4294                   else
4295                     {
4296                       /* When there are three operands left, we want
4297                          to make sure the ones that get the double
4298                          binary op are chosen wisely.  */
4299                       int len = ops.length ();
4300                       if (len >= 3)
4301                         swap_ops_for_binary_stmt (ops, len - 3, stmt);
4302
4303                       rewrite_expr_tree (stmt, 0, ops, false);
4304                     }
4305
4306                   /* If we combined some repeated factors into a
4307                      __builtin_powi call, multiply that result by the
4308                      reassociated operands.  */
4309                   if (powi_result)
4310                     {
4311                       gimple mul_stmt;
4312                       tree type = TREE_TYPE (gimple_get_lhs (stmt));
4313                       tree target_ssa = make_temp_ssa_name (type, NULL,
4314                                                             "reassocpow");
4315                       gimple_set_lhs (stmt, target_ssa);
4316                       update_stmt (stmt);
4317                       mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs,
4318                                                                powi_result,
4319                                                                target_ssa);
4320                       gimple_set_location (mul_stmt, gimple_location (stmt));
4321                       gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4322                     }
4323                 }
4324
4325               ops.release ();
4326             }
4327         }
4328     }
4329   for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4330        son;
4331        son = next_dom_son (CDI_POST_DOMINATORS, son))
4332     reassociate_bb (son);
4333 }
4334
4335 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4336 void debug_ops_vector (vec<operand_entry_t> ops);
4337
4338 /* Dump the operand entry vector OPS to FILE.  */
4339
4340 void
4341 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
4342 {
4343   operand_entry_t oe;
4344   unsigned int i;
4345
4346   FOR_EACH_VEC_ELT (ops, i, oe)
4347     {
4348       fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
4349       print_generic_expr (file, oe->op, 0);
4350     }
4351 }
4352
4353 /* Dump the operand entry vector OPS to STDERR.  */
4354
4355 DEBUG_FUNCTION void
4356 debug_ops_vector (vec<operand_entry_t> ops)
4357 {
4358   dump_ops_vector (stderr, ops);
4359 }
4360
4361 static void
4362 do_reassoc (void)
4363 {
4364   break_up_subtract_bb (ENTRY_BLOCK_PTR);
4365   renumber_gimple_stmt_uids ();
4366   reassociate_bb (EXIT_BLOCK_PTR);
4367 }
4368
4369 /* Initialize the reassociation pass.  */
4370
4371 static void
4372 init_reassoc (void)
4373 {
4374   int i;
4375   long rank = 2;
4376   int *bbs = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
4377
4378   /* Find the loops, so that we can prevent moving calculations in
4379      them.  */
4380   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4381
4382   memset (&reassociate_stats, 0, sizeof (reassociate_stats));
4383
4384   operand_entry_pool = create_alloc_pool ("operand entry pool",
4385                                           sizeof (struct operand_entry), 30);
4386   next_operand_entry_id = 0;
4387
4388   /* Reverse RPO (Reverse Post Order) will give us something where
4389      deeper loops come later.  */
4390   pre_and_rev_post_order_compute (NULL, bbs, false);
4391   bb_rank = XCNEWVEC (long, last_basic_block);
4392   operand_rank = pointer_map_create ();
4393
4394   /* Give each default definition a distinct rank.  This includes
4395      parameters and the static chain.  Walk backwards over all
4396      SSA names so that we get proper rank ordering according
4397      to tree_swap_operands_p.  */
4398   for (i = num_ssa_names - 1; i > 0; --i)
4399     {
4400       tree name = ssa_name (i);
4401       if (name && SSA_NAME_IS_DEFAULT_DEF (name))
4402         insert_operand_rank (name, ++rank);
4403     }
4404
4405   /* Set up rank for each BB  */
4406   for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
4407     bb_rank[bbs[i]] = ++rank  << 16;
4408
4409   free (bbs);
4410   calculate_dominance_info (CDI_POST_DOMINATORS);
4411   plus_negates = vNULL;
4412 }
4413
4414 /* Cleanup after the reassociation pass, and print stats if
4415    requested.  */
4416
4417 static void
4418 fini_reassoc (void)
4419 {
4420   statistics_counter_event (cfun, "Linearized",
4421                             reassociate_stats.linearized);
4422   statistics_counter_event (cfun, "Constants eliminated",
4423                             reassociate_stats.constants_eliminated);
4424   statistics_counter_event (cfun, "Ops eliminated",
4425                             reassociate_stats.ops_eliminated);
4426   statistics_counter_event (cfun, "Statements rewritten",
4427                             reassociate_stats.rewritten);
4428   statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
4429                             reassociate_stats.pows_encountered);
4430   statistics_counter_event (cfun, "Built-in powi calls created",
4431                             reassociate_stats.pows_created);
4432
4433   pointer_map_destroy (operand_rank);
4434   free_alloc_pool (operand_entry_pool);
4435   free (bb_rank);
4436   plus_negates.release ();
4437   free_dominance_info (CDI_POST_DOMINATORS);
4438   loop_optimizer_finalize ();
4439 }
4440
4441 /* Gate and execute functions for Reassociation.  */
4442
4443 static unsigned int
4444 execute_reassoc (void)
4445 {
4446   init_reassoc ();
4447
4448   do_reassoc ();
4449   repropagate_negates ();
4450
4451   fini_reassoc ();
4452   return 0;
4453 }
4454
4455 static bool
4456 gate_tree_ssa_reassoc (void)
4457 {
4458   return flag_tree_reassoc != 0;
4459 }
4460
4461 namespace {
4462
4463 const pass_data pass_data_reassoc =
4464 {
4465   GIMPLE_PASS, /* type */
4466   "reassoc", /* name */
4467   OPTGROUP_NONE, /* optinfo_flags */
4468   true, /* has_gate */
4469   true, /* has_execute */
4470   TV_TREE_REASSOC, /* tv_id */
4471   ( PROP_cfg | PROP_ssa ), /* properties_required */
4472   0, /* properties_provided */
4473   0, /* properties_destroyed */
4474   0, /* todo_flags_start */
4475   ( TODO_verify_ssa
4476     | TODO_update_ssa_only_virtuals
4477     | TODO_verify_flow ), /* todo_flags_finish */
4478 };
4479
4480 class pass_reassoc : public gimple_opt_pass
4481 {
4482 public:
4483   pass_reassoc(gcc::context *ctxt)
4484     : gimple_opt_pass(pass_data_reassoc, ctxt)
4485   {}
4486
4487   /* opt_pass methods: */
4488   opt_pass * clone () { return new pass_reassoc (ctxt_); }
4489   bool gate () { return gate_tree_ssa_reassoc (); }
4490   unsigned int execute () { return execute_reassoc (); }
4491
4492 }; // class pass_reassoc
4493
4494 } // anon namespace
4495
4496 gimple_opt_pass *
4497 make_pass_reassoc (gcc::context *ctxt)
4498 {
4499   return new pass_reassoc (ctxt);
4500 }