gcc/tree-ssa-reassoc.c

   1 /* Reassociation for trees.
   2    Copyright (C) 2005, 2007, 2008, 2009, 2010, 2011
   3    Free Software Foundation, Inc.
   4    Contributed by Daniel Berlin <dan@dberlin.org>
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation; either version 3, or (at your option)
  11 any later version.
  12
  13 GCC is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "tm.h"
  26 #include "tree.h"
  27 #include "basic-block.h"
  28 #include "tree-pretty-print.h"
  29 #include "gimple-pretty-print.h"
  30 #include "tree-inline.h"
  31 #include "tree-flow.h"
  32 #include "gimple.h"
  33 #include "tree-dump.h"
  34 #include "timevar.h"
  35 #include "tree-iterator.h"
  36 #include "tree-pass.h"
  37 #include "alloc-pool.h"
  38 #include "vec.h"
  39 #include "langhooks.h"
  40 #include "pointer-set.h"
  41 #include "cfgloop.h"
  42 #include "flags.h"
  43 #include "target.h"
  44 #include "params.h"
  45 #include "diagnostic-core.h"
  46
  47 /*  This is a simple global reassociation pass.  It is, in part, based
  48     on the LLVM pass of the same name (They do some things more/less
  49     than we do, in different orders, etc).
  50
  51     It consists of five steps:
  52
  53     1. Breaking up subtract operations into addition + negate, where
  54     it would promote the reassociation of adds.
  55
  56     2. Left linearization of the expression trees, so that (A+B)+(C+D)
  57     becomes (((A+B)+C)+D), which is easier for us to rewrite later.
  58     During linearization, we place the operands of the binary
  59     expressions into a vector of operand_entry_t
  60
  61     3. Optimization of the operand lists, eliminating things like a +
  62     -a, a & a, etc.
  63
  64     3a. Combine repeated factors with the same occurrence counts
  65     into a __builtin_powi call that will later be optimized into
  66     an optimal number of multiplies.
  67
  68     4. Rewrite the expression trees we linearized and optimized so
  69     they are in proper rank order.
  70
  71     5. Repropagate negates, as nothing else will clean it up ATM.
  72
  73     A bit of theory on #4, since nobody seems to write anything down
  74     about why it makes sense to do it the way they do it:
  75
  76     We could do this much nicer theoretically, but don't (for reasons
  77     explained after how to do it theoretically nice :P).
  78
  79     In order to promote the most redundancy elimination, you want
  80     binary expressions whose operands are the same rank (or
  81     preferably, the same value) exposed to the redundancy eliminator,
  82     for possible elimination.
  83
  84     So the way to do this if we really cared, is to build the new op
  85     tree from the leaves to the roots, merging as you go, and putting the
  86     new op on the end of the worklist, until you are left with one
  87     thing on the worklist.
  88
  89     IE if you have to rewrite the following set of operands (listed with
  90     rank in parentheses), with opcode PLUS_EXPR:
  91
  92     a (1),  b (1),  c (1),  d (2), e (2)
  93
  94
  95     We start with our merge worklist empty, and the ops list with all of
  96     those on it.
  97
  98     You want to first merge all leaves of the same rank, as much as
  99     possible.
 100
 101     So first build a binary op of
 102
 103     mergetmp = a + b, and put "mergetmp" on the merge worklist.
 104
 105     Because there is no three operand form of PLUS_EXPR, c is not going to
 106     be exposed to redundancy elimination as a rank 1 operand.
 107
 108     So you might as well throw it on the merge worklist (you could also
 109     consider it to now be a rank two operand, and merge it with d and e,
 110     but in this case, you then have evicted e from a binary op. So at
 111     least in this situation, you can't win.)
 112
 113     Then build a binary op of d + e
 114     mergetmp2 = d + e
 115
 116     and put mergetmp2 on the merge worklist.
 117
 118     so merge worklist = {mergetmp, c, mergetmp2}
 119
 120     Continue building binary ops of these operations until you have only
 121     one operation left on the worklist.
 122
 123     So we have
 124
 125     build binary op
 126     mergetmp3 = mergetmp + c
 127
 128     worklist = {mergetmp2, mergetmp3}
 129
 130     mergetmp4 = mergetmp2 + mergetmp3
 131
 132     worklist = {mergetmp4}
 133
 134     because we have one operation left, we can now just set the original
 135     statement equal to the result of that operation.
 136
 137     This will at least expose a + b  and d + e to redundancy elimination
 138     as binary operations.
 139
 140     For extra points, you can reuse the old statements to build the
 141     mergetmps, since you shouldn't run out.
 142
 143     So why don't we do this?
 144
 145     Because it's expensive, and rarely will help.  Most trees we are
 146     reassociating have 3 or less ops.  If they have 2 ops, they already
 147     will be written into a nice single binary op.  If you have 3 ops, a
 148     single simple check suffices to tell you whether the first two are of the
 149     same rank.  If so, you know to order it
 150
 151     mergetmp = op1 + op2
 152     newstmt = mergetmp + op3
 153
 154     instead of
 155     mergetmp = op2 + op3
 156     newstmt = mergetmp + op1
 157
 158     If all three are of the same rank, you can't expose them all in a
 159     single binary operator anyway, so the above is *still* the best you
 160     can do.
 161
 162     Thus, this is what we do.  When we have three ops left, we check to see
 163     what order to put them in, and call it a day.  As a nod to vector sum
 164     reduction, we check if any of the ops are really a phi node that is a
 165     destructive update for the associating op, and keep the destructive
 166     update together for vector sum reduction recognition.  */
 167
 168
 169 /* Statistics */
 170 static struct
 171 {
 172   int linearized;
 173   int constants_eliminated;
 174   int ops_eliminated;
 175   int rewritten;
 176   int pows_encountered;
 177   int pows_created;
 178 } reassociate_stats;
 179
 180 /* Operator, rank pair.  */
 181 typedef struct operand_entry
 182 {
 183   unsigned int rank;
 184   int id;
 185   tree op;
 186   unsigned int count;
 187 } *operand_entry_t;
 188
 189 static alloc_pool operand_entry_pool;
 190
 191 /* This is used to assign a unique ID to each struct operand_entry
 192    so that qsort results are identical on different hosts.  */
 193 static int next_operand_entry_id;
 194
 195 /* Starting rank number for a given basic block, so that we can rank
 196    operations using unmovable instructions in that BB based on the bb
 197    depth.  */
 198 static long *bb_rank;
 199
 200 /* Operand->rank hashtable.  */
 201 static struct pointer_map_t *operand_rank;
 202
 203 /* Forward decls.  */
 204 static long get_rank (tree);
 205
 206
 207 /* Bias amount for loop-carried phis.  We want this to be larger than
 208    the depth of any reassociation tree we can see, but not larger than
 209    the rank difference between two blocks.  */
 210 #define PHI_LOOP_BIAS (1 << 15)
 211
 212 /* Rank assigned to a phi statement.  If STMT is a loop-carried phi of
 213    an innermost loop, and the phi has only a single use which is inside
 214    the loop, then the rank is the block rank of the loop latch plus an
 215    extra bias for the loop-carried dependence.  This causes expressions
 216    calculated into an accumulator variable to be independent for each
 217    iteration of the loop.  If STMT is some other phi, the rank is the
 218    block rank of its containing block.  */
 219 static long
 220 phi_rank (gimple stmt)
 221 {
 222   basic_block bb = gimple_bb (stmt);
 223   struct loop *father = bb->loop_father;
 224   tree res;
 225   unsigned i;
 226   use_operand_p use;
 227   gimple use_stmt;
 228
 229   /* We only care about real loops (those with a latch).  */
 230   if (!father->latch)
 231     return bb_rank[bb->index];
 232
 233   /* Interesting phis must be in headers of innermost loops.  */
 234   if (bb != father->header
 235       || father->inner)
 236     return bb_rank[bb->index];
 237
 238   /* Ignore virtual SSA_NAMEs.  */
 239   res = gimple_phi_result (stmt);
 240   if (!is_gimple_reg (SSA_NAME_VAR (res)))
 241     return bb_rank[bb->index];
 242
 243   /* The phi definition must have a single use, and that use must be
 244      within the loop.  Otherwise this isn't an accumulator pattern.  */
 245   if (!single_imm_use (res, &use, &use_stmt)
 246       || gimple_bb (use_stmt)->loop_father != father)
 247     return bb_rank[bb->index];
 248
 249   /* Look for phi arguments from within the loop.  If found, bias this phi.  */
 250   for (i = 0; i < gimple_phi_num_args (stmt); i++)
 251     {
 252       tree arg = gimple_phi_arg_def (stmt, i);
 253       if (TREE_CODE (arg) == SSA_NAME
 254           && !SSA_NAME_IS_DEFAULT_DEF (arg))
 255         {
 256           gimple def_stmt = SSA_NAME_DEF_STMT (arg);
 257           if (gimple_bb (def_stmt)->loop_father == father)
 258             return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
 259         }
 260     }
 261
 262   /* Must be an uninteresting phi.  */
 263   return bb_rank[bb->index];
 264 }
 265
 266 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
 267    loop-carried dependence of an innermost loop, return TRUE; else
 268    return FALSE.  */
 269 static bool
 270 loop_carried_phi (tree exp)
 271 {
 272   gimple phi_stmt;
 273   long block_rank;
 274
 275   if (TREE_CODE (exp) != SSA_NAME
 276       || SSA_NAME_IS_DEFAULT_DEF (exp))
 277     return false;
 278
 279   phi_stmt = SSA_NAME_DEF_STMT (exp);
 280
 281   if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
 282     return false;
 283
 284   /* Non-loop-carried phis have block rank.  Loop-carried phis have
 285      an additional bias added in.  If this phi doesn't have block rank,
 286      it's biased and should not be propagated.  */
 287   block_rank = bb_rank[gimple_bb (phi_stmt)->index];
 288
 289   if (phi_rank (phi_stmt) != block_rank)
 290     return true;
 291
 292   return false;
 293 }
 294
 295 /* Return the maximum of RANK and the rank that should be propagated
 296    from expression OP.  For most operands, this is just the rank of OP.
 297    For loop-carried phis, the value is zero to avoid undoing the bias
 298    in favor of the phi.  */
 299 static long
 300 propagate_rank (long rank, tree op)
 301 {
 302   long op_rank;
 303
 304   if (loop_carried_phi (op))
 305     return rank;
 306
 307   op_rank = get_rank (op);
 308
 309   return MAX (rank, op_rank);
 310 }
 311
 312 /* Look up the operand rank structure for expression E.  */
 313
 314 static inline long
 315 find_operand_rank (tree e)
 316 {
 317   void **slot = pointer_map_contains (operand_rank, e);
 318   return slot ? (long) (intptr_t) *slot : -1;
 319 }
 320
 321 /* Insert {E,RANK} into the operand rank hashtable.  */
 322
 323 static inline void
 324 insert_operand_rank (tree e, long rank)
 325 {
 326   void **slot;
 327   gcc_assert (rank > 0);
 328   slot = pointer_map_insert (operand_rank, e);
 329   gcc_assert (!*slot);
 330   *slot = (void *) (intptr_t) rank;
 331 }
 332
 333 /* Given an expression E, return the rank of the expression.  */
 334
 335 static long
 336 get_rank (tree e)
 337 {
 338   /* Constants have rank 0.  */
 339   if (is_gimple_min_invariant (e))
 340     return 0;
 341
 342   /* SSA_NAME's have the rank of the expression they are the result
 343      of.
 344      For globals and uninitialized values, the rank is 0.
 345      For function arguments, use the pre-setup rank.
 346      For PHI nodes, stores, asm statements, etc, we use the rank of
 347      the BB.
 348      For simple operations, the rank is the maximum rank of any of
 349      its operands, or the bb_rank, whichever is less.
 350      I make no claims that this is optimal, however, it gives good
 351      results.  */
 352
 353   /* We make an exception to the normal ranking system to break
 354      dependences of accumulator variables in loops.  Suppose we
 355      have a simple one-block loop containing:
 356
 357        x_1 = phi(x_0, x_2)
 358        b = a + x_1
 359        c = b + d
 360        x_2 = c + e
 361
 362      As shown, each iteration of the calculation into x is fully
 363      dependent upon the iteration before it.  We would prefer to
 364      see this in the form:
 365
 366        x_1 = phi(x_0, x_2)
 367        b = a + d
 368        c = b + e
 369        x_2 = c + x_1
 370
 371      If the loop is unrolled, the calculations of b and c from
 372      different iterations can be interleaved.
 373
 374      To obtain this result during reassociation, we bias the rank
 375      of the phi definition x_1 upward, when it is recognized as an
 376      accumulator pattern.  The artificial rank causes it to be
 377      added last, providing the desired independence.  */
 378
 379   if (TREE_CODE (e) == SSA_NAME)
 380     {
 381       gimple stmt;
 382       long rank;
 383       int i, n;
 384       tree op;
 385
 386       if (TREE_CODE (SSA_NAME_VAR (e)) == PARM_DECL
 387           && SSA_NAME_IS_DEFAULT_DEF (e))
 388         return find_operand_rank (e);
 389
 390       stmt = SSA_NAME_DEF_STMT (e);
 391       if (gimple_bb (stmt) == NULL)
 392         return 0;
 393
 394       if (gimple_code (stmt) == GIMPLE_PHI)
 395         return phi_rank (stmt);
 396
 397       if (!is_gimple_assign (stmt)
 398           || gimple_vdef (stmt))
 399         return bb_rank[gimple_bb (stmt)->index];
 400
 401       /* If we already have a rank for this expression, use that.  */
 402       rank = find_operand_rank (e);
 403       if (rank != -1)
 404         return rank;
 405
 406       /* Otherwise, find the maximum rank for the operands.  As an
 407          exception, remove the bias from loop-carried phis when propagating
 408          the rank so that dependent operations are not also biased.  */
 409       rank = 0;
 410       if (gimple_assign_single_p (stmt))
 411         {
 412           tree rhs = gimple_assign_rhs1 (stmt);
 413           n = TREE_OPERAND_LENGTH (rhs);
 414           if (n == 0)
 415             rank = propagate_rank (rank, rhs);
 416           else
 417             {
 418               for (i = 0; i < n; i++)
 419                 {
 420                   op = TREE_OPERAND (rhs, i);
 421
 422                   if (op != NULL_TREE)
 423                     rank = propagate_rank (rank, op);
 424                 }
 425             }
 426         }
 427       else
 428         {
 429           n = gimple_num_ops (stmt);
 430           for (i = 1; i < n; i++)
 431             {
 432               op = gimple_op (stmt, i);
 433               gcc_assert (op);
 434               rank = propagate_rank (rank, op);
 435             }
 436         }
 437
 438       if (dump_file && (dump_flags & TDF_DETAILS))
 439         {
 440           fprintf (dump_file, "Rank for ");
 441           print_generic_expr (dump_file, e, 0);
 442           fprintf (dump_file, " is %ld\n", (rank + 1));
 443         }
 444
 445       /* Note the rank in the hashtable so we don't recompute it.  */
 446       insert_operand_rank (e, (rank + 1));
 447       return (rank + 1);
 448     }
 449
 450   /* Globals, etc,  are rank 0 */
 451   return 0;
 452 }
 453
 454 DEF_VEC_P(operand_entry_t);
 455 DEF_VEC_ALLOC_P(operand_entry_t, heap);
 456
 457 /* We want integer ones to end up last no matter what, since they are
 458    the ones we can do the most with.  */
 459 #define INTEGER_CONST_TYPE 1 << 3
 460 #define FLOAT_CONST_TYPE 1 << 2
 461 #define OTHER_CONST_TYPE 1 << 1
 462
 463 /* Classify an invariant tree into integer, float, or other, so that
 464    we can sort them to be near other constants of the same type.  */
 465 static inline int
 466 constant_type (tree t)
 467 {
 468   if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
 469     return INTEGER_CONST_TYPE;
 470   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
 471     return FLOAT_CONST_TYPE;
 472   else
 473     return OTHER_CONST_TYPE;
 474 }
 475
 476 /* qsort comparison function to sort operand entries PA and PB by rank
 477    so that the sorted array is ordered by rank in decreasing order.  */
 478 static int
 479 sort_by_operand_rank (const void *pa, const void *pb)
 480 {
 481   const operand_entry_t oea = *(const operand_entry_t *)pa;
 482   const operand_entry_t oeb = *(const operand_entry_t *)pb;
 483
 484   /* It's nicer for optimize_expression if constants that are likely
 485      to fold when added/multiplied//whatever are put next to each
 486      other.  Since all constants have rank 0, order them by type.  */
 487   if (oeb->rank == 0 &&  oea->rank == 0)
 488     {
 489       if (constant_type (oeb->op) != constant_type (oea->op))
 490         return constant_type (oeb->op) - constant_type (oea->op);
 491       else
 492         /* To make sorting result stable, we use unique IDs to determine
 493            order.  */
 494         return oeb->id - oea->id;
 495     }
 496
 497   /* Lastly, make sure the versions that are the same go next to each
 498      other.  We use SSA_NAME_VERSION because it's stable.  */
 499   if ((oeb->rank - oea->rank == 0)
 500       && TREE_CODE (oea->op) == SSA_NAME
 501       && TREE_CODE (oeb->op) == SSA_NAME)
 502     {
 503       if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
 504         return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
 505       else
 506         return oeb->id - oea->id;
 507     }
 508
 509   if (oeb->rank != oea->rank)
 510     return oeb->rank - oea->rank;
 511   else
 512     return oeb->id - oea->id;
 513 }
 514
 515 /* Add an operand entry to *OPS for the tree operand OP.  */
 516
 517 static void
 518 add_to_ops_vec (VEC(operand_entry_t, heap) **ops, tree op)
 519 {
 520   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
 521
 522   oe->op = op;
 523   oe->rank = get_rank (op);
 524   oe->id = next_operand_entry_id++;
 525   oe->count = 1;
 526   VEC_safe_push (operand_entry_t, heap, *ops, oe);
 527 }
 528
 529 /* Add an operand entry to *OPS for the tree operand OP with repeat
 530    count REPEAT.  */
 531
 532 static void
 533 add_repeat_to_ops_vec (VEC(operand_entry_t, heap) **ops, tree op,
 534                        HOST_WIDE_INT repeat)
 535 {
 536   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
 537
 538   oe->op = op;
 539   oe->rank = get_rank (op);
 540   oe->id = next_operand_entry_id++;
 541   oe->count = repeat;
 542   VEC_safe_push (operand_entry_t, heap, *ops, oe);
 543
 544   reassociate_stats.pows_encountered++;
 545 }
 546
 547 /* Return true if STMT is reassociable operation containing a binary
 548    operation with tree code CODE, and is inside LOOP.  */
 549
 550 static bool
 551 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
 552 {
 553   basic_block bb = gimple_bb (stmt);
 554
 555   if (gimple_bb (stmt) == NULL)
 556     return false;
 557
 558   if (!flow_bb_inside_loop_p (loop, bb))
 559     return false;
 560
 561   if (is_gimple_assign (stmt)
 562       && gimple_assign_rhs_code (stmt) == code
 563       && has_single_use (gimple_assign_lhs (stmt)))
 564     return true;
 565
 566   return false;
 567 }
 568
 569
 570 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
 571    operand of the negate operation.  Otherwise, return NULL.  */
 572
 573 static tree
 574 get_unary_op (tree name, enum tree_code opcode)
 575 {
 576   gimple stmt = SSA_NAME_DEF_STMT (name);
 577
 578   if (!is_gimple_assign (stmt))
 579     return NULL_TREE;
 580
 581   if (gimple_assign_rhs_code (stmt) == opcode)
 582     return gimple_assign_rhs1 (stmt);
 583   return NULL_TREE;
 584 }
 585
 586 /* If CURR and LAST are a pair of ops that OPCODE allows us to
 587    eliminate through equivalences, do so, remove them from OPS, and
 588    return true.  Otherwise, return false.  */
 589
 590 static bool
 591 eliminate_duplicate_pair (enum tree_code opcode,
 592                           VEC (operand_entry_t, heap) **ops,
 593                           bool *all_done,
 594                           unsigned int i,
 595                           operand_entry_t curr,
 596                           operand_entry_t last)
 597 {
 598
 599   /* If we have two of the same op, and the opcode is & |, min, or max,
 600      we can eliminate one of them.
 601      If we have two of the same op, and the opcode is ^, we can
 602      eliminate both of them.  */
 603
 604   if (last && last->op == curr->op)
 605     {
 606       switch (opcode)
 607         {
 608         case MAX_EXPR:
 609         case MIN_EXPR:
 610         case BIT_IOR_EXPR:
 611         case BIT_AND_EXPR:
 612           if (dump_file && (dump_flags & TDF_DETAILS))
 613             {
 614               fprintf (dump_file, "Equivalence: ");
 615               print_generic_expr (dump_file, curr->op, 0);
 616               fprintf (dump_file, " [&|minmax] ");
 617               print_generic_expr (dump_file, last->op, 0);
 618               fprintf (dump_file, " -> ");
 619               print_generic_stmt (dump_file, last->op, 0);
 620             }
 621
 622           VEC_ordered_remove (operand_entry_t, *ops, i);
 623           reassociate_stats.ops_eliminated ++;
 624
 625           return true;
 626
 627         case BIT_XOR_EXPR:
 628           if (dump_file && (dump_flags & TDF_DETAILS))
 629             {
 630               fprintf (dump_file, "Equivalence: ");
 631               print_generic_expr (dump_file, curr->op, 0);
 632               fprintf (dump_file, " ^ ");
 633               print_generic_expr (dump_file, last->op, 0);
 634               fprintf (dump_file, " -> nothing\n");
 635             }
 636
 637           reassociate_stats.ops_eliminated += 2;
 638
 639           if (VEC_length (operand_entry_t, *ops) == 2)
 640             {
 641               VEC_free (operand_entry_t, heap, *ops);
 642               *ops = NULL;
 643               add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
 644               *all_done = true;
 645             }
 646           else
 647             {
 648               VEC_ordered_remove (operand_entry_t, *ops, i-1);
 649               VEC_ordered_remove (operand_entry_t, *ops, i-1);
 650             }
 651
 652           return true;
 653
 654         default:
 655           break;
 656         }
 657     }
 658   return false;
 659 }
 660
 661 static VEC(tree, heap) *plus_negates;
 662
 663 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
 664    expression, look in OPS for a corresponding positive operation to cancel
 665    it out.  If we find one, remove the other from OPS, replace
 666    OPS[CURRINDEX] with 0 or -1, respectively, and return true.  Otherwise,
 667    return false. */
 668
 669 static bool
 670 eliminate_plus_minus_pair (enum tree_code opcode,
 671                            VEC (operand_entry_t, heap) **ops,
 672                            unsigned int currindex,
 673                            operand_entry_t curr)
 674 {
 675   tree negateop;
 676   tree notop;
 677   unsigned int i;
 678   operand_entry_t oe;
 679
 680   if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
 681     return false;
 682
 683   negateop = get_unary_op (curr->op, NEGATE_EXPR);
 684   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 685   if (negateop == NULL_TREE && notop == NULL_TREE)
 686     return false;
 687
 688   /* Any non-negated version will have a rank that is one less than
 689      the current rank.  So once we hit those ranks, if we don't find
 690      one, we can stop.  */
 691
 692   for (i = currindex + 1;
 693        VEC_iterate (operand_entry_t, *ops, i, oe)
 694        && oe->rank >= curr->rank - 1 ;
 695        i++)
 696     {
 697       if (oe->op == negateop)
 698         {
 699
 700           if (dump_file && (dump_flags & TDF_DETAILS))
 701             {
 702               fprintf (dump_file, "Equivalence: ");
 703               print_generic_expr (dump_file, negateop, 0);
 704               fprintf (dump_file, " + -");
 705               print_generic_expr (dump_file, oe->op, 0);
 706               fprintf (dump_file, " -> 0\n");
 707             }
 708
 709           VEC_ordered_remove (operand_entry_t, *ops, i);
 710           add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
 711           VEC_ordered_remove (operand_entry_t, *ops, currindex);
 712           reassociate_stats.ops_eliminated ++;
 713
 714           return true;
 715         }
 716       else if (oe->op == notop)
 717         {
 718           tree op_type = TREE_TYPE (oe->op);
 719
 720           if (dump_file && (dump_flags & TDF_DETAILS))
 721             {
 722               fprintf (dump_file, "Equivalence: ");
 723               print_generic_expr (dump_file, notop, 0);
 724               fprintf (dump_file, " + ~");
 725               print_generic_expr (dump_file, oe->op, 0);
 726               fprintf (dump_file, " -> -1\n");
 727             }
 728
 729           VEC_ordered_remove (operand_entry_t, *ops, i);
 730           add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
 731           VEC_ordered_remove (operand_entry_t, *ops, currindex);
 732           reassociate_stats.ops_eliminated ++;
 733
 734           return true;
 735         }
 736     }
 737
 738   /* CURR->OP is a negate expr in a plus expr: save it for later
 739      inspection in repropagate_negates().  */
 740   if (negateop != NULL_TREE)
 741     VEC_safe_push (tree, heap, plus_negates, curr->op);
 742
 743   return false;
 744 }
 745
 746 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
 747    bitwise not expression, look in OPS for a corresponding operand to
 748    cancel it out.  If we find one, remove the other from OPS, replace
 749    OPS[CURRINDEX] with 0, and return true.  Otherwise, return
 750    false. */
 751
 752 static bool
 753 eliminate_not_pairs (enum tree_code opcode,
 754                      VEC (operand_entry_t, heap) **ops,
 755                      unsigned int currindex,
 756                      operand_entry_t curr)
 757 {
 758   tree notop;
 759   unsigned int i;
 760   operand_entry_t oe;
 761
 762   if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
 763       || TREE_CODE (curr->op) != SSA_NAME)
 764     return false;
 765
 766   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 767   if (notop == NULL_TREE)
 768     return false;
 769
 770   /* Any non-not version will have a rank that is one less than
 771      the current rank.  So once we hit those ranks, if we don't find
 772      one, we can stop.  */
 773
 774   for (i = currindex + 1;
 775        VEC_iterate (operand_entry_t, *ops, i, oe)
 776        && oe->rank >= curr->rank - 1;
 777        i++)
 778     {
 779       if (oe->op == notop)
 780         {
 781           if (dump_file && (dump_flags & TDF_DETAILS))
 782             {
 783               fprintf (dump_file, "Equivalence: ");
 784               print_generic_expr (dump_file, notop, 0);
 785               if (opcode == BIT_AND_EXPR)
 786                 fprintf (dump_file, " & ~");
 787               else if (opcode == BIT_IOR_EXPR)
 788                 fprintf (dump_file, " | ~");
 789               print_generic_expr (dump_file, oe->op, 0);
 790               if (opcode == BIT_AND_EXPR)
 791                 fprintf (dump_file, " -> 0\n");
 792               else if (opcode == BIT_IOR_EXPR)
 793                 fprintf (dump_file, " -> -1\n");
 794             }
 795
 796           if (opcode == BIT_AND_EXPR)
 797             oe->op = build_zero_cst (TREE_TYPE (oe->op));
 798           else if (opcode == BIT_IOR_EXPR)
 799             oe->op = build_low_bits_mask (TREE_TYPE (oe->op),
 800                                           TYPE_PRECISION (TREE_TYPE (oe->op)));
 801
 802           reassociate_stats.ops_eliminated
 803             += VEC_length (operand_entry_t, *ops) - 1;
 804           VEC_free (operand_entry_t, heap, *ops);
 805           *ops = NULL;
 806           VEC_safe_push (operand_entry_t, heap, *ops, oe);
 807           return true;
 808         }
 809     }
 810
 811   return false;
 812 }
 813
 814 /* Use constant value that may be present in OPS to try to eliminate
 815    operands.  Note that this function is only really used when we've
 816    eliminated ops for other reasons, or merged constants.  Across
 817    single statements, fold already does all of this, plus more.  There
 818    is little point in duplicating logic, so I've only included the
 819    identities that I could ever construct testcases to trigger.  */
 820
 821 static void
 822 eliminate_using_constants (enum tree_code opcode,
 823                            VEC(operand_entry_t, heap) **ops)
 824 {
 825   operand_entry_t oelast = VEC_last (operand_entry_t, *ops);
 826   tree type = TREE_TYPE (oelast->op);
 827
 828   if (oelast->rank == 0
 829       && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
 830     {
 831       switch (opcode)
 832         {
 833         case BIT_AND_EXPR:
 834           if (integer_zerop (oelast->op))
 835             {
 836               if (VEC_length (operand_entry_t, *ops) != 1)
 837                 {
 838                   if (dump_file && (dump_flags & TDF_DETAILS))
 839                     fprintf (dump_file, "Found & 0, removing all other ops\n");
 840
 841                   reassociate_stats.ops_eliminated
 842                     += VEC_length (operand_entry_t, *ops) - 1;
 843
 844                   VEC_free (operand_entry_t, heap, *ops);
 845                   *ops = NULL;
 846                   VEC_safe_push (operand_entry_t, heap, *ops, oelast);
 847                   return;
 848                 }
 849             }
 850           else if (integer_all_onesp (oelast->op))
 851             {
 852               if (VEC_length (operand_entry_t, *ops) != 1)
 853                 {
 854                   if (dump_file && (dump_flags & TDF_DETAILS))
 855                     fprintf (dump_file, "Found & -1, removing\n");
 856                   VEC_pop (operand_entry_t, *ops);
 857                   reassociate_stats.ops_eliminated++;
 858                 }
 859             }
 860           break;
 861         case BIT_IOR_EXPR:
 862           if (integer_all_onesp (oelast->op))
 863             {
 864               if (VEC_length (operand_entry_t, *ops) != 1)
 865                 {
 866                   if (dump_file && (dump_flags & TDF_DETAILS))
 867                     fprintf (dump_file, "Found | -1, removing all other ops\n");
 868
 869                   reassociate_stats.ops_eliminated
 870                     += VEC_length (operand_entry_t, *ops) - 1;
 871
 872                   VEC_free (operand_entry_t, heap, *ops);
 873                   *ops = NULL;
 874                   VEC_safe_push (operand_entry_t, heap, *ops, oelast);
 875                   return;
 876                 }
 877             }
 878           else if (integer_zerop (oelast->op))
 879             {
 880               if (VEC_length (operand_entry_t, *ops) != 1)
 881                 {
 882                   if (dump_file && (dump_flags & TDF_DETAILS))
 883                     fprintf (dump_file, "Found | 0, removing\n");
 884                   VEC_pop (operand_entry_t, *ops);
 885                   reassociate_stats.ops_eliminated++;
 886                 }
 887             }
 888           break;
 889         case MULT_EXPR:
 890           if (integer_zerop (oelast->op)
 891               || (FLOAT_TYPE_P (type)
 892                   && !HONOR_NANS (TYPE_MODE (type))
 893                   && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
 894                   && real_zerop (oelast->op)))
 895             {
 896               if (VEC_length (operand_entry_t, *ops) != 1)
 897                 {
 898                   if (dump_file && (dump_flags & TDF_DETAILS))
 899                     fprintf (dump_file, "Found * 0, removing all other ops\n");
 900
 901                   reassociate_stats.ops_eliminated
 902                     += VEC_length (operand_entry_t, *ops) - 1;
 903                   VEC_free (operand_entry_t, heap, *ops);
 904                   *ops = NULL;
 905                   VEC_safe_push (operand_entry_t, heap, *ops, oelast);
 906                   return;
 907                 }
 908             }
 909           else if (integer_onep (oelast->op)
 910                    || (FLOAT_TYPE_P (type)
 911                        && !HONOR_SNANS (TYPE_MODE (type))
 912                        && real_onep (oelast->op)))
 913             {
 914               if (VEC_length (operand_entry_t, *ops) != 1)
 915                 {
 916                   if (dump_file && (dump_flags & TDF_DETAILS))
 917                     fprintf (dump_file, "Found * 1, removing\n");
 918                   VEC_pop (operand_entry_t, *ops);
 919                   reassociate_stats.ops_eliminated++;
 920                   return;
 921                 }
 922             }
 923           break;
 924         case BIT_XOR_EXPR:
 925         case PLUS_EXPR:
 926         case MINUS_EXPR:
 927           if (integer_zerop (oelast->op)
 928               || (FLOAT_TYPE_P (type)
 929                   && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
 930                   && fold_real_zero_addition_p (type, oelast->op,
 931                                                 opcode == MINUS_EXPR)))
 932             {
 933               if (VEC_length (operand_entry_t, *ops) != 1)
 934                 {
 935                   if (dump_file && (dump_flags & TDF_DETAILS))
 936                     fprintf (dump_file, "Found [|^+] 0, removing\n");
 937                   VEC_pop (operand_entry_t, *ops);
 938                   reassociate_stats.ops_eliminated++;
 939                   return;
 940                 }
 941             }
 942           break;
 943         default:
 944           break;
 945         }
 946     }
 947 }
 948
 949
 950 static void linearize_expr_tree (VEC(operand_entry_t, heap) **, gimple,
 951                                  bool, bool);
 952
 953 /* Structure for tracking and counting operands.  */
 954 typedef struct oecount_s {
 955   int cnt;
 956   int id;
 957   enum tree_code oecode;
 958   tree op;
 959 } oecount;
 960
 961 DEF_VEC_O(oecount);
 962 DEF_VEC_ALLOC_O(oecount,heap);
 963
 964 /* The heap for the oecount hashtable and the sorted list of operands.  */
 965 static VEC (oecount, heap) *cvec;
 966
 967 /* Hash function for oecount.  */
 968
 969 static hashval_t
 970 oecount_hash (const void *p)
 971 {
 972   const oecount *c = VEC_index (oecount, cvec, (size_t)p - 42);
 973   return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
 974 }
 975
 976 /* Comparison function for oecount.  */
 977
 978 static int
 979 oecount_eq (const void *p1, const void *p2)
 980 {
 981   const oecount *c1 = VEC_index (oecount, cvec, (size_t)p1 - 42);
 982   const oecount *c2 = VEC_index (oecount, cvec, (size_t)p2 - 42);
 983   return (c1->oecode == c2->oecode
 984           && c1->op == c2->op);
 985 }
 986
 987 /* Comparison function for qsort sorting oecount elements by count.  */
 988
 989 static int
 990 oecount_cmp (const void *p1, const void *p2)
 991 {
 992   const oecount *c1 = (const oecount *)p1;
 993   const oecount *c2 = (const oecount *)p2;
 994   if (c1->cnt != c2->cnt)
 995     return c1->cnt - c2->cnt;
 996   else
 997     /* If counts are identical, use unique IDs to stabilize qsort.  */
 998     return c1->id - c2->id;
 999 }
1000
1001 /* Return TRUE iff STMT represents a builtin call that raises OP
1002    to some exponent.  */
1003
1004 static bool
1005 stmt_is_power_of_op (gimple stmt, tree op)
1006 {
1007   tree fndecl;
1008
1009   if (!is_gimple_call (stmt))
1010     return false;
1011
1012   fndecl = gimple_call_fndecl (stmt);
1013
1014   if (!fndecl
1015       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
1016     return false;
1017
1018   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1019     {
1020     CASE_FLT_FN (BUILT_IN_POW):
1021     CASE_FLT_FN (BUILT_IN_POWI):
1022       return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1023
1024     default:
1025       return false;
1026     }
1027 }
1028
1029 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1030    in place and return the result.  Assumes that stmt_is_power_of_op
1031    was previously called for STMT and returned TRUE.  */
1032
1033 static HOST_WIDE_INT
1034 decrement_power (gimple stmt)
1035 {
1036   REAL_VALUE_TYPE c, cint;
1037   HOST_WIDE_INT power;
1038   tree arg1;
1039
1040   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1041     {
1042     CASE_FLT_FN (BUILT_IN_POW):
1043       arg1 = gimple_call_arg (stmt, 1);
1044       c = TREE_REAL_CST (arg1);
1045       power = real_to_integer (&c) - 1;
1046       real_from_integer (&cint, VOIDmode, power, 0, 0);
1047       gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1048       return power;
1049
1050     CASE_FLT_FN (BUILT_IN_POWI):
1051       arg1 = gimple_call_arg (stmt, 1);
1052       power = TREE_INT_CST_LOW (arg1) - 1;
1053       gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1054       return power;
1055
1056     default:
1057       gcc_unreachable ();
1058     }
1059 }
1060
1061 /* Find the single immediate use of STMT's LHS, and replace it
1062    with OP.  Remove STMT.  If STMT's LHS is the same as *DEF,
1063    replace *DEF with OP as well.  */
1064
1065 static void
1066 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1067 {
1068   tree lhs;
1069   gimple use_stmt;
1070   use_operand_p use;
1071   gimple_stmt_iterator gsi;
1072
1073   if (is_gimple_call (stmt))
1074     lhs = gimple_call_lhs (stmt);
1075   else
1076     lhs = gimple_assign_lhs (stmt);
1077
1078   gcc_assert (has_single_use (lhs));
1079   single_imm_use (lhs, &use, &use_stmt);
1080   if (lhs == *def)
1081     *def = op;
1082   SET_USE (use, op);
1083   if (TREE_CODE (op) != SSA_NAME)
1084     update_stmt (use_stmt);
1085   gsi = gsi_for_stmt (stmt);
1086   gsi_remove (&gsi, true);
1087   release_defs (stmt);
1088
1089   if (is_gimple_call (stmt))
1090     unlink_stmt_vdef (stmt);
1091 }
1092
1093 /* Walks the linear chain with result *DEF searching for an operation
1094    with operand OP and code OPCODE removing that from the chain.  *DEF
1095    is updated if there is only one operand but no operation left.  */
1096
1097 static void
1098 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1099 {
1100   gimple stmt = SSA_NAME_DEF_STMT (*def);
1101
1102   do
1103     {
1104       tree name;
1105
1106       if (opcode == MULT_EXPR
1107           && stmt_is_power_of_op (stmt, op))
1108         {
1109           if (decrement_power (stmt) == 1)
1110             propagate_op_to_single_use (op, stmt, def);
1111           return;
1112         }
1113
1114       name = gimple_assign_rhs1 (stmt);
1115
1116       /* If this is the operation we look for and one of the operands
1117          is ours simply propagate the other operand into the stmts
1118          single use.  */
1119       if (gimple_assign_rhs_code (stmt) == opcode
1120           && (name == op
1121               || gimple_assign_rhs2 (stmt) == op))
1122         {
1123           if (name == op)
1124             name = gimple_assign_rhs2 (stmt);
1125           propagate_op_to_single_use (name, stmt, def);
1126           return;
1127         }
1128
1129       /* We might have a multiply of two __builtin_pow* calls, and
1130          the operand might be hiding in the rightmost one.  */
1131       if (opcode == MULT_EXPR
1132           && gimple_assign_rhs_code (stmt) == opcode
1133           && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
1134         {
1135           gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1136           if (stmt_is_power_of_op (stmt2, op))
1137             {
1138               if (decrement_power (stmt2) == 1)
1139                 propagate_op_to_single_use (op, stmt2, def);
1140               return;
1141             }
1142         }
1143
1144       /* Continue walking the chain.  */
1145       gcc_assert (name != op
1146                   && TREE_CODE (name) == SSA_NAME);
1147       stmt = SSA_NAME_DEF_STMT (name);
1148     }
1149   while (1);
1150 }
1151
1152 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1153    the result.  Places the statement after the definition of either
1154    OP1 or OP2.  Returns the new statement.  */
1155
1156 static gimple
1157 build_and_add_sum (tree tmpvar, tree op1, tree op2, enum tree_code opcode)
1158 {
1159   gimple op1def = NULL, op2def = NULL;
1160   gimple_stmt_iterator gsi;
1161   tree op;
1162   gimple sum;
1163
1164   /* Create the addition statement.  */
1165   sum = gimple_build_assign_with_ops (opcode, tmpvar, op1, op2);
1166   op = make_ssa_name (tmpvar, sum);
1167   gimple_assign_set_lhs (sum, op);
1168
1169   /* Find an insertion place and insert.  */
1170   if (TREE_CODE (op1) == SSA_NAME)
1171     op1def = SSA_NAME_DEF_STMT (op1);
1172   if (TREE_CODE (op2) == SSA_NAME)
1173     op2def = SSA_NAME_DEF_STMT (op2);
1174   if ((!op1def || gimple_nop_p (op1def))
1175       && (!op2def || gimple_nop_p (op2def)))
1176     {
1177       gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR));
1178       gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1179     }
1180   else if ((!op1def || gimple_nop_p (op1def))
1181            || (op2def && !gimple_nop_p (op2def)
1182                && stmt_dominates_stmt_p (op1def, op2def)))
1183     {
1184       if (gimple_code (op2def) == GIMPLE_PHI)
1185         {
1186           gsi = gsi_after_labels (gimple_bb (op2def));
1187           gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1188         }
1189       else
1190         {
1191           if (!stmt_ends_bb_p (op2def))
1192             {
1193               gsi = gsi_for_stmt (op2def);
1194               gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1195             }
1196           else
1197             {
1198               edge e;
1199               edge_iterator ei;
1200
1201               FOR_EACH_EDGE (e, ei, gimple_bb (op2def)->succs)
1202                 if (e->flags & EDGE_FALLTHRU)
1203                   gsi_insert_on_edge_immediate (e, sum);
1204             }
1205         }
1206     }
1207   else
1208     {
1209       if (gimple_code (op1def) == GIMPLE_PHI)
1210         {
1211           gsi = gsi_after_labels (gimple_bb (op1def));
1212           gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1213         }
1214       else
1215         {
1216           if (!stmt_ends_bb_p (op1def))
1217             {
1218               gsi = gsi_for_stmt (op1def);
1219               gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1220             }
1221           else
1222             {
1223               edge e;
1224               edge_iterator ei;
1225
1226               FOR_EACH_EDGE (e, ei, gimple_bb (op1def)->succs)
1227                 if (e->flags & EDGE_FALLTHRU)
1228                   gsi_insert_on_edge_immediate (e, sum);
1229             }
1230         }
1231     }
1232   update_stmt (sum);
1233
1234   return sum;
1235 }
1236
1237 /* Perform un-distribution of divisions and multiplications.
1238    A * X + B * X is transformed into (A + B) * X and A / X + B / X
1239    to (A + B) / X for real X.
1240
1241    The algorithm is organized as follows.
1242
1243     - First we walk the addition chain *OPS looking for summands that
1244       are defined by a multiplication or a real division.  This results
1245       in the candidates bitmap with relevant indices into *OPS.
1246
1247     - Second we build the chains of multiplications or divisions for
1248       these candidates, counting the number of occurrences of (operand, code)
1249       pairs in all of the candidates chains.
1250
1251     - Third we sort the (operand, code) pairs by number of occurrence and
1252       process them starting with the pair with the most uses.
1253
1254       * For each such pair we walk the candidates again to build a
1255         second candidate bitmap noting all multiplication/division chains
1256         that have at least one occurrence of (operand, code).
1257
1258       * We build an alternate addition chain only covering these
1259         candidates with one (operand, code) operation removed from their
1260         multiplication/division chain.
1261
1262       * The first candidate gets replaced by the alternate addition chain
1263         multiplied/divided by the operand.
1264
1265       * All candidate chains get disabled for further processing and
1266         processing of (operand, code) pairs continues.
1267
1268   The alternate addition chains built are re-processed by the main
1269   reassociation algorithm which allows optimizing a * x * y + b * y * x
1270   to (a + b ) * x * y in one invocation of the reassociation pass.  */
1271
1272 static bool
1273 undistribute_ops_list (enum tree_code opcode,
1274                        VEC (operand_entry_t, heap) **ops, struct loop *loop)
1275 {
1276   unsigned int length = VEC_length (operand_entry_t, *ops);
1277   operand_entry_t oe1;
1278   unsigned i, j;
1279   sbitmap candidates, candidates2;
1280   unsigned nr_candidates, nr_candidates2;
1281   sbitmap_iterator sbi0;
1282   VEC (operand_entry_t, heap) **subops;
1283   htab_t ctable;
1284   bool changed = false;
1285   int next_oecount_id = 0;
1286
1287   if (length <= 1
1288       || opcode != PLUS_EXPR)
1289     return false;
1290
1291   /* Build a list of candidates to process.  */
1292   candidates = sbitmap_alloc (length);
1293   sbitmap_zero (candidates);
1294   nr_candidates = 0;
1295   FOR_EACH_VEC_ELT (operand_entry_t, *ops, i, oe1)
1296     {
1297       enum tree_code dcode;
1298       gimple oe1def;
1299
1300       if (TREE_CODE (oe1->op) != SSA_NAME)
1301         continue;
1302       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1303       if (!is_gimple_assign (oe1def))
1304         continue;
1305       dcode = gimple_assign_rhs_code (oe1def);
1306       if ((dcode != MULT_EXPR
1307            && dcode != RDIV_EXPR)
1308           || !is_reassociable_op (oe1def, dcode, loop))
1309         continue;
1310
1311       SET_BIT (candidates, i);
1312       nr_candidates++;
1313     }
1314
1315   if (nr_candidates < 2)
1316     {
1317       sbitmap_free (candidates);
1318       return false;
1319     }
1320
1321   if (dump_file && (dump_flags & TDF_DETAILS))
1322     {
1323       fprintf (dump_file, "searching for un-distribute opportunities ");
1324       print_generic_expr (dump_file,
1325         VEC_index (operand_entry_t, *ops,
1326                    sbitmap_first_set_bit (candidates))->op, 0);
1327       fprintf (dump_file, " %d\n", nr_candidates);
1328     }
1329
1330   /* Build linearized sub-operand lists and the counting table.  */
1331   cvec = NULL;
1332   ctable = htab_create (15, oecount_hash, oecount_eq, NULL);
1333   subops = XCNEWVEC (VEC (operand_entry_t, heap) *,
1334                      VEC_length (operand_entry_t, *ops));
1335   EXECUTE_IF_SET_IN_SBITMAP (candidates, 0, i, sbi0)
1336     {
1337       gimple oedef;
1338       enum tree_code oecode;
1339       unsigned j;
1340
1341       oedef = SSA_NAME_DEF_STMT (VEC_index (operand_entry_t, *ops, i)->op);
1342       oecode = gimple_assign_rhs_code (oedef);
1343       linearize_expr_tree (&subops[i], oedef,
1344                            associative_tree_code (oecode), false);
1345
1346       FOR_EACH_VEC_ELT (operand_entry_t, subops[i], j, oe1)
1347         {
1348           oecount c;
1349           void **slot;
1350           size_t idx;
1351           c.oecode = oecode;
1352           c.cnt = 1;
1353           c.id = next_oecount_id++;
1354           c.op = oe1->op;
1355           VEC_safe_push (oecount, heap, cvec, &c);
1356           idx = VEC_length (oecount, cvec) + 41;
1357           slot = htab_find_slot (ctable, (void *)idx, INSERT);
1358           if (!*slot)
1359             {
1360               *slot = (void *)idx;
1361             }
1362           else
1363             {
1364               VEC_pop (oecount, cvec);
1365               VEC_index (oecount, cvec, (size_t)*slot - 42)->cnt++;
1366             }
1367         }
1368     }
1369   htab_delete (ctable);
1370
1371   /* Sort the counting table.  */
1372   VEC_qsort (oecount, cvec, oecount_cmp);
1373
1374   if (dump_file && (dump_flags & TDF_DETAILS))
1375     {
1376       oecount *c;
1377       fprintf (dump_file, "Candidates:\n");
1378       FOR_EACH_VEC_ELT (oecount, cvec, j, c)
1379         {
1380           fprintf (dump_file, "  %u %s: ", c->cnt,
1381                    c->oecode == MULT_EXPR
1382                    ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1383           print_generic_expr (dump_file, c->op, 0);
1384           fprintf (dump_file, "\n");
1385         }
1386     }
1387
1388   /* Process the (operand, code) pairs in order of most occurence.  */
1389   candidates2 = sbitmap_alloc (length);
1390   while (!VEC_empty (oecount, cvec))
1391     {
1392       oecount *c = VEC_last (oecount, cvec);
1393       if (c->cnt < 2)
1394         break;
1395
1396       /* Now collect the operands in the outer chain that contain
1397          the common operand in their inner chain.  */
1398       sbitmap_zero (candidates2);
1399       nr_candidates2 = 0;
1400       EXECUTE_IF_SET_IN_SBITMAP (candidates, 0, i, sbi0)
1401         {
1402           gimple oedef;
1403           enum tree_code oecode;
1404           unsigned j;
1405           tree op = VEC_index (operand_entry_t, *ops, i)->op;
1406
1407           /* If we undistributed in this chain already this may be
1408              a constant.  */
1409           if (TREE_CODE (op) != SSA_NAME)
1410             continue;
1411
1412           oedef = SSA_NAME_DEF_STMT (op);
1413           oecode = gimple_assign_rhs_code (oedef);
1414           if (oecode != c->oecode)
1415             continue;
1416
1417           FOR_EACH_VEC_ELT (operand_entry_t, subops[i], j, oe1)
1418             {
1419               if (oe1->op == c->op)
1420                 {
1421                   SET_BIT (candidates2, i);
1422                   ++nr_candidates2;
1423                   break;
1424                 }
1425             }
1426         }
1427
1428       if (nr_candidates2 >= 2)
1429         {
1430           operand_entry_t oe1, oe2;
1431           tree tmpvar;
1432           gimple prod;
1433           int first = sbitmap_first_set_bit (candidates2);
1434
1435           /* Build the new addition chain.  */
1436           oe1 = VEC_index (operand_entry_t, *ops, first);
1437           if (dump_file && (dump_flags & TDF_DETAILS))
1438             {
1439               fprintf (dump_file, "Building (");
1440               print_generic_expr (dump_file, oe1->op, 0);
1441             }
1442           tmpvar = create_tmp_reg (TREE_TYPE (oe1->op), NULL);
1443           add_referenced_var (tmpvar);
1444           zero_one_operation (&oe1->op, c->oecode, c->op);
1445           EXECUTE_IF_SET_IN_SBITMAP (candidates2, first+1, i, sbi0)
1446             {
1447               gimple sum;
1448               oe2 = VEC_index (operand_entry_t, *ops, i);
1449               if (dump_file && (dump_flags & TDF_DETAILS))
1450                 {
1451                   fprintf (dump_file, " + ");
1452                   print_generic_expr (dump_file, oe2->op, 0);
1453                 }
1454               zero_one_operation (&oe2->op, c->oecode, c->op);
1455               sum = build_and_add_sum (tmpvar, oe1->op, oe2->op, opcode);
1456               oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1457               oe2->rank = 0;
1458               oe1->op = gimple_get_lhs (sum);
1459             }
1460
1461           /* Apply the multiplication/division.  */
1462           prod = build_and_add_sum (tmpvar, oe1->op, c->op, c->oecode);
1463           if (dump_file && (dump_flags & TDF_DETAILS))
1464             {
1465               fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1466               print_generic_expr (dump_file, c->op, 0);
1467               fprintf (dump_file, "\n");
1468             }
1469
1470           /* Record it in the addition chain and disable further
1471              undistribution with this op.  */
1472           oe1->op = gimple_assign_lhs (prod);
1473           oe1->rank = get_rank (oe1->op);
1474           VEC_free (operand_entry_t, heap, subops[first]);
1475
1476           changed = true;
1477         }
1478
1479       VEC_pop (oecount, cvec);
1480     }
1481
1482   for (i = 0; i < VEC_length (operand_entry_t, *ops); ++i)
1483     VEC_free (operand_entry_t, heap, subops[i]);
1484   free (subops);
1485   VEC_free (oecount, heap, cvec);
1486   sbitmap_free (candidates);
1487   sbitmap_free (candidates2);
1488
1489   return changed;
1490 }
1491
1492 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1493    expression, examine the other OPS to see if any of them are comparisons
1494    of the same values, which we may be able to combine or eliminate.
1495    For example, we can rewrite (a < b) | (a == b) as (a <= b).  */
1496
1497 static bool
1498 eliminate_redundant_comparison (enum tree_code opcode,
1499                                 VEC (operand_entry_t, heap) **ops,
1500                                 unsigned int currindex,
1501                                 operand_entry_t curr)
1502 {
1503   tree op1, op2;
1504   enum tree_code lcode, rcode;
1505   gimple def1, def2;
1506   int i;
1507   operand_entry_t oe;
1508
1509   if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1510     return false;
1511
1512   /* Check that CURR is a comparison.  */
1513   if (TREE_CODE (curr->op) != SSA_NAME)
1514     return false;
1515   def1 = SSA_NAME_DEF_STMT (curr->op);
1516   if (!is_gimple_assign (def1))
1517     return false;
1518   lcode = gimple_assign_rhs_code (def1);
1519   if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1520     return false;
1521   op1 = gimple_assign_rhs1 (def1);
1522   op2 = gimple_assign_rhs2 (def1);
1523
1524   /* Now look for a similar comparison in the remaining OPS.  */
1525   for (i = currindex + 1;
1526        VEC_iterate (operand_entry_t, *ops, i, oe);
1527        i++)
1528     {
1529       tree t;
1530
1531       if (TREE_CODE (oe->op) != SSA_NAME)
1532         continue;
1533       def2 = SSA_NAME_DEF_STMT (oe->op);
1534       if (!is_gimple_assign (def2))
1535         continue;
1536       rcode = gimple_assign_rhs_code (def2);
1537       if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1538         continue;
1539
1540       /* If we got here, we have a match.  See if we can combine the
1541          two comparisons.  */
1542       if (opcode == BIT_IOR_EXPR)
1543         t = maybe_fold_or_comparisons (lcode, op1, op2,
1544                                        rcode, gimple_assign_rhs1 (def2),
1545                                        gimple_assign_rhs2 (def2));
1546       else
1547         t = maybe_fold_and_comparisons (lcode, op1, op2,
1548                                         rcode, gimple_assign_rhs1 (def2),
1549                                         gimple_assign_rhs2 (def2));
1550       if (!t)
1551         continue;
1552
1553       /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1554          always give us a boolean_type_node value back.  If the original
1555          BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1556          we need to convert.  */
1557       if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1558         t = fold_convert (TREE_TYPE (curr->op), t);
1559
1560       if (TREE_CODE (t) != INTEGER_CST
1561           && !operand_equal_p (t, curr->op, 0))
1562         {
1563           enum tree_code subcode;
1564           tree newop1, newop2;
1565           if (!COMPARISON_CLASS_P (t))
1566             continue;
1567           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1568           STRIP_USELESS_TYPE_CONVERSION (newop1);
1569           STRIP_USELESS_TYPE_CONVERSION (newop2);
1570           if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1571             continue;
1572         }
1573
1574       if (dump_file && (dump_flags & TDF_DETAILS))
1575         {
1576           fprintf (dump_file, "Equivalence: ");
1577           print_generic_expr (dump_file, curr->op, 0);
1578           fprintf (dump_file, " %s ", op_symbol_code (opcode));
1579           print_generic_expr (dump_file, oe->op, 0);
1580           fprintf (dump_file, " -> ");
1581           print_generic_expr (dump_file, t, 0);
1582           fprintf (dump_file, "\n");
1583         }
1584
1585       /* Now we can delete oe, as it has been subsumed by the new combined
1586          expression t.  */
1587       VEC_ordered_remove (operand_entry_t, *ops, i);
1588       reassociate_stats.ops_eliminated ++;
1589
1590       /* If t is the same as curr->op, we're done.  Otherwise we must
1591          replace curr->op with t.  Special case is if we got a constant
1592          back, in which case we add it to the end instead of in place of
1593          the current entry.  */
1594       if (TREE_CODE (t) == INTEGER_CST)
1595         {
1596           VEC_ordered_remove (operand_entry_t, *ops, currindex);
1597           add_to_ops_vec (ops, t);
1598         }
1599       else if (!operand_equal_p (t, curr->op, 0))
1600         {
1601           tree tmpvar;
1602           gimple sum;
1603           enum tree_code subcode;
1604           tree newop1;
1605           tree newop2;
1606           gcc_assert (COMPARISON_CLASS_P (t));
1607           tmpvar = create_tmp_var (TREE_TYPE (t), NULL);
1608           add_referenced_var (tmpvar);
1609           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1610           STRIP_USELESS_TYPE_CONVERSION (newop1);
1611           STRIP_USELESS_TYPE_CONVERSION (newop2);
1612           gcc_checking_assert (is_gimple_val (newop1)
1613                                && is_gimple_val (newop2));
1614           sum = build_and_add_sum (tmpvar, newop1, newop2, subcode);
1615           curr->op = gimple_get_lhs (sum);
1616         }
1617       return true;
1618     }
1619
1620   return false;
1621 }
1622
1623 /* Perform various identities and other optimizations on the list of
1624    operand entries, stored in OPS.  The tree code for the binary
1625    operation between all the operands is OPCODE.  */
1626
1627 static void
1628 optimize_ops_list (enum tree_code opcode,
1629                    VEC (operand_entry_t, heap) **ops)
1630 {
1631   unsigned int length = VEC_length (operand_entry_t, *ops);
1632   unsigned int i;
1633   operand_entry_t oe;
1634   operand_entry_t oelast = NULL;
1635   bool iterate = false;
1636
1637   if (length == 1)
1638     return;
1639
1640   oelast = VEC_last (operand_entry_t, *ops);
1641
1642   /* If the last two are constants, pop the constants off, merge them
1643      and try the next two.  */
1644   if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1645     {
1646       operand_entry_t oelm1 = VEC_index (operand_entry_t, *ops, length - 2);
1647
1648       if (oelm1->rank == 0
1649           && is_gimple_min_invariant (oelm1->op)
1650           && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1651                                        TREE_TYPE (oelast->op)))
1652         {
1653           tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1654                                      oelm1->op, oelast->op);
1655
1656           if (folded && is_gimple_min_invariant (folded))
1657             {
1658               if (dump_file && (dump_flags & TDF_DETAILS))
1659                 fprintf (dump_file, "Merging constants\n");
1660
1661               VEC_pop (operand_entry_t, *ops);
1662               VEC_pop (operand_entry_t, *ops);
1663
1664               add_to_ops_vec (ops, folded);
1665               reassociate_stats.constants_eliminated++;
1666
1667               optimize_ops_list (opcode, ops);
1668               return;
1669             }
1670         }
1671     }
1672
1673   eliminate_using_constants (opcode, ops);
1674   oelast = NULL;
1675
1676   for (i = 0; VEC_iterate (operand_entry_t, *ops, i, oe);)
1677     {
1678       bool done = false;
1679
1680       if (eliminate_not_pairs (opcode, ops, i, oe))
1681         return;
1682       if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1683           || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1684           || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1685         {
1686           if (done)
1687             return;
1688           iterate = true;
1689           oelast = NULL;
1690           continue;
1691         }
1692       oelast = oe;
1693       i++;
1694     }
1695
1696   length  = VEC_length (operand_entry_t, *ops);
1697   oelast = VEC_last (operand_entry_t, *ops);
1698
1699   if (iterate)
1700     optimize_ops_list (opcode, ops);
1701 }
1702
1703 /* The following functions are subroutines to optimize_range_tests and allow
1704    it to try to change a logical combination of comparisons into a range
1705    test.
1706
1707    For example, both
1708         X == 2 || X == 5 || X == 3 || X == 4
1709    and
1710         X >= 2 && X <= 5
1711    are converted to
1712         (unsigned) (X - 2) <= 3
1713
1714    For more information see comments above fold_test_range in fold-const.c,
1715    this implementation is for GIMPLE.  */
1716
1717 struct range_entry
1718 {
1719   tree exp;
1720   tree low;
1721   tree high;
1722   bool in_p;
1723   bool strict_overflow_p;
1724   unsigned int idx, next;
1725 };
1726
1727 /* This is similar to make_range in fold-const.c, but on top of
1728    GIMPLE instead of trees.  */
1729
1730 static void
1731 init_range_entry (struct range_entry *r, tree exp)
1732 {
1733   int in_p;
1734   tree low, high;
1735   bool is_bool, strict_overflow_p;
1736
1737   r->exp = NULL_TREE;
1738   r->in_p = false;
1739   r->strict_overflow_p = false;
1740   r->low = NULL_TREE;
1741   r->high = NULL_TREE;
1742   if (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp)))
1743     return;
1744
1745   /* Start with simply saying "EXP != 0" and then look at the code of EXP
1746      and see if we can refine the range.  Some of the cases below may not
1747      happen, but it doesn't seem worth worrying about this.  We "continue"
1748      the outer loop when we've changed something; otherwise we "break"
1749      the switch, which will "break" the while.  */
1750   low = build_int_cst (TREE_TYPE (exp), 0);
1751   high = low;
1752   in_p = 0;
1753   strict_overflow_p = false;
1754   is_bool = false;
1755   if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1756     {
1757       if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1758         is_bool = true;
1759       else
1760         return;
1761     }
1762   else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1763     is_bool = true;
1764
1765   while (1)
1766     {
1767       gimple stmt;
1768       enum tree_code code;
1769       tree arg0, arg1, exp_type;
1770       tree nexp;
1771       location_t loc;
1772
1773       if (TREE_CODE (exp) != SSA_NAME)
1774         break;
1775
1776       stmt = SSA_NAME_DEF_STMT (exp);
1777       if (!is_gimple_assign (stmt))
1778         break;
1779
1780       code = gimple_assign_rhs_code (stmt);
1781       arg0 = gimple_assign_rhs1 (stmt);
1782       if (TREE_CODE (arg0) != SSA_NAME)
1783         break;
1784       arg1 = gimple_assign_rhs2 (stmt);
1785       exp_type = TREE_TYPE (exp);
1786       loc = gimple_location (stmt);
1787       switch (code)
1788         {
1789         case BIT_NOT_EXPR:
1790           if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1791             {
1792               in_p = !in_p;
1793               exp = arg0;
1794               continue;
1795             }
1796           break;
1797         case SSA_NAME:
1798           exp = arg0;
1799           continue;
1800         CASE_CONVERT:
1801           if (is_bool)
1802             goto do_default;
1803           if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1804             {
1805               if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1806                 is_bool = true;
1807               else
1808                 return;
1809             }
1810           else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1811             is_bool = true;
1812           goto do_default;
1813         case EQ_EXPR:
1814         case NE_EXPR:
1815         case LT_EXPR:
1816         case LE_EXPR:
1817         case GE_EXPR:
1818         case GT_EXPR:
1819           is_bool = true;
1820           /* FALLTHRU */
1821         default:
1822           if (!is_bool)
1823             return;
1824         do_default:
1825           nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1826                                   &low, &high, &in_p,
1827                                   &strict_overflow_p);
1828           if (nexp != NULL_TREE)
1829             {
1830               exp = nexp;
1831               gcc_assert (TREE_CODE (exp) == SSA_NAME);
1832               continue;
1833             }
1834           break;
1835         }
1836       break;
1837     }
1838   if (is_bool)
1839     {
1840       r->exp = exp;
1841       r->in_p = in_p;
1842       r->low = low;
1843       r->high = high;
1844       r->strict_overflow_p = strict_overflow_p;
1845     }
1846 }
1847
1848 /* Comparison function for qsort.  Sort entries
1849    without SSA_NAME exp first, then with SSA_NAMEs sorted
1850    by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1851    by increasing ->low and if ->low is the same, by increasing
1852    ->high.  ->low == NULL_TREE means minimum, ->high == NULL_TREE
1853    maximum.  */
1854
1855 static int
1856 range_entry_cmp (const void *a, const void *b)
1857 {
1858   const struct range_entry *p = (const struct range_entry *) a;
1859   const struct range_entry *q = (const struct range_entry *) b;
1860
1861   if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
1862     {
1863       if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1864         {
1865           /* Group range_entries for the same SSA_NAME together.  */
1866           if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
1867             return -1;
1868           else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
1869             return 1;
1870           /* If ->low is different, NULL low goes first, then by
1871              ascending low.  */
1872           if (p->low != NULL_TREE)
1873             {
1874               if (q->low != NULL_TREE)
1875                 {
1876                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
1877                                           p->low, q->low);
1878                   if (tem && integer_onep (tem))
1879                     return -1;
1880                   tem = fold_binary (GT_EXPR, boolean_type_node,
1881                                      p->low, q->low);
1882                   if (tem && integer_onep (tem))
1883                     return 1;
1884                 }
1885               else
1886                 return 1;
1887             }
1888           else if (q->low != NULL_TREE)
1889             return -1;
1890           /* If ->high is different, NULL high goes last, before that by
1891              ascending high.  */
1892           if (p->high != NULL_TREE)
1893             {
1894               if (q->high != NULL_TREE)
1895                 {
1896                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
1897                                           p->high, q->high);
1898                   if (tem && integer_onep (tem))
1899                     return -1;
1900                   tem = fold_binary (GT_EXPR, boolean_type_node,
1901                                      p->high, q->high);
1902                   if (tem && integer_onep (tem))
1903                     return 1;
1904                 }
1905               else
1906                 return -1;
1907             }
1908           else if (p->high != NULL_TREE)
1909             return 1;
1910           /* If both ranges are the same, sort below by ascending idx.  */
1911         }
1912       else
1913         return 1;
1914     }
1915   else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1916     return -1;
1917
1918   if (p->idx < q->idx)
1919     return -1;
1920   else
1921     {
1922       gcc_checking_assert (p->idx > q->idx);
1923       return 1;
1924     }
1925 }
1926
1927 /* Helper routine of optimize_range_test.
1928    [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
1929    RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
1930    OPCODE and OPS are arguments of optimize_range_tests.  Return
1931    true if the range merge has been successful.  */
1932
1933 static bool
1934 update_range_test (struct range_entry *range, struct range_entry *otherrange,
1935                    unsigned int count, enum tree_code opcode,
1936                    VEC (operand_entry_t, heap) **ops, tree exp, bool in_p,
1937                    tree low, tree high, bool strict_overflow_p)
1938 {
1939   tree op = VEC_index (operand_entry_t, *ops, range->idx)->op;
1940   location_t loc = gimple_location (SSA_NAME_DEF_STMT (op));
1941   tree tem = build_range_check (loc, TREE_TYPE (op), exp, in_p, low, high);
1942   enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
1943   gimple_stmt_iterator gsi;
1944
1945   if (tem == NULL_TREE)
1946     return false;
1947
1948   if (strict_overflow_p && issue_strict_overflow_warning (wc))
1949     warning_at (loc, OPT_Wstrict_overflow,
1950                 "assuming signed overflow does not occur "
1951                 "when simplifying range test");
1952
1953   if (dump_file && (dump_flags & TDF_DETAILS))
1954     {
1955       struct range_entry *r;
1956       fprintf (dump_file, "Optimizing range tests ");
1957       print_generic_expr (dump_file, range->exp, 0);
1958       fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
1959       print_generic_expr (dump_file, range->low, 0);
1960       fprintf (dump_file, ", ");
1961       print_generic_expr (dump_file, range->high, 0);
1962       fprintf (dump_file, "]");
1963       for (r = otherrange; r < otherrange + count; r++)
1964         {
1965           fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
1966           print_generic_expr (dump_file, r->low, 0);
1967           fprintf (dump_file, ", ");
1968           print_generic_expr (dump_file, r->high, 0);
1969           fprintf (dump_file, "]");
1970         }
1971       fprintf (dump_file, "\n into ");
1972       print_generic_expr (dump_file, tem, 0);
1973       fprintf (dump_file, "\n");
1974     }
1975
1976   if (opcode == BIT_IOR_EXPR)
1977     tem = invert_truthvalue_loc (loc, tem);
1978
1979   tem = fold_convert_loc (loc, TREE_TYPE (op), tem);
1980   gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (op));
1981   tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
1982                                   GSI_SAME_STMT);
1983
1984   VEC_index (operand_entry_t, *ops, range->idx)->op = tem;
1985   range->exp = exp;
1986   range->low = low;
1987   range->high = high;
1988   range->in_p = in_p;
1989   range->strict_overflow_p = false;
1990
1991   for (range = otherrange; range < otherrange + count; range++)
1992     {
1993       VEC_index (operand_entry_t, *ops, range->idx)->op = error_mark_node;
1994       range->exp = NULL_TREE;
1995     }
1996   return true;
1997 }
1998
1999 /* Optimize range tests, similarly how fold_range_test optimizes
2000    it on trees.  The tree code for the binary
2001    operation between all the operands is OPCODE.  */
2002
2003 static void
2004 optimize_range_tests (enum tree_code opcode,
2005                       VEC (operand_entry_t, heap) **ops)
2006 {
2007   unsigned int length = VEC_length (operand_entry_t, *ops), i, j, first;
2008   operand_entry_t oe;
2009   struct range_entry *ranges;
2010   bool any_changes = false;
2011
2012   if (length == 1)
2013     return;
2014
2015   ranges = XNEWVEC (struct range_entry, length);
2016   for (i = 0; i < length; i++)
2017     {
2018       ranges[i].idx = i;
2019       init_range_entry (ranges + i, VEC_index (operand_entry_t, *ops, i)->op);
2020       /* For | invert it now, we will invert it again before emitting
2021          the optimized expression.  */
2022       if (opcode == BIT_IOR_EXPR)
2023         ranges[i].in_p = !ranges[i].in_p;
2024     }
2025
2026   qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2027   for (i = 0; i < length; i++)
2028     if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2029       break;
2030
2031   /* Try to merge ranges.  */
2032   for (first = i; i < length; i++)
2033     {
2034       tree low = ranges[i].low;
2035       tree high = ranges[i].high;
2036       int in_p = ranges[i].in_p;
2037       bool strict_overflow_p = ranges[i].strict_overflow_p;
2038       int update_fail_count = 0;
2039
2040       for (j = i + 1; j < length; j++)
2041         {
2042           if (ranges[i].exp != ranges[j].exp)
2043             break;
2044           if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2045                              ranges[j].in_p, ranges[j].low, ranges[j].high))
2046             break;
2047           strict_overflow_p |= ranges[j].strict_overflow_p;
2048         }
2049
2050       if (j == i + 1)
2051         continue;
2052
2053       if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
2054                              ops, ranges[i].exp, in_p, low, high,
2055                              strict_overflow_p))
2056         {
2057           i = j - 1;
2058           any_changes = true;
2059         }
2060       /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2061          while update_range_test would fail.  */
2062       else if (update_fail_count == 64)
2063         i = j - 1;
2064       else
2065         ++update_fail_count;
2066     }
2067
2068   /* Optimize X == CST1 || X == CST2
2069      if popcount (CST1 ^ CST2) == 1 into
2070      (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2071      Similarly for ranges.  E.g.
2072      X != 2 && X != 3 && X != 10 && X != 11
2073      will be transformed by the above loop into
2074      (X - 2U) <= 1U && (X - 10U) <= 1U
2075      and this loop can transform that into
2076      ((X & ~8) - 2U) <= 1U.  */
2077   for (i = first; i < length; i++)
2078     {
2079       tree lowi, highi, lowj, highj, type, lowxor, highxor, tem, exp;
2080
2081       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2082         continue;
2083       type = TREE_TYPE (ranges[i].exp);
2084       if (!INTEGRAL_TYPE_P (type))
2085         continue;
2086       lowi = ranges[i].low;
2087       if (lowi == NULL_TREE)
2088         lowi = TYPE_MIN_VALUE (type);
2089       highi = ranges[i].high;
2090       if (highi == NULL_TREE)
2091         continue;
2092       for (j = i + 1; j < length && j < i + 64; j++)
2093         {
2094           if (ranges[j].exp == NULL_TREE)
2095             continue;
2096           if (ranges[i].exp != ranges[j].exp)
2097             break;
2098           if (ranges[j].in_p)
2099             continue;
2100           lowj = ranges[j].low;
2101           if (lowj == NULL_TREE)
2102             continue;
2103           highj = ranges[j].high;
2104           if (highj == NULL_TREE)
2105             highj = TYPE_MAX_VALUE (type);
2106           tem = fold_binary (GT_EXPR, boolean_type_node,
2107                              lowj, highi);
2108           if (tem == NULL_TREE || !integer_onep (tem))
2109             continue;
2110           lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2111           if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2112             continue;
2113           gcc_checking_assert (!integer_zerop (lowxor));
2114           tem = fold_binary (MINUS_EXPR, type, lowxor,
2115                              build_int_cst (type, 1));
2116           if (tem == NULL_TREE)
2117             continue;
2118           tem = fold_binary (BIT_AND_EXPR, type, lowxor, tem);
2119           if (tem == NULL_TREE || !integer_zerop (tem))
2120             continue;
2121           highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2122           if (!tree_int_cst_equal (lowxor, highxor))
2123             continue;
2124           tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2125           exp = fold_build2 (BIT_AND_EXPR, type, ranges[i].exp, tem);
2126           lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2127           highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2128           if (update_range_test (ranges + i, ranges + j, 1, opcode, ops, exp,
2129                                  ranges[i].in_p, lowj, highj,
2130                                  ranges[i].strict_overflow_p
2131                                  || ranges[j].strict_overflow_p))
2132             {
2133               any_changes = true;
2134               break;
2135             }
2136         }
2137     }
2138
2139   if (any_changes)
2140     {
2141       j = 0;
2142       FOR_EACH_VEC_ELT (operand_entry_t, *ops, i, oe)
2143         {
2144           if (oe->op == error_mark_node)
2145             continue;
2146           else if (i != j)
2147             VEC_replace (operand_entry_t, *ops, j, oe);
2148           j++;
2149         }
2150       VEC_truncate (operand_entry_t, *ops, j);
2151     }
2152
2153   XDELETEVEC (ranges);
2154 }
2155
2156 /* Return true if OPERAND is defined by a PHI node which uses the LHS
2157    of STMT in it's operands.  This is also known as a "destructive
2158    update" operation.  */
2159
2160 static bool
2161 is_phi_for_stmt (gimple stmt, tree operand)
2162 {
2163   gimple def_stmt;
2164   tree lhs;
2165   use_operand_p arg_p;
2166   ssa_op_iter i;
2167
2168   if (TREE_CODE (operand) != SSA_NAME)
2169     return false;
2170
2171   lhs = gimple_assign_lhs (stmt);
2172
2173   def_stmt = SSA_NAME_DEF_STMT (operand);
2174   if (gimple_code (def_stmt) != GIMPLE_PHI)
2175     return false;
2176
2177   FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
2178     if (lhs == USE_FROM_PTR (arg_p))
2179       return true;
2180   return false;
2181 }
2182
2183 /* Remove def stmt of VAR if VAR has zero uses and recurse
2184    on rhs1 operand if so.  */
2185
2186 static void
2187 remove_visited_stmt_chain (tree var)
2188 {
2189   gimple stmt;
2190   gimple_stmt_iterator gsi;
2191
2192   while (1)
2193     {
2194       if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
2195         return;
2196       stmt = SSA_NAME_DEF_STMT (var);
2197       if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
2198         {
2199           var = gimple_assign_rhs1 (stmt);
2200           gsi = gsi_for_stmt (stmt);
2201           gsi_remove (&gsi, true);
2202           release_defs (stmt);
2203         }
2204       else
2205         return;
2206     }
2207 }
2208
2209 /* This function checks three consequtive operands in
2210    passed operands vector OPS starting from OPINDEX and
2211    swaps two operands if it is profitable for binary operation
2212    consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
2213
2214    We pair ops with the same rank if possible.
2215
2216    The alternative we try is to see if STMT is a destructive
2217    update style statement, which is like:
2218    b = phi (a, ...)
2219    a = c + b;
2220    In that case, we want to use the destructive update form to
2221    expose the possible vectorizer sum reduction opportunity.
2222    In that case, the third operand will be the phi node. This
2223    check is not performed if STMT is null.
2224
2225    We could, of course, try to be better as noted above, and do a
2226    lot of work to try to find these opportunities in >3 operand
2227    cases, but it is unlikely to be worth it.  */
2228
2229 static void
2230 swap_ops_for_binary_stmt (VEC(operand_entry_t, heap) * ops,
2231                           unsigned int opindex, gimple stmt)
2232 {
2233   operand_entry_t oe1, oe2, oe3;
2234
2235   oe1 = VEC_index (operand_entry_t, ops, opindex);
2236   oe2 = VEC_index (operand_entry_t, ops, opindex + 1);
2237   oe3 = VEC_index (operand_entry_t, ops, opindex + 2);
2238
2239   if ((oe1->rank == oe2->rank
2240        && oe2->rank != oe3->rank)
2241       || (stmt && is_phi_for_stmt (stmt, oe3->op)
2242           && !is_phi_for_stmt (stmt, oe1->op)
2243           && !is_phi_for_stmt (stmt, oe2->op)))
2244     {
2245       struct operand_entry temp = *oe3;
2246       oe3->op = oe1->op;
2247       oe3->rank = oe1->rank;
2248       oe1->op = temp.op;
2249       oe1->rank= temp.rank;
2250     }
2251   else if ((oe1->rank == oe3->rank
2252             && oe2->rank != oe3->rank)
2253            || (stmt && is_phi_for_stmt (stmt, oe2->op)
2254                && !is_phi_for_stmt (stmt, oe1->op)
2255                && !is_phi_for_stmt (stmt, oe3->op)))
2256     {
2257       struct operand_entry temp = *oe2;
2258       oe2->op = oe1->op;
2259       oe2->rank = oe1->rank;
2260       oe1->op = temp.op;
2261       oe1->rank= temp.rank;
2262     }
2263 }
2264
2265 /* Recursively rewrite our linearized statements so that the operators
2266    match those in OPS[OPINDEX], putting the computation in rank
2267    order.  */
2268
2269 static void
2270 rewrite_expr_tree (gimple stmt, unsigned int opindex,
2271                    VEC(operand_entry_t, heap) * ops, bool moved)
2272 {
2273   tree rhs1 = gimple_assign_rhs1 (stmt);
2274   tree rhs2 = gimple_assign_rhs2 (stmt);
2275   operand_entry_t oe;
2276
2277   /* If we have three operands left, then we want to make sure the ones
2278      that get the double binary op are chosen wisely.  */
2279   if (opindex + 3 == VEC_length (operand_entry_t, ops))
2280     swap_ops_for_binary_stmt (ops, opindex, stmt);
2281
2282   /* The final recursion case for this function is that you have
2283      exactly two operations left.
2284      If we had one exactly one op in the entire list to start with, we
2285      would have never called this function, and the tail recursion
2286      rewrites them one at a time.  */
2287   if (opindex + 2 == VEC_length (operand_entry_t, ops))
2288     {
2289       operand_entry_t oe1, oe2;
2290
2291       oe1 = VEC_index (operand_entry_t, ops, opindex);
2292       oe2 = VEC_index (operand_entry_t, ops, opindex + 1);
2293
2294       if (rhs1 != oe1->op || rhs2 != oe2->op)
2295         {
2296           if (dump_file && (dump_flags & TDF_DETAILS))
2297             {
2298               fprintf (dump_file, "Transforming ");
2299               print_gimple_stmt (dump_file, stmt, 0, 0);
2300             }
2301
2302           gimple_assign_set_rhs1 (stmt, oe1->op);
2303           gimple_assign_set_rhs2 (stmt, oe2->op);
2304           update_stmt (stmt);
2305           if (rhs1 != oe1->op && rhs1 != oe2->op)
2306             remove_visited_stmt_chain (rhs1);
2307
2308           if (dump_file && (dump_flags & TDF_DETAILS))
2309             {
2310               fprintf (dump_file, " into ");
2311               print_gimple_stmt (dump_file, stmt, 0, 0);
2312             }
2313         }
2314       return;
2315     }
2316
2317   /* If we hit here, we should have 3 or more ops left.  */
2318   gcc_assert (opindex + 2 < VEC_length (operand_entry_t, ops));
2319
2320   /* Rewrite the next operator.  */
2321   oe = VEC_index (operand_entry_t, ops, opindex);
2322
2323   if (oe->op != rhs2)
2324     {
2325       if (!moved)
2326         {
2327           gimple_stmt_iterator gsinow, gsirhs1;
2328           gimple stmt1 = stmt, stmt2;
2329           unsigned int count;
2330
2331           gsinow = gsi_for_stmt (stmt);
2332           count = VEC_length (operand_entry_t, ops) - opindex - 2;
2333           while (count-- != 0)
2334             {
2335               stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
2336               gsirhs1 = gsi_for_stmt (stmt2);
2337               gsi_move_before (&gsirhs1, &gsinow);
2338               gsi_prev (&gsinow);
2339               stmt1 = stmt2;
2340             }
2341           moved = true;
2342         }
2343
2344       if (dump_file && (dump_flags & TDF_DETAILS))
2345         {
2346           fprintf (dump_file, "Transforming ");
2347           print_gimple_stmt (dump_file, stmt, 0, 0);
2348         }
2349
2350       gimple_assign_set_rhs2 (stmt, oe->op);
2351       update_stmt (stmt);
2352
2353       if (dump_file && (dump_flags & TDF_DETAILS))
2354         {
2355           fprintf (dump_file, " into ");
2356           print_gimple_stmt (dump_file, stmt, 0, 0);
2357         }
2358     }
2359   /* Recurse on the LHS of the binary operator, which is guaranteed to
2360      be the non-leaf side.  */
2361   rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, moved);
2362 }
2363
2364 /* Find out how many cycles we need to compute statements chain.
2365    OPS_NUM holds number os statements in a chain.  CPU_WIDTH is a
2366    maximum number of independent statements we may execute per cycle.  */
2367
2368 static int
2369 get_required_cycles (int ops_num, int cpu_width)
2370 {
2371   int res;
2372   int elog;
2373   unsigned int rest;
2374
2375   /* While we have more than 2 * cpu_width operands
2376      we may reduce number of operands by cpu_width
2377      per cycle.  */
2378   res = ops_num / (2 * cpu_width);
2379
2380   /* Remained operands count may be reduced twice per cycle
2381      until we have only one operand.  */
2382   rest = (unsigned)(ops_num - res * cpu_width);
2383   elog = exact_log2 (rest);
2384   if (elog >= 0)
2385     res += elog;
2386   else
2387     res += floor_log2 (rest) + 1;
2388
2389   return res;
2390 }
2391
2392 /* Returns an optimal number of registers to use for computation of
2393    given statements.  */
2394
2395 static int
2396 get_reassociation_width (int ops_num, enum tree_code opc,
2397                          enum machine_mode mode)
2398 {
2399   int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
2400   int width;
2401   int width_min;
2402   int cycles_best;
2403
2404   if (param_width > 0)
2405     width = param_width;
2406   else
2407     width = targetm.sched.reassociation_width (opc, mode);
2408
2409   if (width == 1)
2410     return width;
2411
2412   /* Get the minimal time required for sequence computation.  */
2413   cycles_best = get_required_cycles (ops_num, width);
2414
2415   /* Check if we may use less width and still compute sequence for
2416      the same time.  It will allow us to reduce registers usage.
2417      get_required_cycles is monotonically increasing with lower width
2418      so we can perform a binary search for the minimal width that still
2419      results in the optimal cycle count.  */
2420   width_min = 1;
2421   while (width > width_min)
2422     {
2423       int width_mid = (width + width_min) / 2;
2424
2425       if (get_required_cycles (ops_num, width_mid) == cycles_best)
2426         width = width_mid;
2427       else if (width_min < width_mid)
2428         width_min = width_mid;
2429       else
2430         break;
2431     }
2432
2433   return width;
2434 }
2435
2436 /* Recursively rewrite our linearized statements so that the operators
2437    match those in OPS[OPINDEX], putting the computation in rank
2438    order and trying to allow operations to be executed in
2439    parallel.  */
2440
2441 static void
2442 rewrite_expr_tree_parallel (gimple stmt, int width,
2443                             VEC(operand_entry_t, heap) * ops)
2444 {
2445   enum tree_code opcode = gimple_assign_rhs_code (stmt);
2446   int op_num = VEC_length (operand_entry_t, ops);
2447   int stmt_num = op_num - 1;
2448   gimple *stmts = XALLOCAVEC (gimple, stmt_num);
2449   int op_index = op_num - 1;
2450   int stmt_index = 0;
2451   int ready_stmts_end = 0;
2452   int i = 0;
2453   tree last_rhs1 = gimple_assign_rhs1 (stmt);
2454   tree lhs_var;
2455
2456   /* We start expression rewriting from the top statements.
2457      So, in this loop we create a full list of statements
2458      we will work with.  */
2459   stmts[stmt_num - 1] = stmt;
2460   for (i = stmt_num - 2; i >= 0; i--)
2461     stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
2462
2463   lhs_var = create_tmp_reg (TREE_TYPE (last_rhs1), NULL);
2464   add_referenced_var (lhs_var);
2465
2466   for (i = 0; i < stmt_num; i++)
2467     {
2468       tree op1, op2;
2469
2470       /* Determine whether we should use results of
2471          already handled statements or not.  */
2472       if (ready_stmts_end == 0
2473           && (i - stmt_index >= width || op_index < 1))
2474         ready_stmts_end = i;
2475
2476       /* Now we choose operands for the next statement.  Non zero
2477          value in ready_stmts_end means here that we should use
2478          the result of already generated statements as new operand.  */
2479       if (ready_stmts_end > 0)
2480         {
2481           op1 = gimple_assign_lhs (stmts[stmt_index++]);
2482           if (ready_stmts_end > stmt_index)
2483             op2 = gimple_assign_lhs (stmts[stmt_index++]);
2484           else if (op_index >= 0)
2485             op2 = VEC_index (operand_entry_t, ops, op_index--)->op;
2486           else
2487             {
2488               gcc_assert (stmt_index < i);
2489               op2 = gimple_assign_lhs (stmts[stmt_index++]);
2490             }
2491
2492           if (stmt_index >= ready_stmts_end)
2493             ready_stmts_end = 0;
2494         }
2495       else
2496         {
2497           if (op_index > 1)
2498             swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
2499           op2 = VEC_index (operand_entry_t, ops, op_index--)->op;
2500           op1 = VEC_index (operand_entry_t, ops, op_index--)->op;
2501         }
2502
2503       /* If we emit the last statement then we should put
2504          operands into the last statement.  It will also
2505          break the loop.  */
2506       if (op_index < 0 && stmt_index == i)
2507         i = stmt_num - 1;
2508
2509       if (dump_file && (dump_flags & TDF_DETAILS))
2510         {
2511           fprintf (dump_file, "Transforming ");
2512           print_gimple_stmt (dump_file, stmts[i], 0, 0);
2513         }
2514
2515       /* We keep original statement only for the last one.  All
2516          others are recreated.  */
2517       if (i == stmt_num - 1)
2518         {
2519           gimple_assign_set_rhs1 (stmts[i], op1);
2520           gimple_assign_set_rhs2 (stmts[i], op2);
2521           update_stmt (stmts[i]);
2522         }
2523       else
2524         stmts[i] = build_and_add_sum (lhs_var, op1, op2, opcode);
2525
2526       if (dump_file && (dump_flags & TDF_DETAILS))
2527         {
2528           fprintf (dump_file, " into ");
2529           print_gimple_stmt (dump_file, stmts[i], 0, 0);
2530         }
2531     }
2532
2533   remove_visited_stmt_chain (last_rhs1);
2534 }
2535
2536 /* Transform STMT, which is really (A +B) + (C + D) into the left
2537    linear form, ((A+B)+C)+D.
2538    Recurse on D if necessary.  */
2539
2540 static void
2541 linearize_expr (gimple stmt)
2542 {
2543   gimple_stmt_iterator gsinow, gsirhs;
2544   gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
2545   gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
2546   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
2547   gimple newbinrhs = NULL;
2548   struct loop *loop = loop_containing_stmt (stmt);
2549
2550   gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
2551               && is_reassociable_op (binrhs, rhscode, loop));
2552
2553   gsinow = gsi_for_stmt (stmt);
2554   gsirhs = gsi_for_stmt (binrhs);
2555   gsi_move_before (&gsirhs, &gsinow);
2556
2557   gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
2558   gimple_assign_set_rhs1 (binrhs, gimple_assign_lhs (binlhs));
2559   gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
2560
2561   if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
2562     newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
2563
2564   if (dump_file && (dump_flags & TDF_DETAILS))
2565     {
2566       fprintf (dump_file, "Linearized: ");
2567       print_gimple_stmt (dump_file, stmt, 0, 0);
2568     }
2569
2570   reassociate_stats.linearized++;
2571   update_stmt (binrhs);
2572   update_stmt (binlhs);
2573   update_stmt (stmt);
2574
2575   gimple_set_visited (stmt, true);
2576   gimple_set_visited (binlhs, true);
2577   gimple_set_visited (binrhs, true);
2578
2579   /* Tail recurse on the new rhs if it still needs reassociation.  */
2580   if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
2581     /* ??? This should probably be linearize_expr (newbinrhs) but I don't
2582            want to change the algorithm while converting to tuples.  */
2583     linearize_expr (stmt);
2584 }
2585
2586 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
2587    it.  Otherwise, return NULL.  */
2588
2589 static gimple
2590 get_single_immediate_use (tree lhs)
2591 {
2592   use_operand_p immuse;
2593   gimple immusestmt;
2594
2595   if (TREE_CODE (lhs) == SSA_NAME
2596       && single_imm_use (lhs, &immuse, &immusestmt)
2597       && is_gimple_assign (immusestmt))
2598     return immusestmt;
2599
2600   return NULL;
2601 }
2602
2603 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
2604    representing the negated value.  Insertions of any necessary
2605    instructions go before GSI.
2606    This function is recursive in that, if you hand it "a_5" as the
2607    value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
2608    transform b_3 + b_4 into a_5 = -b_3 + -b_4.  */
2609
2610 static tree
2611 negate_value (tree tonegate, gimple_stmt_iterator *gsi)
2612 {
2613   gimple negatedefstmt= NULL;
2614   tree resultofnegate;
2615
2616   /* If we are trying to negate a name, defined by an add, negate the
2617      add operands instead.  */
2618   if (TREE_CODE (tonegate) == SSA_NAME)
2619     negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
2620   if (TREE_CODE (tonegate) == SSA_NAME
2621       && is_gimple_assign (negatedefstmt)
2622       && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
2623       && has_single_use (gimple_assign_lhs (negatedefstmt))
2624       && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
2625     {
2626       gimple_stmt_iterator gsi;
2627       tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
2628       tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
2629
2630       gsi = gsi_for_stmt (negatedefstmt);
2631       rhs1 = negate_value (rhs1, &gsi);
2632       gimple_assign_set_rhs1 (negatedefstmt, rhs1);
2633
2634       gsi = gsi_for_stmt (negatedefstmt);
2635       rhs2 = negate_value (rhs2, &gsi);
2636       gimple_assign_set_rhs2 (negatedefstmt, rhs2);
2637
2638       update_stmt (negatedefstmt);
2639       return gimple_assign_lhs (negatedefstmt);
2640     }
2641
2642   tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
2643   resultofnegate = force_gimple_operand_gsi (gsi, tonegate, true,
2644                                              NULL_TREE, true, GSI_SAME_STMT);
2645   return resultofnegate;
2646 }
2647
2648 /* Return true if we should break up the subtract in STMT into an add
2649    with negate.  This is true when we the subtract operands are really
2650    adds, or the subtract itself is used in an add expression.  In
2651    either case, breaking up the subtract into an add with negate
2652    exposes the adds to reassociation.  */
2653
2654 static bool
2655 should_break_up_subtract (gimple stmt)
2656 {
2657   tree lhs = gimple_assign_lhs (stmt);
2658   tree binlhs = gimple_assign_rhs1 (stmt);
2659   tree binrhs = gimple_assign_rhs2 (stmt);
2660   gimple immusestmt;
2661   struct loop *loop = loop_containing_stmt (stmt);
2662
2663   if (TREE_CODE (binlhs) == SSA_NAME
2664       && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
2665     return true;
2666
2667   if (TREE_CODE (binrhs) == SSA_NAME
2668       && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
2669     return true;
2670
2671   if (TREE_CODE (lhs) == SSA_NAME
2672       && (immusestmt = get_single_immediate_use (lhs))
2673       && is_gimple_assign (immusestmt)
2674       && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
2675           ||  gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
2676     return true;
2677   return false;
2678 }
2679
2680 /* Transform STMT from A - B into A + -B.  */
2681
2682 static void
2683 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
2684 {
2685   tree rhs1 = gimple_assign_rhs1 (stmt);
2686   tree rhs2 = gimple_assign_rhs2 (stmt);
2687
2688   if (dump_file && (dump_flags & TDF_DETAILS))
2689     {
2690       fprintf (dump_file, "Breaking up subtract ");
2691       print_gimple_stmt (dump_file, stmt, 0, 0);
2692     }
2693
2694   rhs2 = negate_value (rhs2, gsip);
2695   gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
2696   update_stmt (stmt);
2697 }
2698
2699 /* Determine whether STMT is a builtin call that raises an SSA name
2700    to an integer power and has only one use.  If so, and this is early
2701    reassociation and unsafe math optimizations are permitted, place
2702    the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
2703    If any of these conditions does not hold, return FALSE.  */
2704
2705 static bool
2706 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
2707 {
2708   tree fndecl, arg1;
2709   REAL_VALUE_TYPE c, cint;
2710
2711   if (!first_pass_instance
2712       || !flag_unsafe_math_optimizations
2713       || !is_gimple_call (stmt)
2714       || !has_single_use (gimple_call_lhs (stmt)))
2715     return false;
2716
2717   fndecl = gimple_call_fndecl (stmt);
2718
2719   if (!fndecl
2720       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
2721     return false;
2722
2723   switch (DECL_FUNCTION_CODE (fndecl))
2724     {
2725     CASE_FLT_FN (BUILT_IN_POW):
2726       *base = gimple_call_arg (stmt, 0);
2727       arg1 = gimple_call_arg (stmt, 1);
2728
2729       if (TREE_CODE (arg1) != REAL_CST)
2730         return false;
2731
2732       c = TREE_REAL_CST (arg1);
2733
2734       if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
2735         return false;
2736
2737       *exponent = real_to_integer (&c);
2738       real_from_integer (&cint, VOIDmode, *exponent,
2739                          *exponent < 0 ? -1 : 0, 0);
2740       if (!real_identical (&c, &cint))
2741         return false;
2742
2743       break;
2744
2745     CASE_FLT_FN (BUILT_IN_POWI):
2746       *base = gimple_call_arg (stmt, 0);
2747       arg1 = gimple_call_arg (stmt, 1);
2748
2749       if (!host_integerp (arg1, 0))
2750         return false;
2751
2752       *exponent = TREE_INT_CST_LOW (arg1);
2753       break;
2754
2755     default:
2756       return false;
2757     }
2758
2759   /* Expanding negative exponents is generally unproductive, so we don't
2760      complicate matters with those.  Exponents of zero and one should
2761      have been handled by expression folding.  */
2762   if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
2763     return false;
2764
2765   return true;
2766 }
2767
2768 /* Recursively linearize a binary expression that is the RHS of STMT.
2769    Place the operands of the expression tree in the vector named OPS.  */
2770
2771 static void
2772 linearize_expr_tree (VEC(operand_entry_t, heap) **ops, gimple stmt,
2773                      bool is_associative, bool set_visited)
2774 {
2775   tree binlhs = gimple_assign_rhs1 (stmt);
2776   tree binrhs = gimple_assign_rhs2 (stmt);
2777   gimple binlhsdef = NULL, binrhsdef = NULL;
2778   bool binlhsisreassoc = false;
2779   bool binrhsisreassoc = false;
2780   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
2781   struct loop *loop = loop_containing_stmt (stmt);
2782   tree base = NULL_TREE;
2783   HOST_WIDE_INT exponent = 0;
2784
2785   if (set_visited)
2786     gimple_set_visited (stmt, true);
2787
2788   if (TREE_CODE (binlhs) == SSA_NAME)
2789     {
2790       binlhsdef = SSA_NAME_DEF_STMT (binlhs);
2791       binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
2792                          && !stmt_could_throw_p (binlhsdef));
2793     }
2794
2795   if (TREE_CODE (binrhs) == SSA_NAME)
2796     {
2797       binrhsdef = SSA_NAME_DEF_STMT (binrhs);
2798       binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
2799                          && !stmt_could_throw_p (binrhsdef));
2800     }
2801
2802   /* If the LHS is not reassociable, but the RHS is, we need to swap
2803      them.  If neither is reassociable, there is nothing we can do, so
2804      just put them in the ops vector.  If the LHS is reassociable,
2805      linearize it.  If both are reassociable, then linearize the RHS
2806      and the LHS.  */
2807
2808   if (!binlhsisreassoc)
2809     {
2810       tree temp;
2811
2812       /* If this is not a associative operation like division, give up.  */
2813       if (!is_associative)
2814         {
2815           add_to_ops_vec (ops, binrhs);
2816           return;
2817         }
2818
2819       if (!binrhsisreassoc)
2820         {
2821           if (rhscode == MULT_EXPR
2822               && TREE_CODE (binrhs) == SSA_NAME
2823               && acceptable_pow_call (binrhsdef, &base, &exponent))
2824             {
2825               add_repeat_to_ops_vec (ops, base, exponent);
2826               gimple_set_visited (binrhsdef, true);
2827             }
2828           else
2829             add_to_ops_vec (ops, binrhs);
2830
2831           if (rhscode == MULT_EXPR
2832               && TREE_CODE (binlhs) == SSA_NAME
2833               && acceptable_pow_call (binlhsdef, &base, &exponent))
2834             {
2835               add_repeat_to_ops_vec (ops, base, exponent);
2836               gimple_set_visited (binlhsdef, true);
2837             }
2838           else
2839             add_to_ops_vec (ops, binlhs);
2840
2841           return;
2842         }
2843
2844       if (dump_file && (dump_flags & TDF_DETAILS))
2845         {
2846           fprintf (dump_file, "swapping operands of ");
2847           print_gimple_stmt (dump_file, stmt, 0, 0);
2848         }
2849
2850       swap_tree_operands (stmt,
2851                           gimple_assign_rhs1_ptr (stmt),
2852                           gimple_assign_rhs2_ptr (stmt));
2853       update_stmt (stmt);
2854
2855       if (dump_file && (dump_flags & TDF_DETAILS))
2856         {
2857           fprintf (dump_file, " is now ");
2858           print_gimple_stmt (dump_file, stmt, 0, 0);
2859         }
2860
2861       /* We want to make it so the lhs is always the reassociative op,
2862          so swap.  */
2863       temp = binlhs;
2864       binlhs = binrhs;
2865       binrhs = temp;
2866     }
2867   else if (binrhsisreassoc)
2868     {
2869       linearize_expr (stmt);
2870       binlhs = gimple_assign_rhs1 (stmt);
2871       binrhs = gimple_assign_rhs2 (stmt);
2872     }
2873
2874   gcc_assert (TREE_CODE (binrhs) != SSA_NAME
2875               || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
2876                                       rhscode, loop));
2877   linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
2878                        is_associative, set_visited);
2879
2880   if (rhscode == MULT_EXPR
2881       && TREE_CODE (binrhs) == SSA_NAME
2882       && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
2883     {
2884       add_repeat_to_ops_vec (ops, base, exponent);
2885       gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
2886     }
2887   else
2888     add_to_ops_vec (ops, binrhs);
2889 }
2890
2891 /* Repropagate the negates back into subtracts, since no other pass
2892    currently does it.  */
2893
2894 static void
2895 repropagate_negates (void)
2896 {
2897   unsigned int i = 0;
2898   tree negate;
2899
2900   FOR_EACH_VEC_ELT (tree, plus_negates, i, negate)
2901     {
2902       gimple user = get_single_immediate_use (negate);
2903
2904       if (!user || !is_gimple_assign (user))
2905         continue;
2906
2907       /* The negate operand can be either operand of a PLUS_EXPR
2908          (it can be the LHS if the RHS is a constant for example).
2909
2910          Force the negate operand to the RHS of the PLUS_EXPR, then
2911          transform the PLUS_EXPR into a MINUS_EXPR.  */
2912       if (gimple_assign_rhs_code (user) == PLUS_EXPR)
2913         {
2914           /* If the negated operand appears on the LHS of the
2915              PLUS_EXPR, exchange the operands of the PLUS_EXPR
2916              to force the negated operand to the RHS of the PLUS_EXPR.  */
2917           if (gimple_assign_rhs1 (user) == negate)
2918             {
2919               swap_tree_operands (user,
2920                                   gimple_assign_rhs1_ptr (user),
2921                                   gimple_assign_rhs2_ptr (user));
2922             }
2923
2924           /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
2925              the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR.  */
2926           if (gimple_assign_rhs2 (user) == negate)
2927             {
2928               tree rhs1 = gimple_assign_rhs1 (user);
2929               tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
2930               gimple_stmt_iterator gsi = gsi_for_stmt (user);
2931               gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
2932               update_stmt (user);
2933             }
2934         }
2935       else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
2936         {
2937           if (gimple_assign_rhs1 (user) == negate)
2938             {
2939               /* We have
2940                    x = -a
2941                    y = x - b
2942                  which we transform into
2943                    x = a + b
2944                    y = -x .
2945                  This pushes down the negate which we possibly can merge
2946                  into some other operation, hence insert it into the
2947                  plus_negates vector.  */
2948               gimple feed = SSA_NAME_DEF_STMT (negate);
2949               tree a = gimple_assign_rhs1 (feed);
2950               tree rhs2 = gimple_assign_rhs2 (user);
2951               gimple_stmt_iterator gsi = gsi_for_stmt (feed), gsi2;
2952               gimple_replace_lhs (feed, negate);
2953               gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, a, rhs2);
2954               update_stmt (gsi_stmt (gsi));
2955               gsi2 = gsi_for_stmt (user);
2956               gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, negate, NULL);
2957               update_stmt (gsi_stmt (gsi2));
2958               gsi_move_before (&gsi, &gsi2);
2959               VEC_safe_push (tree, heap, plus_negates,
2960                              gimple_assign_lhs (gsi_stmt (gsi2)));
2961             }
2962           else
2963             {
2964               /* Transform "x = -a; y = b - x" into "y = b + a", getting
2965                  rid of one operation.  */
2966               gimple feed = SSA_NAME_DEF_STMT (negate);
2967               tree a = gimple_assign_rhs1 (feed);
2968               tree rhs1 = gimple_assign_rhs1 (user);
2969               gimple_stmt_iterator gsi = gsi_for_stmt (user);
2970               gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
2971               update_stmt (gsi_stmt (gsi));
2972             }
2973         }
2974     }
2975 }
2976
2977 /* Returns true if OP is of a type for which we can do reassociation.
2978    That is for integral or non-saturating fixed-point types, and for
2979    floating point type when associative-math is enabled.  */
2980
2981 static bool
2982 can_reassociate_p (tree op)
2983 {
2984   tree type = TREE_TYPE (op);
2985   if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
2986       || NON_SAT_FIXED_POINT_TYPE_P (type)
2987       || (flag_associative_math && FLOAT_TYPE_P (type)))
2988     return true;
2989   return false;
2990 }
2991
2992 /* Break up subtract operations in block BB.
2993
2994    We do this top down because we don't know whether the subtract is
2995    part of a possible chain of reassociation except at the top.
2996
2997    IE given
2998    d = f + g
2999    c = a + e
3000    b = c - d
3001    q = b - r
3002    k = t - q
3003
3004    we want to break up k = t - q, but we won't until we've transformed q
3005    = b - r, which won't be broken up until we transform b = c - d.
3006
3007    En passant, clear the GIMPLE visited flag on every statement.  */
3008
3009 static void
3010 break_up_subtract_bb (basic_block bb)
3011 {
3012   gimple_stmt_iterator gsi;
3013   basic_block son;
3014
3015   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3016     {
3017       gimple stmt = gsi_stmt (gsi);
3018       gimple_set_visited (stmt, false);
3019
3020       if (!is_gimple_assign (stmt)
3021           || !can_reassociate_p (gimple_assign_lhs (stmt)))
3022         continue;
3023
3024       /* Look for simple gimple subtract operations.  */
3025       if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
3026         {
3027           if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
3028               || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
3029             continue;
3030
3031           /* Check for a subtract used only in an addition.  If this
3032              is the case, transform it into add of a negate for better
3033              reassociation.  IE transform C = A-B into C = A + -B if C
3034              is only used in an addition.  */
3035           if (should_break_up_subtract (stmt))
3036             break_up_subtract (stmt, &gsi);
3037         }
3038       else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
3039                && can_reassociate_p (gimple_assign_rhs1 (stmt)))
3040         VEC_safe_push (tree, heap, plus_negates, gimple_assign_lhs (stmt));
3041     }
3042   for (son = first_dom_son (CDI_DOMINATORS, bb);
3043        son;
3044        son = next_dom_son (CDI_DOMINATORS, son))
3045     break_up_subtract_bb (son);
3046 }
3047
3048 /* Used for repeated factor analysis.  */
3049 struct repeat_factor_d
3050 {
3051   /* An SSA name that occurs in a multiply chain.  */
3052   tree factor;
3053
3054   /* Cached rank of the factor.  */
3055   unsigned rank;
3056
3057   /* Number of occurrences of the factor in the chain.  */
3058   HOST_WIDE_INT count;
3059
3060   /* An SSA name representing the product of this factor and
3061      all factors appearing later in the repeated factor vector.  */
3062   tree repr;
3063 };
3064
3065 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
3066 typedef const struct repeat_factor_d *const_repeat_factor_t;
3067
3068 DEF_VEC_O (repeat_factor);
3069 DEF_VEC_ALLOC_O (repeat_factor, heap);
3070
3071 static VEC (repeat_factor, heap) *repeat_factor_vec;
3072
3073 /* Used for sorting the repeat factor vector.  Sort primarily by
3074    ascending occurrence count, secondarily by descending rank.  */
3075
3076 static int
3077 compare_repeat_factors (const void *x1, const void *x2)
3078 {
3079   const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
3080   const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
3081
3082   if (rf1->count != rf2->count)
3083     return rf1->count - rf2->count;
3084
3085   return rf2->rank - rf1->rank;
3086 }
3087
3088 /* Get a new SSA name for register variable *TARGET of type TYPE.
3089    If *TARGET is null or incompatible with TYPE, create the variable
3090    first.  */
3091
3092 static tree
3093 get_reassoc_pow_ssa_name (tree *target, tree type)
3094 {
3095   if (!*target || !types_compatible_p (type, TREE_TYPE (*target)))
3096     {
3097       *target = create_tmp_reg (type, "reassocpow");
3098       add_referenced_var (*target);
3099     }
3100
3101   return make_ssa_name (*target, NULL);
3102 }
3103
3104 /* Look for repeated operands in OPS in the multiply tree rooted at
3105    STMT.  Replace them with an optimal sequence of multiplies and powi
3106    builtin calls, and remove the used operands from OPS.  Return an
3107    SSA name representing the value of the replacement sequence.  */
3108
3109 static tree
3110 attempt_builtin_powi (gimple stmt, VEC(operand_entry_t, heap) **ops,
3111                       tree *target)
3112 {
3113   unsigned i, j, vec_len;
3114   int ii;
3115   operand_entry_t oe;
3116   repeat_factor_t rf1, rf2;
3117   repeat_factor rfnew;
3118   tree result = NULL_TREE;
3119   tree target_ssa, iter_result;
3120   tree type = TREE_TYPE (gimple_get_lhs (stmt));
3121   tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
3122   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3123   gimple mul_stmt, pow_stmt;
3124
3125   /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
3126      target.  */
3127   if (!powi_fndecl)
3128     return NULL_TREE;
3129
3130   /* Allocate the repeated factor vector.  */
3131   repeat_factor_vec = VEC_alloc (repeat_factor, heap, 10);
3132
3133   /* Scan the OPS vector for all SSA names in the product and build
3134      up a vector of occurrence counts for each factor.  */
3135   FOR_EACH_VEC_ELT (operand_entry_t, *ops, i, oe)
3136     {
3137       if (TREE_CODE (oe->op) == SSA_NAME)
3138         {
3139           FOR_EACH_VEC_ELT (repeat_factor, repeat_factor_vec, j, rf1)
3140             {
3141               if (rf1->factor == oe->op)
3142                 {
3143                   rf1->count += oe->count;
3144                   break;
3145                 }
3146             }
3147
3148           if (j >= VEC_length (repeat_factor, repeat_factor_vec))
3149             {
3150               rfnew.factor = oe->op;
3151               rfnew.rank = oe->rank;
3152               rfnew.count = oe->count;
3153               rfnew.repr = NULL_TREE;
3154               VEC_safe_push (repeat_factor, heap, repeat_factor_vec, &rfnew);
3155             }
3156         }
3157     }
3158
3159   /* Sort the repeated factor vector by (a) increasing occurrence count,
3160      and (b) decreasing rank.  */
3161   VEC_qsort (repeat_factor, repeat_factor_vec, compare_repeat_factors);
3162
3163   /* It is generally best to combine as many base factors as possible
3164      into a product before applying __builtin_powi to the result.
3165      However, the sort order chosen for the repeated factor vector
3166      allows us to cache partial results for the product of the base
3167      factors for subsequent use.  When we already have a cached partial
3168      result from a previous iteration, it is best to make use of it
3169      before looking for another __builtin_pow opportunity.
3170
3171      As an example, consider x * x * y * y * y * z * z * z * z.
3172      We want to first compose the product x * y * z, raise it to the
3173      second power, then multiply this by y * z, and finally multiply
3174      by z.  This can be done in 5 multiplies provided we cache y * z
3175      for use in both expressions:
3176
3177         t1 = y * z
3178         t2 = t1 * x
3179         t3 = t2 * t2
3180         t4 = t1 * t3
3181         result = t4 * z
3182
3183      If we instead ignored the cached y * z and first multiplied by
3184      the __builtin_pow opportunity z * z, we would get the inferior:
3185
3186         t1 = y * z
3187         t2 = t1 * x
3188         t3 = t2 * t2
3189         t4 = z * z
3190         t5 = t3 * t4
3191         result = t5 * y  */
3192
3193   vec_len = VEC_length (repeat_factor, repeat_factor_vec);
3194
3195   /* Repeatedly look for opportunities to create a builtin_powi call.  */
3196   while (true)
3197     {
3198       HOST_WIDE_INT power;
3199
3200       /* First look for the largest cached product of factors from
3201          preceding iterations.  If found, create a builtin_powi for
3202          it if the minimum occurrence count for its factors is at
3203          least 2, or just use this cached product as our next
3204          multiplicand if the minimum occurrence count is 1.  */
3205       FOR_EACH_VEC_ELT (repeat_factor, repeat_factor_vec, j, rf1)
3206         {
3207           if (rf1->repr && rf1->count > 0)
3208             break;
3209         }
3210
3211       if (j < vec_len)
3212         {
3213           power = rf1->count;
3214
3215           if (power == 1)
3216             {
3217               iter_result = rf1->repr;
3218
3219               if (dump_file && (dump_flags & TDF_DETAILS))
3220                 {
3221                   unsigned elt;
3222                   repeat_factor_t rf;
3223                   fputs ("Multiplying by cached product ", dump_file);
3224                   for (elt = j; elt < vec_len; elt++)
3225                     {
3226                       rf = VEC_index (repeat_factor, repeat_factor_vec, elt);
3227                       print_generic_expr (dump_file, rf->factor, 0);
3228                       if (elt < vec_len - 1)
3229                         fputs (" * ", dump_file);
3230                     }
3231                   fputs ("\n", dump_file);
3232                 }
3233             }
3234           else
3235             {
3236               iter_result = get_reassoc_pow_ssa_name (target, type);
3237               pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
3238                                             build_int_cst (integer_type_node,
3239                                                            power));
3240               gimple_call_set_lhs (pow_stmt, iter_result);
3241               gimple_set_location (pow_stmt, gimple_location (stmt));
3242               gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
3243
3244               if (dump_file && (dump_flags & TDF_DETAILS))
3245                 {
3246                   unsigned elt;
3247                   repeat_factor_t rf;
3248                   fputs ("Building __builtin_pow call for cached product (",
3249                          dump_file);
3250                   for (elt = j; elt < vec_len; elt++)
3251                     {
3252                       rf = VEC_index (repeat_factor, repeat_factor_vec, elt);
3253                       print_generic_expr (dump_file, rf->factor, 0);
3254                       if (elt < vec_len - 1)
3255                         fputs (" * ", dump_file);
3256                     }
3257                   fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n",
3258                            power);
3259                 }
3260             }
3261         }
3262       else
3263         {
3264           /* Otherwise, find the first factor in the repeated factor
3265              vector whose occurrence count is at least 2.  If no such
3266              factor exists, there are no builtin_powi opportunities
3267              remaining.  */
3268           FOR_EACH_VEC_ELT (repeat_factor, repeat_factor_vec, j, rf1)
3269             {
3270               if (rf1->count >= 2)
3271                 break;
3272             }
3273
3274           if (j >= vec_len)
3275             break;
3276
3277           power = rf1->count;
3278
3279           if (dump_file && (dump_flags & TDF_DETAILS))
3280             {
3281               unsigned elt;
3282               repeat_factor_t rf;
3283               fputs ("Building __builtin_pow call for (", dump_file);
3284               for (elt = j; elt < vec_len; elt++)
3285                 {
3286                   rf = VEC_index (repeat_factor, repeat_factor_vec, elt);
3287                   print_generic_expr (dump_file, rf->factor, 0);
3288                   if (elt < vec_len - 1)
3289                     fputs (" * ", dump_file);
3290                 }
3291               fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power);
3292             }
3293
3294           reassociate_stats.pows_created++;
3295
3296           /* Visit each element of the vector in reverse order (so that
3297              high-occurrence elements are visited first, and within the
3298              same occurrence count, lower-ranked elements are visited
3299              first).  Form a linear product of all elements in this order
3300              whose occurrencce count is at least that of element J.
3301              Record the SSA name representing the product of each element
3302              with all subsequent elements in the vector.  */
3303           if (j == vec_len - 1)
3304             rf1->repr = rf1->factor;
3305           else
3306             {
3307               for (ii = vec_len - 2; ii >= (int)j; ii--)
3308                 {
3309                   tree op1, op2;
3310
3311                   rf1 = VEC_index (repeat_factor, repeat_factor_vec, ii);
3312                   rf2 = VEC_index (repeat_factor, repeat_factor_vec, ii + 1);
3313
3314                   /* Init the last factor's representative to be itself.  */
3315                   if (!rf2->repr)
3316                     rf2->repr = rf2->factor;
3317
3318                   op1 = rf1->factor;
3319                   op2 = rf2->repr;
3320
3321                   target_ssa = get_reassoc_pow_ssa_name (target, type);
3322                   mul_stmt = gimple_build_assign_with_ops (MULT_EXPR,
3323                                                            target_ssa,
3324                                                            op1, op2);
3325                   gimple_set_location (mul_stmt, gimple_location (stmt));
3326                   gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
3327                   rf1->repr = target_ssa;
3328
3329                   /* Don't reprocess the multiply we just introduced.  */
3330                   gimple_set_visited (mul_stmt, true);
3331                 }
3332             }
3333
3334           /* Form a call to __builtin_powi for the maximum product
3335              just formed, raised to the power obtained earlier.  */
3336           rf1 = VEC_index (repeat_factor, repeat_factor_vec, j);
3337           iter_result = get_reassoc_pow_ssa_name (target, type);
3338           pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
3339                                         build_int_cst (integer_type_node,
3340                                                        power));
3341           gimple_call_set_lhs (pow_stmt, iter_result);
3342           gimple_set_location (pow_stmt, gimple_location (stmt));
3343           gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
3344         }
3345
3346       /* If we previously formed at least one other builtin_powi call,
3347          form the product of this one and those others.  */
3348       if (result)
3349         {
3350           tree new_result = get_reassoc_pow_ssa_name (target, type);
3351           mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result,
3352                                                    result, iter_result);
3353           gimple_set_location (mul_stmt, gimple_location (stmt));
3354           gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
3355           gimple_set_visited (mul_stmt, true);
3356           result = new_result;
3357         }
3358       else
3359         result = iter_result;
3360
3361       /* Decrement the occurrence count of each element in the product
3362          by the count found above, and remove this many copies of each
3363          factor from OPS.  */
3364       for (i = j; i < vec_len; i++)
3365         {
3366           unsigned k = power;
3367           unsigned n;
3368
3369           rf1 = VEC_index (repeat_factor, repeat_factor_vec, i);
3370           rf1->count -= power;
3371
3372           FOR_EACH_VEC_ELT_REVERSE (operand_entry_t, *ops, n, oe)
3373             {
3374               if (oe->op == rf1->factor)
3375                 {
3376                   if (oe->count <= k)
3377                     {
3378                       VEC_ordered_remove (operand_entry_t, *ops, n);
3379                       k -= oe->count;
3380
3381                       if (k == 0)
3382                         break;
3383                     }
3384                   else
3385                     {
3386                       oe->count -= k;
3387                       break;
3388                     }
3389                 }
3390             }
3391         }
3392     }
3393
3394   /* At this point all elements in the repeated factor vector have a
3395      remaining occurrence count of 0 or 1, and those with a count of 1
3396      don't have cached representatives.  Re-sort the ops vector and
3397      clean up.  */
3398   VEC_qsort (operand_entry_t, *ops, sort_by_operand_rank);
3399   VEC_free (repeat_factor, heap, repeat_factor_vec);
3400
3401   /* Return the final product computed herein.  Note that there may
3402      still be some elements with single occurrence count left in OPS;
3403      those will be handled by the normal reassociation logic.  */
3404   return result;
3405 }
3406
3407 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS.  */
3408
3409 static void
3410 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
3411 {
3412   tree rhs1;
3413
3414   if (dump_file && (dump_flags & TDF_DETAILS))
3415     {
3416       fprintf (dump_file, "Transforming ");
3417       print_gimple_stmt (dump_file, stmt, 0, 0);
3418     }
3419
3420   rhs1 = gimple_assign_rhs1 (stmt);
3421   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
3422   update_stmt (stmt);
3423   remove_visited_stmt_chain (rhs1);
3424
3425   if (dump_file && (dump_flags & TDF_DETAILS))
3426     {
3427       fprintf (dump_file, " into ");
3428       print_gimple_stmt (dump_file, stmt, 0, 0);
3429     }
3430 }
3431
3432 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2.  */
3433
3434 static void
3435 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
3436                             tree rhs1, tree rhs2)
3437 {
3438   if (dump_file && (dump_flags & TDF_DETAILS))
3439     {
3440       fprintf (dump_file, "Transforming ");
3441       print_gimple_stmt (dump_file, stmt, 0, 0);
3442     }
3443
3444   gimple_assign_set_rhs_with_ops_1 (gsi, MULT_EXPR, rhs1, rhs2, NULL_TREE);
3445   update_stmt (gsi_stmt (*gsi));
3446   remove_visited_stmt_chain (rhs1);
3447
3448   if (dump_file && (dump_flags & TDF_DETAILS))
3449     {
3450       fprintf (dump_file, " into ");
3451       print_gimple_stmt (dump_file, stmt, 0, 0);
3452     }
3453 }
3454
3455 /* Reassociate expressions in basic block BB and its post-dominator as
3456    children.  */
3457
3458 static void
3459 reassociate_bb (basic_block bb)
3460 {
3461   gimple_stmt_iterator gsi;
3462   basic_block son;
3463   tree target = NULL_TREE;
3464
3465   for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
3466     {
3467       gimple stmt = gsi_stmt (gsi);
3468
3469       if (is_gimple_assign (stmt)
3470           && !stmt_could_throw_p (stmt))
3471         {
3472           tree lhs, rhs1, rhs2;
3473           enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
3474
3475           /* If this is not a gimple binary expression, there is
3476              nothing for us to do with it.  */
3477           if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
3478             continue;
3479
3480           /* If this was part of an already processed statement,
3481              we don't need to touch it again. */
3482           if (gimple_visited_p (stmt))
3483             {
3484               /* This statement might have become dead because of previous
3485                  reassociations.  */
3486               if (has_zero_uses (gimple_get_lhs (stmt)))
3487                 {
3488                   gsi_remove (&gsi, true);
3489                   release_defs (stmt);
3490                   /* We might end up removing the last stmt above which
3491                      places the iterator to the end of the sequence.
3492                      Reset it to the last stmt in this case which might
3493                      be the end of the sequence as well if we removed
3494                      the last statement of the sequence.  In which case
3495                      we need to bail out.  */
3496                   if (gsi_end_p (gsi))
3497                     {
3498                       gsi = gsi_last_bb (bb);
3499                       if (gsi_end_p (gsi))
3500                         break;
3501                     }
3502                 }
3503               continue;
3504             }
3505
3506           lhs = gimple_assign_lhs (stmt);
3507           rhs1 = gimple_assign_rhs1 (stmt);
3508           rhs2 = gimple_assign_rhs2 (stmt);
3509
3510           /* For non-bit or min/max operations we can't associate
3511              all types.  Verify that here.  */
3512           if (rhs_code != BIT_IOR_EXPR
3513               && rhs_code != BIT_AND_EXPR
3514               && rhs_code != BIT_XOR_EXPR
3515               && rhs_code != MIN_EXPR
3516               && rhs_code != MAX_EXPR
3517               && (!can_reassociate_p (lhs)
3518                   || !can_reassociate_p (rhs1)
3519                   || !can_reassociate_p (rhs2)))
3520             continue;
3521
3522           if (associative_tree_code (rhs_code))
3523             {
3524               VEC(operand_entry_t, heap) *ops = NULL;
3525               tree powi_result = NULL_TREE;
3526
3527               /* There may be no immediate uses left by the time we
3528                  get here because we may have eliminated them all.  */
3529               if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
3530                 continue;
3531
3532               gimple_set_visited (stmt, true);
3533               linearize_expr_tree (&ops, stmt, true, true);
3534               VEC_qsort (operand_entry_t, ops, sort_by_operand_rank);
3535               optimize_ops_list (rhs_code, &ops);
3536               if (undistribute_ops_list (rhs_code, &ops,
3537                                          loop_containing_stmt (stmt)))
3538                 {
3539                   VEC_qsort (operand_entry_t, ops, sort_by_operand_rank);
3540                   optimize_ops_list (rhs_code, &ops);
3541                 }
3542
3543               if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
3544                 optimize_range_tests (rhs_code, &ops);
3545
3546               if (first_pass_instance
3547                   && rhs_code == MULT_EXPR
3548                   && flag_unsafe_math_optimizations)
3549                 powi_result = attempt_builtin_powi (stmt, &ops, &target);
3550
3551               /* If the operand vector is now empty, all operands were
3552                  consumed by the __builtin_powi optimization.  */
3553               if (VEC_length (operand_entry_t, ops) == 0)
3554                 transform_stmt_to_copy (&gsi, stmt, powi_result);
3555               else if (VEC_length (operand_entry_t, ops) == 1)
3556                 {
3557                   tree last_op = VEC_last (operand_entry_t, ops)->op;
3558
3559                   if (powi_result)
3560                     transform_stmt_to_multiply (&gsi, stmt, last_op,
3561                                                 powi_result);
3562                   else
3563                     transform_stmt_to_copy (&gsi, stmt, last_op);
3564                 }
3565               else
3566                 {
3567                   enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
3568                   int ops_num = VEC_length (operand_entry_t, ops);
3569                   int width = get_reassociation_width (ops_num, rhs_code, mode);
3570
3571                   if (dump_file && (dump_flags & TDF_DETAILS))
3572                     fprintf (dump_file,
3573                              "Width = %d was chosen for reassociation\n", width);
3574
3575                   if (width > 1
3576                       && VEC_length (operand_entry_t, ops) > 3)
3577                     rewrite_expr_tree_parallel (stmt, width, ops);
3578                   else
3579                     rewrite_expr_tree (stmt, 0, ops, false);
3580
3581                   /* If we combined some repeated factors into a
3582                      __builtin_powi call, multiply that result by the
3583                      reassociated operands.  */
3584                   if (powi_result)
3585                     {
3586                       gimple mul_stmt;
3587                       tree type = TREE_TYPE (gimple_get_lhs (stmt));
3588                       tree target_ssa = get_reassoc_pow_ssa_name (&target,
3589                                                                   type);
3590                       gimple_set_lhs (stmt, target_ssa);
3591                       update_stmt (stmt);
3592                       mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs,
3593                                                                powi_result,
3594                                                                target_ssa);
3595                       gimple_set_location (mul_stmt, gimple_location (stmt));
3596                       gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
3597                     }
3598                 }
3599
3600               VEC_free (operand_entry_t, heap, ops);
3601             }
3602         }
3603     }
3604   for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
3605        son;
3606        son = next_dom_son (CDI_POST_DOMINATORS, son))
3607     reassociate_bb (son);
3608 }
3609
3610 void dump_ops_vector (FILE *file, VEC (operand_entry_t, heap) *ops);
3611 void debug_ops_vector (VEC (operand_entry_t, heap) *ops);
3612
3613 /* Dump the operand entry vector OPS to FILE.  */
3614
3615 void
3616 dump_ops_vector (FILE *file, VEC (operand_entry_t, heap) *ops)
3617 {
3618   operand_entry_t oe;
3619   unsigned int i;
3620
3621   FOR_EACH_VEC_ELT (operand_entry_t, ops, i, oe)
3622     {
3623       fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
3624       print_generic_expr (file, oe->op, 0);
3625     }
3626 }
3627
3628 /* Dump the operand entry vector OPS to STDERR.  */
3629
3630 DEBUG_FUNCTION void
3631 debug_ops_vector (VEC (operand_entry_t, heap) *ops)
3632 {
3633   dump_ops_vector (stderr, ops);
3634 }
3635
3636 static void
3637 do_reassoc (void)
3638 {
3639   break_up_subtract_bb (ENTRY_BLOCK_PTR);
3640   reassociate_bb (EXIT_BLOCK_PTR);
3641 }
3642
3643 /* Initialize the reassociation pass.  */
3644
3645 static void
3646 init_reassoc (void)
3647 {
3648   int i;
3649   long rank = 2;
3650   tree param;
3651   int *bbs = XNEWVEC (int, last_basic_block + 1);
3652
3653   /* Find the loops, so that we can prevent moving calculations in
3654      them.  */
3655   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
3656
3657   memset (&reassociate_stats, 0, sizeof (reassociate_stats));
3658
3659   operand_entry_pool = create_alloc_pool ("operand entry pool",
3660                                           sizeof (struct operand_entry), 30);
3661   next_operand_entry_id = 0;
3662
3663   /* Reverse RPO (Reverse Post Order) will give us something where
3664      deeper loops come later.  */
3665   pre_and_rev_post_order_compute (NULL, bbs, false);
3666   bb_rank = XCNEWVEC (long, last_basic_block + 1);
3667   operand_rank = pointer_map_create ();
3668
3669   /* Give each argument a distinct rank.   */
3670   for (param = DECL_ARGUMENTS (current_function_decl);
3671        param;
3672        param = DECL_CHAIN (param))
3673     {
3674       if (gimple_default_def (cfun, param) != NULL)
3675         {
3676           tree def = gimple_default_def (cfun, param);
3677           insert_operand_rank (def, ++rank);
3678         }
3679     }
3680
3681   /* Give the chain decl a distinct rank. */
3682   if (cfun->static_chain_decl != NULL)
3683     {
3684       tree def = gimple_default_def (cfun, cfun->static_chain_decl);
3685       if (def != NULL)
3686         insert_operand_rank (def, ++rank);
3687     }
3688
3689   /* Set up rank for each BB  */
3690   for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
3691     bb_rank[bbs[i]] = ++rank  << 16;
3692
3693   free (bbs);
3694   calculate_dominance_info (CDI_POST_DOMINATORS);
3695   plus_negates = NULL;
3696 }
3697
3698 /* Cleanup after the reassociation pass, and print stats if
3699    requested.  */
3700
3701 static void
3702 fini_reassoc (void)
3703 {
3704   statistics_counter_event (cfun, "Linearized",
3705                             reassociate_stats.linearized);
3706   statistics_counter_event (cfun, "Constants eliminated",
3707                             reassociate_stats.constants_eliminated);
3708   statistics_counter_event (cfun, "Ops eliminated",
3709                             reassociate_stats.ops_eliminated);
3710   statistics_counter_event (cfun, "Statements rewritten",
3711                             reassociate_stats.rewritten);
3712   statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
3713                             reassociate_stats.pows_encountered);
3714   statistics_counter_event (cfun, "Built-in powi calls created",
3715                             reassociate_stats.pows_created);
3716
3717   pointer_map_destroy (operand_rank);
3718   free_alloc_pool (operand_entry_pool);
3719   free (bb_rank);
3720   VEC_free (tree, heap, plus_negates);
3721   free_dominance_info (CDI_POST_DOMINATORS);
3722   loop_optimizer_finalize ();
3723 }
3724
3725 /* Gate and execute functions for Reassociation.  */
3726
3727 static unsigned int
3728 execute_reassoc (void)
3729 {
3730   init_reassoc ();
3731
3732   do_reassoc ();
3733   repropagate_negates ();
3734
3735   fini_reassoc ();
3736   return 0;
3737 }
3738
3739 static bool
3740 gate_tree_ssa_reassoc (void)
3741 {
3742   return flag_tree_reassoc != 0;
3743 }
3744
3745 struct gimple_opt_pass pass_reassoc =
3746 {
3747  {
3748   GIMPLE_PASS,
3749   "reassoc",                            /* name */
3750   gate_tree_ssa_reassoc,                /* gate */
3751   execute_reassoc,                      /* execute */
3752   NULL,                                 /* sub */
3753   NULL,                                 /* next */
3754   0,                                    /* static_pass_number */
3755   TV_TREE_REASSOC,                      /* tv_id */
3756   PROP_cfg | PROP_ssa,                  /* properties_required */
3757   0,                                    /* properties_provided */
3758   0,                                    /* properties_destroyed */
3759   0,                                    /* todo_flags_start */
3760   TODO_verify_ssa
3761     | TODO_verify_flow
3762     | TODO_ggc_collect                  /* todo_flags_finish */
3763  }
3764 };