gcc/tree-ssa-reassoc.c

   1 /* Reassociation for trees.
   2    Copyright (C) 2005-2014 Free Software Foundation, Inc.
   3    Contributed by Daniel Berlin <dan@dberlin.org>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 3, or (at your option)
  10 any later version.
  11
  12 GCC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "hash-table.h"
  25 #include "tm.h"
  26 #include "rtl.h"
  27 #include "tm_p.h"
  28 #include "tree.h"
  29 #include "stor-layout.h"
  30 #include "basic-block.h"
  31 #include "gimple-pretty-print.h"
  32 #include "tree-inline.h"
  33 #include "pointer-set.h"
  34 #include "tree-ssa-alias.h"
  35 #include "internal-fn.h"
  36 #include "gimple-fold.h"
  37 #include "tree-eh.h"
  38 #include "gimple-expr.h"
  39 #include "is-a.h"
  40 #include "gimple.h"
  41 #include "gimple-iterator.h"
  42 #include "gimplify-me.h"
  43 #include "gimple-ssa.h"
  44 #include "tree-cfg.h"
  45 #include "tree-phinodes.h"
  46 #include "ssa-iterators.h"
  47 #include "stringpool.h"
  48 #include "tree-ssanames.h"
  49 #include "tree-ssa-loop-niter.h"
  50 #include "tree-ssa-loop.h"
  51 #include "expr.h"
  52 #include "tree-dfa.h"
  53 #include "tree-ssa.h"
  54 #include "tree-iterator.h"
  55 #include "tree-pass.h"
  56 #include "alloc-pool.h"
  57 #include "langhooks.h"
  58 #include "cfgloop.h"
  59 #include "flags.h"
  60 #include "target.h"
  61 #include "params.h"
  62 #include "diagnostic-core.h"
  63
  64 /*  This is a simple global reassociation pass.  It is, in part, based
  65     on the LLVM pass of the same name (They do some things more/less
  66     than we do, in different orders, etc).
  67
  68     It consists of five steps:
  69
  70     1. Breaking up subtract operations into addition + negate, where
  71     it would promote the reassociation of adds.
  72
  73     2. Left linearization of the expression trees, so that (A+B)+(C+D)
  74     becomes (((A+B)+C)+D), which is easier for us to rewrite later.
  75     During linearization, we place the operands of the binary
  76     expressions into a vector of operand_entry_t
  77
  78     3. Optimization of the operand lists, eliminating things like a +
  79     -a, a & a, etc.
  80
  81     3a. Combine repeated factors with the same occurrence counts
  82     into a __builtin_powi call that will later be optimized into
  83     an optimal number of multiplies.
  84
  85     4. Rewrite the expression trees we linearized and optimized so
  86     they are in proper rank order.
  87
  88     5. Repropagate negates, as nothing else will clean it up ATM.
  89
  90     A bit of theory on #4, since nobody seems to write anything down
  91     about why it makes sense to do it the way they do it:
  92
  93     We could do this much nicer theoretically, but don't (for reasons
  94     explained after how to do it theoretically nice :P).
  95
  96     In order to promote the most redundancy elimination, you want
  97     binary expressions whose operands are the same rank (or
  98     preferably, the same value) exposed to the redundancy eliminator,
  99     for possible elimination.
 100
 101     So the way to do this if we really cared, is to build the new op
 102     tree from the leaves to the roots, merging as you go, and putting the
 103     new op on the end of the worklist, until you are left with one
 104     thing on the worklist.
 105
 106     IE if you have to rewrite the following set of operands (listed with
 107     rank in parentheses), with opcode PLUS_EXPR:
 108
 109     a (1),  b (1),  c (1),  d (2), e (2)
 110
 111
 112     We start with our merge worklist empty, and the ops list with all of
 113     those on it.
 114
 115     You want to first merge all leaves of the same rank, as much as
 116     possible.
 117
 118     So first build a binary op of
 119
 120     mergetmp = a + b, and put "mergetmp" on the merge worklist.
 121
 122     Because there is no three operand form of PLUS_EXPR, c is not going to
 123     be exposed to redundancy elimination as a rank 1 operand.
 124
 125     So you might as well throw it on the merge worklist (you could also
 126     consider it to now be a rank two operand, and merge it with d and e,
 127     but in this case, you then have evicted e from a binary op. So at
 128     least in this situation, you can't win.)
 129
 130     Then build a binary op of d + e
 131     mergetmp2 = d + e
 132
 133     and put mergetmp2 on the merge worklist.
 134
 135     so merge worklist = {mergetmp, c, mergetmp2}
 136
 137     Continue building binary ops of these operations until you have only
 138     one operation left on the worklist.
 139
 140     So we have
 141
 142     build binary op
 143     mergetmp3 = mergetmp + c
 144
 145     worklist = {mergetmp2, mergetmp3}
 146
 147     mergetmp4 = mergetmp2 + mergetmp3
 148
 149     worklist = {mergetmp4}
 150
 151     because we have one operation left, we can now just set the original
 152     statement equal to the result of that operation.
 153
 154     This will at least expose a + b  and d + e to redundancy elimination
 155     as binary operations.
 156
 157     For extra points, you can reuse the old statements to build the
 158     mergetmps, since you shouldn't run out.
 159
 160     So why don't we do this?
 161
 162     Because it's expensive, and rarely will help.  Most trees we are
 163     reassociating have 3 or less ops.  If they have 2 ops, they already
 164     will be written into a nice single binary op.  If you have 3 ops, a
 165     single simple check suffices to tell you whether the first two are of the
 166     same rank.  If so, you know to order it
 167
 168     mergetmp = op1 + op2
 169     newstmt = mergetmp + op3
 170
 171     instead of
 172     mergetmp = op2 + op3
 173     newstmt = mergetmp + op1
 174
 175     If all three are of the same rank, you can't expose them all in a
 176     single binary operator anyway, so the above is *still* the best you
 177     can do.
 178
 179     Thus, this is what we do.  When we have three ops left, we check to see
 180     what order to put them in, and call it a day.  As a nod to vector sum
 181     reduction, we check if any of the ops are really a phi node that is a
 182     destructive update for the associating op, and keep the destructive
 183     update together for vector sum reduction recognition.  */
 184
 185
 186 /* Statistics */
 187 static struct
 188 {
 189   int linearized;
 190   int constants_eliminated;
 191   int ops_eliminated;
 192   int rewritten;
 193   int pows_encountered;
 194   int pows_created;
 195 } reassociate_stats;
 196
 197 /* Operator, rank pair.  */
 198 typedef struct operand_entry
 199 {
 200   unsigned int rank;
 201   int id;
 202   tree op;
 203   unsigned int count;
 204 } *operand_entry_t;
 205
 206 static alloc_pool operand_entry_pool;
 207
 208 /* This is used to assign a unique ID to each struct operand_entry
 209    so that qsort results are identical on different hosts.  */
 210 static int next_operand_entry_id;
 211
 212 /* Starting rank number for a given basic block, so that we can rank
 213    operations using unmovable instructions in that BB based on the bb
 214    depth.  */
 215 static long *bb_rank;
 216
 217 /* Operand->rank hashtable.  */
 218 static struct pointer_map_t *operand_rank;
 219
 220 /* Forward decls.  */
 221 static long get_rank (tree);
 222
 223
 224 /* Bias amount for loop-carried phis.  We want this to be larger than
 225    the depth of any reassociation tree we can see, but not larger than
 226    the rank difference between two blocks.  */
 227 #define PHI_LOOP_BIAS (1 << 15)
 228
 229 /* Rank assigned to a phi statement.  If STMT is a loop-carried phi of
 230    an innermost loop, and the phi has only a single use which is inside
 231    the loop, then the rank is the block rank of the loop latch plus an
 232    extra bias for the loop-carried dependence.  This causes expressions
 233    calculated into an accumulator variable to be independent for each
 234    iteration of the loop.  If STMT is some other phi, the rank is the
 235    block rank of its containing block.  */
 236 static long
 237 phi_rank (gimple stmt)
 238 {
 239   basic_block bb = gimple_bb (stmt);
 240   struct loop *father = bb->loop_father;
 241   tree res;
 242   unsigned i;
 243   use_operand_p use;
 244   gimple use_stmt;
 245
 246   /* We only care about real loops (those with a latch).  */
 247   if (!father->latch)
 248     return bb_rank[bb->index];
 249
 250   /* Interesting phis must be in headers of innermost loops.  */
 251   if (bb != father->header
 252       || father->inner)
 253     return bb_rank[bb->index];
 254
 255   /* Ignore virtual SSA_NAMEs.  */
 256   res = gimple_phi_result (stmt);
 257   if (virtual_operand_p (res))
 258     return bb_rank[bb->index];
 259
 260   /* The phi definition must have a single use, and that use must be
 261      within the loop.  Otherwise this isn't an accumulator pattern.  */
 262   if (!single_imm_use (res, &use, &use_stmt)
 263       || gimple_bb (use_stmt)->loop_father != father)
 264     return bb_rank[bb->index];
 265
 266   /* Look for phi arguments from within the loop.  If found, bias this phi.  */
 267   for (i = 0; i < gimple_phi_num_args (stmt); i++)
 268     {
 269       tree arg = gimple_phi_arg_def (stmt, i);
 270       if (TREE_CODE (arg) == SSA_NAME
 271           && !SSA_NAME_IS_DEFAULT_DEF (arg))
 272         {
 273           gimple def_stmt = SSA_NAME_DEF_STMT (arg);
 274           if (gimple_bb (def_stmt)->loop_father == father)
 275             return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
 276         }
 277     }
 278
 279   /* Must be an uninteresting phi.  */
 280   return bb_rank[bb->index];
 281 }
 282
 283 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
 284    loop-carried dependence of an innermost loop, return TRUE; else
 285    return FALSE.  */
 286 static bool
 287 loop_carried_phi (tree exp)
 288 {
 289   gimple phi_stmt;
 290   long block_rank;
 291
 292   if (TREE_CODE (exp) != SSA_NAME
 293       || SSA_NAME_IS_DEFAULT_DEF (exp))
 294     return false;
 295
 296   phi_stmt = SSA_NAME_DEF_STMT (exp);
 297
 298   if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
 299     return false;
 300
 301   /* Non-loop-carried phis have block rank.  Loop-carried phis have
 302      an additional bias added in.  If this phi doesn't have block rank,
 303      it's biased and should not be propagated.  */
 304   block_rank = bb_rank[gimple_bb (phi_stmt)->index];
 305
 306   if (phi_rank (phi_stmt) != block_rank)
 307     return true;
 308
 309   return false;
 310 }
 311
 312 /* Return the maximum of RANK and the rank that should be propagated
 313    from expression OP.  For most operands, this is just the rank of OP.
 314    For loop-carried phis, the value is zero to avoid undoing the bias
 315    in favor of the phi.  */
 316 static long
 317 propagate_rank (long rank, tree op)
 318 {
 319   long op_rank;
 320
 321   if (loop_carried_phi (op))
 322     return rank;
 323
 324   op_rank = get_rank (op);
 325
 326   return MAX (rank, op_rank);
 327 }
 328
 329 /* Look up the operand rank structure for expression E.  */
 330
 331 static inline long
 332 find_operand_rank (tree e)
 333 {
 334   void **slot = pointer_map_contains (operand_rank, e);
 335   return slot ? (long) (intptr_t) *slot : -1;
 336 }
 337
 338 /* Insert {E,RANK} into the operand rank hashtable.  */
 339
 340 static inline void
 341 insert_operand_rank (tree e, long rank)
 342 {
 343   void **slot;
 344   gcc_assert (rank > 0);
 345   slot = pointer_map_insert (operand_rank, e);
 346   gcc_assert (!*slot);
 347   *slot = (void *) (intptr_t) rank;
 348 }
 349
 350 /* Given an expression E, return the rank of the expression.  */
 351
 352 static long
 353 get_rank (tree e)
 354 {
 355   /* Constants have rank 0.  */
 356   if (is_gimple_min_invariant (e))
 357     return 0;
 358
 359   /* SSA_NAME's have the rank of the expression they are the result
 360      of.
 361      For globals and uninitialized values, the rank is 0.
 362      For function arguments, use the pre-setup rank.
 363      For PHI nodes, stores, asm statements, etc, we use the rank of
 364      the BB.
 365      For simple operations, the rank is the maximum rank of any of
 366      its operands, or the bb_rank, whichever is less.
 367      I make no claims that this is optimal, however, it gives good
 368      results.  */
 369
 370   /* We make an exception to the normal ranking system to break
 371      dependences of accumulator variables in loops.  Suppose we
 372      have a simple one-block loop containing:
 373
 374        x_1 = phi(x_0, x_2)
 375        b = a + x_1
 376        c = b + d
 377        x_2 = c + e
 378
 379      As shown, each iteration of the calculation into x is fully
 380      dependent upon the iteration before it.  We would prefer to
 381      see this in the form:
 382
 383        x_1 = phi(x_0, x_2)
 384        b = a + d
 385        c = b + e
 386        x_2 = c + x_1
 387
 388      If the loop is unrolled, the calculations of b and c from
 389      different iterations can be interleaved.
 390
 391      To obtain this result during reassociation, we bias the rank
 392      of the phi definition x_1 upward, when it is recognized as an
 393      accumulator pattern.  The artificial rank causes it to be
 394      added last, providing the desired independence.  */
 395
 396   if (TREE_CODE (e) == SSA_NAME)
 397     {
 398       gimple stmt;
 399       long rank;
 400       int i, n;
 401       tree op;
 402
 403       if (SSA_NAME_IS_DEFAULT_DEF (e))
 404         return find_operand_rank (e);
 405
 406       stmt = SSA_NAME_DEF_STMT (e);
 407       if (gimple_code (stmt) == GIMPLE_PHI)
 408         return phi_rank (stmt);
 409
 410       if (!is_gimple_assign (stmt)
 411           || gimple_vdef (stmt))
 412         return bb_rank[gimple_bb (stmt)->index];
 413
 414       /* If we already have a rank for this expression, use that.  */
 415       rank = find_operand_rank (e);
 416       if (rank != -1)
 417         return rank;
 418
 419       /* Otherwise, find the maximum rank for the operands.  As an
 420          exception, remove the bias from loop-carried phis when propagating
 421          the rank so that dependent operations are not also biased.  */
 422       rank = 0;
 423       if (gimple_assign_single_p (stmt))
 424         {
 425           tree rhs = gimple_assign_rhs1 (stmt);
 426           n = TREE_OPERAND_LENGTH (rhs);
 427           if (n == 0)
 428             rank = propagate_rank (rank, rhs);
 429           else
 430             {
 431               for (i = 0; i < n; i++)
 432                 {
 433                   op = TREE_OPERAND (rhs, i);
 434
 435                   if (op != NULL_TREE)
 436                     rank = propagate_rank (rank, op);
 437                 }
 438             }
 439         }
 440       else
 441         {
 442           n = gimple_num_ops (stmt);
 443           for (i = 1; i < n; i++)
 444             {
 445               op = gimple_op (stmt, i);
 446               gcc_assert (op);
 447               rank = propagate_rank (rank, op);
 448             }
 449         }
 450
 451       if (dump_file && (dump_flags & TDF_DETAILS))
 452         {
 453           fprintf (dump_file, "Rank for ");
 454           print_generic_expr (dump_file, e, 0);
 455           fprintf (dump_file, " is %ld\n", (rank + 1));
 456         }
 457
 458       /* Note the rank in the hashtable so we don't recompute it.  */
 459       insert_operand_rank (e, (rank + 1));
 460       return (rank + 1);
 461     }
 462
 463   /* Globals, etc,  are rank 0 */
 464   return 0;
 465 }
 466
 467
 468 /* We want integer ones to end up last no matter what, since they are
 469    the ones we can do the most with.  */
 470 #define INTEGER_CONST_TYPE 1 << 3
 471 #define FLOAT_CONST_TYPE 1 << 2
 472 #define OTHER_CONST_TYPE 1 << 1
 473
 474 /* Classify an invariant tree into integer, float, or other, so that
 475    we can sort them to be near other constants of the same type.  */
 476 static inline int
 477 constant_type (tree t)
 478 {
 479   if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
 480     return INTEGER_CONST_TYPE;
 481   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
 482     return FLOAT_CONST_TYPE;
 483   else
 484     return OTHER_CONST_TYPE;
 485 }
 486
 487 /* qsort comparison function to sort operand entries PA and PB by rank
 488    so that the sorted array is ordered by rank in decreasing order.  */
 489 static int
 490 sort_by_operand_rank (const void *pa, const void *pb)
 491 {
 492   const operand_entry_t oea = *(const operand_entry_t *)pa;
 493   const operand_entry_t oeb = *(const operand_entry_t *)pb;
 494
 495   /* It's nicer for optimize_expression if constants that are likely
 496      to fold when added/multiplied//whatever are put next to each
 497      other.  Since all constants have rank 0, order them by type.  */
 498   if (oeb->rank == 0 && oea->rank == 0)
 499     {
 500       if (constant_type (oeb->op) != constant_type (oea->op))
 501         return constant_type (oeb->op) - constant_type (oea->op);
 502       else
 503         /* To make sorting result stable, we use unique IDs to determine
 504            order.  */
 505         return oeb->id - oea->id;
 506     }
 507
 508   /* Lastly, make sure the versions that are the same go next to each
 509      other.  We use SSA_NAME_VERSION because it's stable.  */
 510   if ((oeb->rank - oea->rank == 0)
 511       && TREE_CODE (oea->op) == SSA_NAME
 512       && TREE_CODE (oeb->op) == SSA_NAME)
 513     {
 514       if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
 515         return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
 516       else
 517         return oeb->id - oea->id;
 518     }
 519
 520   if (oeb->rank != oea->rank)
 521     return oeb->rank - oea->rank;
 522   else
 523     return oeb->id - oea->id;
 524 }
 525
 526 /* Add an operand entry to *OPS for the tree operand OP.  */
 527
 528 static void
 529 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
 530 {
 531   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
 532
 533   oe->op = op;
 534   oe->rank = get_rank (op);
 535   oe->id = next_operand_entry_id++;
 536   oe->count = 1;
 537   ops->safe_push (oe);
 538 }
 539
 540 /* Add an operand entry to *OPS for the tree operand OP with repeat
 541    count REPEAT.  */
 542
 543 static void
 544 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
 545                        HOST_WIDE_INT repeat)
 546 {
 547   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
 548
 549   oe->op = op;
 550   oe->rank = get_rank (op);
 551   oe->id = next_operand_entry_id++;
 552   oe->count = repeat;
 553   ops->safe_push (oe);
 554
 555   reassociate_stats.pows_encountered++;
 556 }
 557
 558 /* Return true if STMT is reassociable operation containing a binary
 559    operation with tree code CODE, and is inside LOOP.  */
 560
 561 static bool
 562 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
 563 {
 564   basic_block bb = gimple_bb (stmt);
 565
 566   if (gimple_bb (stmt) == NULL)
 567     return false;
 568
 569   if (!flow_bb_inside_loop_p (loop, bb))
 570     return false;
 571
 572   if (is_gimple_assign (stmt)
 573       && gimple_assign_rhs_code (stmt) == code
 574       && has_single_use (gimple_assign_lhs (stmt)))
 575     return true;
 576
 577   return false;
 578 }
 579
 580
 581 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
 582    operand of the negate operation.  Otherwise, return NULL.  */
 583
 584 static tree
 585 get_unary_op (tree name, enum tree_code opcode)
 586 {
 587   gimple stmt = SSA_NAME_DEF_STMT (name);
 588
 589   if (!is_gimple_assign (stmt))
 590     return NULL_TREE;
 591
 592   if (gimple_assign_rhs_code (stmt) == opcode)
 593     return gimple_assign_rhs1 (stmt);
 594   return NULL_TREE;
 595 }
 596
 597 /* If CURR and LAST are a pair of ops that OPCODE allows us to
 598    eliminate through equivalences, do so, remove them from OPS, and
 599    return true.  Otherwise, return false.  */
 600
 601 static bool
 602 eliminate_duplicate_pair (enum tree_code opcode,
 603                           vec<operand_entry_t> *ops,
 604                           bool *all_done,
 605                           unsigned int i,
 606                           operand_entry_t curr,
 607                           operand_entry_t last)
 608 {
 609
 610   /* If we have two of the same op, and the opcode is & |, min, or max,
 611      we can eliminate one of them.
 612      If we have two of the same op, and the opcode is ^, we can
 613      eliminate both of them.  */
 614
 615   if (last && last->op == curr->op)
 616     {
 617       switch (opcode)
 618         {
 619         case MAX_EXPR:
 620         case MIN_EXPR:
 621         case BIT_IOR_EXPR:
 622         case BIT_AND_EXPR:
 623           if (dump_file && (dump_flags & TDF_DETAILS))
 624             {
 625               fprintf (dump_file, "Equivalence: ");
 626               print_generic_expr (dump_file, curr->op, 0);
 627               fprintf (dump_file, " [&|minmax] ");
 628               print_generic_expr (dump_file, last->op, 0);
 629               fprintf (dump_file, " -> ");
 630               print_generic_stmt (dump_file, last->op, 0);
 631             }
 632
 633           ops->ordered_remove (i);
 634           reassociate_stats.ops_eliminated ++;
 635
 636           return true;
 637
 638         case BIT_XOR_EXPR:
 639           if (dump_file && (dump_flags & TDF_DETAILS))
 640             {
 641               fprintf (dump_file, "Equivalence: ");
 642               print_generic_expr (dump_file, curr->op, 0);
 643               fprintf (dump_file, " ^ ");
 644               print_generic_expr (dump_file, last->op, 0);
 645               fprintf (dump_file, " -> nothing\n");
 646             }
 647
 648           reassociate_stats.ops_eliminated += 2;
 649
 650           if (ops->length () == 2)
 651             {
 652               ops->create (0);
 653               add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
 654               *all_done = true;
 655             }
 656           else
 657             {
 658               ops->ordered_remove (i-1);
 659               ops->ordered_remove (i-1);
 660             }
 661
 662           return true;
 663
 664         default:
 665           break;
 666         }
 667     }
 668   return false;
 669 }
 670
 671 static vec<tree> plus_negates;
 672
 673 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
 674    expression, look in OPS for a corresponding positive operation to cancel
 675    it out.  If we find one, remove the other from OPS, replace
 676    OPS[CURRINDEX] with 0 or -1, respectively, and return true.  Otherwise,
 677    return false. */
 678
 679 static bool
 680 eliminate_plus_minus_pair (enum tree_code opcode,
 681                            vec<operand_entry_t> *ops,
 682                            unsigned int currindex,
 683                            operand_entry_t curr)
 684 {
 685   tree negateop;
 686   tree notop;
 687   unsigned int i;
 688   operand_entry_t oe;
 689
 690   if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
 691     return false;
 692
 693   negateop = get_unary_op (curr->op, NEGATE_EXPR);
 694   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 695   if (negateop == NULL_TREE && notop == NULL_TREE)
 696     return false;
 697
 698   /* Any non-negated version will have a rank that is one less than
 699      the current rank.  So once we hit those ranks, if we don't find
 700      one, we can stop.  */
 701
 702   for (i = currindex + 1;
 703        ops->iterate (i, &oe)
 704        && oe->rank >= curr->rank - 1 ;
 705        i++)
 706     {
 707       if (oe->op == negateop)
 708         {
 709
 710           if (dump_file && (dump_flags & TDF_DETAILS))
 711             {
 712               fprintf (dump_file, "Equivalence: ");
 713               print_generic_expr (dump_file, negateop, 0);
 714               fprintf (dump_file, " + -");
 715               print_generic_expr (dump_file, oe->op, 0);
 716               fprintf (dump_file, " -> 0\n");
 717             }
 718
 719           ops->ordered_remove (i);
 720           add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
 721           ops->ordered_remove (currindex);
 722           reassociate_stats.ops_eliminated ++;
 723
 724           return true;
 725         }
 726       else if (oe->op == notop)
 727         {
 728           tree op_type = TREE_TYPE (oe->op);
 729
 730           if (dump_file && (dump_flags & TDF_DETAILS))
 731             {
 732               fprintf (dump_file, "Equivalence: ");
 733               print_generic_expr (dump_file, notop, 0);
 734               fprintf (dump_file, " + ~");
 735               print_generic_expr (dump_file, oe->op, 0);
 736               fprintf (dump_file, " -> -1\n");
 737             }
 738
 739           ops->ordered_remove (i);
 740           add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
 741           ops->ordered_remove (currindex);
 742           reassociate_stats.ops_eliminated ++;
 743
 744           return true;
 745         }
 746     }
 747
 748   /* CURR->OP is a negate expr in a plus expr: save it for later
 749      inspection in repropagate_negates().  */
 750   if (negateop != NULL_TREE)
 751     plus_negates.safe_push (curr->op);
 752
 753   return false;
 754 }
 755
 756 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
 757    bitwise not expression, look in OPS for a corresponding operand to
 758    cancel it out.  If we find one, remove the other from OPS, replace
 759    OPS[CURRINDEX] with 0, and return true.  Otherwise, return
 760    false. */
 761
 762 static bool
 763 eliminate_not_pairs (enum tree_code opcode,
 764                      vec<operand_entry_t> *ops,
 765                      unsigned int currindex,
 766                      operand_entry_t curr)
 767 {
 768   tree notop;
 769   unsigned int i;
 770   operand_entry_t oe;
 771
 772   if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
 773       || TREE_CODE (curr->op) != SSA_NAME)
 774     return false;
 775
 776   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 777   if (notop == NULL_TREE)
 778     return false;
 779
 780   /* Any non-not version will have a rank that is one less than
 781      the current rank.  So once we hit those ranks, if we don't find
 782      one, we can stop.  */
 783
 784   for (i = currindex + 1;
 785        ops->iterate (i, &oe)
 786        && oe->rank >= curr->rank - 1;
 787        i++)
 788     {
 789       if (oe->op == notop)
 790         {
 791           if (dump_file && (dump_flags & TDF_DETAILS))
 792             {
 793               fprintf (dump_file, "Equivalence: ");
 794               print_generic_expr (dump_file, notop, 0);
 795               if (opcode == BIT_AND_EXPR)
 796                 fprintf (dump_file, " & ~");
 797               else if (opcode == BIT_IOR_EXPR)
 798                 fprintf (dump_file, " | ~");
 799               print_generic_expr (dump_file, oe->op, 0);
 800               if (opcode == BIT_AND_EXPR)
 801                 fprintf (dump_file, " -> 0\n");
 802               else if (opcode == BIT_IOR_EXPR)
 803                 fprintf (dump_file, " -> -1\n");
 804             }
 805
 806           if (opcode == BIT_AND_EXPR)
 807             oe->op = build_zero_cst (TREE_TYPE (oe->op));
 808           else if (opcode == BIT_IOR_EXPR)
 809             oe->op = build_low_bits_mask (TREE_TYPE (oe->op),
 810                                           TYPE_PRECISION (TREE_TYPE (oe->op)));
 811
 812           reassociate_stats.ops_eliminated += ops->length () - 1;
 813           ops->truncate (0);
 814           ops->quick_push (oe);
 815           return true;
 816         }
 817     }
 818
 819   return false;
 820 }
 821
 822 /* Use constant value that may be present in OPS to try to eliminate
 823    operands.  Note that this function is only really used when we've
 824    eliminated ops for other reasons, or merged constants.  Across
 825    single statements, fold already does all of this, plus more.  There
 826    is little point in duplicating logic, so I've only included the
 827    identities that I could ever construct testcases to trigger.  */
 828
 829 static void
 830 eliminate_using_constants (enum tree_code opcode,
 831                            vec<operand_entry_t> *ops)
 832 {
 833   operand_entry_t oelast = ops->last ();
 834   tree type = TREE_TYPE (oelast->op);
 835
 836   if (oelast->rank == 0
 837       && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
 838     {
 839       switch (opcode)
 840         {
 841         case BIT_AND_EXPR:
 842           if (integer_zerop (oelast->op))
 843             {
 844               if (ops->length () != 1)
 845                 {
 846                   if (dump_file && (dump_flags & TDF_DETAILS))
 847                     fprintf (dump_file, "Found & 0, removing all other ops\n");
 848
 849                   reassociate_stats.ops_eliminated += ops->length () - 1;
 850
 851                   ops->truncate (0);
 852                   ops->quick_push (oelast);
 853                   return;
 854                 }
 855             }
 856           else if (integer_all_onesp (oelast->op))
 857             {
 858               if (ops->length () != 1)
 859                 {
 860                   if (dump_file && (dump_flags & TDF_DETAILS))
 861                     fprintf (dump_file, "Found & -1, removing\n");
 862                   ops->pop ();
 863                   reassociate_stats.ops_eliminated++;
 864                 }
 865             }
 866           break;
 867         case BIT_IOR_EXPR:
 868           if (integer_all_onesp (oelast->op))
 869             {
 870               if (ops->length () != 1)
 871                 {
 872                   if (dump_file && (dump_flags & TDF_DETAILS))
 873                     fprintf (dump_file, "Found | -1, removing all other ops\n");
 874
 875                   reassociate_stats.ops_eliminated += ops->length () - 1;
 876
 877                   ops->truncate (0);
 878                   ops->quick_push (oelast);
 879                   return;
 880                 }
 881             }
 882           else if (integer_zerop (oelast->op))
 883             {
 884               if (ops->length () != 1)
 885                 {
 886                   if (dump_file && (dump_flags & TDF_DETAILS))
 887                     fprintf (dump_file, "Found | 0, removing\n");
 888                   ops->pop ();
 889                   reassociate_stats.ops_eliminated++;
 890                 }
 891             }
 892           break;
 893         case MULT_EXPR:
 894           if (integer_zerop (oelast->op)
 895               || (FLOAT_TYPE_P (type)
 896                   && !HONOR_NANS (TYPE_MODE (type))
 897                   && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
 898                   && real_zerop (oelast->op)))
 899             {
 900               if (ops->length () != 1)
 901                 {
 902                   if (dump_file && (dump_flags & TDF_DETAILS))
 903                     fprintf (dump_file, "Found * 0, removing all other ops\n");
 904
 905                   reassociate_stats.ops_eliminated += ops->length () - 1;
 906                   ops->truncate (1);
 907                   ops->quick_push (oelast);
 908                   return;
 909                 }
 910             }
 911           else if (integer_onep (oelast->op)
 912                    || (FLOAT_TYPE_P (type)
 913                        && !HONOR_SNANS (TYPE_MODE (type))
 914                        && real_onep (oelast->op)))
 915             {
 916               if (ops->length () != 1)
 917                 {
 918                   if (dump_file && (dump_flags & TDF_DETAILS))
 919                     fprintf (dump_file, "Found * 1, removing\n");
 920                   ops->pop ();
 921                   reassociate_stats.ops_eliminated++;
 922                   return;
 923                 }
 924             }
 925           break;
 926         case BIT_XOR_EXPR:
 927         case PLUS_EXPR:
 928         case MINUS_EXPR:
 929           if (integer_zerop (oelast->op)
 930               || (FLOAT_TYPE_P (type)
 931                   && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
 932                   && fold_real_zero_addition_p (type, oelast->op,
 933                                                 opcode == MINUS_EXPR)))
 934             {
 935               if (ops->length () != 1)
 936                 {
 937                   if (dump_file && (dump_flags & TDF_DETAILS))
 938                     fprintf (dump_file, "Found [|^+] 0, removing\n");
 939                   ops->pop ();
 940                   reassociate_stats.ops_eliminated++;
 941                   return;
 942                 }
 943             }
 944           break;
 945         default:
 946           break;
 947         }
 948     }
 949 }
 950
 951
 952 static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
 953                                  bool, bool);
 954
 955 /* Structure for tracking and counting operands.  */
 956 typedef struct oecount_s {
 957   int cnt;
 958   int id;
 959   enum tree_code oecode;
 960   tree op;
 961 } oecount;
 962
 963
 964 /* The heap for the oecount hashtable and the sorted list of operands.  */
 965 static vec<oecount> cvec;
 966
 967
 968 /* Oecount hashtable helpers.  */
 969
 970 struct oecount_hasher : typed_noop_remove <void>
 971 {
 972   /* Note that this hash table stores integers, not pointers.
 973      So, observe the casting in the member functions.  */
 974   typedef void value_type;
 975   typedef void compare_type;
 976   static inline hashval_t hash (const value_type *);
 977   static inline bool equal (const value_type *, const compare_type *);
 978 };
 979
 980 /* Hash function for oecount.  */
 981
 982 inline hashval_t
 983 oecount_hasher::hash (const value_type *p)
 984 {
 985   const oecount *c = &cvec[(size_t)p - 42];
 986   return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
 987 }
 988
 989 /* Comparison function for oecount.  */
 990
 991 inline bool
 992 oecount_hasher::equal (const value_type *p1, const compare_type *p2)
 993 {
 994   const oecount *c1 = &cvec[(size_t)p1 - 42];
 995   const oecount *c2 = &cvec[(size_t)p2 - 42];
 996   return (c1->oecode == c2->oecode
 997           && c1->op == c2->op);
 998 }
 999
1000 /* Comparison function for qsort sorting oecount elements by count.  */
1001
1002 static int
1003 oecount_cmp (const void *p1, const void *p2)
1004 {
1005   const oecount *c1 = (const oecount *)p1;
1006   const oecount *c2 = (const oecount *)p2;
1007   if (c1->cnt != c2->cnt)
1008     return c1->cnt - c2->cnt;
1009   else
1010     /* If counts are identical, use unique IDs to stabilize qsort.  */
1011     return c1->id - c2->id;
1012 }
1013
1014 /* Return TRUE iff STMT represents a builtin call that raises OP
1015    to some exponent.  */
1016
1017 static bool
1018 stmt_is_power_of_op (gimple stmt, tree op)
1019 {
1020   tree fndecl;
1021
1022   if (!is_gimple_call (stmt))
1023     return false;
1024
1025   fndecl = gimple_call_fndecl (stmt);
1026
1027   if (!fndecl
1028       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
1029     return false;
1030
1031   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1032     {
1033     CASE_FLT_FN (BUILT_IN_POW):
1034     CASE_FLT_FN (BUILT_IN_POWI):
1035       return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1036
1037     default:
1038       return false;
1039     }
1040 }
1041
1042 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1043    in place and return the result.  Assumes that stmt_is_power_of_op
1044    was previously called for STMT and returned TRUE.  */
1045
1046 static HOST_WIDE_INT
1047 decrement_power (gimple stmt)
1048 {
1049   REAL_VALUE_TYPE c, cint;
1050   HOST_WIDE_INT power;
1051   tree arg1;
1052
1053   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1054     {
1055     CASE_FLT_FN (BUILT_IN_POW):
1056       arg1 = gimple_call_arg (stmt, 1);
1057       c = TREE_REAL_CST (arg1);
1058       power = real_to_integer (&c) - 1;
1059       real_from_integer (&cint, VOIDmode, power, 0, 0);
1060       gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1061       return power;
1062
1063     CASE_FLT_FN (BUILT_IN_POWI):
1064       arg1 = gimple_call_arg (stmt, 1);
1065       power = TREE_INT_CST_LOW (arg1) - 1;
1066       gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1067       return power;
1068
1069     default:
1070       gcc_unreachable ();
1071     }
1072 }
1073
1074 /* Find the single immediate use of STMT's LHS, and replace it
1075    with OP.  Remove STMT.  If STMT's LHS is the same as *DEF,
1076    replace *DEF with OP as well.  */
1077
1078 static void
1079 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1080 {
1081   tree lhs;
1082   gimple use_stmt;
1083   use_operand_p use;
1084   gimple_stmt_iterator gsi;
1085
1086   if (is_gimple_call (stmt))
1087     lhs = gimple_call_lhs (stmt);
1088   else
1089     lhs = gimple_assign_lhs (stmt);
1090
1091   gcc_assert (has_single_use (lhs));
1092   single_imm_use (lhs, &use, &use_stmt);
1093   if (lhs == *def)
1094     *def = op;
1095   SET_USE (use, op);
1096   if (TREE_CODE (op) != SSA_NAME)
1097     update_stmt (use_stmt);
1098   gsi = gsi_for_stmt (stmt);
1099   unlink_stmt_vdef (stmt);
1100   gsi_remove (&gsi, true);
1101   release_defs (stmt);
1102 }
1103
1104 /* Walks the linear chain with result *DEF searching for an operation
1105    with operand OP and code OPCODE removing that from the chain.  *DEF
1106    is updated if there is only one operand but no operation left.  */
1107
1108 static void
1109 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1110 {
1111   gimple stmt = SSA_NAME_DEF_STMT (*def);
1112
1113   do
1114     {
1115       tree name;
1116
1117       if (opcode == MULT_EXPR
1118           && stmt_is_power_of_op (stmt, op))
1119         {
1120           if (decrement_power (stmt) == 1)
1121             propagate_op_to_single_use (op, stmt, def);
1122           return;
1123         }
1124
1125       name = gimple_assign_rhs1 (stmt);
1126
1127       /* If this is the operation we look for and one of the operands
1128          is ours simply propagate the other operand into the stmts
1129          single use.  */
1130       if (gimple_assign_rhs_code (stmt) == opcode
1131           && (name == op
1132               || gimple_assign_rhs2 (stmt) == op))
1133         {
1134           if (name == op)
1135             name = gimple_assign_rhs2 (stmt);
1136           propagate_op_to_single_use (name, stmt, def);
1137           return;
1138         }
1139
1140       /* We might have a multiply of two __builtin_pow* calls, and
1141          the operand might be hiding in the rightmost one.  */
1142       if (opcode == MULT_EXPR
1143           && gimple_assign_rhs_code (stmt) == opcode
1144           && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1145           && has_single_use (gimple_assign_rhs2 (stmt)))
1146         {
1147           gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1148           if (stmt_is_power_of_op (stmt2, op))
1149             {
1150               if (decrement_power (stmt2) == 1)
1151                 propagate_op_to_single_use (op, stmt2, def);
1152               return;
1153             }
1154         }
1155
1156       /* Continue walking the chain.  */
1157       gcc_assert (name != op
1158                   && TREE_CODE (name) == SSA_NAME);
1159       stmt = SSA_NAME_DEF_STMT (name);
1160     }
1161   while (1);
1162 }
1163
1164 /* Returns true if statement S1 dominates statement S2.  Like
1165    stmt_dominates_stmt_p, but uses stmt UIDs to optimize.  */
1166
1167 static bool
1168 reassoc_stmt_dominates_stmt_p (gimple s1, gimple s2)
1169 {
1170   basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1171
1172   /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1173      SSA_NAME.  Assume it lives at the beginning of function and
1174      thus dominates everything.  */
1175   if (!bb1 || s1 == s2)
1176     return true;
1177
1178   /* If bb2 is NULL, it doesn't dominate any stmt with a bb.  */
1179   if (!bb2)
1180     return false;
1181
1182   if (bb1 == bb2)
1183     {
1184       /* PHIs in the same basic block are assumed to be
1185          executed all in parallel, if only one stmt is a PHI,
1186          it dominates the other stmt in the same basic block.  */
1187       if (gimple_code (s1) == GIMPLE_PHI)
1188         return true;
1189
1190       if (gimple_code (s2) == GIMPLE_PHI)
1191         return false;
1192
1193       gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1194
1195       if (gimple_uid (s1) < gimple_uid (s2))
1196         return true;
1197
1198       if (gimple_uid (s1) > gimple_uid (s2))
1199         return false;
1200
1201       gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1202       unsigned int uid = gimple_uid (s1);
1203       for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1204         {
1205           gimple s = gsi_stmt (gsi);
1206           if (gimple_uid (s) != uid)
1207             break;
1208           if (s == s2)
1209             return true;
1210         }
1211
1212       return false;
1213     }
1214
1215   return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1216 }
1217
1218 /* Insert STMT after INSERT_POINT.  */
1219
1220 static void
1221 insert_stmt_after (gimple stmt, gimple insert_point)
1222 {
1223   gimple_stmt_iterator gsi;
1224   basic_block bb;
1225
1226   if (gimple_code (insert_point) == GIMPLE_PHI)
1227     bb = gimple_bb (insert_point);
1228   else if (!stmt_ends_bb_p (insert_point))
1229     {
1230       gsi = gsi_for_stmt (insert_point);
1231       gimple_set_uid (stmt, gimple_uid (insert_point));
1232       gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1233       return;
1234     }
1235   else
1236     /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1237        thus if it must end a basic block, it should be a call that can
1238        throw, or some assignment that can throw.  If it throws, the LHS
1239        of it will not be initialized though, so only valid places using
1240        the SSA_NAME should be dominated by the fallthru edge.  */
1241     bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1242   gsi = gsi_after_labels (bb);
1243   if (gsi_end_p (gsi))
1244     {
1245       gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1246       gimple_set_uid (stmt,
1247                       gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1248     }
1249   else
1250     gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1251   gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1252 }
1253
1254 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1255    the result.  Places the statement after the definition of either
1256    OP1 or OP2.  Returns the new statement.  */
1257
1258 static gimple
1259 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1260 {
1261   gimple op1def = NULL, op2def = NULL;
1262   gimple_stmt_iterator gsi;
1263   tree op;
1264   gimple sum;
1265
1266   /* Create the addition statement.  */
1267   op = make_ssa_name (type, NULL);
1268   sum = gimple_build_assign_with_ops (opcode, op, op1, op2);
1269
1270   /* Find an insertion place and insert.  */
1271   if (TREE_CODE (op1) == SSA_NAME)
1272     op1def = SSA_NAME_DEF_STMT (op1);
1273   if (TREE_CODE (op2) == SSA_NAME)
1274     op2def = SSA_NAME_DEF_STMT (op2);
1275   if ((!op1def || gimple_nop_p (op1def))
1276       && (!op2def || gimple_nop_p (op2def)))
1277     {
1278       gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1279       if (gsi_end_p (gsi))
1280         {
1281           gimple_stmt_iterator gsi2
1282             = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1283           gimple_set_uid (sum,
1284                           gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1285         }
1286       else
1287         gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1288       gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1289     }
1290   else
1291     {
1292       gimple insert_point;
1293       if ((!op1def || gimple_nop_p (op1def))
1294            || (op2def && !gimple_nop_p (op2def)
1295                && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1296         insert_point = op2def;
1297       else
1298         insert_point = op1def;
1299       insert_stmt_after (sum, insert_point);
1300     }
1301   update_stmt (sum);
1302
1303   return sum;
1304 }
1305
1306 /* Perform un-distribution of divisions and multiplications.
1307    A * X + B * X is transformed into (A + B) * X and A / X + B / X
1308    to (A + B) / X for real X.
1309
1310    The algorithm is organized as follows.
1311
1312     - First we walk the addition chain *OPS looking for summands that
1313       are defined by a multiplication or a real division.  This results
1314       in the candidates bitmap with relevant indices into *OPS.
1315
1316     - Second we build the chains of multiplications or divisions for
1317       these candidates, counting the number of occurrences of (operand, code)
1318       pairs in all of the candidates chains.
1319
1320     - Third we sort the (operand, code) pairs by number of occurrence and
1321       process them starting with the pair with the most uses.
1322
1323       * For each such pair we walk the candidates again to build a
1324         second candidate bitmap noting all multiplication/division chains
1325         that have at least one occurrence of (operand, code).
1326
1327       * We build an alternate addition chain only covering these
1328         candidates with one (operand, code) operation removed from their
1329         multiplication/division chain.
1330
1331       * The first candidate gets replaced by the alternate addition chain
1332         multiplied/divided by the operand.
1333
1334       * All candidate chains get disabled for further processing and
1335         processing of (operand, code) pairs continues.
1336
1337   The alternate addition chains built are re-processed by the main
1338   reassociation algorithm which allows optimizing a * x * y + b * y * x
1339   to (a + b ) * x * y in one invocation of the reassociation pass.  */
1340
1341 static bool
1342 undistribute_ops_list (enum tree_code opcode,
1343                        vec<operand_entry_t> *ops, struct loop *loop)
1344 {
1345   unsigned int length = ops->length ();
1346   operand_entry_t oe1;
1347   unsigned i, j;
1348   sbitmap candidates, candidates2;
1349   unsigned nr_candidates, nr_candidates2;
1350   sbitmap_iterator sbi0;
1351   vec<operand_entry_t> *subops;
1352   hash_table <oecount_hasher> ctable;
1353   bool changed = false;
1354   int next_oecount_id = 0;
1355
1356   if (length <= 1
1357       || opcode != PLUS_EXPR)
1358     return false;
1359
1360   /* Build a list of candidates to process.  */
1361   candidates = sbitmap_alloc (length);
1362   bitmap_clear (candidates);
1363   nr_candidates = 0;
1364   FOR_EACH_VEC_ELT (*ops, i, oe1)
1365     {
1366       enum tree_code dcode;
1367       gimple oe1def;
1368
1369       if (TREE_CODE (oe1->op) != SSA_NAME)
1370         continue;
1371       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1372       if (!is_gimple_assign (oe1def))
1373         continue;
1374       dcode = gimple_assign_rhs_code (oe1def);
1375       if ((dcode != MULT_EXPR
1376            && dcode != RDIV_EXPR)
1377           || !is_reassociable_op (oe1def, dcode, loop))
1378         continue;
1379
1380       bitmap_set_bit (candidates, i);
1381       nr_candidates++;
1382     }
1383
1384   if (nr_candidates < 2)
1385     {
1386       sbitmap_free (candidates);
1387       return false;
1388     }
1389
1390   if (dump_file && (dump_flags & TDF_DETAILS))
1391     {
1392       fprintf (dump_file, "searching for un-distribute opportunities ");
1393       print_generic_expr (dump_file,
1394         (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1395       fprintf (dump_file, " %d\n", nr_candidates);
1396     }
1397
1398   /* Build linearized sub-operand lists and the counting table.  */
1399   cvec.create (0);
1400   ctable.create (15);
1401   /* ??? Macro arguments cannot have multi-argument template types in
1402      them.  This typedef is needed to workaround that limitation.  */
1403   typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1404   subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1405   EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1406     {
1407       gimple oedef;
1408       enum tree_code oecode;
1409       unsigned j;
1410
1411       oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1412       oecode = gimple_assign_rhs_code (oedef);
1413       linearize_expr_tree (&subops[i], oedef,
1414                            associative_tree_code (oecode), false);
1415
1416       FOR_EACH_VEC_ELT (subops[i], j, oe1)
1417         {
1418           oecount c;
1419           void **slot;
1420           size_t idx;
1421           c.oecode = oecode;
1422           c.cnt = 1;
1423           c.id = next_oecount_id++;
1424           c.op = oe1->op;
1425           cvec.safe_push (c);
1426           idx = cvec.length () + 41;
1427           slot = ctable.find_slot ((void *)idx, INSERT);
1428           if (!*slot)
1429             {
1430               *slot = (void *)idx;
1431             }
1432           else
1433             {
1434               cvec.pop ();
1435               cvec[(size_t)*slot - 42].cnt++;
1436             }
1437         }
1438     }
1439   ctable.dispose ();
1440
1441   /* Sort the counting table.  */
1442   cvec.qsort (oecount_cmp);
1443
1444   if (dump_file && (dump_flags & TDF_DETAILS))
1445     {
1446       oecount *c;
1447       fprintf (dump_file, "Candidates:\n");
1448       FOR_EACH_VEC_ELT (cvec, j, c)
1449         {
1450           fprintf (dump_file, "  %u %s: ", c->cnt,
1451                    c->oecode == MULT_EXPR
1452                    ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1453           print_generic_expr (dump_file, c->op, 0);
1454           fprintf (dump_file, "\n");
1455         }
1456     }
1457
1458   /* Process the (operand, code) pairs in order of most occurrence.  */
1459   candidates2 = sbitmap_alloc (length);
1460   while (!cvec.is_empty ())
1461     {
1462       oecount *c = &cvec.last ();
1463       if (c->cnt < 2)
1464         break;
1465
1466       /* Now collect the operands in the outer chain that contain
1467          the common operand in their inner chain.  */
1468       bitmap_clear (candidates2);
1469       nr_candidates2 = 0;
1470       EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1471         {
1472           gimple oedef;
1473           enum tree_code oecode;
1474           unsigned j;
1475           tree op = (*ops)[i]->op;
1476
1477           /* If we undistributed in this chain already this may be
1478              a constant.  */
1479           if (TREE_CODE (op) != SSA_NAME)
1480             continue;
1481
1482           oedef = SSA_NAME_DEF_STMT (op);
1483           oecode = gimple_assign_rhs_code (oedef);
1484           if (oecode != c->oecode)
1485             continue;
1486
1487           FOR_EACH_VEC_ELT (subops[i], j, oe1)
1488             {
1489               if (oe1->op == c->op)
1490                 {
1491                   bitmap_set_bit (candidates2, i);
1492                   ++nr_candidates2;
1493                   break;
1494                 }
1495             }
1496         }
1497
1498       if (nr_candidates2 >= 2)
1499         {
1500           operand_entry_t oe1, oe2;
1501           gimple prod;
1502           int first = bitmap_first_set_bit (candidates2);
1503
1504           /* Build the new addition chain.  */
1505           oe1 = (*ops)[first];
1506           if (dump_file && (dump_flags & TDF_DETAILS))
1507             {
1508               fprintf (dump_file, "Building (");
1509               print_generic_expr (dump_file, oe1->op, 0);
1510             }
1511           zero_one_operation (&oe1->op, c->oecode, c->op);
1512           EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1513             {
1514               gimple sum;
1515               oe2 = (*ops)[i];
1516               if (dump_file && (dump_flags & TDF_DETAILS))
1517                 {
1518                   fprintf (dump_file, " + ");
1519                   print_generic_expr (dump_file, oe2->op, 0);
1520                 }
1521               zero_one_operation (&oe2->op, c->oecode, c->op);
1522               sum = build_and_add_sum (TREE_TYPE (oe1->op),
1523                                        oe1->op, oe2->op, opcode);
1524               oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1525               oe2->rank = 0;
1526               oe1->op = gimple_get_lhs (sum);
1527             }
1528
1529           /* Apply the multiplication/division.  */
1530           prod = build_and_add_sum (TREE_TYPE (oe1->op),
1531                                     oe1->op, c->op, c->oecode);
1532           if (dump_file && (dump_flags & TDF_DETAILS))
1533             {
1534               fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1535               print_generic_expr (dump_file, c->op, 0);
1536               fprintf (dump_file, "\n");
1537             }
1538
1539           /* Record it in the addition chain and disable further
1540              undistribution with this op.  */
1541           oe1->op = gimple_assign_lhs (prod);
1542           oe1->rank = get_rank (oe1->op);
1543           subops[first].release ();
1544
1545           changed = true;
1546         }
1547
1548       cvec.pop ();
1549     }
1550
1551   for (i = 0; i < ops->length (); ++i)
1552     subops[i].release ();
1553   free (subops);
1554   cvec.release ();
1555   sbitmap_free (candidates);
1556   sbitmap_free (candidates2);
1557
1558   return changed;
1559 }
1560
1561 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1562    expression, examine the other OPS to see if any of them are comparisons
1563    of the same values, which we may be able to combine or eliminate.
1564    For example, we can rewrite (a < b) | (a == b) as (a <= b).  */
1565
1566 static bool
1567 eliminate_redundant_comparison (enum tree_code opcode,
1568                                 vec<operand_entry_t> *ops,
1569                                 unsigned int currindex,
1570                                 operand_entry_t curr)
1571 {
1572   tree op1, op2;
1573   enum tree_code lcode, rcode;
1574   gimple def1, def2;
1575   int i;
1576   operand_entry_t oe;
1577
1578   if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1579     return false;
1580
1581   /* Check that CURR is a comparison.  */
1582   if (TREE_CODE (curr->op) != SSA_NAME)
1583     return false;
1584   def1 = SSA_NAME_DEF_STMT (curr->op);
1585   if (!is_gimple_assign (def1))
1586     return false;
1587   lcode = gimple_assign_rhs_code (def1);
1588   if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1589     return false;
1590   op1 = gimple_assign_rhs1 (def1);
1591   op2 = gimple_assign_rhs2 (def1);
1592
1593   /* Now look for a similar comparison in the remaining OPS.  */
1594   for (i = currindex + 1; ops->iterate (i, &oe); i++)
1595     {
1596       tree t;
1597
1598       if (TREE_CODE (oe->op) != SSA_NAME)
1599         continue;
1600       def2 = SSA_NAME_DEF_STMT (oe->op);
1601       if (!is_gimple_assign (def2))
1602         continue;
1603       rcode = gimple_assign_rhs_code (def2);
1604       if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1605         continue;
1606
1607       /* If we got here, we have a match.  See if we can combine the
1608          two comparisons.  */
1609       if (opcode == BIT_IOR_EXPR)
1610         t = maybe_fold_or_comparisons (lcode, op1, op2,
1611                                        rcode, gimple_assign_rhs1 (def2),
1612                                        gimple_assign_rhs2 (def2));
1613       else
1614         t = maybe_fold_and_comparisons (lcode, op1, op2,
1615                                         rcode, gimple_assign_rhs1 (def2),
1616                                         gimple_assign_rhs2 (def2));
1617       if (!t)
1618         continue;
1619
1620       /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1621          always give us a boolean_type_node value back.  If the original
1622          BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1623          we need to convert.  */
1624       if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1625         t = fold_convert (TREE_TYPE (curr->op), t);
1626
1627       if (TREE_CODE (t) != INTEGER_CST
1628           && !operand_equal_p (t, curr->op, 0))
1629         {
1630           enum tree_code subcode;
1631           tree newop1, newop2;
1632           if (!COMPARISON_CLASS_P (t))
1633             continue;
1634           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1635           STRIP_USELESS_TYPE_CONVERSION (newop1);
1636           STRIP_USELESS_TYPE_CONVERSION (newop2);
1637           if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1638             continue;
1639         }
1640
1641       if (dump_file && (dump_flags & TDF_DETAILS))
1642         {
1643           fprintf (dump_file, "Equivalence: ");
1644           print_generic_expr (dump_file, curr->op, 0);
1645           fprintf (dump_file, " %s ", op_symbol_code (opcode));
1646           print_generic_expr (dump_file, oe->op, 0);
1647           fprintf (dump_file, " -> ");
1648           print_generic_expr (dump_file, t, 0);
1649           fprintf (dump_file, "\n");
1650         }
1651
1652       /* Now we can delete oe, as it has been subsumed by the new combined
1653          expression t.  */
1654       ops->ordered_remove (i);
1655       reassociate_stats.ops_eliminated ++;
1656
1657       /* If t is the same as curr->op, we're done.  Otherwise we must
1658          replace curr->op with t.  Special case is if we got a constant
1659          back, in which case we add it to the end instead of in place of
1660          the current entry.  */
1661       if (TREE_CODE (t) == INTEGER_CST)
1662         {
1663           ops->ordered_remove (currindex);
1664           add_to_ops_vec (ops, t);
1665         }
1666       else if (!operand_equal_p (t, curr->op, 0))
1667         {
1668           gimple sum;
1669           enum tree_code subcode;
1670           tree newop1;
1671           tree newop2;
1672           gcc_assert (COMPARISON_CLASS_P (t));
1673           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1674           STRIP_USELESS_TYPE_CONVERSION (newop1);
1675           STRIP_USELESS_TYPE_CONVERSION (newop2);
1676           gcc_checking_assert (is_gimple_val (newop1)
1677                                && is_gimple_val (newop2));
1678           sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1679           curr->op = gimple_get_lhs (sum);
1680         }
1681       return true;
1682     }
1683
1684   return false;
1685 }
1686
1687 /* Perform various identities and other optimizations on the list of
1688    operand entries, stored in OPS.  The tree code for the binary
1689    operation between all the operands is OPCODE.  */
1690
1691 static void
1692 optimize_ops_list (enum tree_code opcode,
1693                    vec<operand_entry_t> *ops)
1694 {
1695   unsigned int length = ops->length ();
1696   unsigned int i;
1697   operand_entry_t oe;
1698   operand_entry_t oelast = NULL;
1699   bool iterate = false;
1700
1701   if (length == 1)
1702     return;
1703
1704   oelast = ops->last ();
1705
1706   /* If the last two are constants, pop the constants off, merge them
1707      and try the next two.  */
1708   if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1709     {
1710       operand_entry_t oelm1 = (*ops)[length - 2];
1711
1712       if (oelm1->rank == 0
1713           && is_gimple_min_invariant (oelm1->op)
1714           && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1715                                        TREE_TYPE (oelast->op)))
1716         {
1717           tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1718                                      oelm1->op, oelast->op);
1719
1720           if (folded && is_gimple_min_invariant (folded))
1721             {
1722               if (dump_file && (dump_flags & TDF_DETAILS))
1723                 fprintf (dump_file, "Merging constants\n");
1724
1725               ops->pop ();
1726               ops->pop ();
1727
1728               add_to_ops_vec (ops, folded);
1729               reassociate_stats.constants_eliminated++;
1730
1731               optimize_ops_list (opcode, ops);
1732               return;
1733             }
1734         }
1735     }
1736
1737   eliminate_using_constants (opcode, ops);
1738   oelast = NULL;
1739
1740   for (i = 0; ops->iterate (i, &oe);)
1741     {
1742       bool done = false;
1743
1744       if (eliminate_not_pairs (opcode, ops, i, oe))
1745         return;
1746       if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1747           || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1748           || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1749         {
1750           if (done)
1751             return;
1752           iterate = true;
1753           oelast = NULL;
1754           continue;
1755         }
1756       oelast = oe;
1757       i++;
1758     }
1759
1760   length = ops->length ();
1761   oelast = ops->last ();
1762
1763   if (iterate)
1764     optimize_ops_list (opcode, ops);
1765 }
1766
1767 /* The following functions are subroutines to optimize_range_tests and allow
1768    it to try to change a logical combination of comparisons into a range
1769    test.
1770
1771    For example, both
1772         X == 2 || X == 5 || X == 3 || X == 4
1773    and
1774         X >= 2 && X <= 5
1775    are converted to
1776         (unsigned) (X - 2) <= 3
1777
1778    For more information see comments above fold_test_range in fold-const.c,
1779    this implementation is for GIMPLE.  */
1780
1781 struct range_entry
1782 {
1783   tree exp;
1784   tree low;
1785   tree high;
1786   bool in_p;
1787   bool strict_overflow_p;
1788   unsigned int idx, next;
1789 };
1790
1791 /* This is similar to make_range in fold-const.c, but on top of
1792    GIMPLE instead of trees.  If EXP is non-NULL, it should be
1793    an SSA_NAME and STMT argument is ignored, otherwise STMT
1794    argument should be a GIMPLE_COND.  */
1795
1796 static void
1797 init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1798 {
1799   int in_p;
1800   tree low, high;
1801   bool is_bool, strict_overflow_p;
1802
1803   r->exp = NULL_TREE;
1804   r->in_p = false;
1805   r->strict_overflow_p = false;
1806   r->low = NULL_TREE;
1807   r->high = NULL_TREE;
1808   if (exp != NULL_TREE
1809       && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1810     return;
1811
1812   /* Start with simply saying "EXP != 0" and then look at the code of EXP
1813      and see if we can refine the range.  Some of the cases below may not
1814      happen, but it doesn't seem worth worrying about this.  We "continue"
1815      the outer loop when we've changed something; otherwise we "break"
1816      the switch, which will "break" the while.  */
1817   low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1818   high = low;
1819   in_p = 0;
1820   strict_overflow_p = false;
1821   is_bool = false;
1822   if (exp == NULL_TREE)
1823     is_bool = true;
1824   else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1825     {
1826       if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1827         is_bool = true;
1828       else
1829         return;
1830     }
1831   else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1832     is_bool = true;
1833
1834   while (1)
1835     {
1836       enum tree_code code;
1837       tree arg0, arg1, exp_type;
1838       tree nexp;
1839       location_t loc;
1840
1841       if (exp != NULL_TREE)
1842         {
1843           if (TREE_CODE (exp) != SSA_NAME
1844               || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
1845             break;
1846
1847           stmt = SSA_NAME_DEF_STMT (exp);
1848           if (!is_gimple_assign (stmt))
1849             break;
1850
1851           code = gimple_assign_rhs_code (stmt);
1852           arg0 = gimple_assign_rhs1 (stmt);
1853           arg1 = gimple_assign_rhs2 (stmt);
1854           exp_type = TREE_TYPE (exp);
1855         }
1856       else
1857         {
1858           code = gimple_cond_code (stmt);
1859           arg0 = gimple_cond_lhs (stmt);
1860           arg1 = gimple_cond_rhs (stmt);
1861           exp_type = boolean_type_node;
1862         }
1863
1864       if (TREE_CODE (arg0) != SSA_NAME)
1865         break;
1866       loc = gimple_location (stmt);
1867       switch (code)
1868         {
1869         case BIT_NOT_EXPR:
1870           if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
1871               /* Ensure the range is either +[-,0], +[0,0],
1872                  -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1873                  -[1,1].  If it is e.g. +[-,-] or -[-,-]
1874                  or similar expression of unconditional true or
1875                  false, it should not be negated.  */
1876               && ((high && integer_zerop (high))
1877                   || (low && integer_onep (low))))
1878             {
1879               in_p = !in_p;
1880               exp = arg0;
1881               continue;
1882             }
1883           break;
1884         case SSA_NAME:
1885           exp = arg0;
1886           continue;
1887         CASE_CONVERT:
1888           if (is_bool)
1889             goto do_default;
1890           if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1891             {
1892               if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1893                 is_bool = true;
1894               else
1895                 return;
1896             }
1897           else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1898             is_bool = true;
1899           goto do_default;
1900         case EQ_EXPR:
1901         case NE_EXPR:
1902         case LT_EXPR:
1903         case LE_EXPR:
1904         case GE_EXPR:
1905         case GT_EXPR:
1906           is_bool = true;
1907           /* FALLTHRU */
1908         default:
1909           if (!is_bool)
1910             return;
1911         do_default:
1912           nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1913                                   &low, &high, &in_p,
1914                                   &strict_overflow_p);
1915           if (nexp != NULL_TREE)
1916             {
1917               exp = nexp;
1918               gcc_assert (TREE_CODE (exp) == SSA_NAME);
1919               continue;
1920             }
1921           break;
1922         }
1923       break;
1924     }
1925   if (is_bool)
1926     {
1927       r->exp = exp;
1928       r->in_p = in_p;
1929       r->low = low;
1930       r->high = high;
1931       r->strict_overflow_p = strict_overflow_p;
1932     }
1933 }
1934
1935 /* Comparison function for qsort.  Sort entries
1936    without SSA_NAME exp first, then with SSA_NAMEs sorted
1937    by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1938    by increasing ->low and if ->low is the same, by increasing
1939    ->high.  ->low == NULL_TREE means minimum, ->high == NULL_TREE
1940    maximum.  */
1941
1942 static int
1943 range_entry_cmp (const void *a, const void *b)
1944 {
1945   const struct range_entry *p = (const struct range_entry *) a;
1946   const struct range_entry *q = (const struct range_entry *) b;
1947
1948   if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
1949     {
1950       if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1951         {
1952           /* Group range_entries for the same SSA_NAME together.  */
1953           if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
1954             return -1;
1955           else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
1956             return 1;
1957           /* If ->low is different, NULL low goes first, then by
1958              ascending low.  */
1959           if (p->low != NULL_TREE)
1960             {
1961               if (q->low != NULL_TREE)
1962                 {
1963                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
1964                                           p->low, q->low);
1965                   if (tem && integer_onep (tem))
1966                     return -1;
1967                   tem = fold_binary (GT_EXPR, boolean_type_node,
1968                                      p->low, q->low);
1969                   if (tem && integer_onep (tem))
1970                     return 1;
1971                 }
1972               else
1973                 return 1;
1974             }
1975           else if (q->low != NULL_TREE)
1976             return -1;
1977           /* If ->high is different, NULL high goes last, before that by
1978              ascending high.  */
1979           if (p->high != NULL_TREE)
1980             {
1981               if (q->high != NULL_TREE)
1982                 {
1983                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
1984                                           p->high, q->high);
1985                   if (tem && integer_onep (tem))
1986                     return -1;
1987                   tem = fold_binary (GT_EXPR, boolean_type_node,
1988                                      p->high, q->high);
1989                   if (tem && integer_onep (tem))
1990                     return 1;
1991                 }
1992               else
1993                 return -1;
1994             }
1995           else if (p->high != NULL_TREE)
1996             return 1;
1997           /* If both ranges are the same, sort below by ascending idx.  */
1998         }
1999       else
2000         return 1;
2001     }
2002   else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2003     return -1;
2004
2005   if (p->idx < q->idx)
2006     return -1;
2007   else
2008     {
2009       gcc_checking_assert (p->idx > q->idx);
2010       return 1;
2011     }
2012 }
2013
2014 /* Helper routine of optimize_range_test.
2015    [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2016    RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2017    OPCODE and OPS are arguments of optimize_range_tests.  Return
2018    true if the range merge has been successful.
2019    If OPCODE is ERROR_MARK, this is called from within
2020    maybe_optimize_range_tests and is performing inter-bb range optimization.
2021    In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2022    oe->rank.  */
2023
2024 static bool
2025 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2026                    unsigned int count, enum tree_code opcode,
2027                    vec<operand_entry_t> *ops, tree exp, bool in_p,
2028                    tree low, tree high, bool strict_overflow_p)
2029 {
2030   operand_entry_t oe = (*ops)[range->idx];
2031   tree op = oe->op;
2032   gimple stmt = op ? SSA_NAME_DEF_STMT (op) :
2033     last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2034   location_t loc = gimple_location (stmt);
2035   tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2036   tree tem = build_range_check (loc, optype, exp, in_p, low, high);
2037   enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2038   gimple_stmt_iterator gsi;
2039
2040   if (tem == NULL_TREE)
2041     return false;
2042
2043   if (strict_overflow_p && issue_strict_overflow_warning (wc))
2044     warning_at (loc, OPT_Wstrict_overflow,
2045                 "assuming signed overflow does not occur "
2046                 "when simplifying range test");
2047
2048   if (dump_file && (dump_flags & TDF_DETAILS))
2049     {
2050       struct range_entry *r;
2051       fprintf (dump_file, "Optimizing range tests ");
2052       print_generic_expr (dump_file, range->exp, 0);
2053       fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2054       print_generic_expr (dump_file, range->low, 0);
2055       fprintf (dump_file, ", ");
2056       print_generic_expr (dump_file, range->high, 0);
2057       fprintf (dump_file, "]");
2058       for (r = otherrange; r < otherrange + count; r++)
2059         {
2060           fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2061           print_generic_expr (dump_file, r->low, 0);
2062           fprintf (dump_file, ", ");
2063           print_generic_expr (dump_file, r->high, 0);
2064           fprintf (dump_file, "]");
2065         }
2066       fprintf (dump_file, "\n into ");
2067       print_generic_expr (dump_file, tem, 0);
2068       fprintf (dump_file, "\n");
2069     }
2070
2071   if (opcode == BIT_IOR_EXPR
2072       || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2073     tem = invert_truthvalue_loc (loc, tem);
2074
2075   tem = fold_convert_loc (loc, optype, tem);
2076   gsi = gsi_for_stmt (stmt);
2077   /* In rare cases range->exp can be equal to lhs of stmt.
2078      In that case we have to insert after the stmt rather then before
2079      it.  */
2080   if (op == range->exp)
2081     tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false,
2082                                     GSI_CONTINUE_LINKING);
2083   else
2084     {
2085       tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2086                                       GSI_SAME_STMT);
2087       gsi_prev (&gsi);
2088     }
2089   for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2090     if (gimple_uid (gsi_stmt (gsi)))
2091       break;
2092     else
2093       gimple_set_uid (gsi_stmt (gsi), gimple_uid (stmt));
2094
2095   oe->op = tem;
2096   range->exp = exp;
2097   range->low = low;
2098   range->high = high;
2099   range->in_p = in_p;
2100   range->strict_overflow_p = false;
2101
2102   for (range = otherrange; range < otherrange + count; range++)
2103     {
2104       oe = (*ops)[range->idx];
2105       /* Now change all the other range test immediate uses, so that
2106          those tests will be optimized away.  */
2107       if (opcode == ERROR_MARK)
2108         {
2109           if (oe->op)
2110             oe->op = build_int_cst (TREE_TYPE (oe->op),
2111                                     oe->rank == BIT_IOR_EXPR ? 0 : 1);
2112           else
2113             oe->op = (oe->rank == BIT_IOR_EXPR
2114                       ? boolean_false_node : boolean_true_node);
2115         }
2116       else
2117         oe->op = error_mark_node;
2118       range->exp = NULL_TREE;
2119     }
2120   return true;
2121 }
2122
2123 /* Optimize X == CST1 || X == CST2
2124    if popcount (CST1 ^ CST2) == 1 into
2125    (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2126    Similarly for ranges.  E.g.
2127    X != 2 && X != 3 && X != 10 && X != 11
2128    will be transformed by the previous optimization into
2129    !((X - 2U) <= 1U || (X - 10U) <= 1U)
2130    and this loop can transform that into
2131    !(((X & ~8) - 2U) <= 1U).  */
2132
2133 static bool
2134 optimize_range_tests_xor (enum tree_code opcode, tree type,
2135                           tree lowi, tree lowj, tree highi, tree highj,
2136                           vec<operand_entry_t> *ops,
2137                           struct range_entry *rangei,
2138                           struct range_entry *rangej)
2139 {
2140   tree lowxor, highxor, tem, exp;
2141   /* Check highi ^ lowi == highj ^ lowj and
2142      popcount (highi ^ lowi) == 1.  */
2143   lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2144   if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2145     return false;
2146   if (tree_log2 (lowxor) < 0)
2147     return false;
2148   highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2149   if (!tree_int_cst_equal (lowxor, highxor))
2150     return false;
2151
2152   tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2153   exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2154   lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2155   highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2156   if (update_range_test (rangei, rangej, 1, opcode, ops, exp,
2157                          rangei->in_p, lowj, highj,
2158                          rangei->strict_overflow_p
2159                          || rangej->strict_overflow_p))
2160     return true;
2161   return false;
2162 }
2163
2164 /* Optimize X == CST1 || X == CST2
2165    if popcount (CST2 - CST1) == 1 into
2166    ((X - CST1) & ~(CST2 - CST1)) == 0.
2167    Similarly for ranges.  E.g.
2168    X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2169    || X == 75 || X == 45
2170    will be transformed by the previous optimization into
2171    (X - 43U) <= 3U || (X - 75U) <= 3U
2172    and this loop can transform that into
2173    ((X - 43U) & ~(75U - 43U)) <= 3U.  */
2174 static bool
2175 optimize_range_tests_diff (enum tree_code opcode, tree type,
2176                             tree lowi, tree lowj, tree highi, tree highj,
2177                             vec<operand_entry_t> *ops,
2178                             struct range_entry *rangei,
2179                             struct range_entry *rangej)
2180 {
2181   tree tem1, tem2, mask;
2182   /* Check highi - lowi == highj - lowj.  */
2183   tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2184   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2185     return false;
2186   tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2187   if (!tree_int_cst_equal (tem1, tem2))
2188     return false;
2189   /* Check popcount (lowj - lowi) == 1.  */
2190   tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2191   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2192     return false;
2193   if (tree_log2 (tem1) < 0)
2194     return false;
2195
2196   mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2197   tem1 = fold_binary (MINUS_EXPR, type, rangei->exp, lowi);
2198   tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2199   lowj = build_int_cst (type, 0);
2200   if (update_range_test (rangei, rangej, 1, opcode, ops, tem1,
2201                          rangei->in_p, lowj, tem2,
2202                          rangei->strict_overflow_p
2203                          || rangej->strict_overflow_p))
2204     return true;
2205   return false;
2206 }
2207
2208 /* It does some common checks for function optimize_range_tests_xor and
2209    optimize_range_tests_diff.
2210    If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2211    Else it calls optimize_range_tests_diff.  */
2212
2213 static bool
2214 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2215                         bool optimize_xor, vec<operand_entry_t> *ops,
2216                         struct range_entry *ranges)
2217 {
2218   int i, j;
2219   bool any_changes = false;
2220   for (i = first; i < length; i++)
2221     {
2222       tree lowi, highi, lowj, highj, type, tem;
2223
2224       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2225         continue;
2226       type = TREE_TYPE (ranges[i].exp);
2227       if (!INTEGRAL_TYPE_P (type))
2228         continue;
2229       lowi = ranges[i].low;
2230       if (lowi == NULL_TREE)
2231         lowi = TYPE_MIN_VALUE (type);
2232       highi = ranges[i].high;
2233       if (highi == NULL_TREE)
2234         continue;
2235       for (j = i + 1; j < length && j < i + 64; j++)
2236         {
2237           bool changes;
2238           if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2239             continue;
2240           lowj = ranges[j].low;
2241           if (lowj == NULL_TREE)
2242             continue;
2243           highj = ranges[j].high;
2244           if (highj == NULL_TREE)
2245             highj = TYPE_MAX_VALUE (type);
2246           /* Check lowj > highi.  */
2247           tem = fold_binary (GT_EXPR, boolean_type_node,
2248                              lowj, highi);
2249           if (tem == NULL_TREE || !integer_onep (tem))
2250             continue;
2251           if (optimize_xor)
2252             changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2253                                                 highi, highj, ops,
2254                                                 ranges + i, ranges + j);
2255           else
2256             changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2257                                                  highi, highj, ops,
2258                                                  ranges + i, ranges + j);
2259           if (changes)
2260             {
2261               any_changes = true;
2262               break;
2263             }
2264         }
2265     }
2266   return any_changes;
2267 }
2268
2269 /* Optimize range tests, similarly how fold_range_test optimizes
2270    it on trees.  The tree code for the binary
2271    operation between all the operands is OPCODE.
2272    If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2273    maybe_optimize_range_tests for inter-bb range optimization.
2274    In that case if oe->op is NULL, oe->id is bb->index whose
2275    GIMPLE_COND is && or ||ed into the test, and oe->rank says
2276    the actual opcode.  */
2277
2278 static bool
2279 optimize_range_tests (enum tree_code opcode,
2280                       vec<operand_entry_t> *ops)
2281 {
2282   unsigned int length = ops->length (), i, j, first;
2283   operand_entry_t oe;
2284   struct range_entry *ranges;
2285   bool any_changes = false;
2286
2287   if (length == 1)
2288     return false;
2289
2290   ranges = XNEWVEC (struct range_entry, length);
2291   for (i = 0; i < length; i++)
2292     {
2293       oe = (*ops)[i];
2294       ranges[i].idx = i;
2295       init_range_entry (ranges + i, oe->op,
2296                         oe->op ? NULL :
2297                           last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
2298       /* For | invert it now, we will invert it again before emitting
2299          the optimized expression.  */
2300       if (opcode == BIT_IOR_EXPR
2301           || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2302         ranges[i].in_p = !ranges[i].in_p;
2303     }
2304
2305   qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2306   for (i = 0; i < length; i++)
2307     if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2308       break;
2309
2310   /* Try to merge ranges.  */
2311   for (first = i; i < length; i++)
2312     {
2313       tree low = ranges[i].low;
2314       tree high = ranges[i].high;
2315       int in_p = ranges[i].in_p;
2316       bool strict_overflow_p = ranges[i].strict_overflow_p;
2317       int update_fail_count = 0;
2318
2319       for (j = i + 1; j < length; j++)
2320         {
2321           if (ranges[i].exp != ranges[j].exp)
2322             break;
2323           if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2324                              ranges[j].in_p, ranges[j].low, ranges[j].high))
2325             break;
2326           strict_overflow_p |= ranges[j].strict_overflow_p;
2327         }
2328
2329       if (j == i + 1)
2330         continue;
2331
2332       if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
2333                              ops, ranges[i].exp, in_p, low, high,
2334                              strict_overflow_p))
2335         {
2336           i = j - 1;
2337           any_changes = true;
2338         }
2339       /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2340          while update_range_test would fail.  */
2341       else if (update_fail_count == 64)
2342         i = j - 1;
2343       else
2344         ++update_fail_count;
2345     }
2346
2347   any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2348                                          ops, ranges);
2349
2350   if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2351     any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2352                                            ops, ranges);
2353
2354   if (any_changes && opcode != ERROR_MARK)
2355     {
2356       j = 0;
2357       FOR_EACH_VEC_ELT (*ops, i, oe)
2358         {
2359           if (oe->op == error_mark_node)
2360             continue;
2361           else if (i != j)
2362             (*ops)[j] = oe;
2363           j++;
2364         }
2365       ops->truncate (j);
2366     }
2367
2368   XDELETEVEC (ranges);
2369   return any_changes;
2370 }
2371
2372 /* Return true if STMT is a cast like:
2373    <bb N>:
2374    ...
2375    _123 = (int) _234;
2376
2377    <bb M>:
2378    # _345 = PHI <_123(N), 1(...), 1(...)>
2379    where _234 has bool type, _123 has single use and
2380    bb N has a single successor M.  This is commonly used in
2381    the last block of a range test.  */
2382
2383 static bool
2384 final_range_test_p (gimple stmt)
2385 {
2386   basic_block bb, rhs_bb;
2387   edge e;
2388   tree lhs, rhs;
2389   use_operand_p use_p;
2390   gimple use_stmt;
2391
2392   if (!gimple_assign_cast_p (stmt))
2393     return false;
2394   bb = gimple_bb (stmt);
2395   if (!single_succ_p (bb))
2396     return false;
2397   e = single_succ_edge (bb);
2398   if (e->flags & EDGE_COMPLEX)
2399     return false;
2400
2401   lhs = gimple_assign_lhs (stmt);
2402   rhs = gimple_assign_rhs1 (stmt);
2403   if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2404       || TREE_CODE (rhs) != SSA_NAME
2405       || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2406     return false;
2407
2408   /* Test whether lhs is consumed only by a PHI in the only successor bb.  */
2409   if (!single_imm_use (lhs, &use_p, &use_stmt))
2410     return false;
2411
2412   if (gimple_code (use_stmt) != GIMPLE_PHI
2413       || gimple_bb (use_stmt) != e->dest)
2414     return false;
2415
2416   /* And that the rhs is defined in the same loop.  */
2417   rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2418   if (rhs_bb == NULL
2419       || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2420     return false;
2421
2422   return true;
2423 }
2424
2425 /* Return true if BB is suitable basic block for inter-bb range test
2426    optimization.  If BACKWARD is true, BB should be the only predecessor
2427    of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2428    or compared with to find a common basic block to which all conditions
2429    branch to if true resp. false.  If BACKWARD is false, TEST_BB should
2430    be the only predecessor of BB.  */
2431
2432 static bool
2433 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2434                   bool backward)
2435 {
2436   edge_iterator ei, ei2;
2437   edge e, e2;
2438   gimple stmt;
2439   gimple_stmt_iterator gsi;
2440   bool other_edge_seen = false;
2441   bool is_cond;
2442
2443   if (test_bb == bb)
2444     return false;
2445   /* Check last stmt first.  */
2446   stmt = last_stmt (bb);
2447   if (stmt == NULL
2448       || (gimple_code (stmt) != GIMPLE_COND
2449           && (backward || !final_range_test_p (stmt)))
2450       || gimple_visited_p (stmt)
2451       || stmt_could_throw_p (stmt)
2452       || *other_bb == bb)
2453     return false;
2454   is_cond = gimple_code (stmt) == GIMPLE_COND;
2455   if (is_cond)
2456     {
2457       /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2458          goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2459          to *OTHER_BB (if not set yet, try to find it out).  */
2460       if (EDGE_COUNT (bb->succs) != 2)
2461         return false;
2462       FOR_EACH_EDGE (e, ei, bb->succs)
2463         {
2464           if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2465             return false;
2466           if (e->dest == test_bb)
2467             {
2468               if (backward)
2469                 continue;
2470               else
2471                 return false;
2472             }
2473           if (e->dest == bb)
2474             return false;
2475           if (*other_bb == NULL)
2476             {
2477               FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2478                 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2479                   return false;
2480                 else if (e->dest == e2->dest)
2481                   *other_bb = e->dest;
2482               if (*other_bb == NULL)
2483                 return false;
2484             }
2485           if (e->dest == *other_bb)
2486             other_edge_seen = true;
2487           else if (backward)
2488             return false;
2489         }
2490       if (*other_bb == NULL || !other_edge_seen)
2491         return false;
2492     }
2493   else if (single_succ (bb) != *other_bb)
2494     return false;
2495
2496   /* Now check all PHIs of *OTHER_BB.  */
2497   e = find_edge (bb, *other_bb);
2498   e2 = find_edge (test_bb, *other_bb);
2499   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2500     {
2501       gimple phi = gsi_stmt (gsi);
2502       /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2503          corresponding to BB and TEST_BB predecessor must be the same.  */
2504       if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2505                             gimple_phi_arg_def (phi, e2->dest_idx), 0))
2506         {
2507           /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2508              one of the PHIs should have the lhs of the last stmt in
2509              that block as PHI arg and that PHI should have 0 or 1
2510              corresponding to it in all other range test basic blocks
2511              considered.  */
2512           if (!is_cond)
2513             {
2514               if (gimple_phi_arg_def (phi, e->dest_idx)
2515                   == gimple_assign_lhs (stmt)
2516                   && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2517                       || integer_onep (gimple_phi_arg_def (phi,
2518                                                            e2->dest_idx))))
2519                 continue;
2520             }
2521           else
2522             {
2523               gimple test_last = last_stmt (test_bb);
2524               if (gimple_code (test_last) != GIMPLE_COND
2525                   && gimple_phi_arg_def (phi, e2->dest_idx)
2526                      == gimple_assign_lhs (test_last)
2527                   && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2528                       || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2529                 continue;
2530             }
2531
2532           return false;
2533         }
2534     }
2535   return true;
2536 }
2537
2538 /* Return true if BB doesn't have side-effects that would disallow
2539    range test optimization, all SSA_NAMEs set in the bb are consumed
2540    in the bb and there are no PHIs.  */
2541
2542 static bool
2543 no_side_effect_bb (basic_block bb)
2544 {
2545   gimple_stmt_iterator gsi;
2546   gimple last;
2547
2548   if (!gimple_seq_empty_p (phi_nodes (bb)))
2549     return false;
2550   last = last_stmt (bb);
2551   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2552     {
2553       gimple stmt = gsi_stmt (gsi);
2554       tree lhs;
2555       imm_use_iterator imm_iter;
2556       use_operand_p use_p;
2557
2558       if (is_gimple_debug (stmt))
2559         continue;
2560       if (gimple_has_side_effects (stmt))
2561         return false;
2562       if (stmt == last)
2563         return true;
2564       if (!is_gimple_assign (stmt))
2565         return false;
2566       lhs = gimple_assign_lhs (stmt);
2567       if (TREE_CODE (lhs) != SSA_NAME)
2568         return false;
2569       if (gimple_assign_rhs_could_trap_p (stmt))
2570         return false;
2571       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2572         {
2573           gimple use_stmt = USE_STMT (use_p);
2574           if (is_gimple_debug (use_stmt))
2575             continue;
2576           if (gimple_bb (use_stmt) != bb)
2577             return false;
2578         }
2579     }
2580   return false;
2581 }
2582
2583 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2584    return true and fill in *OPS recursively.  */
2585
2586 static bool
2587 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2588          struct loop *loop)
2589 {
2590   gimple stmt = SSA_NAME_DEF_STMT (var);
2591   tree rhs[2];
2592   int i;
2593
2594   if (!is_reassociable_op (stmt, code, loop))
2595     return false;
2596
2597   rhs[0] = gimple_assign_rhs1 (stmt);
2598   rhs[1] = gimple_assign_rhs2 (stmt);
2599   gimple_set_visited (stmt, true);
2600   for (i = 0; i < 2; i++)
2601     if (TREE_CODE (rhs[i]) == SSA_NAME
2602         && !get_ops (rhs[i], code, ops, loop)
2603         && has_single_use (rhs[i]))
2604       {
2605         operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
2606
2607         oe->op = rhs[i];
2608         oe->rank = code;
2609         oe->id = 0;
2610         oe->count = 1;
2611         ops->safe_push (oe);
2612       }
2613   return true;
2614 }
2615
2616 /* Find the ops that were added by get_ops starting from VAR, see if
2617    they were changed during update_range_test and if yes, create new
2618    stmts.  */
2619
2620 static tree
2621 update_ops (tree var, enum tree_code code, vec<operand_entry_t> ops,
2622             unsigned int *pidx, struct loop *loop)
2623 {
2624   gimple stmt = SSA_NAME_DEF_STMT (var);
2625   tree rhs[4];
2626   int i;
2627
2628   if (!is_reassociable_op (stmt, code, loop))
2629     return NULL;
2630
2631   rhs[0] = gimple_assign_rhs1 (stmt);
2632   rhs[1] = gimple_assign_rhs2 (stmt);
2633   rhs[2] = rhs[0];
2634   rhs[3] = rhs[1];
2635   for (i = 0; i < 2; i++)
2636     if (TREE_CODE (rhs[i]) == SSA_NAME)
2637       {
2638         rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
2639         if (rhs[2 + i] == NULL_TREE)
2640           {
2641             if (has_single_use (rhs[i]))
2642               rhs[2 + i] = ops[(*pidx)++]->op;
2643             else
2644               rhs[2 + i] = rhs[i];
2645           }
2646       }
2647   if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
2648       && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
2649     {
2650       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2651       var = make_ssa_name (TREE_TYPE (var), NULL);
2652       gimple g = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
2653                                                var, rhs[2], rhs[3]);
2654       gimple_set_uid (g, gimple_uid (stmt));
2655       gimple_set_visited (g, true);
2656       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2657     }
2658   return var;
2659 }
2660
2661 /* Structure to track the initial value passed to get_ops and
2662    the range in the ops vector for each basic block.  */
2663
2664 struct inter_bb_range_test_entry
2665 {
2666   tree op;
2667   unsigned int first_idx, last_idx;
2668 };
2669
2670 /* Inter-bb range test optimization.  */
2671
2672 static void
2673 maybe_optimize_range_tests (gimple stmt)
2674 {
2675   basic_block first_bb = gimple_bb (stmt);
2676   basic_block last_bb = first_bb;
2677   basic_block other_bb = NULL;
2678   basic_block bb;
2679   edge_iterator ei;
2680   edge e;
2681   auto_vec<operand_entry_t> ops;
2682   auto_vec<inter_bb_range_test_entry> bbinfo;
2683   bool any_changes = false;
2684
2685   /* Consider only basic blocks that end with GIMPLE_COND or
2686      a cast statement satisfying final_range_test_p.  All
2687      but the last bb in the first_bb .. last_bb range
2688      should end with GIMPLE_COND.  */
2689   if (gimple_code (stmt) == GIMPLE_COND)
2690     {
2691       if (EDGE_COUNT (first_bb->succs) != 2)
2692         return;
2693     }
2694   else if (final_range_test_p (stmt))
2695     other_bb = single_succ (first_bb);
2696   else
2697     return;
2698
2699   if (stmt_could_throw_p (stmt))
2700     return;
2701
2702   /* As relative ordering of post-dominator sons isn't fixed,
2703      maybe_optimize_range_tests can be called first on any
2704      bb in the range we want to optimize.  So, start searching
2705      backwards, if first_bb can be set to a predecessor.  */
2706   while (single_pred_p (first_bb))
2707     {
2708       basic_block pred_bb = single_pred (first_bb);
2709       if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
2710         break;
2711       if (!no_side_effect_bb (first_bb))
2712         break;
2713       first_bb = pred_bb;
2714     }
2715   /* If first_bb is last_bb, other_bb hasn't been computed yet.
2716      Before starting forward search in last_bb successors, find
2717      out the other_bb.  */
2718   if (first_bb == last_bb)
2719     {
2720       other_bb = NULL;
2721       /* As non-GIMPLE_COND last stmt always terminates the range,
2722          if forward search didn't discover anything, just give up.  */
2723       if (gimple_code (stmt) != GIMPLE_COND)
2724         return;
2725       /* Look at both successors.  Either it ends with a GIMPLE_COND
2726          and satisfies suitable_cond_bb, or ends with a cast and
2727          other_bb is that cast's successor.  */
2728       FOR_EACH_EDGE (e, ei, first_bb->succs)
2729         if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
2730             || e->dest == first_bb)
2731           return;
2732         else if (single_pred_p (e->dest))
2733           {
2734             stmt = last_stmt (e->dest);
2735             if (stmt
2736                 && gimple_code (stmt) == GIMPLE_COND
2737                 && EDGE_COUNT (e->dest->succs) == 2)
2738               {
2739                 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
2740                   break;
2741                 else
2742                   other_bb = NULL;
2743               }
2744             else if (stmt
2745                      && final_range_test_p (stmt)
2746                      && find_edge (first_bb, single_succ (e->dest)))
2747               {
2748                 other_bb = single_succ (e->dest);
2749                 if (other_bb == first_bb)
2750                   other_bb = NULL;
2751               }
2752           }
2753       if (other_bb == NULL)
2754         return;
2755     }
2756   /* Now do the forward search, moving last_bb to successor bbs
2757      that aren't other_bb.  */
2758   while (EDGE_COUNT (last_bb->succs) == 2)
2759     {
2760       FOR_EACH_EDGE (e, ei, last_bb->succs)
2761         if (e->dest != other_bb)
2762           break;
2763       if (e == NULL)
2764         break;
2765       if (!single_pred_p (e->dest))
2766         break;
2767       if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
2768         break;
2769       if (!no_side_effect_bb (e->dest))
2770         break;
2771       last_bb = e->dest;
2772     }
2773   if (first_bb == last_bb)
2774     return;
2775   /* Here basic blocks first_bb through last_bb's predecessor
2776      end with GIMPLE_COND, all of them have one of the edges to
2777      other_bb and another to another block in the range,
2778      all blocks except first_bb don't have side-effects and
2779      last_bb ends with either GIMPLE_COND, or cast satisfying
2780      final_range_test_p.  */
2781   for (bb = last_bb; ; bb = single_pred (bb))
2782     {
2783       enum tree_code code;
2784       tree lhs, rhs;
2785       inter_bb_range_test_entry bb_ent;
2786
2787       bb_ent.op = NULL_TREE;
2788       bb_ent.first_idx = ops.length ();
2789       bb_ent.last_idx = bb_ent.first_idx;
2790       e = find_edge (bb, other_bb);
2791       stmt = last_stmt (bb);
2792       gimple_set_visited (stmt, true);
2793       if (gimple_code (stmt) != GIMPLE_COND)
2794         {
2795           use_operand_p use_p;
2796           gimple phi;
2797           edge e2;
2798           unsigned int d;
2799
2800           lhs = gimple_assign_lhs (stmt);
2801           rhs = gimple_assign_rhs1 (stmt);
2802           gcc_assert (bb == last_bb);
2803
2804           /* stmt is
2805              _123 = (int) _234;
2806
2807              followed by:
2808              <bb M>:
2809              # _345 = PHI <_123(N), 1(...), 1(...)>
2810
2811              or 0 instead of 1.  If it is 0, the _234
2812              range test is anded together with all the
2813              other range tests, if it is 1, it is ored with
2814              them.  */
2815           single_imm_use (lhs, &use_p, &phi);
2816           gcc_assert (gimple_code (phi) == GIMPLE_PHI);
2817           e2 = find_edge (first_bb, other_bb);
2818           d = e2->dest_idx;
2819           gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
2820           if (integer_zerop (gimple_phi_arg_def (phi, d)))
2821             code = BIT_AND_EXPR;
2822           else
2823             {
2824               gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
2825               code = BIT_IOR_EXPR;
2826             }
2827
2828           /* If _234 SSA_NAME_DEF_STMT is
2829              _234 = _567 | _789;
2830              (or &, corresponding to 1/0 in the phi arguments,
2831              push into ops the individual range test arguments
2832              of the bitwise or resp. and, recursively.  */
2833           if (!get_ops (rhs, code, &ops,
2834                         loop_containing_stmt (stmt))
2835               && has_single_use (rhs))
2836             {
2837               /* Otherwise, push the _234 range test itself.  */
2838               operand_entry_t oe
2839                 = (operand_entry_t) pool_alloc (operand_entry_pool);
2840
2841               oe->op = rhs;
2842               oe->rank = code;
2843               oe->id = 0;
2844               oe->count = 1;
2845               ops.safe_push (oe);
2846               bb_ent.last_idx++;
2847             }
2848           else
2849             bb_ent.last_idx = ops.length ();
2850           bb_ent.op = rhs;
2851           bbinfo.safe_push (bb_ent);
2852           continue;
2853         }
2854       /* Otherwise stmt is GIMPLE_COND.  */
2855       code = gimple_cond_code (stmt);
2856       lhs = gimple_cond_lhs (stmt);
2857       rhs = gimple_cond_rhs (stmt);
2858       if (TREE_CODE (lhs) == SSA_NAME
2859           && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2860           && ((code != EQ_EXPR && code != NE_EXPR)
2861               || rhs != boolean_false_node
2862                  /* Either push into ops the individual bitwise
2863                     or resp. and operands, depending on which
2864                     edge is other_bb.  */
2865               || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
2866                                  ^ (code == EQ_EXPR))
2867                                 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
2868                            loop_containing_stmt (stmt))))
2869         {
2870           /* Or push the GIMPLE_COND stmt itself.  */
2871           operand_entry_t oe
2872             = (operand_entry_t) pool_alloc (operand_entry_pool);
2873
2874           oe->op = NULL;
2875           oe->rank = (e->flags & EDGE_TRUE_VALUE)
2876                      ? BIT_IOR_EXPR : BIT_AND_EXPR;
2877           /* oe->op = NULL signs that there is no SSA_NAME
2878              for the range test, and oe->id instead is the
2879              basic block number, at which's end the GIMPLE_COND
2880              is.  */
2881           oe->id = bb->index;
2882           oe->count = 1;
2883           ops.safe_push (oe);
2884           bb_ent.op = NULL;
2885           bb_ent.last_idx++;
2886         }
2887       else if (ops.length () > bb_ent.first_idx)
2888         {
2889           bb_ent.op = lhs;
2890           bb_ent.last_idx = ops.length ();
2891         }
2892       bbinfo.safe_push (bb_ent);
2893       if (bb == first_bb)
2894         break;
2895     }
2896   if (ops.length () > 1)
2897     any_changes = optimize_range_tests (ERROR_MARK, &ops);
2898   if (any_changes)
2899     {
2900       unsigned int idx;
2901       /* update_ops relies on has_single_use predicates returning the
2902          same values as it did during get_ops earlier.  Additionally it
2903          never removes statements, only adds new ones and it should walk
2904          from the single imm use and check the predicate already before
2905          making those changes.
2906          On the other side, the handling of GIMPLE_COND directly can turn
2907          previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
2908          it needs to be done in a separate loop afterwards.  */
2909       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
2910         {
2911           if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
2912               && bbinfo[idx].op != NULL_TREE)
2913             {
2914               tree new_op;
2915
2916               stmt = last_stmt (bb);
2917               new_op = update_ops (bbinfo[idx].op,
2918                                    (enum tree_code)
2919                                    ops[bbinfo[idx].first_idx]->rank,
2920                                    ops, &bbinfo[idx].first_idx,
2921                                    loop_containing_stmt (stmt));
2922               if (new_op == NULL_TREE)
2923                 {
2924                   gcc_assert (bb == last_bb);
2925                   new_op = ops[bbinfo[idx].first_idx++]->op;
2926                 }
2927               if (bbinfo[idx].op != new_op)
2928                 {
2929                   imm_use_iterator iter;
2930                   use_operand_p use_p;
2931                   gimple use_stmt, cast_stmt = NULL;
2932
2933                   FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
2934                     if (is_gimple_debug (use_stmt))
2935                       continue;
2936                     else if (gimple_code (use_stmt) == GIMPLE_COND
2937                              || gimple_code (use_stmt) == GIMPLE_PHI)
2938                       FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2939                         SET_USE (use_p, new_op);
2940                     else if (gimple_assign_cast_p (use_stmt))
2941                       cast_stmt = use_stmt;
2942                     else
2943                       gcc_unreachable ();
2944                   if (cast_stmt)
2945                     {
2946                       gcc_assert (bb == last_bb);
2947                       tree lhs = gimple_assign_lhs (cast_stmt);
2948                       tree new_lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
2949                       enum tree_code rhs_code
2950                         = gimple_assign_rhs_code (cast_stmt);
2951                       gimple g;
2952                       if (is_gimple_min_invariant (new_op))
2953                         {
2954                           new_op = fold_convert (TREE_TYPE (lhs), new_op);
2955                           g = gimple_build_assign (new_lhs, new_op);
2956                         }
2957                       else
2958                         g = gimple_build_assign_with_ops (rhs_code, new_lhs,
2959                                                           new_op, NULL_TREE);
2960                       gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt);
2961                       gimple_set_uid (g, gimple_uid (cast_stmt));
2962                       gimple_set_visited (g, true);
2963                       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2964                       FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
2965                         if (is_gimple_debug (use_stmt))
2966                           continue;
2967                         else if (gimple_code (use_stmt) == GIMPLE_COND
2968                                  || gimple_code (use_stmt) == GIMPLE_PHI)
2969                           FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2970                             SET_USE (use_p, new_lhs);
2971                         else
2972                           gcc_unreachable ();
2973                     }
2974                 }
2975             }
2976           if (bb == first_bb)
2977             break;
2978         }
2979       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
2980         {
2981           if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
2982               && bbinfo[idx].op == NULL_TREE
2983               && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
2984             {
2985               stmt = last_stmt (bb);
2986               if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
2987                 gimple_cond_make_false (stmt);
2988               else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
2989                 gimple_cond_make_true (stmt);
2990               else
2991                 {
2992                   gimple_cond_set_code (stmt, NE_EXPR);
2993                   gimple_cond_set_lhs (stmt, ops[bbinfo[idx].first_idx]->op);
2994                   gimple_cond_set_rhs (stmt, boolean_false_node);
2995                 }
2996               update_stmt (stmt);
2997             }
2998           if (bb == first_bb)
2999             break;
3000         }
3001     }
3002 }
3003
3004 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3005    of STMT in it's operands.  This is also known as a "destructive
3006    update" operation.  */
3007
3008 static bool
3009 is_phi_for_stmt (gimple stmt, tree operand)
3010 {
3011   gimple def_stmt;
3012   tree lhs;
3013   use_operand_p arg_p;
3014   ssa_op_iter i;
3015
3016   if (TREE_CODE (operand) != SSA_NAME)
3017     return false;
3018
3019   lhs = gimple_assign_lhs (stmt);
3020
3021   def_stmt = SSA_NAME_DEF_STMT (operand);
3022   if (gimple_code (def_stmt) != GIMPLE_PHI)
3023     return false;
3024
3025   FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
3026     if (lhs == USE_FROM_PTR (arg_p))
3027       return true;
3028   return false;
3029 }
3030
3031 /* Remove def stmt of VAR if VAR has zero uses and recurse
3032    on rhs1 operand if so.  */
3033
3034 static void
3035 remove_visited_stmt_chain (tree var)
3036 {
3037   gimple stmt;
3038   gimple_stmt_iterator gsi;
3039
3040   while (1)
3041     {
3042       if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
3043         return;
3044       stmt = SSA_NAME_DEF_STMT (var);
3045       if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
3046         {
3047           var = gimple_assign_rhs1 (stmt);
3048           gsi = gsi_for_stmt (stmt);
3049           gsi_remove (&gsi, true);
3050           release_defs (stmt);
3051         }
3052       else
3053         return;
3054     }
3055 }
3056
3057 /* This function checks three consequtive operands in
3058    passed operands vector OPS starting from OPINDEX and
3059    swaps two operands if it is profitable for binary operation
3060    consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3061
3062    We pair ops with the same rank if possible.
3063
3064    The alternative we try is to see if STMT is a destructive
3065    update style statement, which is like:
3066    b = phi (a, ...)
3067    a = c + b;
3068    In that case, we want to use the destructive update form to
3069    expose the possible vectorizer sum reduction opportunity.
3070    In that case, the third operand will be the phi node. This
3071    check is not performed if STMT is null.
3072
3073    We could, of course, try to be better as noted above, and do a
3074    lot of work to try to find these opportunities in >3 operand
3075    cases, but it is unlikely to be worth it.  */
3076
3077 static void
3078 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
3079                           unsigned int opindex, gimple stmt)
3080 {
3081   operand_entry_t oe1, oe2, oe3;
3082
3083   oe1 = ops[opindex];
3084   oe2 = ops[opindex + 1];
3085   oe3 = ops[opindex + 2];
3086
3087   if ((oe1->rank == oe2->rank
3088        && oe2->rank != oe3->rank)
3089       || (stmt && is_phi_for_stmt (stmt, oe3->op)
3090           && !is_phi_for_stmt (stmt, oe1->op)
3091           && !is_phi_for_stmt (stmt, oe2->op)))
3092     {
3093       struct operand_entry temp = *oe3;
3094       oe3->op = oe1->op;
3095       oe3->rank = oe1->rank;
3096       oe1->op = temp.op;
3097       oe1->rank= temp.rank;
3098     }
3099   else if ((oe1->rank == oe3->rank
3100             && oe2->rank != oe3->rank)
3101            || (stmt && is_phi_for_stmt (stmt, oe2->op)
3102                && !is_phi_for_stmt (stmt, oe1->op)
3103                && !is_phi_for_stmt (stmt, oe3->op)))
3104     {
3105       struct operand_entry temp = *oe2;
3106       oe2->op = oe1->op;
3107       oe2->rank = oe1->rank;
3108       oe1->op = temp.op;
3109       oe1->rank = temp.rank;
3110     }
3111 }
3112
3113 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3114    two definitions, otherwise return STMT.  */
3115
3116 static inline gimple
3117 find_insert_point (gimple stmt, tree rhs1, tree rhs2)
3118 {
3119   if (TREE_CODE (rhs1) == SSA_NAME
3120       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
3121     stmt = SSA_NAME_DEF_STMT (rhs1);
3122   if (TREE_CODE (rhs2) == SSA_NAME
3123       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
3124     stmt = SSA_NAME_DEF_STMT (rhs2);
3125   return stmt;
3126 }
3127
3128 /* Recursively rewrite our linearized statements so that the operators
3129    match those in OPS[OPINDEX], putting the computation in rank
3130    order.  Return new lhs.  */
3131
3132 static tree
3133 rewrite_expr_tree (gimple stmt, unsigned int opindex,
3134                    vec<operand_entry_t> ops, bool changed)
3135 {
3136   tree rhs1 = gimple_assign_rhs1 (stmt);
3137   tree rhs2 = gimple_assign_rhs2 (stmt);
3138   tree lhs = gimple_assign_lhs (stmt);
3139   operand_entry_t oe;
3140
3141   /* The final recursion case for this function is that you have
3142      exactly two operations left.
3143      If we had one exactly one op in the entire list to start with, we
3144      would have never called this function, and the tail recursion
3145      rewrites them one at a time.  */
3146   if (opindex + 2 == ops.length ())
3147     {
3148       operand_entry_t oe1, oe2;
3149
3150       oe1 = ops[opindex];
3151       oe2 = ops[opindex + 1];
3152
3153       if (rhs1 != oe1->op || rhs2 != oe2->op)
3154         {
3155           gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3156           unsigned int uid = gimple_uid (stmt);
3157
3158           if (dump_file && (dump_flags & TDF_DETAILS))
3159             {
3160               fprintf (dump_file, "Transforming ");
3161               print_gimple_stmt (dump_file, stmt, 0, 0);
3162             }
3163
3164           if (changed)
3165             {
3166               gimple insert_point = find_insert_point (stmt, oe1->op, oe2->op);
3167               lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3168               stmt
3169                 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3170                                                 lhs, oe1->op, oe2->op);
3171               gimple_set_uid (stmt, uid);
3172               gimple_set_visited (stmt, true);
3173               if (insert_point == gsi_stmt (gsi))
3174                 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3175               else
3176                 insert_stmt_after (stmt, insert_point);
3177             }
3178           else
3179             {
3180               gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
3181                                    == stmt);
3182               gimple_assign_set_rhs1 (stmt, oe1->op);
3183               gimple_assign_set_rhs2 (stmt, oe2->op);
3184               update_stmt (stmt);
3185             }
3186
3187           if (rhs1 != oe1->op && rhs1 != oe2->op)
3188             remove_visited_stmt_chain (rhs1);
3189
3190           if (dump_file && (dump_flags & TDF_DETAILS))
3191             {
3192               fprintf (dump_file, " into ");
3193               print_gimple_stmt (dump_file, stmt, 0, 0);
3194             }
3195         }
3196       return lhs;
3197     }
3198
3199   /* If we hit here, we should have 3 or more ops left.  */
3200   gcc_assert (opindex + 2 < ops.length ());
3201
3202   /* Rewrite the next operator.  */
3203   oe = ops[opindex];
3204
3205   /* Recurse on the LHS of the binary operator, which is guaranteed to
3206      be the non-leaf side.  */
3207   tree new_rhs1
3208     = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
3209                          changed || oe->op != rhs2);
3210
3211   if (oe->op != rhs2 || new_rhs1 != rhs1)
3212     {
3213       if (dump_file && (dump_flags & TDF_DETAILS))
3214         {
3215           fprintf (dump_file, "Transforming ");
3216           print_gimple_stmt (dump_file, stmt, 0, 0);
3217         }
3218
3219       /* If changed is false, this is either opindex == 0
3220          or all outer rhs2's were equal to corresponding oe->op,
3221          and powi_result is NULL.
3222          That means lhs is equivalent before and after reassociation.
3223          Otherwise ensure the old lhs SSA_NAME is not reused and
3224          create a new stmt as well, so that any debug stmts will be
3225          properly adjusted.  */
3226       if (changed)
3227         {
3228           gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3229           unsigned int uid = gimple_uid (stmt);
3230           gimple insert_point = find_insert_point (stmt, new_rhs1, oe->op);
3231
3232           lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3233           stmt = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3234                                                lhs, new_rhs1, oe->op);
3235           gimple_set_uid (stmt, uid);
3236           gimple_set_visited (stmt, true);
3237           if (insert_point == gsi_stmt (gsi))
3238             gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3239           else
3240             insert_stmt_after (stmt, insert_point);
3241         }
3242       else
3243         {
3244           gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
3245                                == stmt);
3246           gimple_assign_set_rhs1 (stmt, new_rhs1);
3247           gimple_assign_set_rhs2 (stmt, oe->op);
3248           update_stmt (stmt);
3249         }
3250
3251       if (dump_file && (dump_flags & TDF_DETAILS))
3252         {
3253           fprintf (dump_file, " into ");
3254           print_gimple_stmt (dump_file, stmt, 0, 0);
3255         }
3256     }
3257   return lhs;
3258 }
3259
3260 /* Find out how many cycles we need to compute statements chain.
3261    OPS_NUM holds number os statements in a chain.  CPU_WIDTH is a
3262    maximum number of independent statements we may execute per cycle.  */
3263
3264 static int
3265 get_required_cycles (int ops_num, int cpu_width)
3266 {
3267   int res;
3268   int elog;
3269   unsigned int rest;
3270
3271   /* While we have more than 2 * cpu_width operands
3272      we may reduce number of operands by cpu_width
3273      per cycle.  */
3274   res = ops_num / (2 * cpu_width);
3275
3276   /* Remained operands count may be reduced twice per cycle
3277      until we have only one operand.  */
3278   rest = (unsigned)(ops_num - res * cpu_width);
3279   elog = exact_log2 (rest);
3280   if (elog >= 0)
3281     res += elog;
3282   else
3283     res += floor_log2 (rest) + 1;
3284
3285   return res;
3286 }
3287
3288 /* Returns an optimal number of registers to use for computation of
3289    given statements.  */
3290
3291 static int
3292 get_reassociation_width (int ops_num, enum tree_code opc,
3293                          enum machine_mode mode)
3294 {
3295   int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
3296   int width;
3297   int width_min;
3298   int cycles_best;
3299
3300   if (param_width > 0)
3301     width = param_width;
3302   else
3303     width = targetm.sched.reassociation_width (opc, mode);
3304
3305   if (width == 1)
3306     return width;
3307
3308   /* Get the minimal time required for sequence computation.  */
3309   cycles_best = get_required_cycles (ops_num, width);
3310
3311   /* Check if we may use less width and still compute sequence for
3312      the same time.  It will allow us to reduce registers usage.
3313      get_required_cycles is monotonically increasing with lower width
3314      so we can perform a binary search for the minimal width that still
3315      results in the optimal cycle count.  */
3316   width_min = 1;
3317   while (width > width_min)
3318     {
3319       int width_mid = (width + width_min) / 2;
3320
3321       if (get_required_cycles (ops_num, width_mid) == cycles_best)
3322         width = width_mid;
3323       else if (width_min < width_mid)
3324         width_min = width_mid;
3325       else
3326         break;
3327     }
3328
3329   return width;
3330 }
3331
3332 /* Recursively rewrite our linearized statements so that the operators
3333    match those in OPS[OPINDEX], putting the computation in rank
3334    order and trying to allow operations to be executed in
3335    parallel.  */
3336
3337 static void
3338 rewrite_expr_tree_parallel (gimple stmt, int width,
3339                             vec<operand_entry_t> ops)
3340 {
3341   enum tree_code opcode = gimple_assign_rhs_code (stmt);
3342   int op_num = ops.length ();
3343   int stmt_num = op_num - 1;
3344   gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3345   int op_index = op_num - 1;
3346   int stmt_index = 0;
3347   int ready_stmts_end = 0;
3348   int i = 0;
3349   tree last_rhs1 = gimple_assign_rhs1 (stmt);
3350
3351   /* We start expression rewriting from the top statements.
3352      So, in this loop we create a full list of statements
3353      we will work with.  */
3354   stmts[stmt_num - 1] = stmt;
3355   for (i = stmt_num - 2; i >= 0; i--)
3356     stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3357
3358   for (i = 0; i < stmt_num; i++)
3359     {
3360       tree op1, op2;
3361
3362       /* Determine whether we should use results of
3363          already handled statements or not.  */
3364       if (ready_stmts_end == 0
3365           && (i - stmt_index >= width || op_index < 1))
3366         ready_stmts_end = i;
3367
3368       /* Now we choose operands for the next statement.  Non zero
3369          value in ready_stmts_end means here that we should use
3370          the result of already generated statements as new operand.  */
3371       if (ready_stmts_end > 0)
3372         {
3373           op1 = gimple_assign_lhs (stmts[stmt_index++]);
3374           if (ready_stmts_end > stmt_index)
3375             op2 = gimple_assign_lhs (stmts[stmt_index++]);
3376           else if (op_index >= 0)
3377             op2 = ops[op_index--]->op;
3378           else
3379             {
3380               gcc_assert (stmt_index < i);
3381               op2 = gimple_assign_lhs (stmts[stmt_index++]);
3382             }
3383
3384           if (stmt_index >= ready_stmts_end)
3385             ready_stmts_end = 0;
3386         }
3387       else
3388         {
3389           if (op_index > 1)
3390             swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3391           op2 = ops[op_index--]->op;
3392           op1 = ops[op_index--]->op;
3393         }
3394
3395       /* If we emit the last statement then we should put
3396          operands into the last statement.  It will also
3397          break the loop.  */
3398       if (op_index < 0 && stmt_index == i)
3399         i = stmt_num - 1;
3400
3401       if (dump_file && (dump_flags & TDF_DETAILS))
3402         {
3403           fprintf (dump_file, "Transforming ");
3404           print_gimple_stmt (dump_file, stmts[i], 0, 0);
3405         }
3406
3407       /* We keep original statement only for the last one.  All
3408          others are recreated.  */
3409       if (i == stmt_num - 1)
3410         {
3411           gimple_assign_set_rhs1 (stmts[i], op1);
3412           gimple_assign_set_rhs2 (stmts[i], op2);
3413           update_stmt (stmts[i]);
3414         }
3415       else
3416         stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3417
3418       if (dump_file && (dump_flags & TDF_DETAILS))
3419         {
3420           fprintf (dump_file, " into ");
3421           print_gimple_stmt (dump_file, stmts[i], 0, 0);
3422         }
3423     }
3424
3425   remove_visited_stmt_chain (last_rhs1);
3426 }
3427
3428 /* Transform STMT, which is really (A +B) + (C + D) into the left
3429    linear form, ((A+B)+C)+D.
3430    Recurse on D if necessary.  */
3431
3432 static void
3433 linearize_expr (gimple stmt)
3434 {
3435   gimple_stmt_iterator gsi;
3436   gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3437   gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3438   gimple oldbinrhs = binrhs;
3439   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3440   gimple newbinrhs = NULL;
3441   struct loop *loop = loop_containing_stmt (stmt);
3442   tree lhs = gimple_assign_lhs (stmt);
3443
3444   gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3445               && is_reassociable_op (binrhs, rhscode, loop));
3446
3447   gsi = gsi_for_stmt (stmt);
3448
3449   gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3450   binrhs = gimple_build_assign_with_ops (gimple_assign_rhs_code (binrhs),
3451                                          make_ssa_name (TREE_TYPE (lhs), NULL),
3452                                          gimple_assign_lhs (binlhs),
3453                                          gimple_assign_rhs2 (binrhs));
3454   gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3455   gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
3456   gimple_set_uid (binrhs, gimple_uid (stmt));
3457
3458   if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3459     newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3460
3461   if (dump_file && (dump_flags & TDF_DETAILS))
3462     {
3463       fprintf (dump_file, "Linearized: ");
3464       print_gimple_stmt (dump_file, stmt, 0, 0);
3465     }
3466
3467   reassociate_stats.linearized++;
3468   update_stmt (stmt);
3469
3470   gsi = gsi_for_stmt (oldbinrhs);
3471   gsi_remove (&gsi, true);
3472   release_defs (oldbinrhs);
3473
3474   gimple_set_visited (stmt, true);
3475   gimple_set_visited (binlhs, true);
3476   gimple_set_visited (binrhs, true);
3477
3478   /* Tail recurse on the new rhs if it still needs reassociation.  */
3479   if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3480     /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3481            want to change the algorithm while converting to tuples.  */
3482     linearize_expr (stmt);
3483 }
3484
3485 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3486    it.  Otherwise, return NULL.  */
3487
3488 static gimple
3489 get_single_immediate_use (tree lhs)
3490 {
3491   use_operand_p immuse;
3492   gimple immusestmt;
3493
3494   if (TREE_CODE (lhs) == SSA_NAME
3495       && single_imm_use (lhs, &immuse, &immusestmt)
3496       && is_gimple_assign (immusestmt))
3497     return immusestmt;
3498
3499   return NULL;
3500 }
3501
3502 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3503    representing the negated value.  Insertions of any necessary
3504    instructions go before GSI.
3505    This function is recursive in that, if you hand it "a_5" as the
3506    value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3507    transform b_3 + b_4 into a_5 = -b_3 + -b_4.  */
3508
3509 static tree
3510 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
3511 {
3512   gimple negatedefstmt = NULL;
3513   tree resultofnegate;
3514   gimple_stmt_iterator gsi;
3515   unsigned int uid;
3516
3517   /* If we are trying to negate a name, defined by an add, negate the
3518      add operands instead.  */
3519   if (TREE_CODE (tonegate) == SSA_NAME)
3520     negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3521   if (TREE_CODE (tonegate) == SSA_NAME
3522       && is_gimple_assign (negatedefstmt)
3523       && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3524       && has_single_use (gimple_assign_lhs (negatedefstmt))
3525       && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3526     {
3527       tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3528       tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3529       tree lhs = gimple_assign_lhs (negatedefstmt);
3530       gimple g;
3531
3532       gsi = gsi_for_stmt (negatedefstmt);
3533       rhs1 = negate_value (rhs1, &gsi);
3534
3535       gsi = gsi_for_stmt (negatedefstmt);
3536       rhs2 = negate_value (rhs2, &gsi);
3537
3538       gsi = gsi_for_stmt (negatedefstmt);
3539       lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3540       gimple_set_visited (negatedefstmt, true);
3541       g = gimple_build_assign_with_ops (PLUS_EXPR, lhs, rhs1, rhs2);
3542       gimple_set_uid (g, gimple_uid (negatedefstmt));
3543       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3544       return lhs;
3545     }
3546
3547   tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3548   resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
3549                                              NULL_TREE, true, GSI_SAME_STMT);
3550   gsi = *gsip;
3551   uid = gimple_uid (gsi_stmt (gsi));
3552   for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
3553     {
3554       gimple stmt = gsi_stmt (gsi);
3555       if (gimple_uid (stmt) != 0)
3556         break;
3557       gimple_set_uid (stmt, uid);
3558     }
3559   return resultofnegate;
3560 }
3561
3562 /* Return true if we should break up the subtract in STMT into an add
3563    with negate.  This is true when we the subtract operands are really
3564    adds, or the subtract itself is used in an add expression.  In
3565    either case, breaking up the subtract into an add with negate
3566    exposes the adds to reassociation.  */
3567
3568 static bool
3569 should_break_up_subtract (gimple stmt)
3570 {
3571   tree lhs = gimple_assign_lhs (stmt);
3572   tree binlhs = gimple_assign_rhs1 (stmt);
3573   tree binrhs = gimple_assign_rhs2 (stmt);
3574   gimple immusestmt;
3575   struct loop *loop = loop_containing_stmt (stmt);
3576
3577   if (TREE_CODE (binlhs) == SSA_NAME
3578       && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3579     return true;
3580
3581   if (TREE_CODE (binrhs) == SSA_NAME
3582       && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3583     return true;
3584
3585   if (TREE_CODE (lhs) == SSA_NAME
3586       && (immusestmt = get_single_immediate_use (lhs))
3587       && is_gimple_assign (immusestmt)
3588       && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3589           ||  gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3590     return true;
3591   return false;
3592 }
3593
3594 /* Transform STMT from A - B into A + -B.  */
3595
3596 static void
3597 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3598 {
3599   tree rhs1 = gimple_assign_rhs1 (stmt);
3600   tree rhs2 = gimple_assign_rhs2 (stmt);
3601
3602   if (dump_file && (dump_flags & TDF_DETAILS))
3603     {
3604       fprintf (dump_file, "Breaking up subtract ");
3605       print_gimple_stmt (dump_file, stmt, 0, 0);
3606     }
3607
3608   rhs2 = negate_value (rhs2, gsip);
3609   gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3610   update_stmt (stmt);
3611 }
3612
3613 /* Determine whether STMT is a builtin call that raises an SSA name
3614    to an integer power and has only one use.  If so, and this is early
3615    reassociation and unsafe math optimizations are permitted, place
3616    the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3617    If any of these conditions does not hold, return FALSE.  */
3618
3619 static bool
3620 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3621 {
3622   tree fndecl, arg1;
3623   REAL_VALUE_TYPE c, cint;
3624
3625   if (!first_pass_instance
3626       || !flag_unsafe_math_optimizations
3627       || !is_gimple_call (stmt)
3628       || !has_single_use (gimple_call_lhs (stmt)))
3629     return false;
3630
3631   fndecl = gimple_call_fndecl (stmt);
3632
3633   if (!fndecl
3634       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3635     return false;
3636
3637   switch (DECL_FUNCTION_CODE (fndecl))
3638     {
3639     CASE_FLT_FN (BUILT_IN_POW):
3640       *base = gimple_call_arg (stmt, 0);
3641       arg1 = gimple_call_arg (stmt, 1);
3642
3643       if (TREE_CODE (arg1) != REAL_CST)
3644         return false;
3645
3646       c = TREE_REAL_CST (arg1);
3647
3648       if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
3649         return false;
3650
3651       *exponent = real_to_integer (&c);
3652       real_from_integer (&cint, VOIDmode, *exponent,
3653                          *exponent < 0 ? -1 : 0, 0);
3654       if (!real_identical (&c, &cint))
3655         return false;
3656
3657       break;
3658
3659     CASE_FLT_FN (BUILT_IN_POWI):
3660       *base = gimple_call_arg (stmt, 0);
3661       arg1 = gimple_call_arg (stmt, 1);
3662
3663       if (!tree_fits_shwi_p (arg1))
3664         return false;
3665
3666       *exponent = tree_to_shwi (arg1);
3667       break;
3668
3669     default:
3670       return false;
3671     }
3672
3673   /* Expanding negative exponents is generally unproductive, so we don't
3674      complicate matters with those.  Exponents of zero and one should
3675      have been handled by expression folding.  */
3676   if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
3677     return false;
3678
3679   return true;
3680 }
3681
3682 /* Recursively linearize a binary expression that is the RHS of STMT.
3683    Place the operands of the expression tree in the vector named OPS.  */
3684
3685 static void
3686 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
3687                      bool is_associative, bool set_visited)
3688 {
3689   tree binlhs = gimple_assign_rhs1 (stmt);
3690   tree binrhs = gimple_assign_rhs2 (stmt);
3691   gimple binlhsdef = NULL, binrhsdef = NULL;
3692   bool binlhsisreassoc = false;
3693   bool binrhsisreassoc = false;
3694   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3695   struct loop *loop = loop_containing_stmt (stmt);
3696   tree base = NULL_TREE;
3697   HOST_WIDE_INT exponent = 0;
3698
3699   if (set_visited)
3700     gimple_set_visited (stmt, true);
3701
3702   if (TREE_CODE (binlhs) == SSA_NAME)
3703     {
3704       binlhsdef = SSA_NAME_DEF_STMT (binlhs);
3705       binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
3706                          && !stmt_could_throw_p (binlhsdef));
3707     }
3708
3709   if (TREE_CODE (binrhs) == SSA_NAME)
3710     {
3711       binrhsdef = SSA_NAME_DEF_STMT (binrhs);
3712       binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
3713                          && !stmt_could_throw_p (binrhsdef));
3714     }
3715
3716   /* If the LHS is not reassociable, but the RHS is, we need to swap
3717      them.  If neither is reassociable, there is nothing we can do, so
3718      just put them in the ops vector.  If the LHS is reassociable,
3719      linearize it.  If both are reassociable, then linearize the RHS
3720      and the LHS.  */
3721
3722   if (!binlhsisreassoc)
3723     {
3724       tree temp;
3725
3726       /* If this is not a associative operation like division, give up.  */
3727       if (!is_associative)
3728         {
3729           add_to_ops_vec (ops, binrhs);
3730           return;
3731         }
3732
3733       if (!binrhsisreassoc)
3734         {
3735           if (rhscode == MULT_EXPR
3736               && TREE_CODE (binrhs) == SSA_NAME
3737               && acceptable_pow_call (binrhsdef, &base, &exponent))
3738             {
3739               add_repeat_to_ops_vec (ops, base, exponent);
3740               gimple_set_visited (binrhsdef, true);
3741             }
3742           else
3743             add_to_ops_vec (ops, binrhs);
3744
3745           if (rhscode == MULT_EXPR
3746               && TREE_CODE (binlhs) == SSA_NAME
3747               && acceptable_pow_call (binlhsdef, &base, &exponent))
3748             {
3749               add_repeat_to_ops_vec (ops, base, exponent);
3750               gimple_set_visited (binlhsdef, true);
3751             }
3752           else
3753             add_to_ops_vec (ops, binlhs);
3754
3755           return;
3756         }
3757
3758       if (dump_file && (dump_flags & TDF_DETAILS))
3759         {
3760           fprintf (dump_file, "swapping operands of ");
3761           print_gimple_stmt (dump_file, stmt, 0, 0);
3762         }
3763
3764       swap_ssa_operands (stmt,
3765                          gimple_assign_rhs1_ptr (stmt),
3766                          gimple_assign_rhs2_ptr (stmt));
3767       update_stmt (stmt);
3768
3769       if (dump_file && (dump_flags & TDF_DETAILS))
3770         {
3771           fprintf (dump_file, " is now ");
3772           print_gimple_stmt (dump_file, stmt, 0, 0);
3773         }
3774
3775       /* We want to make it so the lhs is always the reassociative op,
3776          so swap.  */
3777       temp = binlhs;
3778       binlhs = binrhs;
3779       binrhs = temp;
3780     }
3781   else if (binrhsisreassoc)
3782     {
3783       linearize_expr (stmt);
3784       binlhs = gimple_assign_rhs1 (stmt);
3785       binrhs = gimple_assign_rhs2 (stmt);
3786     }
3787
3788   gcc_assert (TREE_CODE (binrhs) != SSA_NAME
3789               || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
3790                                       rhscode, loop));
3791   linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
3792                        is_associative, set_visited);
3793
3794   if (rhscode == MULT_EXPR
3795       && TREE_CODE (binrhs) == SSA_NAME
3796       && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
3797     {
3798       add_repeat_to_ops_vec (ops, base, exponent);
3799       gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
3800     }
3801   else
3802     add_to_ops_vec (ops, binrhs);
3803 }
3804
3805 /* Repropagate the negates back into subtracts, since no other pass
3806    currently does it.  */
3807
3808 static void
3809 repropagate_negates (void)
3810 {
3811   unsigned int i = 0;
3812   tree negate;
3813
3814   FOR_EACH_VEC_ELT (plus_negates, i, negate)
3815     {
3816       gimple user = get_single_immediate_use (negate);
3817
3818       if (!user || !is_gimple_assign (user))
3819         continue;
3820
3821       /* The negate operand can be either operand of a PLUS_EXPR
3822          (it can be the LHS if the RHS is a constant for example).
3823
3824          Force the negate operand to the RHS of the PLUS_EXPR, then
3825          transform the PLUS_EXPR into a MINUS_EXPR.  */
3826       if (gimple_assign_rhs_code (user) == PLUS_EXPR)
3827         {
3828           /* If the negated operand appears on the LHS of the
3829              PLUS_EXPR, exchange the operands of the PLUS_EXPR
3830              to force the negated operand to the RHS of the PLUS_EXPR.  */
3831           if (gimple_assign_rhs1 (user) == negate)
3832             {
3833               swap_ssa_operands (user,
3834                                  gimple_assign_rhs1_ptr (user),
3835                                  gimple_assign_rhs2_ptr (user));
3836             }
3837
3838           /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
3839              the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR.  */
3840           if (gimple_assign_rhs2 (user) == negate)
3841             {
3842               tree rhs1 = gimple_assign_rhs1 (user);
3843               tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
3844               gimple_stmt_iterator gsi = gsi_for_stmt (user);
3845               gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
3846               update_stmt (user);
3847             }
3848         }
3849       else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
3850         {
3851           if (gimple_assign_rhs1 (user) == negate)
3852             {
3853               /* We have
3854                    x = -a
3855                    y = x - b
3856                  which we transform into
3857                    x = a + b
3858                    y = -x .
3859                  This pushes down the negate which we possibly can merge
3860                  into some other operation, hence insert it into the
3861                  plus_negates vector.  */
3862               gimple feed = SSA_NAME_DEF_STMT (negate);
3863               tree a = gimple_assign_rhs1 (feed);
3864               tree b = gimple_assign_rhs2 (user);
3865               gimple_stmt_iterator gsi = gsi_for_stmt (feed);
3866               gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
3867               tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)), NULL);
3868               gimple g = gimple_build_assign_with_ops (PLUS_EXPR, x, a, b);
3869               gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3870               gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x, NULL);
3871               user = gsi_stmt (gsi2);
3872               update_stmt (user);
3873               gsi_remove (&gsi, true);
3874               release_defs (feed);
3875               plus_negates.safe_push (gimple_assign_lhs (user));
3876             }
3877           else
3878             {
3879               /* Transform "x = -a; y = b - x" into "y = b + a", getting
3880                  rid of one operation.  */
3881               gimple feed = SSA_NAME_DEF_STMT (negate);
3882               tree a = gimple_assign_rhs1 (feed);
3883               tree rhs1 = gimple_assign_rhs1 (user);
3884               gimple_stmt_iterator gsi = gsi_for_stmt (user);
3885               gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
3886               update_stmt (gsi_stmt (gsi));
3887             }
3888         }
3889     }
3890 }
3891
3892 /* Returns true if OP is of a type for which we can do reassociation.
3893    That is for integral or non-saturating fixed-point types, and for
3894    floating point type when associative-math is enabled.  */
3895
3896 static bool
3897 can_reassociate_p (tree op)
3898 {
3899   tree type = TREE_TYPE (op);
3900   if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
3901       || NON_SAT_FIXED_POINT_TYPE_P (type)
3902       || (flag_associative_math && FLOAT_TYPE_P (type)))
3903     return true;
3904   return false;
3905 }
3906
3907 /* Break up subtract operations in block BB.
3908
3909    We do this top down because we don't know whether the subtract is
3910    part of a possible chain of reassociation except at the top.
3911
3912    IE given
3913    d = f + g
3914    c = a + e
3915    b = c - d
3916    q = b - r
3917    k = t - q
3918
3919    we want to break up k = t - q, but we won't until we've transformed q
3920    = b - r, which won't be broken up until we transform b = c - d.
3921
3922    En passant, clear the GIMPLE visited flag on every statement
3923    and set UIDs within each basic block.  */
3924
3925 static void
3926 break_up_subtract_bb (basic_block bb)
3927 {
3928   gimple_stmt_iterator gsi;
3929   basic_block son;
3930   unsigned int uid = 1;
3931
3932   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3933     {
3934       gimple stmt = gsi_stmt (gsi);
3935       gimple_set_visited (stmt, false);
3936       gimple_set_uid (stmt, uid++);
3937
3938       if (!is_gimple_assign (stmt)
3939           || !can_reassociate_p (gimple_assign_lhs (stmt)))
3940         continue;
3941
3942       /* Look for simple gimple subtract operations.  */
3943       if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
3944         {
3945           if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
3946               || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
3947             continue;
3948
3949           /* Check for a subtract used only in an addition.  If this
3950              is the case, transform it into add of a negate for better
3951              reassociation.  IE transform C = A-B into C = A + -B if C
3952              is only used in an addition.  */
3953           if (should_break_up_subtract (stmt))
3954             break_up_subtract (stmt, &gsi);
3955         }
3956       else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
3957                && can_reassociate_p (gimple_assign_rhs1 (stmt)))
3958         plus_negates.safe_push (gimple_assign_lhs (stmt));
3959     }
3960   for (son = first_dom_son (CDI_DOMINATORS, bb);
3961        son;
3962        son = next_dom_son (CDI_DOMINATORS, son))
3963     break_up_subtract_bb (son);
3964 }
3965
3966 /* Used for repeated factor analysis.  */
3967 struct repeat_factor_d
3968 {
3969   /* An SSA name that occurs in a multiply chain.  */
3970   tree factor;
3971
3972   /* Cached rank of the factor.  */
3973   unsigned rank;
3974
3975   /* Number of occurrences of the factor in the chain.  */
3976   HOST_WIDE_INT count;
3977
3978   /* An SSA name representing the product of this factor and
3979      all factors appearing later in the repeated factor vector.  */
3980   tree repr;
3981 };
3982
3983 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
3984 typedef const struct repeat_factor_d *const_repeat_factor_t;
3985
3986
3987 static vec<repeat_factor> repeat_factor_vec;
3988
3989 /* Used for sorting the repeat factor vector.  Sort primarily by
3990    ascending occurrence count, secondarily by descending rank.  */
3991
3992 static int
3993 compare_repeat_factors (const void *x1, const void *x2)
3994 {
3995   const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
3996   const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
3997
3998   if (rf1->count != rf2->count)
3999     return rf1->count - rf2->count;
4000
4001   return rf2->rank - rf1->rank;
4002 }
4003
4004 /* Look for repeated operands in OPS in the multiply tree rooted at
4005    STMT.  Replace them with an optimal sequence of multiplies and powi
4006    builtin calls, and remove the used operands from OPS.  Return an
4007    SSA name representing the value of the replacement sequence.  */
4008
4009 static tree
4010 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
4011 {
4012   unsigned i, j, vec_len;
4013   int ii;
4014   operand_entry_t oe;
4015   repeat_factor_t rf1, rf2;
4016   repeat_factor rfnew;
4017   tree result = NULL_TREE;
4018   tree target_ssa, iter_result;
4019   tree type = TREE_TYPE (gimple_get_lhs (stmt));
4020   tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
4021   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4022   gimple mul_stmt, pow_stmt;
4023
4024   /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4025      target.  */
4026   if (!powi_fndecl)
4027     return NULL_TREE;
4028
4029   /* Allocate the repeated factor vector.  */
4030   repeat_factor_vec.create (10);
4031
4032   /* Scan the OPS vector for all SSA names in the product and build
4033      up a vector of occurrence counts for each factor.  */
4034   FOR_EACH_VEC_ELT (*ops, i, oe)
4035     {
4036       if (TREE_CODE (oe->op) == SSA_NAME)
4037         {
4038           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4039             {
4040               if (rf1->factor == oe->op)
4041                 {
4042                   rf1->count += oe->count;
4043                   break;
4044                 }
4045             }
4046
4047           if (j >= repeat_factor_vec.length ())
4048             {
4049               rfnew.factor = oe->op;
4050               rfnew.rank = oe->rank;
4051               rfnew.count = oe->count;
4052               rfnew.repr = NULL_TREE;
4053               repeat_factor_vec.safe_push (rfnew);
4054             }
4055         }
4056     }
4057
4058   /* Sort the repeated factor vector by (a) increasing occurrence count,
4059      and (b) decreasing rank.  */
4060   repeat_factor_vec.qsort (compare_repeat_factors);
4061
4062   /* It is generally best to combine as many base factors as possible
4063      into a product before applying __builtin_powi to the result.
4064      However, the sort order chosen for the repeated factor vector
4065      allows us to cache partial results for the product of the base
4066      factors for subsequent use.  When we already have a cached partial
4067      result from a previous iteration, it is best to make use of it
4068      before looking for another __builtin_pow opportunity.
4069
4070      As an example, consider x * x * y * y * y * z * z * z * z.
4071      We want to first compose the product x * y * z, raise it to the
4072      second power, then multiply this by y * z, and finally multiply
4073      by z.  This can be done in 5 multiplies provided we cache y * z
4074      for use in both expressions:
4075
4076         t1 = y * z
4077         t2 = t1 * x
4078         t3 = t2 * t2
4079         t4 = t1 * t3
4080         result = t4 * z
4081
4082      If we instead ignored the cached y * z and first multiplied by
4083      the __builtin_pow opportunity z * z, we would get the inferior:
4084
4085         t1 = y * z
4086         t2 = t1 * x
4087         t3 = t2 * t2
4088         t4 = z * z
4089         t5 = t3 * t4
4090         result = t5 * y  */
4091
4092   vec_len = repeat_factor_vec.length ();
4093
4094   /* Repeatedly look for opportunities to create a builtin_powi call.  */
4095   while (true)
4096     {
4097       HOST_WIDE_INT power;
4098
4099       /* First look for the largest cached product of factors from
4100          preceding iterations.  If found, create a builtin_powi for
4101          it if the minimum occurrence count for its factors is at
4102          least 2, or just use this cached product as our next
4103          multiplicand if the minimum occurrence count is 1.  */
4104       FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4105         {
4106           if (rf1->repr && rf1->count > 0)
4107             break;
4108         }
4109
4110       if (j < vec_len)
4111         {
4112           power = rf1->count;
4113
4114           if (power == 1)
4115             {
4116               iter_result = rf1->repr;
4117
4118               if (dump_file && (dump_flags & TDF_DETAILS))
4119                 {
4120                   unsigned elt;
4121                   repeat_factor_t rf;
4122                   fputs ("Multiplying by cached product ", dump_file);
4123                   for (elt = j; elt < vec_len; elt++)
4124                     {
4125                       rf = &repeat_factor_vec[elt];
4126                       print_generic_expr (dump_file, rf->factor, 0);
4127                       if (elt < vec_len - 1)
4128                         fputs (" * ", dump_file);
4129                     }
4130                   fputs ("\n", dump_file);
4131                 }
4132             }
4133           else
4134             {
4135               iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4136               pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4137                                             build_int_cst (integer_type_node,
4138                                                            power));
4139               gimple_call_set_lhs (pow_stmt, iter_result);
4140               gimple_set_location (pow_stmt, gimple_location (stmt));
4141               gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4142
4143               if (dump_file && (dump_flags & TDF_DETAILS))
4144                 {
4145                   unsigned elt;
4146                   repeat_factor_t rf;
4147                   fputs ("Building __builtin_pow call for cached product (",
4148                          dump_file);
4149                   for (elt = j; elt < vec_len; elt++)
4150                     {
4151                       rf = &repeat_factor_vec[elt];
4152                       print_generic_expr (dump_file, rf->factor, 0);
4153                       if (elt < vec_len - 1)
4154                         fputs (" * ", dump_file);
4155                     }
4156                   fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n",
4157                            power);
4158                 }
4159             }
4160         }
4161       else
4162         {
4163           /* Otherwise, find the first factor in the repeated factor
4164              vector whose occurrence count is at least 2.  If no such
4165              factor exists, there are no builtin_powi opportunities
4166              remaining.  */
4167           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4168             {
4169               if (rf1->count >= 2)
4170                 break;
4171             }
4172
4173           if (j >= vec_len)
4174             break;
4175
4176           power = rf1->count;
4177
4178           if (dump_file && (dump_flags & TDF_DETAILS))
4179             {
4180               unsigned elt;
4181               repeat_factor_t rf;
4182               fputs ("Building __builtin_pow call for (", dump_file);
4183               for (elt = j; elt < vec_len; elt++)
4184                 {
4185                   rf = &repeat_factor_vec[elt];
4186                   print_generic_expr (dump_file, rf->factor, 0);
4187                   if (elt < vec_len - 1)
4188                     fputs (" * ", dump_file);
4189                 }
4190               fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power);
4191             }
4192
4193           reassociate_stats.pows_created++;
4194
4195           /* Visit each element of the vector in reverse order (so that
4196              high-occurrence elements are visited first, and within the
4197              same occurrence count, lower-ranked elements are visited
4198              first).  Form a linear product of all elements in this order
4199              whose occurrencce count is at least that of element J.
4200              Record the SSA name representing the product of each element
4201              with all subsequent elements in the vector.  */
4202           if (j == vec_len - 1)
4203             rf1->repr = rf1->factor;
4204           else
4205             {
4206               for (ii = vec_len - 2; ii >= (int)j; ii--)
4207                 {
4208                   tree op1, op2;
4209
4210                   rf1 = &repeat_factor_vec[ii];
4211                   rf2 = &repeat_factor_vec[ii + 1];
4212
4213                   /* Init the last factor's representative to be itself.  */
4214                   if (!rf2->repr)
4215                     rf2->repr = rf2->factor;
4216
4217                   op1 = rf1->factor;
4218                   op2 = rf2->repr;
4219
4220                   target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4221                   mul_stmt = gimple_build_assign_with_ops (MULT_EXPR,
4222                                                            target_ssa,
4223                                                            op1, op2);
4224                   gimple_set_location (mul_stmt, gimple_location (stmt));
4225                   gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4226                   rf1->repr = target_ssa;
4227
4228                   /* Don't reprocess the multiply we just introduced.  */
4229                   gimple_set_visited (mul_stmt, true);
4230                 }
4231             }
4232
4233           /* Form a call to __builtin_powi for the maximum product
4234              just formed, raised to the power obtained earlier.  */
4235           rf1 = &repeat_factor_vec[j];
4236           iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4237           pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4238                                         build_int_cst (integer_type_node,
4239                                                        power));
4240           gimple_call_set_lhs (pow_stmt, iter_result);
4241           gimple_set_location (pow_stmt, gimple_location (stmt));
4242           gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4243         }
4244
4245       /* If we previously formed at least one other builtin_powi call,
4246          form the product of this one and those others.  */
4247       if (result)
4248         {
4249           tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4250           mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result,
4251                                                    result, iter_result);
4252           gimple_set_location (mul_stmt, gimple_location (stmt));
4253           gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4254           gimple_set_visited (mul_stmt, true);
4255           result = new_result;
4256         }
4257       else
4258         result = iter_result;
4259
4260       /* Decrement the occurrence count of each element in the product
4261          by the count found above, and remove this many copies of each
4262          factor from OPS.  */
4263       for (i = j; i < vec_len; i++)
4264         {
4265           unsigned k = power;
4266           unsigned n;
4267
4268           rf1 = &repeat_factor_vec[i];
4269           rf1->count -= power;
4270
4271           FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
4272             {
4273               if (oe->op == rf1->factor)
4274                 {
4275                   if (oe->count <= k)
4276                     {
4277                       ops->ordered_remove (n);
4278                       k -= oe->count;
4279
4280                       if (k == 0)
4281                         break;
4282                     }
4283                   else
4284                     {
4285                       oe->count -= k;
4286                       break;
4287                     }
4288                 }
4289             }
4290         }
4291     }
4292
4293   /* At this point all elements in the repeated factor vector have a
4294      remaining occurrence count of 0 or 1, and those with a count of 1
4295      don't have cached representatives.  Re-sort the ops vector and
4296      clean up.  */
4297   ops->qsort (sort_by_operand_rank);
4298   repeat_factor_vec.release ();
4299
4300   /* Return the final product computed herein.  Note that there may
4301      still be some elements with single occurrence count left in OPS;
4302      those will be handled by the normal reassociation logic.  */
4303   return result;
4304 }
4305
4306 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS.  */
4307
4308 static void
4309 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
4310 {
4311   tree rhs1;
4312
4313   if (dump_file && (dump_flags & TDF_DETAILS))
4314     {
4315       fprintf (dump_file, "Transforming ");
4316       print_gimple_stmt (dump_file, stmt, 0, 0);
4317     }
4318
4319   rhs1 = gimple_assign_rhs1 (stmt);
4320   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
4321   update_stmt (stmt);
4322   remove_visited_stmt_chain (rhs1);
4323
4324   if (dump_file && (dump_flags & TDF_DETAILS))
4325     {
4326       fprintf (dump_file, " into ");
4327       print_gimple_stmt (dump_file, stmt, 0, 0);
4328     }
4329 }
4330
4331 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2.  */
4332
4333 static void
4334 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
4335                             tree rhs1, tree rhs2)
4336 {
4337   if (dump_file && (dump_flags & TDF_DETAILS))
4338     {
4339       fprintf (dump_file, "Transforming ");
4340       print_gimple_stmt (dump_file, stmt, 0, 0);
4341     }
4342
4343   gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
4344   update_stmt (gsi_stmt (*gsi));
4345   remove_visited_stmt_chain (rhs1);
4346
4347   if (dump_file && (dump_flags & TDF_DETAILS))
4348     {
4349       fprintf (dump_file, " into ");
4350       print_gimple_stmt (dump_file, stmt, 0, 0);
4351     }
4352 }
4353
4354 /* Reassociate expressions in basic block BB and its post-dominator as
4355    children.  */
4356
4357 static void
4358 reassociate_bb (basic_block bb)
4359 {
4360   gimple_stmt_iterator gsi;
4361   basic_block son;
4362   gimple stmt = last_stmt (bb);
4363
4364   if (stmt && !gimple_visited_p (stmt))
4365     maybe_optimize_range_tests (stmt);
4366
4367   for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4368     {
4369       stmt = gsi_stmt (gsi);
4370
4371       if (is_gimple_assign (stmt)
4372           && !stmt_could_throw_p (stmt))
4373         {
4374           tree lhs, rhs1, rhs2;
4375           enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4376
4377           /* If this is not a gimple binary expression, there is
4378              nothing for us to do with it.  */
4379           if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4380             continue;
4381
4382           /* If this was part of an already processed statement,
4383              we don't need to touch it again. */
4384           if (gimple_visited_p (stmt))
4385             {
4386               /* This statement might have become dead because of previous
4387                  reassociations.  */
4388               if (has_zero_uses (gimple_get_lhs (stmt)))
4389                 {
4390                   gsi_remove (&gsi, true);
4391                   release_defs (stmt);
4392                   /* We might end up removing the last stmt above which
4393                      places the iterator to the end of the sequence.
4394                      Reset it to the last stmt in this case which might
4395                      be the end of the sequence as well if we removed
4396                      the last statement of the sequence.  In which case
4397                      we need to bail out.  */
4398                   if (gsi_end_p (gsi))
4399                     {
4400                       gsi = gsi_last_bb (bb);
4401                       if (gsi_end_p (gsi))
4402                         break;
4403                     }
4404                 }
4405               continue;
4406             }
4407
4408           lhs = gimple_assign_lhs (stmt);
4409           rhs1 = gimple_assign_rhs1 (stmt);
4410           rhs2 = gimple_assign_rhs2 (stmt);
4411
4412           /* For non-bit or min/max operations we can't associate
4413              all types.  Verify that here.  */
4414           if (rhs_code != BIT_IOR_EXPR
4415               && rhs_code != BIT_AND_EXPR
4416               && rhs_code != BIT_XOR_EXPR
4417               && rhs_code != MIN_EXPR
4418               && rhs_code != MAX_EXPR
4419               && (!can_reassociate_p (lhs)
4420                   || !can_reassociate_p (rhs1)
4421                   || !can_reassociate_p (rhs2)))
4422             continue;
4423
4424           if (associative_tree_code (rhs_code))
4425             {
4426               auto_vec<operand_entry_t> ops;
4427               tree powi_result = NULL_TREE;
4428
4429               /* There may be no immediate uses left by the time we
4430                  get here because we may have eliminated them all.  */
4431               if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4432                 continue;
4433
4434               gimple_set_visited (stmt, true);
4435               linearize_expr_tree (&ops, stmt, true, true);
4436               ops.qsort (sort_by_operand_rank);
4437               optimize_ops_list (rhs_code, &ops);
4438               if (undistribute_ops_list (rhs_code, &ops,
4439                                          loop_containing_stmt (stmt)))
4440                 {
4441                   ops.qsort (sort_by_operand_rank);
4442                   optimize_ops_list (rhs_code, &ops);
4443                 }
4444
4445               if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4446                 optimize_range_tests (rhs_code, &ops);
4447
4448               if (first_pass_instance
4449                   && rhs_code == MULT_EXPR
4450                   && flag_unsafe_math_optimizations)
4451                 powi_result = attempt_builtin_powi (stmt, &ops);
4452
4453               /* If the operand vector is now empty, all operands were
4454                  consumed by the __builtin_powi optimization.  */
4455               if (ops.length () == 0)
4456                 transform_stmt_to_copy (&gsi, stmt, powi_result);
4457               else if (ops.length () == 1)
4458                 {
4459                   tree last_op = ops.last ()->op;
4460
4461                   if (powi_result)
4462                     transform_stmt_to_multiply (&gsi, stmt, last_op,
4463                                                 powi_result);
4464                   else
4465                     transform_stmt_to_copy (&gsi, stmt, last_op);
4466                 }
4467               else
4468                 {
4469                   enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4470                   int ops_num = ops.length ();
4471                   int width = get_reassociation_width (ops_num, rhs_code, mode);
4472                   tree new_lhs = lhs;
4473
4474                   if (dump_file && (dump_flags & TDF_DETAILS))
4475                     fprintf (dump_file,
4476                              "Width = %d was chosen for reassociation\n", width);
4477
4478                   if (width > 1
4479                       && ops.length () > 3)
4480                     rewrite_expr_tree_parallel (stmt, width, ops);
4481                   else
4482                     {
4483                       /* When there are three operands left, we want
4484                          to make sure the ones that get the double
4485                          binary op are chosen wisely.  */
4486                       int len = ops.length ();
4487                       if (len >= 3)
4488                         swap_ops_for_binary_stmt (ops, len - 3, stmt);
4489
4490                       new_lhs = rewrite_expr_tree (stmt, 0, ops,
4491                                                    powi_result != NULL);
4492                     }
4493
4494                   /* If we combined some repeated factors into a
4495                      __builtin_powi call, multiply that result by the
4496                      reassociated operands.  */
4497                   if (powi_result)
4498                     {
4499                       gimple mul_stmt, lhs_stmt = SSA_NAME_DEF_STMT (lhs);
4500                       tree type = TREE_TYPE (lhs);
4501                       tree target_ssa = make_temp_ssa_name (type, NULL,
4502                                                             "reassocpow");
4503                       gimple_set_lhs (lhs_stmt, target_ssa);
4504                       update_stmt (lhs_stmt);
4505                       if (lhs != new_lhs)
4506                         target_ssa = new_lhs;
4507                       mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs,
4508                                                                powi_result,
4509                                                                target_ssa);
4510                       gimple_set_location (mul_stmt, gimple_location (stmt));
4511                       gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4512                     }
4513                 }
4514             }
4515         }
4516     }
4517   for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4518        son;
4519        son = next_dom_son (CDI_POST_DOMINATORS, son))
4520     reassociate_bb (son);
4521 }
4522
4523 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4524 void debug_ops_vector (vec<operand_entry_t> ops);
4525
4526 /* Dump the operand entry vector OPS to FILE.  */
4527
4528 void
4529 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
4530 {
4531   operand_entry_t oe;
4532   unsigned int i;
4533
4534   FOR_EACH_VEC_ELT (ops, i, oe)
4535     {
4536       fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
4537       print_generic_expr (file, oe->op, 0);
4538     }
4539 }
4540
4541 /* Dump the operand entry vector OPS to STDERR.  */
4542
4543 DEBUG_FUNCTION void
4544 debug_ops_vector (vec<operand_entry_t> ops)
4545 {
4546   dump_ops_vector (stderr, ops);
4547 }
4548
4549 static void
4550 do_reassoc (void)
4551 {
4552   break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
4553   reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
4554 }
4555
4556 /* Initialize the reassociation pass.  */
4557
4558 static void
4559 init_reassoc (void)
4560 {
4561   int i;
4562   long rank = 2;
4563   int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
4564
4565   /* Find the loops, so that we can prevent moving calculations in
4566      them.  */
4567   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4568
4569   memset (&reassociate_stats, 0, sizeof (reassociate_stats));
4570
4571   operand_entry_pool = create_alloc_pool ("operand entry pool",
4572                                           sizeof (struct operand_entry), 30);
4573   next_operand_entry_id = 0;
4574
4575   /* Reverse RPO (Reverse Post Order) will give us something where
4576      deeper loops come later.  */
4577   pre_and_rev_post_order_compute (NULL, bbs, false);
4578   bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
4579   operand_rank = pointer_map_create ();
4580
4581   /* Give each default definition a distinct rank.  This includes
4582      parameters and the static chain.  Walk backwards over all
4583      SSA names so that we get proper rank ordering according
4584      to tree_swap_operands_p.  */
4585   for (i = num_ssa_names - 1; i > 0; --i)
4586     {
4587       tree name = ssa_name (i);
4588       if (name && SSA_NAME_IS_DEFAULT_DEF (name))
4589         insert_operand_rank (name, ++rank);
4590     }
4591
4592   /* Set up rank for each BB  */
4593   for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
4594     bb_rank[bbs[i]] = ++rank  << 16;
4595
4596   free (bbs);
4597   calculate_dominance_info (CDI_POST_DOMINATORS);
4598   plus_negates = vNULL;
4599 }
4600
4601 /* Cleanup after the reassociation pass, and print stats if
4602    requested.  */
4603
4604 static void
4605 fini_reassoc (void)
4606 {
4607   statistics_counter_event (cfun, "Linearized",
4608                             reassociate_stats.linearized);
4609   statistics_counter_event (cfun, "Constants eliminated",
4610                             reassociate_stats.constants_eliminated);
4611   statistics_counter_event (cfun, "Ops eliminated",
4612                             reassociate_stats.ops_eliminated);
4613   statistics_counter_event (cfun, "Statements rewritten",
4614                             reassociate_stats.rewritten);
4615   statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
4616                             reassociate_stats.pows_encountered);
4617   statistics_counter_event (cfun, "Built-in powi calls created",
4618                             reassociate_stats.pows_created);
4619
4620   pointer_map_destroy (operand_rank);
4621   free_alloc_pool (operand_entry_pool);
4622   free (bb_rank);
4623   plus_negates.release ();
4624   free_dominance_info (CDI_POST_DOMINATORS);
4625   loop_optimizer_finalize ();
4626 }
4627
4628 /* Gate and execute functions for Reassociation.  */
4629
4630 static unsigned int
4631 execute_reassoc (void)
4632 {
4633   init_reassoc ();
4634
4635   do_reassoc ();
4636   repropagate_negates ();
4637
4638   fini_reassoc ();
4639   return 0;
4640 }
4641
4642 static bool
4643 gate_tree_ssa_reassoc (void)
4644 {
4645   return flag_tree_reassoc != 0;
4646 }
4647
4648 namespace {
4649
4650 const pass_data pass_data_reassoc =
4651 {
4652   GIMPLE_PASS, /* type */
4653   "reassoc", /* name */
4654   OPTGROUP_NONE, /* optinfo_flags */
4655   true, /* has_gate */
4656   true, /* has_execute */
4657   TV_TREE_REASSOC, /* tv_id */
4658   ( PROP_cfg | PROP_ssa ), /* properties_required */
4659   0, /* properties_provided */
4660   0, /* properties_destroyed */
4661   0, /* todo_flags_start */
4662   ( TODO_verify_ssa
4663     | TODO_update_ssa_only_virtuals
4664     | TODO_verify_flow ), /* todo_flags_finish */
4665 };
4666
4667 class pass_reassoc : public gimple_opt_pass
4668 {
4669 public:
4670   pass_reassoc (gcc::context *ctxt)
4671     : gimple_opt_pass (pass_data_reassoc, ctxt)
4672   {}
4673
4674   /* opt_pass methods: */
4675   opt_pass * clone () { return new pass_reassoc (m_ctxt); }
4676   bool gate () { return gate_tree_ssa_reassoc (); }
4677   unsigned int execute () { return execute_reassoc (); }
4678
4679 }; // class pass_reassoc
4680
4681 } // anon namespace
4682
4683 gimple_opt_pass *
4684 make_pass_reassoc (gcc::context *ctxt)
4685 {
4686   return new pass_reassoc (ctxt);
4687 }