gcc/tree-ssa-reassoc.c

   1 /* Reassociation for trees.
   2    Copyright (C) 2005-2014 Free Software Foundation, Inc.
   3    Contributed by Daniel Berlin <dan@dberlin.org>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 3, or (at your option)
  10 any later version.
  11
  12 GCC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "hash-table.h"
  25 #include "tm.h"
  26 #include "rtl.h"
  27 #include "tm_p.h"
  28 #include "tree.h"
  29 #include "stor-layout.h"
  30 #include "basic-block.h"
  31 #include "gimple-pretty-print.h"
  32 #include "tree-inline.h"
  33 #include "pointer-set.h"
  34 #include "tree-ssa-alias.h"
  35 #include "internal-fn.h"
  36 #include "gimple-fold.h"
  37 #include "tree-eh.h"
  38 #include "gimple-expr.h"
  39 #include "is-a.h"
  40 #include "gimple.h"
  41 #include "gimple-iterator.h"
  42 #include "gimplify-me.h"
  43 #include "gimple-ssa.h"
  44 #include "tree-cfg.h"
  45 #include "tree-phinodes.h"
  46 #include "ssa-iterators.h"
  47 #include "stringpool.h"
  48 #include "tree-ssanames.h"
  49 #include "tree-ssa-loop-niter.h"
  50 #include "tree-ssa-loop.h"
  51 #include "expr.h"
  52 #include "tree-dfa.h"
  53 #include "tree-ssa.h"
  54 #include "tree-iterator.h"
  55 #include "tree-pass.h"
  56 #include "alloc-pool.h"
  57 #include "langhooks.h"
  58 #include "cfgloop.h"
  59 #include "flags.h"
  60 #include "target.h"
  61 #include "params.h"
  62 #include "diagnostic-core.h"
  63
  64 /*  This is a simple global reassociation pass.  It is, in part, based
  65     on the LLVM pass of the same name (They do some things more/less
  66     than we do, in different orders, etc).
  67
  68     It consists of five steps:
  69
  70     1. Breaking up subtract operations into addition + negate, where
  71     it would promote the reassociation of adds.
  72
  73     2. Left linearization of the expression trees, so that (A+B)+(C+D)
  74     becomes (((A+B)+C)+D), which is easier for us to rewrite later.
  75     During linearization, we place the operands of the binary
  76     expressions into a vector of operand_entry_t
  77
  78     3. Optimization of the operand lists, eliminating things like a +
  79     -a, a & a, etc.
  80
  81     3a. Combine repeated factors with the same occurrence counts
  82     into a __builtin_powi call that will later be optimized into
  83     an optimal number of multiplies.
  84
  85     4. Rewrite the expression trees we linearized and optimized so
  86     they are in proper rank order.
  87
  88     5. Repropagate negates, as nothing else will clean it up ATM.
  89
  90     A bit of theory on #4, since nobody seems to write anything down
  91     about why it makes sense to do it the way they do it:
  92
  93     We could do this much nicer theoretically, but don't (for reasons
  94     explained after how to do it theoretically nice :P).
  95
  96     In order to promote the most redundancy elimination, you want
  97     binary expressions whose operands are the same rank (or
  98     preferably, the same value) exposed to the redundancy eliminator,
  99     for possible elimination.
 100
 101     So the way to do this if we really cared, is to build the new op
 102     tree from the leaves to the roots, merging as you go, and putting the
 103     new op on the end of the worklist, until you are left with one
 104     thing on the worklist.
 105
 106     IE if you have to rewrite the following set of operands (listed with
 107     rank in parentheses), with opcode PLUS_EXPR:
 108
 109     a (1),  b (1),  c (1),  d (2), e (2)
 110
 111
 112     We start with our merge worklist empty, and the ops list with all of
 113     those on it.
 114
 115     You want to first merge all leaves of the same rank, as much as
 116     possible.
 117
 118     So first build a binary op of
 119
 120     mergetmp = a + b, and put "mergetmp" on the merge worklist.
 121
 122     Because there is no three operand form of PLUS_EXPR, c is not going to
 123     be exposed to redundancy elimination as a rank 1 operand.
 124
 125     So you might as well throw it on the merge worklist (you could also
 126     consider it to now be a rank two operand, and merge it with d and e,
 127     but in this case, you then have evicted e from a binary op. So at
 128     least in this situation, you can't win.)
 129
 130     Then build a binary op of d + e
 131     mergetmp2 = d + e
 132
 133     and put mergetmp2 on the merge worklist.
 134
 135     so merge worklist = {mergetmp, c, mergetmp2}
 136
 137     Continue building binary ops of these operations until you have only
 138     one operation left on the worklist.
 139
 140     So we have
 141
 142     build binary op
 143     mergetmp3 = mergetmp + c
 144
 145     worklist = {mergetmp2, mergetmp3}
 146
 147     mergetmp4 = mergetmp2 + mergetmp3
 148
 149     worklist = {mergetmp4}
 150
 151     because we have one operation left, we can now just set the original
 152     statement equal to the result of that operation.
 153
 154     This will at least expose a + b  and d + e to redundancy elimination
 155     as binary operations.
 156
 157     For extra points, you can reuse the old statements to build the
 158     mergetmps, since you shouldn't run out.
 159
 160     So why don't we do this?
 161
 162     Because it's expensive, and rarely will help.  Most trees we are
 163     reassociating have 3 or less ops.  If they have 2 ops, they already
 164     will be written into a nice single binary op.  If you have 3 ops, a
 165     single simple check suffices to tell you whether the first two are of the
 166     same rank.  If so, you know to order it
 167
 168     mergetmp = op1 + op2
 169     newstmt = mergetmp + op3
 170
 171     instead of
 172     mergetmp = op2 + op3
 173     newstmt = mergetmp + op1
 174
 175     If all three are of the same rank, you can't expose them all in a
 176     single binary operator anyway, so the above is *still* the best you
 177     can do.
 178
 179     Thus, this is what we do.  When we have three ops left, we check to see
 180     what order to put them in, and call it a day.  As a nod to vector sum
 181     reduction, we check if any of the ops are really a phi node that is a
 182     destructive update for the associating op, and keep the destructive
 183     update together for vector sum reduction recognition.  */
 184
 185
 186 /* Statistics */
 187 static struct
 188 {
 189   int linearized;
 190   int constants_eliminated;
 191   int ops_eliminated;
 192   int rewritten;
 193   int pows_encountered;
 194   int pows_created;
 195 } reassociate_stats;
 196
 197 /* Operator, rank pair.  */
 198 typedef struct operand_entry
 199 {
 200   unsigned int rank;
 201   int id;
 202   tree op;
 203   unsigned int count;
 204 } *operand_entry_t;
 205
 206 static alloc_pool operand_entry_pool;
 207
 208 /* This is used to assign a unique ID to each struct operand_entry
 209    so that qsort results are identical on different hosts.  */
 210 static int next_operand_entry_id;
 211
 212 /* Starting rank number for a given basic block, so that we can rank
 213    operations using unmovable instructions in that BB based on the bb
 214    depth.  */
 215 static long *bb_rank;
 216
 217 /* Operand->rank hashtable.  */
 218 static struct pointer_map_t *operand_rank;
 219
 220 /* Forward decls.  */
 221 static long get_rank (tree);
 222
 223
 224 /* Bias amount for loop-carried phis.  We want this to be larger than
 225    the depth of any reassociation tree we can see, but not larger than
 226    the rank difference between two blocks.  */
 227 #define PHI_LOOP_BIAS (1 << 15)
 228
 229 /* Rank assigned to a phi statement.  If STMT is a loop-carried phi of
 230    an innermost loop, and the phi has only a single use which is inside
 231    the loop, then the rank is the block rank of the loop latch plus an
 232    extra bias for the loop-carried dependence.  This causes expressions
 233    calculated into an accumulator variable to be independent for each
 234    iteration of the loop.  If STMT is some other phi, the rank is the
 235    block rank of its containing block.  */
 236 static long
 237 phi_rank (gimple stmt)
 238 {
 239   basic_block bb = gimple_bb (stmt);
 240   struct loop *father = bb->loop_father;
 241   tree res;
 242   unsigned i;
 243   use_operand_p use;
 244   gimple use_stmt;
 245
 246   /* We only care about real loops (those with a latch).  */
 247   if (!father->latch)
 248     return bb_rank[bb->index];
 249
 250   /* Interesting phis must be in headers of innermost loops.  */
 251   if (bb != father->header
 252       || father->inner)
 253     return bb_rank[bb->index];
 254
 255   /* Ignore virtual SSA_NAMEs.  */
 256   res = gimple_phi_result (stmt);
 257   if (virtual_operand_p (res))
 258     return bb_rank[bb->index];
 259
 260   /* The phi definition must have a single use, and that use must be
 261      within the loop.  Otherwise this isn't an accumulator pattern.  */
 262   if (!single_imm_use (res, &use, &use_stmt)
 263       || gimple_bb (use_stmt)->loop_father != father)
 264     return bb_rank[bb->index];
 265
 266   /* Look for phi arguments from within the loop.  If found, bias this phi.  */
 267   for (i = 0; i < gimple_phi_num_args (stmt); i++)
 268     {
 269       tree arg = gimple_phi_arg_def (stmt, i);
 270       if (TREE_CODE (arg) == SSA_NAME
 271           && !SSA_NAME_IS_DEFAULT_DEF (arg))
 272         {
 273           gimple def_stmt = SSA_NAME_DEF_STMT (arg);
 274           if (gimple_bb (def_stmt)->loop_father == father)
 275             return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
 276         }
 277     }
 278
 279   /* Must be an uninteresting phi.  */
 280   return bb_rank[bb->index];
 281 }
 282
 283 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
 284    loop-carried dependence of an innermost loop, return TRUE; else
 285    return FALSE.  */
 286 static bool
 287 loop_carried_phi (tree exp)
 288 {
 289   gimple phi_stmt;
 290   long block_rank;
 291
 292   if (TREE_CODE (exp) != SSA_NAME
 293       || SSA_NAME_IS_DEFAULT_DEF (exp))
 294     return false;
 295
 296   phi_stmt = SSA_NAME_DEF_STMT (exp);
 297
 298   if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
 299     return false;
 300
 301   /* Non-loop-carried phis have block rank.  Loop-carried phis have
 302      an additional bias added in.  If this phi doesn't have block rank,
 303      it's biased and should not be propagated.  */
 304   block_rank = bb_rank[gimple_bb (phi_stmt)->index];
 305
 306   if (phi_rank (phi_stmt) != block_rank)
 307     return true;
 308
 309   return false;
 310 }
 311
 312 /* Return the maximum of RANK and the rank that should be propagated
 313    from expression OP.  For most operands, this is just the rank of OP.
 314    For loop-carried phis, the value is zero to avoid undoing the bias
 315    in favor of the phi.  */
 316 static long
 317 propagate_rank (long rank, tree op)
 318 {
 319   long op_rank;
 320
 321   if (loop_carried_phi (op))
 322     return rank;
 323
 324   op_rank = get_rank (op);
 325
 326   return MAX (rank, op_rank);
 327 }
 328
 329 /* Look up the operand rank structure for expression E.  */
 330
 331 static inline long
 332 find_operand_rank (tree e)
 333 {
 334   void **slot = pointer_map_contains (operand_rank, e);
 335   return slot ? (long) (intptr_t) *slot : -1;
 336 }
 337
 338 /* Insert {E,RANK} into the operand rank hashtable.  */
 339
 340 static inline void
 341 insert_operand_rank (tree e, long rank)
 342 {
 343   void **slot;
 344   gcc_assert (rank > 0);
 345   slot = pointer_map_insert (operand_rank, e);
 346   gcc_assert (!*slot);
 347   *slot = (void *) (intptr_t) rank;
 348 }
 349
 350 /* Given an expression E, return the rank of the expression.  */
 351
 352 static long
 353 get_rank (tree e)
 354 {
 355   /* Constants have rank 0.  */
 356   if (is_gimple_min_invariant (e))
 357     return 0;
 358
 359   /* SSA_NAME's have the rank of the expression they are the result
 360      of.
 361      For globals and uninitialized values, the rank is 0.
 362      For function arguments, use the pre-setup rank.
 363      For PHI nodes, stores, asm statements, etc, we use the rank of
 364      the BB.
 365      For simple operations, the rank is the maximum rank of any of
 366      its operands, or the bb_rank, whichever is less.
 367      I make no claims that this is optimal, however, it gives good
 368      results.  */
 369
 370   /* We make an exception to the normal ranking system to break
 371      dependences of accumulator variables in loops.  Suppose we
 372      have a simple one-block loop containing:
 373
 374        x_1 = phi(x_0, x_2)
 375        b = a + x_1
 376        c = b + d
 377        x_2 = c + e
 378
 379      As shown, each iteration of the calculation into x is fully
 380      dependent upon the iteration before it.  We would prefer to
 381      see this in the form:
 382
 383        x_1 = phi(x_0, x_2)
 384        b = a + d
 385        c = b + e
 386        x_2 = c + x_1
 387
 388      If the loop is unrolled, the calculations of b and c from
 389      different iterations can be interleaved.
 390
 391      To obtain this result during reassociation, we bias the rank
 392      of the phi definition x_1 upward, when it is recognized as an
 393      accumulator pattern.  The artificial rank causes it to be
 394      added last, providing the desired independence.  */
 395
 396   if (TREE_CODE (e) == SSA_NAME)
 397     {
 398       gimple stmt;
 399       long rank;
 400       int i, n;
 401       tree op;
 402
 403       if (SSA_NAME_IS_DEFAULT_DEF (e))
 404         return find_operand_rank (e);
 405
 406       stmt = SSA_NAME_DEF_STMT (e);
 407       if (gimple_code (stmt) == GIMPLE_PHI)
 408         return phi_rank (stmt);
 409
 410       if (!is_gimple_assign (stmt)
 411           || gimple_vdef (stmt))
 412         return bb_rank[gimple_bb (stmt)->index];
 413
 414       /* If we already have a rank for this expression, use that.  */
 415       rank = find_operand_rank (e);
 416       if (rank != -1)
 417         return rank;
 418
 419       /* Otherwise, find the maximum rank for the operands.  As an
 420          exception, remove the bias from loop-carried phis when propagating
 421          the rank so that dependent operations are not also biased.  */
 422       rank = 0;
 423       if (gimple_assign_single_p (stmt))
 424         {
 425           tree rhs = gimple_assign_rhs1 (stmt);
 426           n = TREE_OPERAND_LENGTH (rhs);
 427           if (n == 0)
 428             rank = propagate_rank (rank, rhs);
 429           else
 430             {
 431               for (i = 0; i < n; i++)
 432                 {
 433                   op = TREE_OPERAND (rhs, i);
 434
 435                   if (op != NULL_TREE)
 436                     rank = propagate_rank (rank, op);
 437                 }
 438             }
 439         }
 440       else
 441         {
 442           n = gimple_num_ops (stmt);
 443           for (i = 1; i < n; i++)
 444             {
 445               op = gimple_op (stmt, i);
 446               gcc_assert (op);
 447               rank = propagate_rank (rank, op);
 448             }
 449         }
 450
 451       if (dump_file && (dump_flags & TDF_DETAILS))
 452         {
 453           fprintf (dump_file, "Rank for ");
 454           print_generic_expr (dump_file, e, 0);
 455           fprintf (dump_file, " is %ld\n", (rank + 1));
 456         }
 457
 458       /* Note the rank in the hashtable so we don't recompute it.  */
 459       insert_operand_rank (e, (rank + 1));
 460       return (rank + 1);
 461     }
 462
 463   /* Globals, etc,  are rank 0 */
 464   return 0;
 465 }
 466
 467
 468 /* We want integer ones to end up last no matter what, since they are
 469    the ones we can do the most with.  */
 470 #define INTEGER_CONST_TYPE 1 << 3
 471 #define FLOAT_CONST_TYPE 1 << 2
 472 #define OTHER_CONST_TYPE 1 << 1
 473
 474 /* Classify an invariant tree into integer, float, or other, so that
 475    we can sort them to be near other constants of the same type.  */
 476 static inline int
 477 constant_type (tree t)
 478 {
 479   if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
 480     return INTEGER_CONST_TYPE;
 481   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
 482     return FLOAT_CONST_TYPE;
 483   else
 484     return OTHER_CONST_TYPE;
 485 }
 486
 487 /* qsort comparison function to sort operand entries PA and PB by rank
 488    so that the sorted array is ordered by rank in decreasing order.  */
 489 static int
 490 sort_by_operand_rank (const void *pa, const void *pb)
 491 {
 492   const operand_entry_t oea = *(const operand_entry_t *)pa;
 493   const operand_entry_t oeb = *(const operand_entry_t *)pb;
 494
 495   /* It's nicer for optimize_expression if constants that are likely
 496      to fold when added/multiplied//whatever are put next to each
 497      other.  Since all constants have rank 0, order them by type.  */
 498   if (oeb->rank == 0 && oea->rank == 0)
 499     {
 500       if (constant_type (oeb->op) != constant_type (oea->op))
 501         return constant_type (oeb->op) - constant_type (oea->op);
 502       else
 503         /* To make sorting result stable, we use unique IDs to determine
 504            order.  */
 505         return oeb->id - oea->id;
 506     }
 507
 508   /* Lastly, make sure the versions that are the same go next to each
 509      other.  We use SSA_NAME_VERSION because it's stable.  */
 510   if ((oeb->rank - oea->rank == 0)
 511       && TREE_CODE (oea->op) == SSA_NAME
 512       && TREE_CODE (oeb->op) == SSA_NAME)
 513     {
 514       if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
 515         return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
 516       else
 517         return oeb->id - oea->id;
 518     }
 519
 520   if (oeb->rank != oea->rank)
 521     return oeb->rank - oea->rank;
 522   else
 523     return oeb->id - oea->id;
 524 }
 525
 526 /* Add an operand entry to *OPS for the tree operand OP.  */
 527
 528 static void
 529 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
 530 {
 531   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
 532
 533   oe->op = op;
 534   oe->rank = get_rank (op);
 535   oe->id = next_operand_entry_id++;
 536   oe->count = 1;
 537   ops->safe_push (oe);
 538 }
 539
 540 /* Add an operand entry to *OPS for the tree operand OP with repeat
 541    count REPEAT.  */
 542
 543 static void
 544 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
 545                        HOST_WIDE_INT repeat)
 546 {
 547   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
 548
 549   oe->op = op;
 550   oe->rank = get_rank (op);
 551   oe->id = next_operand_entry_id++;
 552   oe->count = repeat;
 553   ops->safe_push (oe);
 554
 555   reassociate_stats.pows_encountered++;
 556 }
 557
 558 /* Return true if STMT is reassociable operation containing a binary
 559    operation with tree code CODE, and is inside LOOP.  */
 560
 561 static bool
 562 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
 563 {
 564   basic_block bb = gimple_bb (stmt);
 565
 566   if (gimple_bb (stmt) == NULL)
 567     return false;
 568
 569   if (!flow_bb_inside_loop_p (loop, bb))
 570     return false;
 571
 572   if (is_gimple_assign (stmt)
 573       && gimple_assign_rhs_code (stmt) == code
 574       && has_single_use (gimple_assign_lhs (stmt)))
 575     return true;
 576
 577   return false;
 578 }
 579
 580
 581 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
 582    operand of the negate operation.  Otherwise, return NULL.  */
 583
 584 static tree
 585 get_unary_op (tree name, enum tree_code opcode)
 586 {
 587   gimple stmt = SSA_NAME_DEF_STMT (name);
 588
 589   if (!is_gimple_assign (stmt))
 590     return NULL_TREE;
 591
 592   if (gimple_assign_rhs_code (stmt) == opcode)
 593     return gimple_assign_rhs1 (stmt);
 594   return NULL_TREE;
 595 }
 596
 597 /* If CURR and LAST are a pair of ops that OPCODE allows us to
 598    eliminate through equivalences, do so, remove them from OPS, and
 599    return true.  Otherwise, return false.  */
 600
 601 static bool
 602 eliminate_duplicate_pair (enum tree_code opcode,
 603                           vec<operand_entry_t> *ops,
 604                           bool *all_done,
 605                           unsigned int i,
 606                           operand_entry_t curr,
 607                           operand_entry_t last)
 608 {
 609
 610   /* If we have two of the same op, and the opcode is & |, min, or max,
 611      we can eliminate one of them.
 612      If we have two of the same op, and the opcode is ^, we can
 613      eliminate both of them.  */
 614
 615   if (last && last->op == curr->op)
 616     {
 617       switch (opcode)
 618         {
 619         case MAX_EXPR:
 620         case MIN_EXPR:
 621         case BIT_IOR_EXPR:
 622         case BIT_AND_EXPR:
 623           if (dump_file && (dump_flags & TDF_DETAILS))
 624             {
 625               fprintf (dump_file, "Equivalence: ");
 626               print_generic_expr (dump_file, curr->op, 0);
 627               fprintf (dump_file, " [&|minmax] ");
 628               print_generic_expr (dump_file, last->op, 0);
 629               fprintf (dump_file, " -> ");
 630               print_generic_stmt (dump_file, last->op, 0);
 631             }
 632
 633           ops->ordered_remove (i);
 634           reassociate_stats.ops_eliminated ++;
 635
 636           return true;
 637
 638         case BIT_XOR_EXPR:
 639           if (dump_file && (dump_flags & TDF_DETAILS))
 640             {
 641               fprintf (dump_file, "Equivalence: ");
 642               print_generic_expr (dump_file, curr->op, 0);
 643               fprintf (dump_file, " ^ ");
 644               print_generic_expr (dump_file, last->op, 0);
 645               fprintf (dump_file, " -> nothing\n");
 646             }
 647
 648           reassociate_stats.ops_eliminated += 2;
 649
 650           if (ops->length () == 2)
 651             {
 652               ops->create (0);
 653               add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
 654               *all_done = true;
 655             }
 656           else
 657             {
 658               ops->ordered_remove (i-1);
 659               ops->ordered_remove (i-1);
 660             }
 661
 662           return true;
 663
 664         default:
 665           break;
 666         }
 667     }
 668   return false;
 669 }
 670
 671 static vec<tree> plus_negates;
 672
 673 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
 674    expression, look in OPS for a corresponding positive operation to cancel
 675    it out.  If we find one, remove the other from OPS, replace
 676    OPS[CURRINDEX] with 0 or -1, respectively, and return true.  Otherwise,
 677    return false. */
 678
 679 static bool
 680 eliminate_plus_minus_pair (enum tree_code opcode,
 681                            vec<operand_entry_t> *ops,
 682                            unsigned int currindex,
 683                            operand_entry_t curr)
 684 {
 685   tree negateop;
 686   tree notop;
 687   unsigned int i;
 688   operand_entry_t oe;
 689
 690   if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
 691     return false;
 692
 693   negateop = get_unary_op (curr->op, NEGATE_EXPR);
 694   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 695   if (negateop == NULL_TREE && notop == NULL_TREE)
 696     return false;
 697
 698   /* Any non-negated version will have a rank that is one less than
 699      the current rank.  So once we hit those ranks, if we don't find
 700      one, we can stop.  */
 701
 702   for (i = currindex + 1;
 703        ops->iterate (i, &oe)
 704        && oe->rank >= curr->rank - 1 ;
 705        i++)
 706     {
 707       if (oe->op == negateop)
 708         {
 709
 710           if (dump_file && (dump_flags & TDF_DETAILS))
 711             {
 712               fprintf (dump_file, "Equivalence: ");
 713               print_generic_expr (dump_file, negateop, 0);
 714               fprintf (dump_file, " + -");
 715               print_generic_expr (dump_file, oe->op, 0);
 716               fprintf (dump_file, " -> 0\n");
 717             }
 718
 719           ops->ordered_remove (i);
 720           add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
 721           ops->ordered_remove (currindex);
 722           reassociate_stats.ops_eliminated ++;
 723
 724           return true;
 725         }
 726       else if (oe->op == notop)
 727         {
 728           tree op_type = TREE_TYPE (oe->op);
 729
 730           if (dump_file && (dump_flags & TDF_DETAILS))
 731             {
 732               fprintf (dump_file, "Equivalence: ");
 733               print_generic_expr (dump_file, notop, 0);
 734               fprintf (dump_file, " + ~");
 735               print_generic_expr (dump_file, oe->op, 0);
 736               fprintf (dump_file, " -> -1\n");
 737             }
 738
 739           ops->ordered_remove (i);
 740           add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
 741           ops->ordered_remove (currindex);
 742           reassociate_stats.ops_eliminated ++;
 743
 744           return true;
 745         }
 746     }
 747
 748   /* CURR->OP is a negate expr in a plus expr: save it for later
 749      inspection in repropagate_negates().  */
 750   if (negateop != NULL_TREE)
 751     plus_negates.safe_push (curr->op);
 752
 753   return false;
 754 }
 755
 756 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
 757    bitwise not expression, look in OPS for a corresponding operand to
 758    cancel it out.  If we find one, remove the other from OPS, replace
 759    OPS[CURRINDEX] with 0, and return true.  Otherwise, return
 760    false. */
 761
 762 static bool
 763 eliminate_not_pairs (enum tree_code opcode,
 764                      vec<operand_entry_t> *ops,
 765                      unsigned int currindex,
 766                      operand_entry_t curr)
 767 {
 768   tree notop;
 769   unsigned int i;
 770   operand_entry_t oe;
 771
 772   if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
 773       || TREE_CODE (curr->op) != SSA_NAME)
 774     return false;
 775
 776   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 777   if (notop == NULL_TREE)
 778     return false;
 779
 780   /* Any non-not version will have a rank that is one less than
 781      the current rank.  So once we hit those ranks, if we don't find
 782      one, we can stop.  */
 783
 784   for (i = currindex + 1;
 785        ops->iterate (i, &oe)
 786        && oe->rank >= curr->rank - 1;
 787        i++)
 788     {
 789       if (oe->op == notop)
 790         {
 791           if (dump_file && (dump_flags & TDF_DETAILS))
 792             {
 793               fprintf (dump_file, "Equivalence: ");
 794               print_generic_expr (dump_file, notop, 0);
 795               if (opcode == BIT_AND_EXPR)
 796                 fprintf (dump_file, " & ~");
 797               else if (opcode == BIT_IOR_EXPR)
 798                 fprintf (dump_file, " | ~");
 799               print_generic_expr (dump_file, oe->op, 0);
 800               if (opcode == BIT_AND_EXPR)
 801                 fprintf (dump_file, " -> 0\n");
 802               else if (opcode == BIT_IOR_EXPR)
 803                 fprintf (dump_file, " -> -1\n");
 804             }
 805
 806           if (opcode == BIT_AND_EXPR)
 807             oe->op = build_zero_cst (TREE_TYPE (oe->op));
 808           else if (opcode == BIT_IOR_EXPR)
 809             oe->op = build_low_bits_mask (TREE_TYPE (oe->op),
 810                                           TYPE_PRECISION (TREE_TYPE (oe->op)));
 811
 812           reassociate_stats.ops_eliminated += ops->length () - 1;
 813           ops->truncate (0);
 814           ops->quick_push (oe);
 815           return true;
 816         }
 817     }
 818
 819   return false;
 820 }
 821
 822 /* Use constant value that may be present in OPS to try to eliminate
 823    operands.  Note that this function is only really used when we've
 824    eliminated ops for other reasons, or merged constants.  Across
 825    single statements, fold already does all of this, plus more.  There
 826    is little point in duplicating logic, so I've only included the
 827    identities that I could ever construct testcases to trigger.  */
 828
 829 static void
 830 eliminate_using_constants (enum tree_code opcode,
 831                            vec<operand_entry_t> *ops)
 832 {
 833   operand_entry_t oelast = ops->last ();
 834   tree type = TREE_TYPE (oelast->op);
 835
 836   if (oelast->rank == 0
 837       && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
 838     {
 839       switch (opcode)
 840         {
 841         case BIT_AND_EXPR:
 842           if (integer_zerop (oelast->op))
 843             {
 844               if (ops->length () != 1)
 845                 {
 846                   if (dump_file && (dump_flags & TDF_DETAILS))
 847                     fprintf (dump_file, "Found & 0, removing all other ops\n");
 848
 849                   reassociate_stats.ops_eliminated += ops->length () - 1;
 850
 851                   ops->truncate (0);
 852                   ops->quick_push (oelast);
 853                   return;
 854                 }
 855             }
 856           else if (integer_all_onesp (oelast->op))
 857             {
 858               if (ops->length () != 1)
 859                 {
 860                   if (dump_file && (dump_flags & TDF_DETAILS))
 861                     fprintf (dump_file, "Found & -1, removing\n");
 862                   ops->pop ();
 863                   reassociate_stats.ops_eliminated++;
 864                 }
 865             }
 866           break;
 867         case BIT_IOR_EXPR:
 868           if (integer_all_onesp (oelast->op))
 869             {
 870               if (ops->length () != 1)
 871                 {
 872                   if (dump_file && (dump_flags & TDF_DETAILS))
 873                     fprintf (dump_file, "Found | -1, removing all other ops\n");
 874
 875                   reassociate_stats.ops_eliminated += ops->length () - 1;
 876
 877                   ops->truncate (0);
 878                   ops->quick_push (oelast);
 879                   return;
 880                 }
 881             }
 882           else if (integer_zerop (oelast->op))
 883             {
 884               if (ops->length () != 1)
 885                 {
 886                   if (dump_file && (dump_flags & TDF_DETAILS))
 887                     fprintf (dump_file, "Found | 0, removing\n");
 888                   ops->pop ();
 889                   reassociate_stats.ops_eliminated++;
 890                 }
 891             }
 892           break;
 893         case MULT_EXPR:
 894           if (integer_zerop (oelast->op)
 895               || (FLOAT_TYPE_P (type)
 896                   && !HONOR_NANS (TYPE_MODE (type))
 897                   && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
 898                   && real_zerop (oelast->op)))
 899             {
 900               if (ops->length () != 1)
 901                 {
 902                   if (dump_file && (dump_flags & TDF_DETAILS))
 903                     fprintf (dump_file, "Found * 0, removing all other ops\n");
 904
 905                   reassociate_stats.ops_eliminated += ops->length () - 1;
 906                   ops->truncate (1);
 907                   ops->quick_push (oelast);
 908                   return;
 909                 }
 910             }
 911           else if (integer_onep (oelast->op)
 912                    || (FLOAT_TYPE_P (type)
 913                        && !HONOR_SNANS (TYPE_MODE (type))
 914                        && real_onep (oelast->op)))
 915             {
 916               if (ops->length () != 1)
 917                 {
 918                   if (dump_file && (dump_flags & TDF_DETAILS))
 919                     fprintf (dump_file, "Found * 1, removing\n");
 920                   ops->pop ();
 921                   reassociate_stats.ops_eliminated++;
 922                   return;
 923                 }
 924             }
 925           break;
 926         case BIT_XOR_EXPR:
 927         case PLUS_EXPR:
 928         case MINUS_EXPR:
 929           if (integer_zerop (oelast->op)
 930               || (FLOAT_TYPE_P (type)
 931                   && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
 932                   && fold_real_zero_addition_p (type, oelast->op,
 933                                                 opcode == MINUS_EXPR)))
 934             {
 935               if (ops->length () != 1)
 936                 {
 937                   if (dump_file && (dump_flags & TDF_DETAILS))
 938                     fprintf (dump_file, "Found [|^+] 0, removing\n");
 939                   ops->pop ();
 940                   reassociate_stats.ops_eliminated++;
 941                   return;
 942                 }
 943             }
 944           break;
 945         default:
 946           break;
 947         }
 948     }
 949 }
 950
 951
 952 static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
 953                                  bool, bool);
 954
 955 /* Structure for tracking and counting operands.  */
 956 typedef struct oecount_s {
 957   int cnt;
 958   int id;
 959   enum tree_code oecode;
 960   tree op;
 961 } oecount;
 962
 963
 964 /* The heap for the oecount hashtable and the sorted list of operands.  */
 965 static vec<oecount> cvec;
 966
 967
 968 /* Oecount hashtable helpers.  */
 969
 970 struct oecount_hasher : typed_noop_remove <void>
 971 {
 972   /* Note that this hash table stores integers, not pointers.
 973      So, observe the casting in the member functions.  */
 974   typedef void value_type;
 975   typedef void compare_type;
 976   static inline hashval_t hash (const value_type *);
 977   static inline bool equal (const value_type *, const compare_type *);
 978 };
 979
 980 /* Hash function for oecount.  */
 981
 982 inline hashval_t
 983 oecount_hasher::hash (const value_type *p)
 984 {
 985   const oecount *c = &cvec[(size_t)p - 42];
 986   return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
 987 }
 988
 989 /* Comparison function for oecount.  */
 990
 991 inline bool
 992 oecount_hasher::equal (const value_type *p1, const compare_type *p2)
 993 {
 994   const oecount *c1 = &cvec[(size_t)p1 - 42];
 995   const oecount *c2 = &cvec[(size_t)p2 - 42];
 996   return (c1->oecode == c2->oecode
 997           && c1->op == c2->op);
 998 }
 999
1000 /* Comparison function for qsort sorting oecount elements by count.  */
1001
1002 static int
1003 oecount_cmp (const void *p1, const void *p2)
1004 {
1005   const oecount *c1 = (const oecount *)p1;
1006   const oecount *c2 = (const oecount *)p2;
1007   if (c1->cnt != c2->cnt)
1008     return c1->cnt - c2->cnt;
1009   else
1010     /* If counts are identical, use unique IDs to stabilize qsort.  */
1011     return c1->id - c2->id;
1012 }
1013
1014 /* Return TRUE iff STMT represents a builtin call that raises OP
1015    to some exponent.  */
1016
1017 static bool
1018 stmt_is_power_of_op (gimple stmt, tree op)
1019 {
1020   tree fndecl;
1021
1022   if (!is_gimple_call (stmt))
1023     return false;
1024
1025   fndecl = gimple_call_fndecl (stmt);
1026
1027   if (!fndecl
1028       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
1029     return false;
1030
1031   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1032     {
1033     CASE_FLT_FN (BUILT_IN_POW):
1034     CASE_FLT_FN (BUILT_IN_POWI):
1035       return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1036
1037     default:
1038       return false;
1039     }
1040 }
1041
1042 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1043    in place and return the result.  Assumes that stmt_is_power_of_op
1044    was previously called for STMT and returned TRUE.  */
1045
1046 static HOST_WIDE_INT
1047 decrement_power (gimple stmt)
1048 {
1049   REAL_VALUE_TYPE c, cint;
1050   HOST_WIDE_INT power;
1051   tree arg1;
1052
1053   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1054     {
1055     CASE_FLT_FN (BUILT_IN_POW):
1056       arg1 = gimple_call_arg (stmt, 1);
1057       c = TREE_REAL_CST (arg1);
1058       power = real_to_integer (&c) - 1;
1059       real_from_integer (&cint, VOIDmode, power, 0, 0);
1060       gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1061       return power;
1062
1063     CASE_FLT_FN (BUILT_IN_POWI):
1064       arg1 = gimple_call_arg (stmt, 1);
1065       power = TREE_INT_CST_LOW (arg1) - 1;
1066       gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1067       return power;
1068
1069     default:
1070       gcc_unreachable ();
1071     }
1072 }
1073
1074 /* Find the single immediate use of STMT's LHS, and replace it
1075    with OP.  Remove STMT.  If STMT's LHS is the same as *DEF,
1076    replace *DEF with OP as well.  */
1077
1078 static void
1079 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1080 {
1081   tree lhs;
1082   gimple use_stmt;
1083   use_operand_p use;
1084   gimple_stmt_iterator gsi;
1085
1086   if (is_gimple_call (stmt))
1087     lhs = gimple_call_lhs (stmt);
1088   else
1089     lhs = gimple_assign_lhs (stmt);
1090
1091   gcc_assert (has_single_use (lhs));
1092   single_imm_use (lhs, &use, &use_stmt);
1093   if (lhs == *def)
1094     *def = op;
1095   SET_USE (use, op);
1096   if (TREE_CODE (op) != SSA_NAME)
1097     update_stmt (use_stmt);
1098   gsi = gsi_for_stmt (stmt);
1099   unlink_stmt_vdef (stmt);
1100   gsi_remove (&gsi, true);
1101   release_defs (stmt);
1102 }
1103
1104 /* Walks the linear chain with result *DEF searching for an operation
1105    with operand OP and code OPCODE removing that from the chain.  *DEF
1106    is updated if there is only one operand but no operation left.  */
1107
1108 static void
1109 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1110 {
1111   gimple stmt = SSA_NAME_DEF_STMT (*def);
1112
1113   do
1114     {
1115       tree name;
1116
1117       if (opcode == MULT_EXPR
1118           && stmt_is_power_of_op (stmt, op))
1119         {
1120           if (decrement_power (stmt) == 1)
1121             propagate_op_to_single_use (op, stmt, def);
1122           return;
1123         }
1124
1125       name = gimple_assign_rhs1 (stmt);
1126
1127       /* If this is the operation we look for and one of the operands
1128          is ours simply propagate the other operand into the stmts
1129          single use.  */
1130       if (gimple_assign_rhs_code (stmt) == opcode
1131           && (name == op
1132               || gimple_assign_rhs2 (stmt) == op))
1133         {
1134           if (name == op)
1135             name = gimple_assign_rhs2 (stmt);
1136           propagate_op_to_single_use (name, stmt, def);
1137           return;
1138         }
1139
1140       /* We might have a multiply of two __builtin_pow* calls, and
1141          the operand might be hiding in the rightmost one.  */
1142       if (opcode == MULT_EXPR
1143           && gimple_assign_rhs_code (stmt) == opcode
1144           && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1145           && has_single_use (gimple_assign_rhs2 (stmt)))
1146         {
1147           gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1148           if (stmt_is_power_of_op (stmt2, op))
1149             {
1150               if (decrement_power (stmt2) == 1)
1151                 propagate_op_to_single_use (op, stmt2, def);
1152               return;
1153             }
1154         }
1155
1156       /* Continue walking the chain.  */
1157       gcc_assert (name != op
1158                   && TREE_CODE (name) == SSA_NAME);
1159       stmt = SSA_NAME_DEF_STMT (name);
1160     }
1161   while (1);
1162 }
1163
1164 /* Returns true if statement S1 dominates statement S2.  Like
1165    stmt_dominates_stmt_p, but uses stmt UIDs to optimize.  */
1166
1167 static bool
1168 reassoc_stmt_dominates_stmt_p (gimple s1, gimple s2)
1169 {
1170   basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1171
1172   /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1173      SSA_NAME.  Assume it lives at the beginning of function and
1174      thus dominates everything.  */
1175   if (!bb1 || s1 == s2)
1176     return true;
1177
1178   /* If bb2 is NULL, it doesn't dominate any stmt with a bb.  */
1179   if (!bb2)
1180     return false;
1181
1182   if (bb1 == bb2)
1183     {
1184       /* PHIs in the same basic block are assumed to be
1185          executed all in parallel, if only one stmt is a PHI,
1186          it dominates the other stmt in the same basic block.  */
1187       if (gimple_code (s1) == GIMPLE_PHI)
1188         return true;
1189
1190       if (gimple_code (s2) == GIMPLE_PHI)
1191         return false;
1192
1193       gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1194
1195       if (gimple_uid (s1) < gimple_uid (s2))
1196         return true;
1197
1198       if (gimple_uid (s1) > gimple_uid (s2))
1199         return false;
1200
1201       gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1202       unsigned int uid = gimple_uid (s1);
1203       for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1204         {
1205           gimple s = gsi_stmt (gsi);
1206           if (gimple_uid (s) != uid)
1207             break;
1208           if (s == s2)
1209             return true;
1210         }
1211
1212       return false;
1213     }
1214
1215   return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1216 }
1217
1218 /* Insert STMT after INSERT_POINT.  */
1219
1220 static void
1221 insert_stmt_after (gimple stmt, gimple insert_point)
1222 {
1223   gimple_stmt_iterator gsi;
1224   basic_block bb;
1225
1226   if (gimple_code (insert_point) == GIMPLE_PHI)
1227     bb = gimple_bb (insert_point);
1228   else if (!stmt_ends_bb_p (insert_point))
1229     {
1230       gsi = gsi_for_stmt (insert_point);
1231       gimple_set_uid (stmt, gimple_uid (insert_point));
1232       gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1233       return;
1234     }
1235   else
1236     /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1237        thus if it must end a basic block, it should be a call that can
1238        throw, or some assignment that can throw.  If it throws, the LHS
1239        of it will not be initialized though, so only valid places using
1240        the SSA_NAME should be dominated by the fallthru edge.  */
1241     bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1242   gsi = gsi_after_labels (bb);
1243   if (gsi_end_p (gsi))
1244     {
1245       gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1246       gimple_set_uid (stmt,
1247                       gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1248     }
1249   else
1250     gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1251   gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1252 }
1253
1254 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1255    the result.  Places the statement after the definition of either
1256    OP1 or OP2.  Returns the new statement.  */
1257
1258 static gimple
1259 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1260 {
1261   gimple op1def = NULL, op2def = NULL;
1262   gimple_stmt_iterator gsi;
1263   tree op;
1264   gimple sum;
1265
1266   /* Create the addition statement.  */
1267   op = make_ssa_name (type, NULL);
1268   sum = gimple_build_assign_with_ops (opcode, op, op1, op2);
1269
1270   /* Find an insertion place and insert.  */
1271   if (TREE_CODE (op1) == SSA_NAME)
1272     op1def = SSA_NAME_DEF_STMT (op1);
1273   if (TREE_CODE (op2) == SSA_NAME)
1274     op2def = SSA_NAME_DEF_STMT (op2);
1275   if ((!op1def || gimple_nop_p (op1def))
1276       && (!op2def || gimple_nop_p (op2def)))
1277     {
1278       gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1279       if (gsi_end_p (gsi))
1280         {
1281           gimple_stmt_iterator gsi2
1282             = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1283           gimple_set_uid (sum,
1284                           gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1285         }
1286       else
1287         gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1288       gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1289     }
1290   else
1291     {
1292       gimple insert_point;
1293       if ((!op1def || gimple_nop_p (op1def))
1294            || (op2def && !gimple_nop_p (op2def)
1295                && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1296         insert_point = op2def;
1297       else
1298         insert_point = op1def;
1299       insert_stmt_after (sum, insert_point);
1300     }
1301   update_stmt (sum);
1302
1303   return sum;
1304 }
1305
1306 /* Perform un-distribution of divisions and multiplications.
1307    A * X + B * X is transformed into (A + B) * X and A / X + B / X
1308    to (A + B) / X for real X.
1309
1310    The algorithm is organized as follows.
1311
1312     - First we walk the addition chain *OPS looking for summands that
1313       are defined by a multiplication or a real division.  This results
1314       in the candidates bitmap with relevant indices into *OPS.
1315
1316     - Second we build the chains of multiplications or divisions for
1317       these candidates, counting the number of occurrences of (operand, code)
1318       pairs in all of the candidates chains.
1319
1320     - Third we sort the (operand, code) pairs by number of occurrence and
1321       process them starting with the pair with the most uses.
1322
1323       * For each such pair we walk the candidates again to build a
1324         second candidate bitmap noting all multiplication/division chains
1325         that have at least one occurrence of (operand, code).
1326
1327       * We build an alternate addition chain only covering these
1328         candidates with one (operand, code) operation removed from their
1329         multiplication/division chain.
1330
1331       * The first candidate gets replaced by the alternate addition chain
1332         multiplied/divided by the operand.
1333
1334       * All candidate chains get disabled for further processing and
1335         processing of (operand, code) pairs continues.
1336
1337   The alternate addition chains built are re-processed by the main
1338   reassociation algorithm which allows optimizing a * x * y + b * y * x
1339   to (a + b ) * x * y in one invocation of the reassociation pass.  */
1340
1341 static bool
1342 undistribute_ops_list (enum tree_code opcode,
1343                        vec<operand_entry_t> *ops, struct loop *loop)
1344 {
1345   unsigned int length = ops->length ();
1346   operand_entry_t oe1;
1347   unsigned i, j;
1348   sbitmap candidates, candidates2;
1349   unsigned nr_candidates, nr_candidates2;
1350   sbitmap_iterator sbi0;
1351   vec<operand_entry_t> *subops;
1352   hash_table <oecount_hasher> ctable;
1353   bool changed = false;
1354   int next_oecount_id = 0;
1355
1356   if (length <= 1
1357       || opcode != PLUS_EXPR)
1358     return false;
1359
1360   /* Build a list of candidates to process.  */
1361   candidates = sbitmap_alloc (length);
1362   bitmap_clear (candidates);
1363   nr_candidates = 0;
1364   FOR_EACH_VEC_ELT (*ops, i, oe1)
1365     {
1366       enum tree_code dcode;
1367       gimple oe1def;
1368
1369       if (TREE_CODE (oe1->op) != SSA_NAME)
1370         continue;
1371       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1372       if (!is_gimple_assign (oe1def))
1373         continue;
1374       dcode = gimple_assign_rhs_code (oe1def);
1375       if ((dcode != MULT_EXPR
1376            && dcode != RDIV_EXPR)
1377           || !is_reassociable_op (oe1def, dcode, loop))
1378         continue;
1379
1380       bitmap_set_bit (candidates, i);
1381       nr_candidates++;
1382     }
1383
1384   if (nr_candidates < 2)
1385     {
1386       sbitmap_free (candidates);
1387       return false;
1388     }
1389
1390   if (dump_file && (dump_flags & TDF_DETAILS))
1391     {
1392       fprintf (dump_file, "searching for un-distribute opportunities ");
1393       print_generic_expr (dump_file,
1394         (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1395       fprintf (dump_file, " %d\n", nr_candidates);
1396     }
1397
1398   /* Build linearized sub-operand lists and the counting table.  */
1399   cvec.create (0);
1400   ctable.create (15);
1401   /* ??? Macro arguments cannot have multi-argument template types in
1402      them.  This typedef is needed to workaround that limitation.  */
1403   typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1404   subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1405   EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1406     {
1407       gimple oedef;
1408       enum tree_code oecode;
1409       unsigned j;
1410
1411       oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1412       oecode = gimple_assign_rhs_code (oedef);
1413       linearize_expr_tree (&subops[i], oedef,
1414                            associative_tree_code (oecode), false);
1415
1416       FOR_EACH_VEC_ELT (subops[i], j, oe1)
1417         {
1418           oecount c;
1419           void **slot;
1420           size_t idx;
1421           c.oecode = oecode;
1422           c.cnt = 1;
1423           c.id = next_oecount_id++;
1424           c.op = oe1->op;
1425           cvec.safe_push (c);
1426           idx = cvec.length () + 41;
1427           slot = ctable.find_slot ((void *)idx, INSERT);
1428           if (!*slot)
1429             {
1430               *slot = (void *)idx;
1431             }
1432           else
1433             {
1434               cvec.pop ();
1435               cvec[(size_t)*slot - 42].cnt++;
1436             }
1437         }
1438     }
1439   ctable.dispose ();
1440
1441   /* Sort the counting table.  */
1442   cvec.qsort (oecount_cmp);
1443
1444   if (dump_file && (dump_flags & TDF_DETAILS))
1445     {
1446       oecount *c;
1447       fprintf (dump_file, "Candidates:\n");
1448       FOR_EACH_VEC_ELT (cvec, j, c)
1449         {
1450           fprintf (dump_file, "  %u %s: ", c->cnt,
1451                    c->oecode == MULT_EXPR
1452                    ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1453           print_generic_expr (dump_file, c->op, 0);
1454           fprintf (dump_file, "\n");
1455         }
1456     }
1457
1458   /* Process the (operand, code) pairs in order of most occurrence.  */
1459   candidates2 = sbitmap_alloc (length);
1460   while (!cvec.is_empty ())
1461     {
1462       oecount *c = &cvec.last ();
1463       if (c->cnt < 2)
1464         break;
1465
1466       /* Now collect the operands in the outer chain that contain
1467          the common operand in their inner chain.  */
1468       bitmap_clear (candidates2);
1469       nr_candidates2 = 0;
1470       EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1471         {
1472           gimple oedef;
1473           enum tree_code oecode;
1474           unsigned j;
1475           tree op = (*ops)[i]->op;
1476
1477           /* If we undistributed in this chain already this may be
1478              a constant.  */
1479           if (TREE_CODE (op) != SSA_NAME)
1480             continue;
1481
1482           oedef = SSA_NAME_DEF_STMT (op);
1483           oecode = gimple_assign_rhs_code (oedef);
1484           if (oecode != c->oecode)
1485             continue;
1486
1487           FOR_EACH_VEC_ELT (subops[i], j, oe1)
1488             {
1489               if (oe1->op == c->op)
1490                 {
1491                   bitmap_set_bit (candidates2, i);
1492                   ++nr_candidates2;
1493                   break;
1494                 }
1495             }
1496         }
1497
1498       if (nr_candidates2 >= 2)
1499         {
1500           operand_entry_t oe1, oe2;
1501           gimple prod;
1502           int first = bitmap_first_set_bit (candidates2);
1503
1504           /* Build the new addition chain.  */
1505           oe1 = (*ops)[first];
1506           if (dump_file && (dump_flags & TDF_DETAILS))
1507             {
1508               fprintf (dump_file, "Building (");
1509               print_generic_expr (dump_file, oe1->op, 0);
1510             }
1511           zero_one_operation (&oe1->op, c->oecode, c->op);
1512           EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1513             {
1514               gimple sum;
1515               oe2 = (*ops)[i];
1516               if (dump_file && (dump_flags & TDF_DETAILS))
1517                 {
1518                   fprintf (dump_file, " + ");
1519                   print_generic_expr (dump_file, oe2->op, 0);
1520                 }
1521               zero_one_operation (&oe2->op, c->oecode, c->op);
1522               sum = build_and_add_sum (TREE_TYPE (oe1->op),
1523                                        oe1->op, oe2->op, opcode);
1524               oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1525               oe2->rank = 0;
1526               oe1->op = gimple_get_lhs (sum);
1527             }
1528
1529           /* Apply the multiplication/division.  */
1530           prod = build_and_add_sum (TREE_TYPE (oe1->op),
1531                                     oe1->op, c->op, c->oecode);
1532           if (dump_file && (dump_flags & TDF_DETAILS))
1533             {
1534               fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1535               print_generic_expr (dump_file, c->op, 0);
1536               fprintf (dump_file, "\n");
1537             }
1538
1539           /* Record it in the addition chain and disable further
1540              undistribution with this op.  */
1541           oe1->op = gimple_assign_lhs (prod);
1542           oe1->rank = get_rank (oe1->op);
1543           subops[first].release ();
1544
1545           changed = true;
1546         }
1547
1548       cvec.pop ();
1549     }
1550
1551   for (i = 0; i < ops->length (); ++i)
1552     subops[i].release ();
1553   free (subops);
1554   cvec.release ();
1555   sbitmap_free (candidates);
1556   sbitmap_free (candidates2);
1557
1558   return changed;
1559 }
1560
1561 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1562    expression, examine the other OPS to see if any of them are comparisons
1563    of the same values, which we may be able to combine or eliminate.
1564    For example, we can rewrite (a < b) | (a == b) as (a <= b).  */
1565
1566 static bool
1567 eliminate_redundant_comparison (enum tree_code opcode,
1568                                 vec<operand_entry_t> *ops,
1569                                 unsigned int currindex,
1570                                 operand_entry_t curr)
1571 {
1572   tree op1, op2;
1573   enum tree_code lcode, rcode;
1574   gimple def1, def2;
1575   int i;
1576   operand_entry_t oe;
1577
1578   if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1579     return false;
1580
1581   /* Check that CURR is a comparison.  */
1582   if (TREE_CODE (curr->op) != SSA_NAME)
1583     return false;
1584   def1 = SSA_NAME_DEF_STMT (curr->op);
1585   if (!is_gimple_assign (def1))
1586     return false;
1587   lcode = gimple_assign_rhs_code (def1);
1588   if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1589     return false;
1590   op1 = gimple_assign_rhs1 (def1);
1591   op2 = gimple_assign_rhs2 (def1);
1592
1593   /* Now look for a similar comparison in the remaining OPS.  */
1594   for (i = currindex + 1; ops->iterate (i, &oe); i++)
1595     {
1596       tree t;
1597
1598       if (TREE_CODE (oe->op) != SSA_NAME)
1599         continue;
1600       def2 = SSA_NAME_DEF_STMT (oe->op);
1601       if (!is_gimple_assign (def2))
1602         continue;
1603       rcode = gimple_assign_rhs_code (def2);
1604       if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1605         continue;
1606
1607       /* If we got here, we have a match.  See if we can combine the
1608          two comparisons.  */
1609       if (opcode == BIT_IOR_EXPR)
1610         t = maybe_fold_or_comparisons (lcode, op1, op2,
1611                                        rcode, gimple_assign_rhs1 (def2),
1612                                        gimple_assign_rhs2 (def2));
1613       else
1614         t = maybe_fold_and_comparisons (lcode, op1, op2,
1615                                         rcode, gimple_assign_rhs1 (def2),
1616                                         gimple_assign_rhs2 (def2));
1617       if (!t)
1618         continue;
1619
1620       /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1621          always give us a boolean_type_node value back.  If the original
1622          BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1623          we need to convert.  */
1624       if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1625         t = fold_convert (TREE_TYPE (curr->op), t);
1626
1627       if (TREE_CODE (t) != INTEGER_CST
1628           && !operand_equal_p (t, curr->op, 0))
1629         {
1630           enum tree_code subcode;
1631           tree newop1, newop2;
1632           if (!COMPARISON_CLASS_P (t))
1633             continue;
1634           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1635           STRIP_USELESS_TYPE_CONVERSION (newop1);
1636           STRIP_USELESS_TYPE_CONVERSION (newop2);
1637           if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1638             continue;
1639         }
1640
1641       if (dump_file && (dump_flags & TDF_DETAILS))
1642         {
1643           fprintf (dump_file, "Equivalence: ");
1644           print_generic_expr (dump_file, curr->op, 0);
1645           fprintf (dump_file, " %s ", op_symbol_code (opcode));
1646           print_generic_expr (dump_file, oe->op, 0);
1647           fprintf (dump_file, " -> ");
1648           print_generic_expr (dump_file, t, 0);
1649           fprintf (dump_file, "\n");
1650         }
1651
1652       /* Now we can delete oe, as it has been subsumed by the new combined
1653          expression t.  */
1654       ops->ordered_remove (i);
1655       reassociate_stats.ops_eliminated ++;
1656
1657       /* If t is the same as curr->op, we're done.  Otherwise we must
1658          replace curr->op with t.  Special case is if we got a constant
1659          back, in which case we add it to the end instead of in place of
1660          the current entry.  */
1661       if (TREE_CODE (t) == INTEGER_CST)
1662         {
1663           ops->ordered_remove (currindex);
1664           add_to_ops_vec (ops, t);
1665         }
1666       else if (!operand_equal_p (t, curr->op, 0))
1667         {
1668           gimple sum;
1669           enum tree_code subcode;
1670           tree newop1;
1671           tree newop2;
1672           gcc_assert (COMPARISON_CLASS_P (t));
1673           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1674           STRIP_USELESS_TYPE_CONVERSION (newop1);
1675           STRIP_USELESS_TYPE_CONVERSION (newop2);
1676           gcc_checking_assert (is_gimple_val (newop1)
1677                                && is_gimple_val (newop2));
1678           sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1679           curr->op = gimple_get_lhs (sum);
1680         }
1681       return true;
1682     }
1683
1684   return false;
1685 }
1686
1687 /* Perform various identities and other optimizations on the list of
1688    operand entries, stored in OPS.  The tree code for the binary
1689    operation between all the operands is OPCODE.  */
1690
1691 static void
1692 optimize_ops_list (enum tree_code opcode,
1693                    vec<operand_entry_t> *ops)
1694 {
1695   unsigned int length = ops->length ();
1696   unsigned int i;
1697   operand_entry_t oe;
1698   operand_entry_t oelast = NULL;
1699   bool iterate = false;
1700
1701   if (length == 1)
1702     return;
1703
1704   oelast = ops->last ();
1705
1706   /* If the last two are constants, pop the constants off, merge them
1707      and try the next two.  */
1708   if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1709     {
1710       operand_entry_t oelm1 = (*ops)[length - 2];
1711
1712       if (oelm1->rank == 0
1713           && is_gimple_min_invariant (oelm1->op)
1714           && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1715                                        TREE_TYPE (oelast->op)))
1716         {
1717           tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1718                                      oelm1->op, oelast->op);
1719
1720           if (folded && is_gimple_min_invariant (folded))
1721             {
1722               if (dump_file && (dump_flags & TDF_DETAILS))
1723                 fprintf (dump_file, "Merging constants\n");
1724
1725               ops->pop ();
1726               ops->pop ();
1727
1728               add_to_ops_vec (ops, folded);
1729               reassociate_stats.constants_eliminated++;
1730
1731               optimize_ops_list (opcode, ops);
1732               return;
1733             }
1734         }
1735     }
1736
1737   eliminate_using_constants (opcode, ops);
1738   oelast = NULL;
1739
1740   for (i = 0; ops->iterate (i, &oe);)
1741     {
1742       bool done = false;
1743
1744       if (eliminate_not_pairs (opcode, ops, i, oe))
1745         return;
1746       if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1747           || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1748           || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1749         {
1750           if (done)
1751             return;
1752           iterate = true;
1753           oelast = NULL;
1754           continue;
1755         }
1756       oelast = oe;
1757       i++;
1758     }
1759
1760   length = ops->length ();
1761   oelast = ops->last ();
1762
1763   if (iterate)
1764     optimize_ops_list (opcode, ops);
1765 }
1766
1767 /* The following functions are subroutines to optimize_range_tests and allow
1768    it to try to change a logical combination of comparisons into a range
1769    test.
1770
1771    For example, both
1772         X == 2 || X == 5 || X == 3 || X == 4
1773    and
1774         X >= 2 && X <= 5
1775    are converted to
1776         (unsigned) (X - 2) <= 3
1777
1778    For more information see comments above fold_test_range in fold-const.c,
1779    this implementation is for GIMPLE.  */
1780
1781 struct range_entry
1782 {
1783   tree exp;
1784   tree low;
1785   tree high;
1786   bool in_p;
1787   bool strict_overflow_p;
1788   unsigned int idx, next;
1789 };
1790
1791 /* This is similar to make_range in fold-const.c, but on top of
1792    GIMPLE instead of trees.  If EXP is non-NULL, it should be
1793    an SSA_NAME and STMT argument is ignored, otherwise STMT
1794    argument should be a GIMPLE_COND.  */
1795
1796 static void
1797 init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1798 {
1799   int in_p;
1800   tree low, high;
1801   bool is_bool, strict_overflow_p;
1802
1803   r->exp = NULL_TREE;
1804   r->in_p = false;
1805   r->strict_overflow_p = false;
1806   r->low = NULL_TREE;
1807   r->high = NULL_TREE;
1808   if (exp != NULL_TREE
1809       && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1810     return;
1811
1812   /* Start with simply saying "EXP != 0" and then look at the code of EXP
1813      and see if we can refine the range.  Some of the cases below may not
1814      happen, but it doesn't seem worth worrying about this.  We "continue"
1815      the outer loop when we've changed something; otherwise we "break"
1816      the switch, which will "break" the while.  */
1817   low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1818   high = low;
1819   in_p = 0;
1820   strict_overflow_p = false;
1821   is_bool = false;
1822   if (exp == NULL_TREE)
1823     is_bool = true;
1824   else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1825     {
1826       if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1827         is_bool = true;
1828       else
1829         return;
1830     }
1831   else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1832     is_bool = true;
1833
1834   while (1)
1835     {
1836       enum tree_code code;
1837       tree arg0, arg1, exp_type;
1838       tree nexp;
1839       location_t loc;
1840
1841       if (exp != NULL_TREE)
1842         {
1843           if (TREE_CODE (exp) != SSA_NAME)
1844             break;
1845
1846           stmt = SSA_NAME_DEF_STMT (exp);
1847           if (!is_gimple_assign (stmt))
1848             break;
1849
1850           code = gimple_assign_rhs_code (stmt);
1851           arg0 = gimple_assign_rhs1 (stmt);
1852           arg1 = gimple_assign_rhs2 (stmt);
1853           exp_type = TREE_TYPE (exp);
1854         }
1855       else
1856         {
1857           code = gimple_cond_code (stmt);
1858           arg0 = gimple_cond_lhs (stmt);
1859           arg1 = gimple_cond_rhs (stmt);
1860           exp_type = boolean_type_node;
1861         }
1862
1863       if (TREE_CODE (arg0) != SSA_NAME)
1864         break;
1865       loc = gimple_location (stmt);
1866       switch (code)
1867         {
1868         case BIT_NOT_EXPR:
1869           if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
1870               /* Ensure the range is either +[-,0], +[0,0],
1871                  -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1872                  -[1,1].  If it is e.g. +[-,-] or -[-,-]
1873                  or similar expression of unconditional true or
1874                  false, it should not be negated.  */
1875               && ((high && integer_zerop (high))
1876                   || (low && integer_onep (low))))
1877             {
1878               in_p = !in_p;
1879               exp = arg0;
1880               continue;
1881             }
1882           break;
1883         case SSA_NAME:
1884           exp = arg0;
1885           continue;
1886         CASE_CONVERT:
1887           if (is_bool)
1888             goto do_default;
1889           if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1890             {
1891               if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1892                 is_bool = true;
1893               else
1894                 return;
1895             }
1896           else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1897             is_bool = true;
1898           goto do_default;
1899         case EQ_EXPR:
1900         case NE_EXPR:
1901         case LT_EXPR:
1902         case LE_EXPR:
1903         case GE_EXPR:
1904         case GT_EXPR:
1905           is_bool = true;
1906           /* FALLTHRU */
1907         default:
1908           if (!is_bool)
1909             return;
1910         do_default:
1911           nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1912                                   &low, &high, &in_p,
1913                                   &strict_overflow_p);
1914           if (nexp != NULL_TREE)
1915             {
1916               exp = nexp;
1917               gcc_assert (TREE_CODE (exp) == SSA_NAME);
1918               continue;
1919             }
1920           break;
1921         }
1922       break;
1923     }
1924   if (is_bool)
1925     {
1926       r->exp = exp;
1927       r->in_p = in_p;
1928       r->low = low;
1929       r->high = high;
1930       r->strict_overflow_p = strict_overflow_p;
1931     }
1932 }
1933
1934 /* Comparison function for qsort.  Sort entries
1935    without SSA_NAME exp first, then with SSA_NAMEs sorted
1936    by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1937    by increasing ->low and if ->low is the same, by increasing
1938    ->high.  ->low == NULL_TREE means minimum, ->high == NULL_TREE
1939    maximum.  */
1940
1941 static int
1942 range_entry_cmp (const void *a, const void *b)
1943 {
1944   const struct range_entry *p = (const struct range_entry *) a;
1945   const struct range_entry *q = (const struct range_entry *) b;
1946
1947   if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
1948     {
1949       if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1950         {
1951           /* Group range_entries for the same SSA_NAME together.  */
1952           if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
1953             return -1;
1954           else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
1955             return 1;
1956           /* If ->low is different, NULL low goes first, then by
1957              ascending low.  */
1958           if (p->low != NULL_TREE)
1959             {
1960               if (q->low != NULL_TREE)
1961                 {
1962                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
1963                                           p->low, q->low);
1964                   if (tem && integer_onep (tem))
1965                     return -1;
1966                   tem = fold_binary (GT_EXPR, boolean_type_node,
1967                                      p->low, q->low);
1968                   if (tem && integer_onep (tem))
1969                     return 1;
1970                 }
1971               else
1972                 return 1;
1973             }
1974           else if (q->low != NULL_TREE)
1975             return -1;
1976           /* If ->high is different, NULL high goes last, before that by
1977              ascending high.  */
1978           if (p->high != NULL_TREE)
1979             {
1980               if (q->high != NULL_TREE)
1981                 {
1982                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
1983                                           p->high, q->high);
1984                   if (tem && integer_onep (tem))
1985                     return -1;
1986                   tem = fold_binary (GT_EXPR, boolean_type_node,
1987                                      p->high, q->high);
1988                   if (tem && integer_onep (tem))
1989                     return 1;
1990                 }
1991               else
1992                 return -1;
1993             }
1994           else if (p->high != NULL_TREE)
1995             return 1;
1996           /* If both ranges are the same, sort below by ascending idx.  */
1997         }
1998       else
1999         return 1;
2000     }
2001   else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2002     return -1;
2003
2004   if (p->idx < q->idx)
2005     return -1;
2006   else
2007     {
2008       gcc_checking_assert (p->idx > q->idx);
2009       return 1;
2010     }
2011 }
2012
2013 /* Helper routine of optimize_range_test.
2014    [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2015    RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2016    OPCODE and OPS are arguments of optimize_range_tests.  Return
2017    true if the range merge has been successful.
2018    If OPCODE is ERROR_MARK, this is called from within
2019    maybe_optimize_range_tests and is performing inter-bb range optimization.
2020    In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2021    oe->rank.  */
2022
2023 static bool
2024 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2025                    unsigned int count, enum tree_code opcode,
2026                    vec<operand_entry_t> *ops, tree exp, bool in_p,
2027                    tree low, tree high, bool strict_overflow_p)
2028 {
2029   operand_entry_t oe = (*ops)[range->idx];
2030   tree op = oe->op;
2031   gimple stmt = op ? SSA_NAME_DEF_STMT (op) :
2032     last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2033   location_t loc = gimple_location (stmt);
2034   tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2035   tree tem = build_range_check (loc, optype, exp, in_p, low, high);
2036   enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2037   gimple_stmt_iterator gsi;
2038
2039   if (tem == NULL_TREE)
2040     return false;
2041
2042   if (strict_overflow_p && issue_strict_overflow_warning (wc))
2043     warning_at (loc, OPT_Wstrict_overflow,
2044                 "assuming signed overflow does not occur "
2045                 "when simplifying range test");
2046
2047   if (dump_file && (dump_flags & TDF_DETAILS))
2048     {
2049       struct range_entry *r;
2050       fprintf (dump_file, "Optimizing range tests ");
2051       print_generic_expr (dump_file, range->exp, 0);
2052       fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2053       print_generic_expr (dump_file, range->low, 0);
2054       fprintf (dump_file, ", ");
2055       print_generic_expr (dump_file, range->high, 0);
2056       fprintf (dump_file, "]");
2057       for (r = otherrange; r < otherrange + count; r++)
2058         {
2059           fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2060           print_generic_expr (dump_file, r->low, 0);
2061           fprintf (dump_file, ", ");
2062           print_generic_expr (dump_file, r->high, 0);
2063           fprintf (dump_file, "]");
2064         }
2065       fprintf (dump_file, "\n into ");
2066       print_generic_expr (dump_file, tem, 0);
2067       fprintf (dump_file, "\n");
2068     }
2069
2070   if (opcode == BIT_IOR_EXPR
2071       || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2072     tem = invert_truthvalue_loc (loc, tem);
2073
2074   tem = fold_convert_loc (loc, optype, tem);
2075   gsi = gsi_for_stmt (stmt);
2076   /* In rare cases range->exp can be equal to lhs of stmt.
2077      In that case we have to insert after the stmt rather then before
2078      it.  */
2079   if (op == range->exp)
2080     tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false,
2081                                     GSI_CONTINUE_LINKING);
2082   else
2083     {
2084       tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2085                                       GSI_SAME_STMT);
2086       gsi_prev (&gsi);
2087     }
2088   for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2089     if (gimple_uid (gsi_stmt (gsi)))
2090       break;
2091     else
2092       gimple_set_uid (gsi_stmt (gsi), gimple_uid (stmt));
2093
2094   oe->op = tem;
2095   range->exp = exp;
2096   range->low = low;
2097   range->high = high;
2098   range->in_p = in_p;
2099   range->strict_overflow_p = false;
2100
2101   for (range = otherrange; range < otherrange + count; range++)
2102     {
2103       oe = (*ops)[range->idx];
2104       /* Now change all the other range test immediate uses, so that
2105          those tests will be optimized away.  */
2106       if (opcode == ERROR_MARK)
2107         {
2108           if (oe->op)
2109             oe->op = build_int_cst (TREE_TYPE (oe->op),
2110                                     oe->rank == BIT_IOR_EXPR ? 0 : 1);
2111           else
2112             oe->op = (oe->rank == BIT_IOR_EXPR
2113                       ? boolean_false_node : boolean_true_node);
2114         }
2115       else
2116         oe->op = error_mark_node;
2117       range->exp = NULL_TREE;
2118     }
2119   return true;
2120 }
2121
2122 /* Optimize X == CST1 || X == CST2
2123    if popcount (CST1 ^ CST2) == 1 into
2124    (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2125    Similarly for ranges.  E.g.
2126    X != 2 && X != 3 && X != 10 && X != 11
2127    will be transformed by the previous optimization into
2128    !((X - 2U) <= 1U || (X - 10U) <= 1U)
2129    and this loop can transform that into
2130    !(((X & ~8) - 2U) <= 1U).  */
2131
2132 static bool
2133 optimize_range_tests_xor (enum tree_code opcode, tree type,
2134                           tree lowi, tree lowj, tree highi, tree highj,
2135                           vec<operand_entry_t> *ops,
2136                           struct range_entry *rangei,
2137                           struct range_entry *rangej)
2138 {
2139   tree lowxor, highxor, tem, exp;
2140   /* Check highi ^ lowi == highj ^ lowj and
2141      popcount (highi ^ lowi) == 1.  */
2142   lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2143   if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2144     return false;
2145   if (tree_log2 (lowxor) < 0)
2146     return false;
2147   highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2148   if (!tree_int_cst_equal (lowxor, highxor))
2149     return false;
2150
2151   tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2152   exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2153   lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2154   highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2155   if (update_range_test (rangei, rangej, 1, opcode, ops, exp,
2156                          rangei->in_p, lowj, highj,
2157                          rangei->strict_overflow_p
2158                          || rangej->strict_overflow_p))
2159     return true;
2160   return false;
2161 }
2162
2163 /* Optimize X == CST1 || X == CST2
2164    if popcount (CST2 - CST1) == 1 into
2165    ((X - CST1) & ~(CST2 - CST1)) == 0.
2166    Similarly for ranges.  E.g.
2167    X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2168    || X == 75 || X == 45
2169    will be transformed by the previous optimization into
2170    (X - 43U) <= 3U || (X - 75U) <= 3U
2171    and this loop can transform that into
2172    ((X - 43U) & ~(75U - 43U)) <= 3U.  */
2173 static bool
2174 optimize_range_tests_diff (enum tree_code opcode, tree type,
2175                             tree lowi, tree lowj, tree highi, tree highj,
2176                             vec<operand_entry_t> *ops,
2177                             struct range_entry *rangei,
2178                             struct range_entry *rangej)
2179 {
2180   tree tem1, tem2, mask;
2181   /* Check highi - lowi == highj - lowj.  */
2182   tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2183   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2184     return false;
2185   tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2186   if (!tree_int_cst_equal (tem1, tem2))
2187     return false;
2188   /* Check popcount (lowj - lowi) == 1.  */
2189   tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2190   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2191     return false;
2192   if (tree_log2 (tem1) < 0)
2193     return false;
2194
2195   mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2196   tem1 = fold_binary (MINUS_EXPR, type, rangei->exp, lowi);
2197   tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2198   lowj = build_int_cst (type, 0);
2199   if (update_range_test (rangei, rangej, 1, opcode, ops, tem1,
2200                          rangei->in_p, lowj, tem2,
2201                          rangei->strict_overflow_p
2202                          || rangej->strict_overflow_p))
2203     return true;
2204   return false;
2205 }
2206
2207 /* It does some common checks for function optimize_range_tests_xor and
2208    optimize_range_tests_diff.
2209    If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2210    Else it calls optimize_range_tests_diff.  */
2211
2212 static bool
2213 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2214                         bool optimize_xor, vec<operand_entry_t> *ops,
2215                         struct range_entry *ranges)
2216 {
2217   int i, j;
2218   bool any_changes = false;
2219   for (i = first; i < length; i++)
2220     {
2221       tree lowi, highi, lowj, highj, type, tem;
2222
2223       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2224         continue;
2225       type = TREE_TYPE (ranges[i].exp);
2226       if (!INTEGRAL_TYPE_P (type))
2227         continue;
2228       lowi = ranges[i].low;
2229       if (lowi == NULL_TREE)
2230         lowi = TYPE_MIN_VALUE (type);
2231       highi = ranges[i].high;
2232       if (highi == NULL_TREE)
2233         continue;
2234       for (j = i + 1; j < length && j < i + 64; j++)
2235         {
2236           bool changes;
2237           if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2238             continue;
2239           lowj = ranges[j].low;
2240           if (lowj == NULL_TREE)
2241             continue;
2242           highj = ranges[j].high;
2243           if (highj == NULL_TREE)
2244             highj = TYPE_MAX_VALUE (type);
2245           /* Check lowj > highi.  */
2246           tem = fold_binary (GT_EXPR, boolean_type_node,
2247                              lowj, highi);
2248           if (tem == NULL_TREE || !integer_onep (tem))
2249             continue;
2250           if (optimize_xor)
2251             changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2252                                                 highi, highj, ops,
2253                                                 ranges + i, ranges + j);
2254           else
2255             changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2256                                                  highi, highj, ops,
2257                                                  ranges + i, ranges + j);
2258           if (changes)
2259             {
2260               any_changes = true;
2261               break;
2262             }
2263         }
2264     }
2265   return any_changes;
2266 }
2267
2268 /* Optimize range tests, similarly how fold_range_test optimizes
2269    it on trees.  The tree code for the binary
2270    operation between all the operands is OPCODE.
2271    If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2272    maybe_optimize_range_tests for inter-bb range optimization.
2273    In that case if oe->op is NULL, oe->id is bb->index whose
2274    GIMPLE_COND is && or ||ed into the test, and oe->rank says
2275    the actual opcode.  */
2276
2277 static bool
2278 optimize_range_tests (enum tree_code opcode,
2279                       vec<operand_entry_t> *ops)
2280 {
2281   unsigned int length = ops->length (), i, j, first;
2282   operand_entry_t oe;
2283   struct range_entry *ranges;
2284   bool any_changes = false;
2285
2286   if (length == 1)
2287     return false;
2288
2289   ranges = XNEWVEC (struct range_entry, length);
2290   for (i = 0; i < length; i++)
2291     {
2292       oe = (*ops)[i];
2293       ranges[i].idx = i;
2294       init_range_entry (ranges + i, oe->op,
2295                         oe->op ? NULL :
2296                           last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
2297       /* For | invert it now, we will invert it again before emitting
2298          the optimized expression.  */
2299       if (opcode == BIT_IOR_EXPR
2300           || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2301         ranges[i].in_p = !ranges[i].in_p;
2302     }
2303
2304   qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2305   for (i = 0; i < length; i++)
2306     if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2307       break;
2308
2309   /* Try to merge ranges.  */
2310   for (first = i; i < length; i++)
2311     {
2312       tree low = ranges[i].low;
2313       tree high = ranges[i].high;
2314       int in_p = ranges[i].in_p;
2315       bool strict_overflow_p = ranges[i].strict_overflow_p;
2316       int update_fail_count = 0;
2317
2318       for (j = i + 1; j < length; j++)
2319         {
2320           if (ranges[i].exp != ranges[j].exp)
2321             break;
2322           if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2323                              ranges[j].in_p, ranges[j].low, ranges[j].high))
2324             break;
2325           strict_overflow_p |= ranges[j].strict_overflow_p;
2326         }
2327
2328       if (j == i + 1)
2329         continue;
2330
2331       if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
2332                              ops, ranges[i].exp, in_p, low, high,
2333                              strict_overflow_p))
2334         {
2335           i = j - 1;
2336           any_changes = true;
2337         }
2338       /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2339          while update_range_test would fail.  */
2340       else if (update_fail_count == 64)
2341         i = j - 1;
2342       else
2343         ++update_fail_count;
2344     }
2345
2346   any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2347                                          ops, ranges);
2348
2349   if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2350     any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2351                                            ops, ranges);
2352
2353   if (any_changes && opcode != ERROR_MARK)
2354     {
2355       j = 0;
2356       FOR_EACH_VEC_ELT (*ops, i, oe)
2357         {
2358           if (oe->op == error_mark_node)
2359             continue;
2360           else if (i != j)
2361             (*ops)[j] = oe;
2362           j++;
2363         }
2364       ops->truncate (j);
2365     }
2366
2367   XDELETEVEC (ranges);
2368   return any_changes;
2369 }
2370
2371 /* Return true if STMT is a cast like:
2372    <bb N>:
2373    ...
2374    _123 = (int) _234;
2375
2376    <bb M>:
2377    # _345 = PHI <_123(N), 1(...), 1(...)>
2378    where _234 has bool type, _123 has single use and
2379    bb N has a single successor M.  This is commonly used in
2380    the last block of a range test.  */
2381
2382 static bool
2383 final_range_test_p (gimple stmt)
2384 {
2385   basic_block bb, rhs_bb;
2386   edge e;
2387   tree lhs, rhs;
2388   use_operand_p use_p;
2389   gimple use_stmt;
2390
2391   if (!gimple_assign_cast_p (stmt))
2392     return false;
2393   bb = gimple_bb (stmt);
2394   if (!single_succ_p (bb))
2395     return false;
2396   e = single_succ_edge (bb);
2397   if (e->flags & EDGE_COMPLEX)
2398     return false;
2399
2400   lhs = gimple_assign_lhs (stmt);
2401   rhs = gimple_assign_rhs1 (stmt);
2402   if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2403       || TREE_CODE (rhs) != SSA_NAME
2404       || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2405     return false;
2406
2407   /* Test whether lhs is consumed only by a PHI in the only successor bb.  */
2408   if (!single_imm_use (lhs, &use_p, &use_stmt))
2409     return false;
2410
2411   if (gimple_code (use_stmt) != GIMPLE_PHI
2412       || gimple_bb (use_stmt) != e->dest)
2413     return false;
2414
2415   /* And that the rhs is defined in the same loop.  */
2416   rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2417   if (rhs_bb == NULL
2418       || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2419     return false;
2420
2421   return true;
2422 }
2423
2424 /* Return true if BB is suitable basic block for inter-bb range test
2425    optimization.  If BACKWARD is true, BB should be the only predecessor
2426    of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2427    or compared with to find a common basic block to which all conditions
2428    branch to if true resp. false.  If BACKWARD is false, TEST_BB should
2429    be the only predecessor of BB.  */
2430
2431 static bool
2432 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2433                   bool backward)
2434 {
2435   edge_iterator ei, ei2;
2436   edge e, e2;
2437   gimple stmt;
2438   gimple_stmt_iterator gsi;
2439   bool other_edge_seen = false;
2440   bool is_cond;
2441
2442   if (test_bb == bb)
2443     return false;
2444   /* Check last stmt first.  */
2445   stmt = last_stmt (bb);
2446   if (stmt == NULL
2447       || (gimple_code (stmt) != GIMPLE_COND
2448           && (backward || !final_range_test_p (stmt)))
2449       || gimple_visited_p (stmt)
2450       || stmt_could_throw_p (stmt)
2451       || *other_bb == bb)
2452     return false;
2453   is_cond = gimple_code (stmt) == GIMPLE_COND;
2454   if (is_cond)
2455     {
2456       /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2457          goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2458          to *OTHER_BB (if not set yet, try to find it out).  */
2459       if (EDGE_COUNT (bb->succs) != 2)
2460         return false;
2461       FOR_EACH_EDGE (e, ei, bb->succs)
2462         {
2463           if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2464             return false;
2465           if (e->dest == test_bb)
2466             {
2467               if (backward)
2468                 continue;
2469               else
2470                 return false;
2471             }
2472           if (e->dest == bb)
2473             return false;
2474           if (*other_bb == NULL)
2475             {
2476               FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2477                 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2478                   return false;
2479                 else if (e->dest == e2->dest)
2480                   *other_bb = e->dest;
2481               if (*other_bb == NULL)
2482                 return false;
2483             }
2484           if (e->dest == *other_bb)
2485             other_edge_seen = true;
2486           else if (backward)
2487             return false;
2488         }
2489       if (*other_bb == NULL || !other_edge_seen)
2490         return false;
2491     }
2492   else if (single_succ (bb) != *other_bb)
2493     return false;
2494
2495   /* Now check all PHIs of *OTHER_BB.  */
2496   e = find_edge (bb, *other_bb);
2497   e2 = find_edge (test_bb, *other_bb);
2498   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2499     {
2500       gimple phi = gsi_stmt (gsi);
2501       /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2502          corresponding to BB and TEST_BB predecessor must be the same.  */
2503       if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2504                             gimple_phi_arg_def (phi, e2->dest_idx), 0))
2505         {
2506           /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2507              one of the PHIs should have the lhs of the last stmt in
2508              that block as PHI arg and that PHI should have 0 or 1
2509              corresponding to it in all other range test basic blocks
2510              considered.  */
2511           if (!is_cond)
2512             {
2513               if (gimple_phi_arg_def (phi, e->dest_idx)
2514                   == gimple_assign_lhs (stmt)
2515                   && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2516                       || integer_onep (gimple_phi_arg_def (phi,
2517                                                            e2->dest_idx))))
2518                 continue;
2519             }
2520           else
2521             {
2522               gimple test_last = last_stmt (test_bb);
2523               if (gimple_code (test_last) != GIMPLE_COND
2524                   && gimple_phi_arg_def (phi, e2->dest_idx)
2525                      == gimple_assign_lhs (test_last)
2526                   && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2527                       || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2528                 continue;
2529             }
2530
2531           return false;
2532         }
2533     }
2534   return true;
2535 }
2536
2537 /* Return true if BB doesn't have side-effects that would disallow
2538    range test optimization, all SSA_NAMEs set in the bb are consumed
2539    in the bb and there are no PHIs.  */
2540
2541 static bool
2542 no_side_effect_bb (basic_block bb)
2543 {
2544   gimple_stmt_iterator gsi;
2545   gimple last;
2546
2547   if (!gimple_seq_empty_p (phi_nodes (bb)))
2548     return false;
2549   last = last_stmt (bb);
2550   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2551     {
2552       gimple stmt = gsi_stmt (gsi);
2553       tree lhs;
2554       imm_use_iterator imm_iter;
2555       use_operand_p use_p;
2556
2557       if (is_gimple_debug (stmt))
2558         continue;
2559       if (gimple_has_side_effects (stmt))
2560         return false;
2561       if (stmt == last)
2562         return true;
2563       if (!is_gimple_assign (stmt))
2564         return false;
2565       lhs = gimple_assign_lhs (stmt);
2566       if (TREE_CODE (lhs) != SSA_NAME)
2567         return false;
2568       if (gimple_assign_rhs_could_trap_p (stmt))
2569         return false;
2570       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2571         {
2572           gimple use_stmt = USE_STMT (use_p);
2573           if (is_gimple_debug (use_stmt))
2574             continue;
2575           if (gimple_bb (use_stmt) != bb)
2576             return false;
2577         }
2578     }
2579   return false;
2580 }
2581
2582 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2583    return true and fill in *OPS recursively.  */
2584
2585 static bool
2586 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2587          struct loop *loop)
2588 {
2589   gimple stmt = SSA_NAME_DEF_STMT (var);
2590   tree rhs[2];
2591   int i;
2592
2593   if (!is_reassociable_op (stmt, code, loop))
2594     return false;
2595
2596   rhs[0] = gimple_assign_rhs1 (stmt);
2597   rhs[1] = gimple_assign_rhs2 (stmt);
2598   gimple_set_visited (stmt, true);
2599   for (i = 0; i < 2; i++)
2600     if (TREE_CODE (rhs[i]) == SSA_NAME
2601         && !get_ops (rhs[i], code, ops, loop)
2602         && has_single_use (rhs[i]))
2603       {
2604         operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
2605
2606         oe->op = rhs[i];
2607         oe->rank = code;
2608         oe->id = 0;
2609         oe->count = 1;
2610         ops->safe_push (oe);
2611       }
2612   return true;
2613 }
2614
2615 /* Find the ops that were added by get_ops starting from VAR, see if
2616    they were changed during update_range_test and if yes, create new
2617    stmts.  */
2618
2619 static tree
2620 update_ops (tree var, enum tree_code code, vec<operand_entry_t> ops,
2621             unsigned int *pidx, struct loop *loop)
2622 {
2623   gimple stmt = SSA_NAME_DEF_STMT (var);
2624   tree rhs[4];
2625   int i;
2626
2627   if (!is_reassociable_op (stmt, code, loop))
2628     return NULL;
2629
2630   rhs[0] = gimple_assign_rhs1 (stmt);
2631   rhs[1] = gimple_assign_rhs2 (stmt);
2632   rhs[2] = rhs[0];
2633   rhs[3] = rhs[1];
2634   for (i = 0; i < 2; i++)
2635     if (TREE_CODE (rhs[i]) == SSA_NAME)
2636       {
2637         rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
2638         if (rhs[2 + i] == NULL_TREE)
2639           {
2640             if (has_single_use (rhs[i]))
2641               rhs[2 + i] = ops[(*pidx)++]->op;
2642             else
2643               rhs[2 + i] = rhs[i];
2644           }
2645       }
2646   if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
2647       && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
2648     {
2649       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2650       var = make_ssa_name (TREE_TYPE (var), NULL);
2651       gimple g = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
2652                                                var, rhs[2], rhs[3]);
2653       gimple_set_uid (g, gimple_uid (stmt));
2654       gimple_set_visited (g, true);
2655       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2656     }
2657   return var;
2658 }
2659
2660 /* Structure to track the initial value passed to get_ops and
2661    the range in the ops vector for each basic block.  */
2662
2663 struct inter_bb_range_test_entry
2664 {
2665   tree op;
2666   unsigned int first_idx, last_idx;
2667 };
2668
2669 /* Inter-bb range test optimization.  */
2670
2671 static void
2672 maybe_optimize_range_tests (gimple stmt)
2673 {
2674   basic_block first_bb = gimple_bb (stmt);
2675   basic_block last_bb = first_bb;
2676   basic_block other_bb = NULL;
2677   basic_block bb;
2678   edge_iterator ei;
2679   edge e;
2680   auto_vec<operand_entry_t> ops;
2681   auto_vec<inter_bb_range_test_entry> bbinfo;
2682   bool any_changes = false;
2683
2684   /* Consider only basic blocks that end with GIMPLE_COND or
2685      a cast statement satisfying final_range_test_p.  All
2686      but the last bb in the first_bb .. last_bb range
2687      should end with GIMPLE_COND.  */
2688   if (gimple_code (stmt) == GIMPLE_COND)
2689     {
2690       if (EDGE_COUNT (first_bb->succs) != 2)
2691         return;
2692     }
2693   else if (final_range_test_p (stmt))
2694     other_bb = single_succ (first_bb);
2695   else
2696     return;
2697
2698   if (stmt_could_throw_p (stmt))
2699     return;
2700
2701   /* As relative ordering of post-dominator sons isn't fixed,
2702      maybe_optimize_range_tests can be called first on any
2703      bb in the range we want to optimize.  So, start searching
2704      backwards, if first_bb can be set to a predecessor.  */
2705   while (single_pred_p (first_bb))
2706     {
2707       basic_block pred_bb = single_pred (first_bb);
2708       if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
2709         break;
2710       if (!no_side_effect_bb (first_bb))
2711         break;
2712       first_bb = pred_bb;
2713     }
2714   /* If first_bb is last_bb, other_bb hasn't been computed yet.
2715      Before starting forward search in last_bb successors, find
2716      out the other_bb.  */
2717   if (first_bb == last_bb)
2718     {
2719       other_bb = NULL;
2720       /* As non-GIMPLE_COND last stmt always terminates the range,
2721          if forward search didn't discover anything, just give up.  */
2722       if (gimple_code (stmt) != GIMPLE_COND)
2723         return;
2724       /* Look at both successors.  Either it ends with a GIMPLE_COND
2725          and satisfies suitable_cond_bb, or ends with a cast and
2726          other_bb is that cast's successor.  */
2727       FOR_EACH_EDGE (e, ei, first_bb->succs)
2728         if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
2729             || e->dest == first_bb)
2730           return;
2731         else if (single_pred_p (e->dest))
2732           {
2733             stmt = last_stmt (e->dest);
2734             if (stmt
2735                 && gimple_code (stmt) == GIMPLE_COND
2736                 && EDGE_COUNT (e->dest->succs) == 2)
2737               {
2738                 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
2739                   break;
2740                 else
2741                   other_bb = NULL;
2742               }
2743             else if (stmt
2744                      && final_range_test_p (stmt)
2745                      && find_edge (first_bb, single_succ (e->dest)))
2746               {
2747                 other_bb = single_succ (e->dest);
2748                 if (other_bb == first_bb)
2749                   other_bb = NULL;
2750               }
2751           }
2752       if (other_bb == NULL)
2753         return;
2754     }
2755   /* Now do the forward search, moving last_bb to successor bbs
2756      that aren't other_bb.  */
2757   while (EDGE_COUNT (last_bb->succs) == 2)
2758     {
2759       FOR_EACH_EDGE (e, ei, last_bb->succs)
2760         if (e->dest != other_bb)
2761           break;
2762       if (e == NULL)
2763         break;
2764       if (!single_pred_p (e->dest))
2765         break;
2766       if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
2767         break;
2768       if (!no_side_effect_bb (e->dest))
2769         break;
2770       last_bb = e->dest;
2771     }
2772   if (first_bb == last_bb)
2773     return;
2774   /* Here basic blocks first_bb through last_bb's predecessor
2775      end with GIMPLE_COND, all of them have one of the edges to
2776      other_bb and another to another block in the range,
2777      all blocks except first_bb don't have side-effects and
2778      last_bb ends with either GIMPLE_COND, or cast satisfying
2779      final_range_test_p.  */
2780   for (bb = last_bb; ; bb = single_pred (bb))
2781     {
2782       enum tree_code code;
2783       tree lhs, rhs;
2784       inter_bb_range_test_entry bb_ent;
2785
2786       bb_ent.op = NULL_TREE;
2787       bb_ent.first_idx = ops.length ();
2788       bb_ent.last_idx = bb_ent.first_idx;
2789       e = find_edge (bb, other_bb);
2790       stmt = last_stmt (bb);
2791       gimple_set_visited (stmt, true);
2792       if (gimple_code (stmt) != GIMPLE_COND)
2793         {
2794           use_operand_p use_p;
2795           gimple phi;
2796           edge e2;
2797           unsigned int d;
2798
2799           lhs = gimple_assign_lhs (stmt);
2800           rhs = gimple_assign_rhs1 (stmt);
2801           gcc_assert (bb == last_bb);
2802
2803           /* stmt is
2804              _123 = (int) _234;
2805
2806              followed by:
2807              <bb M>:
2808              # _345 = PHI <_123(N), 1(...), 1(...)>
2809
2810              or 0 instead of 1.  If it is 0, the _234
2811              range test is anded together with all the
2812              other range tests, if it is 1, it is ored with
2813              them.  */
2814           single_imm_use (lhs, &use_p, &phi);
2815           gcc_assert (gimple_code (phi) == GIMPLE_PHI);
2816           e2 = find_edge (first_bb, other_bb);
2817           d = e2->dest_idx;
2818           gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
2819           if (integer_zerop (gimple_phi_arg_def (phi, d)))
2820             code = BIT_AND_EXPR;
2821           else
2822             {
2823               gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
2824               code = BIT_IOR_EXPR;
2825             }
2826
2827           /* If _234 SSA_NAME_DEF_STMT is
2828              _234 = _567 | _789;
2829              (or &, corresponding to 1/0 in the phi arguments,
2830              push into ops the individual range test arguments
2831              of the bitwise or resp. and, recursively.  */
2832           if (!get_ops (rhs, code, &ops,
2833                         loop_containing_stmt (stmt))
2834               && has_single_use (rhs))
2835             {
2836               /* Otherwise, push the _234 range test itself.  */
2837               operand_entry_t oe
2838                 = (operand_entry_t) pool_alloc (operand_entry_pool);
2839
2840               oe->op = rhs;
2841               oe->rank = code;
2842               oe->id = 0;
2843               oe->count = 1;
2844               ops.safe_push (oe);
2845               bb_ent.last_idx++;
2846             }
2847           else
2848             bb_ent.last_idx = ops.length ();
2849           bb_ent.op = rhs;
2850           bbinfo.safe_push (bb_ent);
2851           continue;
2852         }
2853       /* Otherwise stmt is GIMPLE_COND.  */
2854       code = gimple_cond_code (stmt);
2855       lhs = gimple_cond_lhs (stmt);
2856       rhs = gimple_cond_rhs (stmt);
2857       if (TREE_CODE (lhs) == SSA_NAME
2858           && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2859           && ((code != EQ_EXPR && code != NE_EXPR)
2860               || rhs != boolean_false_node
2861                  /* Either push into ops the individual bitwise
2862                     or resp. and operands, depending on which
2863                     edge is other_bb.  */
2864               || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
2865                                  ^ (code == EQ_EXPR))
2866                                 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
2867                            loop_containing_stmt (stmt))))
2868         {
2869           /* Or push the GIMPLE_COND stmt itself.  */
2870           operand_entry_t oe
2871             = (operand_entry_t) pool_alloc (operand_entry_pool);
2872
2873           oe->op = NULL;
2874           oe->rank = (e->flags & EDGE_TRUE_VALUE)
2875                      ? BIT_IOR_EXPR : BIT_AND_EXPR;
2876           /* oe->op = NULL signs that there is no SSA_NAME
2877              for the range test, and oe->id instead is the
2878              basic block number, at which's end the GIMPLE_COND
2879              is.  */
2880           oe->id = bb->index;
2881           oe->count = 1;
2882           ops.safe_push (oe);
2883           bb_ent.op = NULL;
2884           bb_ent.last_idx++;
2885         }
2886       else if (ops.length () > bb_ent.first_idx)
2887         {
2888           bb_ent.op = lhs;
2889           bb_ent.last_idx = ops.length ();
2890         }
2891       bbinfo.safe_push (bb_ent);
2892       if (bb == first_bb)
2893         break;
2894     }
2895   if (ops.length () > 1)
2896     any_changes = optimize_range_tests (ERROR_MARK, &ops);
2897   if (any_changes)
2898     {
2899       unsigned int idx;
2900       /* update_ops relies on has_single_use predicates returning the
2901          same values as it did during get_ops earlier.  Additionally it
2902          never removes statements, only adds new ones and it should walk
2903          from the single imm use and check the predicate already before
2904          making those changes.
2905          On the other side, the handling of GIMPLE_COND directly can turn
2906          previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
2907          it needs to be done in a separate loop afterwards.  */
2908       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
2909         {
2910           if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
2911               && bbinfo[idx].op != NULL_TREE)
2912             {
2913               tree new_op;
2914
2915               stmt = last_stmt (bb);
2916               new_op = update_ops (bbinfo[idx].op,
2917                                    (enum tree_code)
2918                                    ops[bbinfo[idx].first_idx]->rank,
2919                                    ops, &bbinfo[idx].first_idx,
2920                                    loop_containing_stmt (stmt));
2921               if (new_op == NULL_TREE)
2922                 {
2923                   gcc_assert (bb == last_bb);
2924                   new_op = ops[bbinfo[idx].first_idx++]->op;
2925                 }
2926               if (bbinfo[idx].op != new_op)
2927                 {
2928                   imm_use_iterator iter;
2929                   use_operand_p use_p;
2930                   gimple use_stmt, cast_stmt = NULL;
2931
2932                   FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
2933                     if (is_gimple_debug (use_stmt))
2934                       continue;
2935                     else if (gimple_code (use_stmt) == GIMPLE_COND
2936                              || gimple_code (use_stmt) == GIMPLE_PHI)
2937                       FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2938                         SET_USE (use_p, new_op);
2939                     else if (gimple_assign_cast_p (use_stmt))
2940                       cast_stmt = use_stmt;
2941                     else
2942                       gcc_unreachable ();
2943                   if (cast_stmt)
2944                     {
2945                       gcc_assert (bb == last_bb);
2946                       tree lhs = gimple_assign_lhs (cast_stmt);
2947                       tree new_lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
2948                       enum tree_code rhs_code
2949                         = gimple_assign_rhs_code (cast_stmt);
2950                       gimple g;
2951                       if (is_gimple_min_invariant (new_op))
2952                         {
2953                           new_op = fold_convert (TREE_TYPE (lhs), new_op);
2954                           g = gimple_build_assign (new_lhs, new_op);
2955                         }
2956                       else
2957                         g = gimple_build_assign_with_ops (rhs_code, new_lhs,
2958                                                           new_op, NULL_TREE);
2959                       gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt);
2960                       gimple_set_uid (g, gimple_uid (cast_stmt));
2961                       gimple_set_visited (g, true);
2962                       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2963                       FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
2964                         if (is_gimple_debug (use_stmt))
2965                           continue;
2966                         else if (gimple_code (use_stmt) == GIMPLE_COND
2967                                  || gimple_code (use_stmt) == GIMPLE_PHI)
2968                           FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2969                             SET_USE (use_p, new_lhs);
2970                         else
2971                           gcc_unreachable ();
2972                     }
2973                 }
2974             }
2975           if (bb == first_bb)
2976             break;
2977         }
2978       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
2979         {
2980           if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
2981               && bbinfo[idx].op == NULL_TREE
2982               && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
2983             {
2984               stmt = last_stmt (bb);
2985               if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
2986                 gimple_cond_make_false (stmt);
2987               else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
2988                 gimple_cond_make_true (stmt);
2989               else
2990                 {
2991                   gimple_cond_set_code (stmt, NE_EXPR);
2992                   gimple_cond_set_lhs (stmt, ops[bbinfo[idx].first_idx]->op);
2993                   gimple_cond_set_rhs (stmt, boolean_false_node);
2994                 }
2995               update_stmt (stmt);
2996             }
2997           if (bb == first_bb)
2998             break;
2999         }
3000     }
3001 }
3002
3003 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3004    of STMT in it's operands.  This is also known as a "destructive
3005    update" operation.  */
3006
3007 static bool
3008 is_phi_for_stmt (gimple stmt, tree operand)
3009 {
3010   gimple def_stmt;
3011   tree lhs;
3012   use_operand_p arg_p;
3013   ssa_op_iter i;
3014
3015   if (TREE_CODE (operand) != SSA_NAME)
3016     return false;
3017
3018   lhs = gimple_assign_lhs (stmt);
3019
3020   def_stmt = SSA_NAME_DEF_STMT (operand);
3021   if (gimple_code (def_stmt) != GIMPLE_PHI)
3022     return false;
3023
3024   FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
3025     if (lhs == USE_FROM_PTR (arg_p))
3026       return true;
3027   return false;
3028 }
3029
3030 /* Remove def stmt of VAR if VAR has zero uses and recurse
3031    on rhs1 operand if so.  */
3032
3033 static void
3034 remove_visited_stmt_chain (tree var)
3035 {
3036   gimple stmt;
3037   gimple_stmt_iterator gsi;
3038
3039   while (1)
3040     {
3041       if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
3042         return;
3043       stmt = SSA_NAME_DEF_STMT (var);
3044       if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
3045         {
3046           var = gimple_assign_rhs1 (stmt);
3047           gsi = gsi_for_stmt (stmt);
3048           gsi_remove (&gsi, true);
3049           release_defs (stmt);
3050         }
3051       else
3052         return;
3053     }
3054 }
3055
3056 /* This function checks three consequtive operands in
3057    passed operands vector OPS starting from OPINDEX and
3058    swaps two operands if it is profitable for binary operation
3059    consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3060
3061    We pair ops with the same rank if possible.
3062
3063    The alternative we try is to see if STMT is a destructive
3064    update style statement, which is like:
3065    b = phi (a, ...)
3066    a = c + b;
3067    In that case, we want to use the destructive update form to
3068    expose the possible vectorizer sum reduction opportunity.
3069    In that case, the third operand will be the phi node. This
3070    check is not performed if STMT is null.
3071
3072    We could, of course, try to be better as noted above, and do a
3073    lot of work to try to find these opportunities in >3 operand
3074    cases, but it is unlikely to be worth it.  */
3075
3076 static void
3077 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
3078                           unsigned int opindex, gimple stmt)
3079 {
3080   operand_entry_t oe1, oe2, oe3;
3081
3082   oe1 = ops[opindex];
3083   oe2 = ops[opindex + 1];
3084   oe3 = ops[opindex + 2];
3085
3086   if ((oe1->rank == oe2->rank
3087        && oe2->rank != oe3->rank)
3088       || (stmt && is_phi_for_stmt (stmt, oe3->op)
3089           && !is_phi_for_stmt (stmt, oe1->op)
3090           && !is_phi_for_stmt (stmt, oe2->op)))
3091     {
3092       struct operand_entry temp = *oe3;
3093       oe3->op = oe1->op;
3094       oe3->rank = oe1->rank;
3095       oe1->op = temp.op;
3096       oe1->rank= temp.rank;
3097     }
3098   else if ((oe1->rank == oe3->rank
3099             && oe2->rank != oe3->rank)
3100            || (stmt && is_phi_for_stmt (stmt, oe2->op)
3101                && !is_phi_for_stmt (stmt, oe1->op)
3102                && !is_phi_for_stmt (stmt, oe3->op)))
3103     {
3104       struct operand_entry temp = *oe2;
3105       oe2->op = oe1->op;
3106       oe2->rank = oe1->rank;
3107       oe1->op = temp.op;
3108       oe1->rank = temp.rank;
3109     }
3110 }
3111
3112 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3113    two definitions, otherwise return STMT.  */
3114
3115 static inline gimple
3116 find_insert_point (gimple stmt, tree rhs1, tree rhs2)
3117 {
3118   if (TREE_CODE (rhs1) == SSA_NAME
3119       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
3120     stmt = SSA_NAME_DEF_STMT (rhs1);
3121   if (TREE_CODE (rhs2) == SSA_NAME
3122       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
3123     stmt = SSA_NAME_DEF_STMT (rhs2);
3124   return stmt;
3125 }
3126
3127 /* Recursively rewrite our linearized statements so that the operators
3128    match those in OPS[OPINDEX], putting the computation in rank
3129    order.  Return new lhs.  */
3130
3131 static tree
3132 rewrite_expr_tree (gimple stmt, unsigned int opindex,
3133                    vec<operand_entry_t> ops, bool changed)
3134 {
3135   tree rhs1 = gimple_assign_rhs1 (stmt);
3136   tree rhs2 = gimple_assign_rhs2 (stmt);
3137   tree lhs = gimple_assign_lhs (stmt);
3138   operand_entry_t oe;
3139
3140   /* The final recursion case for this function is that you have
3141      exactly two operations left.
3142      If we had one exactly one op in the entire list to start with, we
3143      would have never called this function, and the tail recursion
3144      rewrites them one at a time.  */
3145   if (opindex + 2 == ops.length ())
3146     {
3147       operand_entry_t oe1, oe2;
3148
3149       oe1 = ops[opindex];
3150       oe2 = ops[opindex + 1];
3151
3152       if (rhs1 != oe1->op || rhs2 != oe2->op)
3153         {
3154           gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3155           unsigned int uid = gimple_uid (stmt);
3156
3157           if (dump_file && (dump_flags & TDF_DETAILS))
3158             {
3159               fprintf (dump_file, "Transforming ");
3160               print_gimple_stmt (dump_file, stmt, 0, 0);
3161             }
3162
3163           if (changed)
3164             {
3165               gimple insert_point = find_insert_point (stmt, oe1->op, oe2->op);
3166               lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3167               stmt
3168                 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3169                                                 lhs, oe1->op, oe2->op);
3170               gimple_set_uid (stmt, uid);
3171               gimple_set_visited (stmt, true);
3172               if (insert_point == gsi_stmt (gsi))
3173                 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3174               else
3175                 insert_stmt_after (stmt, insert_point);
3176             }
3177           else
3178             {
3179               gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
3180                                    == stmt);
3181               gimple_assign_set_rhs1 (stmt, oe1->op);
3182               gimple_assign_set_rhs2 (stmt, oe2->op);
3183               update_stmt (stmt);
3184             }
3185
3186           if (rhs1 != oe1->op && rhs1 != oe2->op)
3187             remove_visited_stmt_chain (rhs1);
3188
3189           if (dump_file && (dump_flags & TDF_DETAILS))
3190             {
3191               fprintf (dump_file, " into ");
3192               print_gimple_stmt (dump_file, stmt, 0, 0);
3193             }
3194         }
3195       return lhs;
3196     }
3197
3198   /* If we hit here, we should have 3 or more ops left.  */
3199   gcc_assert (opindex + 2 < ops.length ());
3200
3201   /* Rewrite the next operator.  */
3202   oe = ops[opindex];
3203
3204   /* Recurse on the LHS of the binary operator, which is guaranteed to
3205      be the non-leaf side.  */
3206   tree new_rhs1
3207     = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
3208                          changed || oe->op != rhs2);
3209
3210   if (oe->op != rhs2 || new_rhs1 != rhs1)
3211     {
3212       if (dump_file && (dump_flags & TDF_DETAILS))
3213         {
3214           fprintf (dump_file, "Transforming ");
3215           print_gimple_stmt (dump_file, stmt, 0, 0);
3216         }
3217
3218       /* If changed is false, this is either opindex == 0
3219          or all outer rhs2's were equal to corresponding oe->op,
3220          and powi_result is NULL.
3221          That means lhs is equivalent before and after reassociation.
3222          Otherwise ensure the old lhs SSA_NAME is not reused and
3223          create a new stmt as well, so that any debug stmts will be
3224          properly adjusted.  */
3225       if (changed)
3226         {
3227           gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3228           unsigned int uid = gimple_uid (stmt);
3229           gimple insert_point = find_insert_point (stmt, new_rhs1, oe->op);
3230
3231           lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3232           stmt = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3233                                                lhs, new_rhs1, oe->op);
3234           gimple_set_uid (stmt, uid);
3235           gimple_set_visited (stmt, true);
3236           if (insert_point == gsi_stmt (gsi))
3237             gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3238           else
3239             insert_stmt_after (stmt, insert_point);
3240         }
3241       else
3242         {
3243           gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
3244                                == stmt);
3245           gimple_assign_set_rhs1 (stmt, new_rhs1);
3246           gimple_assign_set_rhs2 (stmt, oe->op);
3247           update_stmt (stmt);
3248         }
3249
3250       if (dump_file && (dump_flags & TDF_DETAILS))
3251         {
3252           fprintf (dump_file, " into ");
3253           print_gimple_stmt (dump_file, stmt, 0, 0);
3254         }
3255     }
3256   return lhs;
3257 }
3258
3259 /* Find out how many cycles we need to compute statements chain.
3260    OPS_NUM holds number os statements in a chain.  CPU_WIDTH is a
3261    maximum number of independent statements we may execute per cycle.  */
3262
3263 static int
3264 get_required_cycles (int ops_num, int cpu_width)
3265 {
3266   int res;
3267   int elog;
3268   unsigned int rest;
3269
3270   /* While we have more than 2 * cpu_width operands
3271      we may reduce number of operands by cpu_width
3272      per cycle.  */
3273   res = ops_num / (2 * cpu_width);
3274
3275   /* Remained operands count may be reduced twice per cycle
3276      until we have only one operand.  */
3277   rest = (unsigned)(ops_num - res * cpu_width);
3278   elog = exact_log2 (rest);
3279   if (elog >= 0)
3280     res += elog;
3281   else
3282     res += floor_log2 (rest) + 1;
3283
3284   return res;
3285 }
3286
3287 /* Returns an optimal number of registers to use for computation of
3288    given statements.  */
3289
3290 static int
3291 get_reassociation_width (int ops_num, enum tree_code opc,
3292                          enum machine_mode mode)
3293 {
3294   int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
3295   int width;
3296   int width_min;
3297   int cycles_best;
3298
3299   if (param_width > 0)
3300     width = param_width;
3301   else
3302     width = targetm.sched.reassociation_width (opc, mode);
3303
3304   if (width == 1)
3305     return width;
3306
3307   /* Get the minimal time required for sequence computation.  */
3308   cycles_best = get_required_cycles (ops_num, width);
3309
3310   /* Check if we may use less width and still compute sequence for
3311      the same time.  It will allow us to reduce registers usage.
3312      get_required_cycles is monotonically increasing with lower width
3313      so we can perform a binary search for the minimal width that still
3314      results in the optimal cycle count.  */
3315   width_min = 1;
3316   while (width > width_min)
3317     {
3318       int width_mid = (width + width_min) / 2;
3319
3320       if (get_required_cycles (ops_num, width_mid) == cycles_best)
3321         width = width_mid;
3322       else if (width_min < width_mid)
3323         width_min = width_mid;
3324       else
3325         break;
3326     }
3327
3328   return width;
3329 }
3330
3331 /* Recursively rewrite our linearized statements so that the operators
3332    match those in OPS[OPINDEX], putting the computation in rank
3333    order and trying to allow operations to be executed in
3334    parallel.  */
3335
3336 static void
3337 rewrite_expr_tree_parallel (gimple stmt, int width,
3338                             vec<operand_entry_t> ops)
3339 {
3340   enum tree_code opcode = gimple_assign_rhs_code (stmt);
3341   int op_num = ops.length ();
3342   int stmt_num = op_num - 1;
3343   gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3344   int op_index = op_num - 1;
3345   int stmt_index = 0;
3346   int ready_stmts_end = 0;
3347   int i = 0;
3348   tree last_rhs1 = gimple_assign_rhs1 (stmt);
3349
3350   /* We start expression rewriting from the top statements.
3351      So, in this loop we create a full list of statements
3352      we will work with.  */
3353   stmts[stmt_num - 1] = stmt;
3354   for (i = stmt_num - 2; i >= 0; i--)
3355     stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3356
3357   for (i = 0; i < stmt_num; i++)
3358     {
3359       tree op1, op2;
3360
3361       /* Determine whether we should use results of
3362          already handled statements or not.  */
3363       if (ready_stmts_end == 0
3364           && (i - stmt_index >= width || op_index < 1))
3365         ready_stmts_end = i;
3366
3367       /* Now we choose operands for the next statement.  Non zero
3368          value in ready_stmts_end means here that we should use
3369          the result of already generated statements as new operand.  */
3370       if (ready_stmts_end > 0)
3371         {
3372           op1 = gimple_assign_lhs (stmts[stmt_index++]);
3373           if (ready_stmts_end > stmt_index)
3374             op2 = gimple_assign_lhs (stmts[stmt_index++]);
3375           else if (op_index >= 0)
3376             op2 = ops[op_index--]->op;
3377           else
3378             {
3379               gcc_assert (stmt_index < i);
3380               op2 = gimple_assign_lhs (stmts[stmt_index++]);
3381             }
3382
3383           if (stmt_index >= ready_stmts_end)
3384             ready_stmts_end = 0;
3385         }
3386       else
3387         {
3388           if (op_index > 1)
3389             swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3390           op2 = ops[op_index--]->op;
3391           op1 = ops[op_index--]->op;
3392         }
3393
3394       /* If we emit the last statement then we should put
3395          operands into the last statement.  It will also
3396          break the loop.  */
3397       if (op_index < 0 && stmt_index == i)
3398         i = stmt_num - 1;
3399
3400       if (dump_file && (dump_flags & TDF_DETAILS))
3401         {
3402           fprintf (dump_file, "Transforming ");
3403           print_gimple_stmt (dump_file, stmts[i], 0, 0);
3404         }
3405
3406       /* We keep original statement only for the last one.  All
3407          others are recreated.  */
3408       if (i == stmt_num - 1)
3409         {
3410           gimple_assign_set_rhs1 (stmts[i], op1);
3411           gimple_assign_set_rhs2 (stmts[i], op2);
3412           update_stmt (stmts[i]);
3413         }
3414       else
3415         stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3416
3417       if (dump_file && (dump_flags & TDF_DETAILS))
3418         {
3419           fprintf (dump_file, " into ");
3420           print_gimple_stmt (dump_file, stmts[i], 0, 0);
3421         }
3422     }
3423
3424   remove_visited_stmt_chain (last_rhs1);
3425 }
3426
3427 /* Transform STMT, which is really (A +B) + (C + D) into the left
3428    linear form, ((A+B)+C)+D.
3429    Recurse on D if necessary.  */
3430
3431 static void
3432 linearize_expr (gimple stmt)
3433 {
3434   gimple_stmt_iterator gsi;
3435   gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3436   gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3437   gimple oldbinrhs = binrhs;
3438   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3439   gimple newbinrhs = NULL;
3440   struct loop *loop = loop_containing_stmt (stmt);
3441   tree lhs = gimple_assign_lhs (stmt);
3442
3443   gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3444               && is_reassociable_op (binrhs, rhscode, loop));
3445
3446   gsi = gsi_for_stmt (stmt);
3447
3448   gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3449   binrhs = gimple_build_assign_with_ops (gimple_assign_rhs_code (binrhs),
3450                                          make_ssa_name (TREE_TYPE (lhs), NULL),
3451                                          gimple_assign_lhs (binlhs),
3452                                          gimple_assign_rhs2 (binrhs));
3453   gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3454   gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
3455   gimple_set_uid (binrhs, gimple_uid (stmt));
3456
3457   if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3458     newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3459
3460   if (dump_file && (dump_flags & TDF_DETAILS))
3461     {
3462       fprintf (dump_file, "Linearized: ");
3463       print_gimple_stmt (dump_file, stmt, 0, 0);
3464     }
3465
3466   reassociate_stats.linearized++;
3467   update_stmt (stmt);
3468
3469   gsi = gsi_for_stmt (oldbinrhs);
3470   gsi_remove (&gsi, true);
3471   release_defs (oldbinrhs);
3472
3473   gimple_set_visited (stmt, true);
3474   gimple_set_visited (binlhs, true);
3475   gimple_set_visited (binrhs, true);
3476
3477   /* Tail recurse on the new rhs if it still needs reassociation.  */
3478   if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3479     /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3480            want to change the algorithm while converting to tuples.  */
3481     linearize_expr (stmt);
3482 }
3483
3484 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3485    it.  Otherwise, return NULL.  */
3486
3487 static gimple
3488 get_single_immediate_use (tree lhs)
3489 {
3490   use_operand_p immuse;
3491   gimple immusestmt;
3492
3493   if (TREE_CODE (lhs) == SSA_NAME
3494       && single_imm_use (lhs, &immuse, &immusestmt)
3495       && is_gimple_assign (immusestmt))
3496     return immusestmt;
3497
3498   return NULL;
3499 }
3500
3501 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3502    representing the negated value.  Insertions of any necessary
3503    instructions go before GSI.
3504    This function is recursive in that, if you hand it "a_5" as the
3505    value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3506    transform b_3 + b_4 into a_5 = -b_3 + -b_4.  */
3507
3508 static tree
3509 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
3510 {
3511   gimple negatedefstmt = NULL;
3512   tree resultofnegate;
3513   gimple_stmt_iterator gsi;
3514   unsigned int uid;
3515
3516   /* If we are trying to negate a name, defined by an add, negate the
3517      add operands instead.  */
3518   if (TREE_CODE (tonegate) == SSA_NAME)
3519     negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3520   if (TREE_CODE (tonegate) == SSA_NAME
3521       && is_gimple_assign (negatedefstmt)
3522       && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3523       && has_single_use (gimple_assign_lhs (negatedefstmt))
3524       && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3525     {
3526       tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3527       tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3528       tree lhs = gimple_assign_lhs (negatedefstmt);
3529       gimple g;
3530
3531       gsi = gsi_for_stmt (negatedefstmt);
3532       rhs1 = negate_value (rhs1, &gsi);
3533
3534       gsi = gsi_for_stmt (negatedefstmt);
3535       rhs2 = negate_value (rhs2, &gsi);
3536
3537       gsi = gsi_for_stmt (negatedefstmt);
3538       lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3539       gimple_set_visited (negatedefstmt, true);
3540       g = gimple_build_assign_with_ops (PLUS_EXPR, lhs, rhs1, rhs2);
3541       gimple_set_uid (g, gimple_uid (negatedefstmt));
3542       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3543       return lhs;
3544     }
3545
3546   tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3547   resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
3548                                              NULL_TREE, true, GSI_SAME_STMT);
3549   gsi = *gsip;
3550   uid = gimple_uid (gsi_stmt (gsi));
3551   for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
3552     {
3553       gimple stmt = gsi_stmt (gsi);
3554       if (gimple_uid (stmt) != 0)
3555         break;
3556       gimple_set_uid (stmt, uid);
3557     }
3558   return resultofnegate;
3559 }
3560
3561 /* Return true if we should break up the subtract in STMT into an add
3562    with negate.  This is true when we the subtract operands are really
3563    adds, or the subtract itself is used in an add expression.  In
3564    either case, breaking up the subtract into an add with negate
3565    exposes the adds to reassociation.  */
3566
3567 static bool
3568 should_break_up_subtract (gimple stmt)
3569 {
3570   tree lhs = gimple_assign_lhs (stmt);
3571   tree binlhs = gimple_assign_rhs1 (stmt);
3572   tree binrhs = gimple_assign_rhs2 (stmt);
3573   gimple immusestmt;
3574   struct loop *loop = loop_containing_stmt (stmt);
3575
3576   if (TREE_CODE (binlhs) == SSA_NAME
3577       && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3578     return true;
3579
3580   if (TREE_CODE (binrhs) == SSA_NAME
3581       && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3582     return true;
3583
3584   if (TREE_CODE (lhs) == SSA_NAME
3585       && (immusestmt = get_single_immediate_use (lhs))
3586       && is_gimple_assign (immusestmt)
3587       && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3588           ||  gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3589     return true;
3590   return false;
3591 }
3592
3593 /* Transform STMT from A - B into A + -B.  */
3594
3595 static void
3596 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3597 {
3598   tree rhs1 = gimple_assign_rhs1 (stmt);
3599   tree rhs2 = gimple_assign_rhs2 (stmt);
3600
3601   if (dump_file && (dump_flags & TDF_DETAILS))
3602     {
3603       fprintf (dump_file, "Breaking up subtract ");
3604       print_gimple_stmt (dump_file, stmt, 0, 0);
3605     }
3606
3607   rhs2 = negate_value (rhs2, gsip);
3608   gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3609   update_stmt (stmt);
3610 }
3611
3612 /* Determine whether STMT is a builtin call that raises an SSA name
3613    to an integer power and has only one use.  If so, and this is early
3614    reassociation and unsafe math optimizations are permitted, place
3615    the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3616    If any of these conditions does not hold, return FALSE.  */
3617
3618 static bool
3619 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3620 {
3621   tree fndecl, arg1;
3622   REAL_VALUE_TYPE c, cint;
3623
3624   if (!first_pass_instance
3625       || !flag_unsafe_math_optimizations
3626       || !is_gimple_call (stmt)
3627       || !has_single_use (gimple_call_lhs (stmt)))
3628     return false;
3629
3630   fndecl = gimple_call_fndecl (stmt);
3631
3632   if (!fndecl
3633       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3634     return false;
3635
3636   switch (DECL_FUNCTION_CODE (fndecl))
3637     {
3638     CASE_FLT_FN (BUILT_IN_POW):
3639       *base = gimple_call_arg (stmt, 0);
3640       arg1 = gimple_call_arg (stmt, 1);
3641
3642       if (TREE_CODE (arg1) != REAL_CST)
3643         return false;
3644
3645       c = TREE_REAL_CST (arg1);
3646
3647       if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
3648         return false;
3649
3650       *exponent = real_to_integer (&c);
3651       real_from_integer (&cint, VOIDmode, *exponent,
3652                          *exponent < 0 ? -1 : 0, 0);
3653       if (!real_identical (&c, &cint))
3654         return false;
3655
3656       break;
3657
3658     CASE_FLT_FN (BUILT_IN_POWI):
3659       *base = gimple_call_arg (stmt, 0);
3660       arg1 = gimple_call_arg (stmt, 1);
3661
3662       if (!tree_fits_shwi_p (arg1))
3663         return false;
3664
3665       *exponent = tree_to_shwi (arg1);
3666       break;
3667
3668     default:
3669       return false;
3670     }
3671
3672   /* Expanding negative exponents is generally unproductive, so we don't
3673      complicate matters with those.  Exponents of zero and one should
3674      have been handled by expression folding.  */
3675   if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
3676     return false;
3677
3678   return true;
3679 }
3680
3681 /* Recursively linearize a binary expression that is the RHS of STMT.
3682    Place the operands of the expression tree in the vector named OPS.  */
3683
3684 static void
3685 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
3686                      bool is_associative, bool set_visited)
3687 {
3688   tree binlhs = gimple_assign_rhs1 (stmt);
3689   tree binrhs = gimple_assign_rhs2 (stmt);
3690   gimple binlhsdef = NULL, binrhsdef = NULL;
3691   bool binlhsisreassoc = false;
3692   bool binrhsisreassoc = false;
3693   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3694   struct loop *loop = loop_containing_stmt (stmt);
3695   tree base = NULL_TREE;
3696   HOST_WIDE_INT exponent = 0;
3697
3698   if (set_visited)
3699     gimple_set_visited (stmt, true);
3700
3701   if (TREE_CODE (binlhs) == SSA_NAME)
3702     {
3703       binlhsdef = SSA_NAME_DEF_STMT (binlhs);
3704       binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
3705                          && !stmt_could_throw_p (binlhsdef));
3706     }
3707
3708   if (TREE_CODE (binrhs) == SSA_NAME)
3709     {
3710       binrhsdef = SSA_NAME_DEF_STMT (binrhs);
3711       binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
3712                          && !stmt_could_throw_p (binrhsdef));
3713     }
3714
3715   /* If the LHS is not reassociable, but the RHS is, we need to swap
3716      them.  If neither is reassociable, there is nothing we can do, so
3717      just put them in the ops vector.  If the LHS is reassociable,
3718      linearize it.  If both are reassociable, then linearize the RHS
3719      and the LHS.  */
3720
3721   if (!binlhsisreassoc)
3722     {
3723       tree temp;
3724
3725       /* If this is not a associative operation like division, give up.  */
3726       if (!is_associative)
3727         {
3728           add_to_ops_vec (ops, binrhs);
3729           return;
3730         }
3731
3732       if (!binrhsisreassoc)
3733         {
3734           if (rhscode == MULT_EXPR
3735               && TREE_CODE (binrhs) == SSA_NAME
3736               && acceptable_pow_call (binrhsdef, &base, &exponent))
3737             {
3738               add_repeat_to_ops_vec (ops, base, exponent);
3739               gimple_set_visited (binrhsdef, true);
3740             }
3741           else
3742             add_to_ops_vec (ops, binrhs);
3743
3744           if (rhscode == MULT_EXPR
3745               && TREE_CODE (binlhs) == SSA_NAME
3746               && acceptable_pow_call (binlhsdef, &base, &exponent))
3747             {
3748               add_repeat_to_ops_vec (ops, base, exponent);
3749               gimple_set_visited (binlhsdef, true);
3750             }
3751           else
3752             add_to_ops_vec (ops, binlhs);
3753
3754           return;
3755         }
3756
3757       if (dump_file && (dump_flags & TDF_DETAILS))
3758         {
3759           fprintf (dump_file, "swapping operands of ");
3760           print_gimple_stmt (dump_file, stmt, 0, 0);
3761         }
3762
3763       swap_ssa_operands (stmt,
3764                          gimple_assign_rhs1_ptr (stmt),
3765                          gimple_assign_rhs2_ptr (stmt));
3766       update_stmt (stmt);
3767
3768       if (dump_file && (dump_flags & TDF_DETAILS))
3769         {
3770           fprintf (dump_file, " is now ");
3771           print_gimple_stmt (dump_file, stmt, 0, 0);
3772         }
3773
3774       /* We want to make it so the lhs is always the reassociative op,
3775          so swap.  */
3776       temp = binlhs;
3777       binlhs = binrhs;
3778       binrhs = temp;
3779     }
3780   else if (binrhsisreassoc)
3781     {
3782       linearize_expr (stmt);
3783       binlhs = gimple_assign_rhs1 (stmt);
3784       binrhs = gimple_assign_rhs2 (stmt);
3785     }
3786
3787   gcc_assert (TREE_CODE (binrhs) != SSA_NAME
3788               || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
3789                                       rhscode, loop));
3790   linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
3791                        is_associative, set_visited);
3792
3793   if (rhscode == MULT_EXPR
3794       && TREE_CODE (binrhs) == SSA_NAME
3795       && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
3796     {
3797       add_repeat_to_ops_vec (ops, base, exponent);
3798       gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
3799     }
3800   else
3801     add_to_ops_vec (ops, binrhs);
3802 }
3803
3804 /* Repropagate the negates back into subtracts, since no other pass
3805    currently does it.  */
3806
3807 static void
3808 repropagate_negates (void)
3809 {
3810   unsigned int i = 0;
3811   tree negate;
3812
3813   FOR_EACH_VEC_ELT (plus_negates, i, negate)
3814     {
3815       gimple user = get_single_immediate_use (negate);
3816
3817       if (!user || !is_gimple_assign (user))
3818         continue;
3819
3820       /* The negate operand can be either operand of a PLUS_EXPR
3821          (it can be the LHS if the RHS is a constant for example).
3822
3823          Force the negate operand to the RHS of the PLUS_EXPR, then
3824          transform the PLUS_EXPR into a MINUS_EXPR.  */
3825       if (gimple_assign_rhs_code (user) == PLUS_EXPR)
3826         {
3827           /* If the negated operand appears on the LHS of the
3828              PLUS_EXPR, exchange the operands of the PLUS_EXPR
3829              to force the negated operand to the RHS of the PLUS_EXPR.  */
3830           if (gimple_assign_rhs1 (user) == negate)
3831             {
3832               swap_ssa_operands (user,
3833                                  gimple_assign_rhs1_ptr (user),
3834                                  gimple_assign_rhs2_ptr (user));
3835             }
3836
3837           /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
3838              the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR.  */
3839           if (gimple_assign_rhs2 (user) == negate)
3840             {
3841               tree rhs1 = gimple_assign_rhs1 (user);
3842               tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
3843               gimple_stmt_iterator gsi = gsi_for_stmt (user);
3844               gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
3845               update_stmt (user);
3846             }
3847         }
3848       else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
3849         {
3850           if (gimple_assign_rhs1 (user) == negate)
3851             {
3852               /* We have
3853                    x = -a
3854                    y = x - b
3855                  which we transform into
3856                    x = a + b
3857                    y = -x .
3858                  This pushes down the negate which we possibly can merge
3859                  into some other operation, hence insert it into the
3860                  plus_negates vector.  */
3861               gimple feed = SSA_NAME_DEF_STMT (negate);
3862               tree a = gimple_assign_rhs1 (feed);
3863               tree b = gimple_assign_rhs2 (user);
3864               gimple_stmt_iterator gsi = gsi_for_stmt (feed);
3865               gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
3866               tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)), NULL);
3867               gimple g = gimple_build_assign_with_ops (PLUS_EXPR, x, a, b);
3868               gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3869               gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x, NULL);
3870               user = gsi_stmt (gsi2);
3871               update_stmt (user);
3872               gsi_remove (&gsi, true);
3873               release_defs (feed);
3874               plus_negates.safe_push (gimple_assign_lhs (user));
3875             }
3876           else
3877             {
3878               /* Transform "x = -a; y = b - x" into "y = b + a", getting
3879                  rid of one operation.  */
3880               gimple feed = SSA_NAME_DEF_STMT (negate);
3881               tree a = gimple_assign_rhs1 (feed);
3882               tree rhs1 = gimple_assign_rhs1 (user);
3883               gimple_stmt_iterator gsi = gsi_for_stmt (user);
3884               gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
3885               update_stmt (gsi_stmt (gsi));
3886             }
3887         }
3888     }
3889 }
3890
3891 /* Returns true if OP is of a type for which we can do reassociation.
3892    That is for integral or non-saturating fixed-point types, and for
3893    floating point type when associative-math is enabled.  */
3894
3895 static bool
3896 can_reassociate_p (tree op)
3897 {
3898   tree type = TREE_TYPE (op);
3899   if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
3900       || NON_SAT_FIXED_POINT_TYPE_P (type)
3901       || (flag_associative_math && FLOAT_TYPE_P (type)))
3902     return true;
3903   return false;
3904 }
3905
3906 /* Break up subtract operations in block BB.
3907
3908    We do this top down because we don't know whether the subtract is
3909    part of a possible chain of reassociation except at the top.
3910
3911    IE given
3912    d = f + g
3913    c = a + e
3914    b = c - d
3915    q = b - r
3916    k = t - q
3917
3918    we want to break up k = t - q, but we won't until we've transformed q
3919    = b - r, which won't be broken up until we transform b = c - d.
3920
3921    En passant, clear the GIMPLE visited flag on every statement
3922    and set UIDs within each basic block.  */
3923
3924 static void
3925 break_up_subtract_bb (basic_block bb)
3926 {
3927   gimple_stmt_iterator gsi;
3928   basic_block son;
3929   unsigned int uid = 1;
3930
3931   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3932     {
3933       gimple stmt = gsi_stmt (gsi);
3934       gimple_set_visited (stmt, false);
3935       gimple_set_uid (stmt, uid++);
3936
3937       if (!is_gimple_assign (stmt)
3938           || !can_reassociate_p (gimple_assign_lhs (stmt)))
3939         continue;
3940
3941       /* Look for simple gimple subtract operations.  */
3942       if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
3943         {
3944           if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
3945               || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
3946             continue;
3947
3948           /* Check for a subtract used only in an addition.  If this
3949              is the case, transform it into add of a negate for better
3950              reassociation.  IE transform C = A-B into C = A + -B if C
3951              is only used in an addition.  */
3952           if (should_break_up_subtract (stmt))
3953             break_up_subtract (stmt, &gsi);
3954         }
3955       else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
3956                && can_reassociate_p (gimple_assign_rhs1 (stmt)))
3957         plus_negates.safe_push (gimple_assign_lhs (stmt));
3958     }
3959   for (son = first_dom_son (CDI_DOMINATORS, bb);
3960        son;
3961        son = next_dom_son (CDI_DOMINATORS, son))
3962     break_up_subtract_bb (son);
3963 }
3964
3965 /* Used for repeated factor analysis.  */
3966 struct repeat_factor_d
3967 {
3968   /* An SSA name that occurs in a multiply chain.  */
3969   tree factor;
3970
3971   /* Cached rank of the factor.  */
3972   unsigned rank;
3973
3974   /* Number of occurrences of the factor in the chain.  */
3975   HOST_WIDE_INT count;
3976
3977   /* An SSA name representing the product of this factor and
3978      all factors appearing later in the repeated factor vector.  */
3979   tree repr;
3980 };
3981
3982 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
3983 typedef const struct repeat_factor_d *const_repeat_factor_t;
3984
3985
3986 static vec<repeat_factor> repeat_factor_vec;
3987
3988 /* Used for sorting the repeat factor vector.  Sort primarily by
3989    ascending occurrence count, secondarily by descending rank.  */
3990
3991 static int
3992 compare_repeat_factors (const void *x1, const void *x2)
3993 {
3994   const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
3995   const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
3996
3997   if (rf1->count != rf2->count)
3998     return rf1->count - rf2->count;
3999
4000   return rf2->rank - rf1->rank;
4001 }
4002
4003 /* Look for repeated operands in OPS in the multiply tree rooted at
4004    STMT.  Replace them with an optimal sequence of multiplies and powi
4005    builtin calls, and remove the used operands from OPS.  Return an
4006    SSA name representing the value of the replacement sequence.  */
4007
4008 static tree
4009 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
4010 {
4011   unsigned i, j, vec_len;
4012   int ii;
4013   operand_entry_t oe;
4014   repeat_factor_t rf1, rf2;
4015   repeat_factor rfnew;
4016   tree result = NULL_TREE;
4017   tree target_ssa, iter_result;
4018   tree type = TREE_TYPE (gimple_get_lhs (stmt));
4019   tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
4020   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4021   gimple mul_stmt, pow_stmt;
4022
4023   /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4024      target.  */
4025   if (!powi_fndecl)
4026     return NULL_TREE;
4027
4028   /* Allocate the repeated factor vector.  */
4029   repeat_factor_vec.create (10);
4030
4031   /* Scan the OPS vector for all SSA names in the product and build
4032      up a vector of occurrence counts for each factor.  */
4033   FOR_EACH_VEC_ELT (*ops, i, oe)
4034     {
4035       if (TREE_CODE (oe->op) == SSA_NAME)
4036         {
4037           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4038             {
4039               if (rf1->factor == oe->op)
4040                 {
4041                   rf1->count += oe->count;
4042                   break;
4043                 }
4044             }
4045
4046           if (j >= repeat_factor_vec.length ())
4047             {
4048               rfnew.factor = oe->op;
4049               rfnew.rank = oe->rank;
4050               rfnew.count = oe->count;
4051               rfnew.repr = NULL_TREE;
4052               repeat_factor_vec.safe_push (rfnew);
4053             }
4054         }
4055     }
4056
4057   /* Sort the repeated factor vector by (a) increasing occurrence count,
4058      and (b) decreasing rank.  */
4059   repeat_factor_vec.qsort (compare_repeat_factors);
4060
4061   /* It is generally best to combine as many base factors as possible
4062      into a product before applying __builtin_powi to the result.
4063      However, the sort order chosen for the repeated factor vector
4064      allows us to cache partial results for the product of the base
4065      factors for subsequent use.  When we already have a cached partial
4066      result from a previous iteration, it is best to make use of it
4067      before looking for another __builtin_pow opportunity.
4068
4069      As an example, consider x * x * y * y * y * z * z * z * z.
4070      We want to first compose the product x * y * z, raise it to the
4071      second power, then multiply this by y * z, and finally multiply
4072      by z.  This can be done in 5 multiplies provided we cache y * z
4073      for use in both expressions:
4074
4075         t1 = y * z
4076         t2 = t1 * x
4077         t3 = t2 * t2
4078         t4 = t1 * t3
4079         result = t4 * z
4080
4081      If we instead ignored the cached y * z and first multiplied by
4082      the __builtin_pow opportunity z * z, we would get the inferior:
4083
4084         t1 = y * z
4085         t2 = t1 * x
4086         t3 = t2 * t2
4087         t4 = z * z
4088         t5 = t3 * t4
4089         result = t5 * y  */
4090
4091   vec_len = repeat_factor_vec.length ();
4092
4093   /* Repeatedly look for opportunities to create a builtin_powi call.  */
4094   while (true)
4095     {
4096       HOST_WIDE_INT power;
4097
4098       /* First look for the largest cached product of factors from
4099          preceding iterations.  If found, create a builtin_powi for
4100          it if the minimum occurrence count for its factors is at
4101          least 2, or just use this cached product as our next
4102          multiplicand if the minimum occurrence count is 1.  */
4103       FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4104         {
4105           if (rf1->repr && rf1->count > 0)
4106             break;
4107         }
4108
4109       if (j < vec_len)
4110         {
4111           power = rf1->count;
4112
4113           if (power == 1)
4114             {
4115               iter_result = rf1->repr;
4116
4117               if (dump_file && (dump_flags & TDF_DETAILS))
4118                 {
4119                   unsigned elt;
4120                   repeat_factor_t rf;
4121                   fputs ("Multiplying by cached product ", dump_file);
4122                   for (elt = j; elt < vec_len; elt++)
4123                     {
4124                       rf = &repeat_factor_vec[elt];
4125                       print_generic_expr (dump_file, rf->factor, 0);
4126                       if (elt < vec_len - 1)
4127                         fputs (" * ", dump_file);
4128                     }
4129                   fputs ("\n", dump_file);
4130                 }
4131             }
4132           else
4133             {
4134               iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4135               pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4136                                             build_int_cst (integer_type_node,
4137                                                            power));
4138               gimple_call_set_lhs (pow_stmt, iter_result);
4139               gimple_set_location (pow_stmt, gimple_location (stmt));
4140               gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4141
4142               if (dump_file && (dump_flags & TDF_DETAILS))
4143                 {
4144                   unsigned elt;
4145                   repeat_factor_t rf;
4146                   fputs ("Building __builtin_pow call for cached product (",
4147                          dump_file);
4148                   for (elt = j; elt < vec_len; elt++)
4149                     {
4150                       rf = &repeat_factor_vec[elt];
4151                       print_generic_expr (dump_file, rf->factor, 0);
4152                       if (elt < vec_len - 1)
4153                         fputs (" * ", dump_file);
4154                     }
4155                   fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n",
4156                            power);
4157                 }
4158             }
4159         }
4160       else
4161         {
4162           /* Otherwise, find the first factor in the repeated factor
4163              vector whose occurrence count is at least 2.  If no such
4164              factor exists, there are no builtin_powi opportunities
4165              remaining.  */
4166           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4167             {
4168               if (rf1->count >= 2)
4169                 break;
4170             }
4171
4172           if (j >= vec_len)
4173             break;
4174
4175           power = rf1->count;
4176
4177           if (dump_file && (dump_flags & TDF_DETAILS))
4178             {
4179               unsigned elt;
4180               repeat_factor_t rf;
4181               fputs ("Building __builtin_pow call for (", dump_file);
4182               for (elt = j; elt < vec_len; elt++)
4183                 {
4184                   rf = &repeat_factor_vec[elt];
4185                   print_generic_expr (dump_file, rf->factor, 0);
4186                   if (elt < vec_len - 1)
4187                     fputs (" * ", dump_file);
4188                 }
4189               fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power);
4190             }
4191
4192           reassociate_stats.pows_created++;
4193
4194           /* Visit each element of the vector in reverse order (so that
4195              high-occurrence elements are visited first, and within the
4196              same occurrence count, lower-ranked elements are visited
4197              first).  Form a linear product of all elements in this order
4198              whose occurrencce count is at least that of element J.
4199              Record the SSA name representing the product of each element
4200              with all subsequent elements in the vector.  */
4201           if (j == vec_len - 1)
4202             rf1->repr = rf1->factor;
4203           else
4204             {
4205               for (ii = vec_len - 2; ii >= (int)j; ii--)
4206                 {
4207                   tree op1, op2;
4208
4209                   rf1 = &repeat_factor_vec[ii];
4210                   rf2 = &repeat_factor_vec[ii + 1];
4211
4212                   /* Init the last factor's representative to be itself.  */
4213                   if (!rf2->repr)
4214                     rf2->repr = rf2->factor;
4215
4216                   op1 = rf1->factor;
4217                   op2 = rf2->repr;
4218
4219                   target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4220                   mul_stmt = gimple_build_assign_with_ops (MULT_EXPR,
4221                                                            target_ssa,
4222                                                            op1, op2);
4223                   gimple_set_location (mul_stmt, gimple_location (stmt));
4224                   gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4225                   rf1->repr = target_ssa;
4226
4227                   /* Don't reprocess the multiply we just introduced.  */
4228                   gimple_set_visited (mul_stmt, true);
4229                 }
4230             }
4231
4232           /* Form a call to __builtin_powi for the maximum product
4233              just formed, raised to the power obtained earlier.  */
4234           rf1 = &repeat_factor_vec[j];
4235           iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4236           pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4237                                         build_int_cst (integer_type_node,
4238                                                        power));
4239           gimple_call_set_lhs (pow_stmt, iter_result);
4240           gimple_set_location (pow_stmt, gimple_location (stmt));
4241           gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4242         }
4243
4244       /* If we previously formed at least one other builtin_powi call,
4245          form the product of this one and those others.  */
4246       if (result)
4247         {
4248           tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4249           mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result,
4250                                                    result, iter_result);
4251           gimple_set_location (mul_stmt, gimple_location (stmt));
4252           gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4253           gimple_set_visited (mul_stmt, true);
4254           result = new_result;
4255         }
4256       else
4257         result = iter_result;
4258
4259       /* Decrement the occurrence count of each element in the product
4260          by the count found above, and remove this many copies of each
4261          factor from OPS.  */
4262       for (i = j; i < vec_len; i++)
4263         {
4264           unsigned k = power;
4265           unsigned n;
4266
4267           rf1 = &repeat_factor_vec[i];
4268           rf1->count -= power;
4269
4270           FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
4271             {
4272               if (oe->op == rf1->factor)
4273                 {
4274                   if (oe->count <= k)
4275                     {
4276                       ops->ordered_remove (n);
4277                       k -= oe->count;
4278
4279                       if (k == 0)
4280                         break;
4281                     }
4282                   else
4283                     {
4284                       oe->count -= k;
4285                       break;
4286                     }
4287                 }
4288             }
4289         }
4290     }
4291
4292   /* At this point all elements in the repeated factor vector have a
4293      remaining occurrence count of 0 or 1, and those with a count of 1
4294      don't have cached representatives.  Re-sort the ops vector and
4295      clean up.  */
4296   ops->qsort (sort_by_operand_rank);
4297   repeat_factor_vec.release ();
4298
4299   /* Return the final product computed herein.  Note that there may
4300      still be some elements with single occurrence count left in OPS;
4301      those will be handled by the normal reassociation logic.  */
4302   return result;
4303 }
4304
4305 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS.  */
4306
4307 static void
4308 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
4309 {
4310   tree rhs1;
4311
4312   if (dump_file && (dump_flags & TDF_DETAILS))
4313     {
4314       fprintf (dump_file, "Transforming ");
4315       print_gimple_stmt (dump_file, stmt, 0, 0);
4316     }
4317
4318   rhs1 = gimple_assign_rhs1 (stmt);
4319   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
4320   update_stmt (stmt);
4321   remove_visited_stmt_chain (rhs1);
4322
4323   if (dump_file && (dump_flags & TDF_DETAILS))
4324     {
4325       fprintf (dump_file, " into ");
4326       print_gimple_stmt (dump_file, stmt, 0, 0);
4327     }
4328 }
4329
4330 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2.  */
4331
4332 static void
4333 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
4334                             tree rhs1, tree rhs2)
4335 {
4336   if (dump_file && (dump_flags & TDF_DETAILS))
4337     {
4338       fprintf (dump_file, "Transforming ");
4339       print_gimple_stmt (dump_file, stmt, 0, 0);
4340     }
4341
4342   gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
4343   update_stmt (gsi_stmt (*gsi));
4344   remove_visited_stmt_chain (rhs1);
4345
4346   if (dump_file && (dump_flags & TDF_DETAILS))
4347     {
4348       fprintf (dump_file, " into ");
4349       print_gimple_stmt (dump_file, stmt, 0, 0);
4350     }
4351 }
4352
4353 /* Reassociate expressions in basic block BB and its post-dominator as
4354    children.  */
4355
4356 static void
4357 reassociate_bb (basic_block bb)
4358 {
4359   gimple_stmt_iterator gsi;
4360   basic_block son;
4361   gimple stmt = last_stmt (bb);
4362
4363   if (stmt && !gimple_visited_p (stmt))
4364     maybe_optimize_range_tests (stmt);
4365
4366   for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4367     {
4368       stmt = gsi_stmt (gsi);
4369
4370       if (is_gimple_assign (stmt)
4371           && !stmt_could_throw_p (stmt))
4372         {
4373           tree lhs, rhs1, rhs2;
4374           enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4375
4376           /* If this is not a gimple binary expression, there is
4377              nothing for us to do with it.  */
4378           if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4379             continue;
4380
4381           /* If this was part of an already processed statement,
4382              we don't need to touch it again. */
4383           if (gimple_visited_p (stmt))
4384             {
4385               /* This statement might have become dead because of previous
4386                  reassociations.  */
4387               if (has_zero_uses (gimple_get_lhs (stmt)))
4388                 {
4389                   gsi_remove (&gsi, true);
4390                   release_defs (stmt);
4391                   /* We might end up removing the last stmt above which
4392                      places the iterator to the end of the sequence.
4393                      Reset it to the last stmt in this case which might
4394                      be the end of the sequence as well if we removed
4395                      the last statement of the sequence.  In which case
4396                      we need to bail out.  */
4397                   if (gsi_end_p (gsi))
4398                     {
4399                       gsi = gsi_last_bb (bb);
4400                       if (gsi_end_p (gsi))
4401                         break;
4402                     }
4403                 }
4404               continue;
4405             }
4406
4407           lhs = gimple_assign_lhs (stmt);
4408           rhs1 = gimple_assign_rhs1 (stmt);
4409           rhs2 = gimple_assign_rhs2 (stmt);
4410
4411           /* For non-bit or min/max operations we can't associate
4412              all types.  Verify that here.  */
4413           if (rhs_code != BIT_IOR_EXPR
4414               && rhs_code != BIT_AND_EXPR
4415               && rhs_code != BIT_XOR_EXPR
4416               && rhs_code != MIN_EXPR
4417               && rhs_code != MAX_EXPR
4418               && (!can_reassociate_p (lhs)
4419                   || !can_reassociate_p (rhs1)
4420                   || !can_reassociate_p (rhs2)))
4421             continue;
4422
4423           if (associative_tree_code (rhs_code))
4424             {
4425               auto_vec<operand_entry_t> ops;
4426               tree powi_result = NULL_TREE;
4427
4428               /* There may be no immediate uses left by the time we
4429                  get here because we may have eliminated them all.  */
4430               if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4431                 continue;
4432
4433               gimple_set_visited (stmt, true);
4434               linearize_expr_tree (&ops, stmt, true, true);
4435               ops.qsort (sort_by_operand_rank);
4436               optimize_ops_list (rhs_code, &ops);
4437               if (undistribute_ops_list (rhs_code, &ops,
4438                                          loop_containing_stmt (stmt)))
4439                 {
4440                   ops.qsort (sort_by_operand_rank);
4441                   optimize_ops_list (rhs_code, &ops);
4442                 }
4443
4444               if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4445                 optimize_range_tests (rhs_code, &ops);
4446
4447               if (first_pass_instance
4448                   && rhs_code == MULT_EXPR
4449                   && flag_unsafe_math_optimizations)
4450                 powi_result = attempt_builtin_powi (stmt, &ops);
4451
4452               /* If the operand vector is now empty, all operands were
4453                  consumed by the __builtin_powi optimization.  */
4454               if (ops.length () == 0)
4455                 transform_stmt_to_copy (&gsi, stmt, powi_result);
4456               else if (ops.length () == 1)
4457                 {
4458                   tree last_op = ops.last ()->op;
4459
4460                   if (powi_result)
4461                     transform_stmt_to_multiply (&gsi, stmt, last_op,
4462                                                 powi_result);
4463                   else
4464                     transform_stmt_to_copy (&gsi, stmt, last_op);
4465                 }
4466               else
4467                 {
4468                   enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4469                   int ops_num = ops.length ();
4470                   int width = get_reassociation_width (ops_num, rhs_code, mode);
4471                   tree new_lhs = lhs;
4472
4473                   if (dump_file && (dump_flags & TDF_DETAILS))
4474                     fprintf (dump_file,
4475                              "Width = %d was chosen for reassociation\n", width);
4476
4477                   if (width > 1
4478                       && ops.length () > 3)
4479                     rewrite_expr_tree_parallel (stmt, width, ops);
4480                   else
4481                     {
4482                       /* When there are three operands left, we want
4483                          to make sure the ones that get the double
4484                          binary op are chosen wisely.  */
4485                       int len = ops.length ();
4486                       if (len >= 3)
4487                         swap_ops_for_binary_stmt (ops, len - 3, stmt);
4488
4489                       new_lhs = rewrite_expr_tree (stmt, 0, ops,
4490                                                    powi_result != NULL);
4491                     }
4492
4493                   /* If we combined some repeated factors into a
4494                      __builtin_powi call, multiply that result by the
4495                      reassociated operands.  */
4496                   if (powi_result)
4497                     {
4498                       gimple mul_stmt, lhs_stmt = SSA_NAME_DEF_STMT (lhs);
4499                       tree type = TREE_TYPE (lhs);
4500                       tree target_ssa = make_temp_ssa_name (type, NULL,
4501                                                             "reassocpow");
4502                       gimple_set_lhs (lhs_stmt, target_ssa);
4503                       update_stmt (lhs_stmt);
4504                       if (lhs != new_lhs)
4505                         target_ssa = new_lhs;
4506                       mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs,
4507                                                                powi_result,
4508                                                                target_ssa);
4509                       gimple_set_location (mul_stmt, gimple_location (stmt));
4510                       gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4511                     }
4512                 }
4513             }
4514         }
4515     }
4516   for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4517        son;
4518        son = next_dom_son (CDI_POST_DOMINATORS, son))
4519     reassociate_bb (son);
4520 }
4521
4522 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4523 void debug_ops_vector (vec<operand_entry_t> ops);
4524
4525 /* Dump the operand entry vector OPS to FILE.  */
4526
4527 void
4528 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
4529 {
4530   operand_entry_t oe;
4531   unsigned int i;
4532
4533   FOR_EACH_VEC_ELT (ops, i, oe)
4534     {
4535       fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
4536       print_generic_expr (file, oe->op, 0);
4537     }
4538 }
4539
4540 /* Dump the operand entry vector OPS to STDERR.  */
4541
4542 DEBUG_FUNCTION void
4543 debug_ops_vector (vec<operand_entry_t> ops)
4544 {
4545   dump_ops_vector (stderr, ops);
4546 }
4547
4548 static void
4549 do_reassoc (void)
4550 {
4551   break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
4552   reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
4553 }
4554
4555 /* Initialize the reassociation pass.  */
4556
4557 static void
4558 init_reassoc (void)
4559 {
4560   int i;
4561   long rank = 2;
4562   int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
4563
4564   /* Find the loops, so that we can prevent moving calculations in
4565      them.  */
4566   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4567
4568   memset (&reassociate_stats, 0, sizeof (reassociate_stats));
4569
4570   operand_entry_pool = create_alloc_pool ("operand entry pool",
4571                                           sizeof (struct operand_entry), 30);
4572   next_operand_entry_id = 0;
4573
4574   /* Reverse RPO (Reverse Post Order) will give us something where
4575      deeper loops come later.  */
4576   pre_and_rev_post_order_compute (NULL, bbs, false);
4577   bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
4578   operand_rank = pointer_map_create ();
4579
4580   /* Give each default definition a distinct rank.  This includes
4581      parameters and the static chain.  Walk backwards over all
4582      SSA names so that we get proper rank ordering according
4583      to tree_swap_operands_p.  */
4584   for (i = num_ssa_names - 1; i > 0; --i)
4585     {
4586       tree name = ssa_name (i);
4587       if (name && SSA_NAME_IS_DEFAULT_DEF (name))
4588         insert_operand_rank (name, ++rank);
4589     }
4590
4591   /* Set up rank for each BB  */
4592   for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
4593     bb_rank[bbs[i]] = ++rank  << 16;
4594
4595   free (bbs);
4596   calculate_dominance_info (CDI_POST_DOMINATORS);
4597   plus_negates = vNULL;
4598 }
4599
4600 /* Cleanup after the reassociation pass, and print stats if
4601    requested.  */
4602
4603 static void
4604 fini_reassoc (void)
4605 {
4606   statistics_counter_event (cfun, "Linearized",
4607                             reassociate_stats.linearized);
4608   statistics_counter_event (cfun, "Constants eliminated",
4609                             reassociate_stats.constants_eliminated);
4610   statistics_counter_event (cfun, "Ops eliminated",
4611                             reassociate_stats.ops_eliminated);
4612   statistics_counter_event (cfun, "Statements rewritten",
4613                             reassociate_stats.rewritten);
4614   statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
4615                             reassociate_stats.pows_encountered);
4616   statistics_counter_event (cfun, "Built-in powi calls created",
4617                             reassociate_stats.pows_created);
4618
4619   pointer_map_destroy (operand_rank);
4620   free_alloc_pool (operand_entry_pool);
4621   free (bb_rank);
4622   plus_negates.release ();
4623   free_dominance_info (CDI_POST_DOMINATORS);
4624   loop_optimizer_finalize ();
4625 }
4626
4627 /* Gate and execute functions for Reassociation.  */
4628
4629 static unsigned int
4630 execute_reassoc (void)
4631 {
4632   init_reassoc ();
4633
4634   do_reassoc ();
4635   repropagate_negates ();
4636
4637   fini_reassoc ();
4638   return 0;
4639 }
4640
4641 static bool
4642 gate_tree_ssa_reassoc (void)
4643 {
4644   return flag_tree_reassoc != 0;
4645 }
4646
4647 namespace {
4648
4649 const pass_data pass_data_reassoc =
4650 {
4651   GIMPLE_PASS, /* type */
4652   "reassoc", /* name */
4653   OPTGROUP_NONE, /* optinfo_flags */
4654   true, /* has_gate */
4655   true, /* has_execute */
4656   TV_TREE_REASSOC, /* tv_id */
4657   ( PROP_cfg | PROP_ssa ), /* properties_required */
4658   0, /* properties_provided */
4659   0, /* properties_destroyed */
4660   0, /* todo_flags_start */
4661   ( TODO_verify_ssa
4662     | TODO_update_ssa_only_virtuals
4663     | TODO_verify_flow ), /* todo_flags_finish */
4664 };
4665
4666 class pass_reassoc : public gimple_opt_pass
4667 {
4668 public:
4669   pass_reassoc (gcc::context *ctxt)
4670     : gimple_opt_pass (pass_data_reassoc, ctxt)
4671   {}
4672
4673   /* opt_pass methods: */
4674   opt_pass * clone () { return new pass_reassoc (m_ctxt); }
4675   bool gate () { return gate_tree_ssa_reassoc (); }
4676   unsigned int execute () { return execute_reassoc (); }
4677
4678 }; // class pass_reassoc
4679
4680 } // anon namespace
4681
4682 gimple_opt_pass *
4683 make_pass_reassoc (gcc::context *ctxt)
4684 {
4685   return new pass_reassoc (ctxt);
4686 }