gcc/tree-ssa-reassoc.c

   1 /* Reassociation for trees.
   2    Copyright (C) 2005-2014 Free Software Foundation, Inc.
   3    Contributed by Daniel Berlin <dan@dberlin.org>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 3, or (at your option)
  10 any later version.
  11
  12 GCC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "hash-table.h"
  25 #include "tm.h"
  26 #include "rtl.h"
  27 #include "tm_p.h"
  28 #include "tree.h"
  29 #include "stor-layout.h"
  30 #include "basic-block.h"
  31 #include "gimple-pretty-print.h"
  32 #include "tree-inline.h"
  33 #include "pointer-set.h"
  34 #include "tree-ssa-alias.h"
  35 #include "internal-fn.h"
  36 #include "gimple-fold.h"
  37 #include "tree-eh.h"
  38 #include "gimple-expr.h"
  39 #include "is-a.h"
  40 #include "gimple.h"
  41 #include "gimple-iterator.h"
  42 #include "gimplify-me.h"
  43 #include "gimple-ssa.h"
  44 #include "tree-cfg.h"
  45 #include "tree-phinodes.h"
  46 #include "ssa-iterators.h"
  47 #include "stringpool.h"
  48 #include "tree-ssanames.h"
  49 #include "tree-ssa-loop-niter.h"
  50 #include "tree-ssa-loop.h"
  51 #include "expr.h"
  52 #include "tree-dfa.h"
  53 #include "tree-ssa.h"
  54 #include "tree-iterator.h"
  55 #include "tree-pass.h"
  56 #include "alloc-pool.h"
  57 #include "langhooks.h"
  58 #include "cfgloop.h"
  59 #include "flags.h"
  60 #include "target.h"
  61 #include "params.h"
  62 #include "diagnostic-core.h"
  63
  64 /*  This is a simple global reassociation pass.  It is, in part, based
  65     on the LLVM pass of the same name (They do some things more/less
  66     than we do, in different orders, etc).
  67
  68     It consists of five steps:
  69
  70     1. Breaking up subtract operations into addition + negate, where
  71     it would promote the reassociation of adds.
  72
  73     2. Left linearization of the expression trees, so that (A+B)+(C+D)
  74     becomes (((A+B)+C)+D), which is easier for us to rewrite later.
  75     During linearization, we place the operands of the binary
  76     expressions into a vector of operand_entry_t
  77
  78     3. Optimization of the operand lists, eliminating things like a +
  79     -a, a & a, etc.
  80
  81     3a. Combine repeated factors with the same occurrence counts
  82     into a __builtin_powi call that will later be optimized into
  83     an optimal number of multiplies.
  84
  85     4. Rewrite the expression trees we linearized and optimized so
  86     they are in proper rank order.
  87
  88     5. Repropagate negates, as nothing else will clean it up ATM.
  89
  90     A bit of theory on #4, since nobody seems to write anything down
  91     about why it makes sense to do it the way they do it:
  92
  93     We could do this much nicer theoretically, but don't (for reasons
  94     explained after how to do it theoretically nice :P).
  95
  96     In order to promote the most redundancy elimination, you want
  97     binary expressions whose operands are the same rank (or
  98     preferably, the same value) exposed to the redundancy eliminator,
  99     for possible elimination.
 100
 101     So the way to do this if we really cared, is to build the new op
 102     tree from the leaves to the roots, merging as you go, and putting the
 103     new op on the end of the worklist, until you are left with one
 104     thing on the worklist.
 105
 106     IE if you have to rewrite the following set of operands (listed with
 107     rank in parentheses), with opcode PLUS_EXPR:
 108
 109     a (1),  b (1),  c (1),  d (2), e (2)
 110
 111
 112     We start with our merge worklist empty, and the ops list with all of
 113     those on it.
 114
 115     You want to first merge all leaves of the same rank, as much as
 116     possible.
 117
 118     So first build a binary op of
 119
 120     mergetmp = a + b, and put "mergetmp" on the merge worklist.
 121
 122     Because there is no three operand form of PLUS_EXPR, c is not going to
 123     be exposed to redundancy elimination as a rank 1 operand.
 124
 125     So you might as well throw it on the merge worklist (you could also
 126     consider it to now be a rank two operand, and merge it with d and e,
 127     but in this case, you then have evicted e from a binary op. So at
 128     least in this situation, you can't win.)
 129
 130     Then build a binary op of d + e
 131     mergetmp2 = d + e
 132
 133     and put mergetmp2 on the merge worklist.
 134
 135     so merge worklist = {mergetmp, c, mergetmp2}
 136
 137     Continue building binary ops of these operations until you have only
 138     one operation left on the worklist.
 139
 140     So we have
 141
 142     build binary op
 143     mergetmp3 = mergetmp + c
 144
 145     worklist = {mergetmp2, mergetmp3}
 146
 147     mergetmp4 = mergetmp2 + mergetmp3
 148
 149     worklist = {mergetmp4}
 150
 151     because we have one operation left, we can now just set the original
 152     statement equal to the result of that operation.
 153
 154     This will at least expose a + b  and d + e to redundancy elimination
 155     as binary operations.
 156
 157     For extra points, you can reuse the old statements to build the
 158     mergetmps, since you shouldn't run out.
 159
 160     So why don't we do this?
 161
 162     Because it's expensive, and rarely will help.  Most trees we are
 163     reassociating have 3 or less ops.  If they have 2 ops, they already
 164     will be written into a nice single binary op.  If you have 3 ops, a
 165     single simple check suffices to tell you whether the first two are of the
 166     same rank.  If so, you know to order it
 167
 168     mergetmp = op1 + op2
 169     newstmt = mergetmp + op3
 170
 171     instead of
 172     mergetmp = op2 + op3
 173     newstmt = mergetmp + op1
 174
 175     If all three are of the same rank, you can't expose them all in a
 176     single binary operator anyway, so the above is *still* the best you
 177     can do.
 178
 179     Thus, this is what we do.  When we have three ops left, we check to see
 180     what order to put them in, and call it a day.  As a nod to vector sum
 181     reduction, we check if any of the ops are really a phi node that is a
 182     destructive update for the associating op, and keep the destructive
 183     update together for vector sum reduction recognition.  */
 184
 185
 186 /* Statistics */
 187 static struct
 188 {
 189   int linearized;
 190   int constants_eliminated;
 191   int ops_eliminated;
 192   int rewritten;
 193   int pows_encountered;
 194   int pows_created;
 195 } reassociate_stats;
 196
 197 /* Operator, rank pair.  */
 198 typedef struct operand_entry
 199 {
 200   unsigned int rank;
 201   int id;
 202   tree op;
 203   unsigned int count;
 204 } *operand_entry_t;
 205
 206 static alloc_pool operand_entry_pool;
 207
 208 /* This is used to assign a unique ID to each struct operand_entry
 209    so that qsort results are identical on different hosts.  */
 210 static int next_operand_entry_id;
 211
 212 /* Starting rank number for a given basic block, so that we can rank
 213    operations using unmovable instructions in that BB based on the bb
 214    depth.  */
 215 static long *bb_rank;
 216
 217 /* Operand->rank hashtable.  */
 218 static struct pointer_map_t *operand_rank;
 219
 220 /* Forward decls.  */
 221 static long get_rank (tree);
 222 static bool reassoc_stmt_dominates_stmt_p (gimple, gimple);
 223
 224
 225 /* Bias amount for loop-carried phis.  We want this to be larger than
 226    the depth of any reassociation tree we can see, but not larger than
 227    the rank difference between two blocks.  */
 228 #define PHI_LOOP_BIAS (1 << 15)
 229
 230 /* Rank assigned to a phi statement.  If STMT is a loop-carried phi of
 231    an innermost loop, and the phi has only a single use which is inside
 232    the loop, then the rank is the block rank of the loop latch plus an
 233    extra bias for the loop-carried dependence.  This causes expressions
 234    calculated into an accumulator variable to be independent for each
 235    iteration of the loop.  If STMT is some other phi, the rank is the
 236    block rank of its containing block.  */
 237 static long
 238 phi_rank (gimple stmt)
 239 {
 240   basic_block bb = gimple_bb (stmt);
 241   struct loop *father = bb->loop_father;
 242   tree res;
 243   unsigned i;
 244   use_operand_p use;
 245   gimple use_stmt;
 246
 247   /* We only care about real loops (those with a latch).  */
 248   if (!father->latch)
 249     return bb_rank[bb->index];
 250
 251   /* Interesting phis must be in headers of innermost loops.  */
 252   if (bb != father->header
 253       || father->inner)
 254     return bb_rank[bb->index];
 255
 256   /* Ignore virtual SSA_NAMEs.  */
 257   res = gimple_phi_result (stmt);
 258   if (virtual_operand_p (res))
 259     return bb_rank[bb->index];
 260
 261   /* The phi definition must have a single use, and that use must be
 262      within the loop.  Otherwise this isn't an accumulator pattern.  */
 263   if (!single_imm_use (res, &use, &use_stmt)
 264       || gimple_bb (use_stmt)->loop_father != father)
 265     return bb_rank[bb->index];
 266
 267   /* Look for phi arguments from within the loop.  If found, bias this phi.  */
 268   for (i = 0; i < gimple_phi_num_args (stmt); i++)
 269     {
 270       tree arg = gimple_phi_arg_def (stmt, i);
 271       if (TREE_CODE (arg) == SSA_NAME
 272           && !SSA_NAME_IS_DEFAULT_DEF (arg))
 273         {
 274           gimple def_stmt = SSA_NAME_DEF_STMT (arg);
 275           if (gimple_bb (def_stmt)->loop_father == father)
 276             return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
 277         }
 278     }
 279
 280   /* Must be an uninteresting phi.  */
 281   return bb_rank[bb->index];
 282 }
 283
 284 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
 285    loop-carried dependence of an innermost loop, return TRUE; else
 286    return FALSE.  */
 287 static bool
 288 loop_carried_phi (tree exp)
 289 {
 290   gimple phi_stmt;
 291   long block_rank;
 292
 293   if (TREE_CODE (exp) != SSA_NAME
 294       || SSA_NAME_IS_DEFAULT_DEF (exp))
 295     return false;
 296
 297   phi_stmt = SSA_NAME_DEF_STMT (exp);
 298
 299   if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
 300     return false;
 301
 302   /* Non-loop-carried phis have block rank.  Loop-carried phis have
 303      an additional bias added in.  If this phi doesn't have block rank,
 304      it's biased and should not be propagated.  */
 305   block_rank = bb_rank[gimple_bb (phi_stmt)->index];
 306
 307   if (phi_rank (phi_stmt) != block_rank)
 308     return true;
 309
 310   return false;
 311 }
 312
 313 /* Return the maximum of RANK and the rank that should be propagated
 314    from expression OP.  For most operands, this is just the rank of OP.
 315    For loop-carried phis, the value is zero to avoid undoing the bias
 316    in favor of the phi.  */
 317 static long
 318 propagate_rank (long rank, tree op)
 319 {
 320   long op_rank;
 321
 322   if (loop_carried_phi (op))
 323     return rank;
 324
 325   op_rank = get_rank (op);
 326
 327   return MAX (rank, op_rank);
 328 }
 329
 330 /* Look up the operand rank structure for expression E.  */
 331
 332 static inline long
 333 find_operand_rank (tree e)
 334 {
 335   void **slot = pointer_map_contains (operand_rank, e);
 336   return slot ? (long) (intptr_t) *slot : -1;
 337 }
 338
 339 /* Insert {E,RANK} into the operand rank hashtable.  */
 340
 341 static inline void
 342 insert_operand_rank (tree e, long rank)
 343 {
 344   void **slot;
 345   gcc_assert (rank > 0);
 346   slot = pointer_map_insert (operand_rank, e);
 347   gcc_assert (!*slot);
 348   *slot = (void *) (intptr_t) rank;
 349 }
 350
 351 /* Given an expression E, return the rank of the expression.  */
 352
 353 static long
 354 get_rank (tree e)
 355 {
 356   /* Constants have rank 0.  */
 357   if (is_gimple_min_invariant (e))
 358     return 0;
 359
 360   /* SSA_NAME's have the rank of the expression they are the result
 361      of.
 362      For globals and uninitialized values, the rank is 0.
 363      For function arguments, use the pre-setup rank.
 364      For PHI nodes, stores, asm statements, etc, we use the rank of
 365      the BB.
 366      For simple operations, the rank is the maximum rank of any of
 367      its operands, or the bb_rank, whichever is less.
 368      I make no claims that this is optimal, however, it gives good
 369      results.  */
 370
 371   /* We make an exception to the normal ranking system to break
 372      dependences of accumulator variables in loops.  Suppose we
 373      have a simple one-block loop containing:
 374
 375        x_1 = phi(x_0, x_2)
 376        b = a + x_1
 377        c = b + d
 378        x_2 = c + e
 379
 380      As shown, each iteration of the calculation into x is fully
 381      dependent upon the iteration before it.  We would prefer to
 382      see this in the form:
 383
 384        x_1 = phi(x_0, x_2)
 385        b = a + d
 386        c = b + e
 387        x_2 = c + x_1
 388
 389      If the loop is unrolled, the calculations of b and c from
 390      different iterations can be interleaved.
 391
 392      To obtain this result during reassociation, we bias the rank
 393      of the phi definition x_1 upward, when it is recognized as an
 394      accumulator pattern.  The artificial rank causes it to be
 395      added last, providing the desired independence.  */
 396
 397   if (TREE_CODE (e) == SSA_NAME)
 398     {
 399       gimple stmt;
 400       long rank;
 401       int i, n;
 402       tree op;
 403
 404       if (SSA_NAME_IS_DEFAULT_DEF (e))
 405         return find_operand_rank (e);
 406
 407       stmt = SSA_NAME_DEF_STMT (e);
 408       if (gimple_code (stmt) == GIMPLE_PHI)
 409         return phi_rank (stmt);
 410
 411       if (!is_gimple_assign (stmt)
 412           || gimple_vdef (stmt))
 413         return bb_rank[gimple_bb (stmt)->index];
 414
 415       /* If we already have a rank for this expression, use that.  */
 416       rank = find_operand_rank (e);
 417       if (rank != -1)
 418         return rank;
 419
 420       /* Otherwise, find the maximum rank for the operands.  As an
 421          exception, remove the bias from loop-carried phis when propagating
 422          the rank so that dependent operations are not also biased.  */
 423       rank = 0;
 424       if (gimple_assign_single_p (stmt))
 425         {
 426           tree rhs = gimple_assign_rhs1 (stmt);
 427           n = TREE_OPERAND_LENGTH (rhs);
 428           if (n == 0)
 429             rank = propagate_rank (rank, rhs);
 430           else
 431             {
 432               for (i = 0; i < n; i++)
 433                 {
 434                   op = TREE_OPERAND (rhs, i);
 435
 436                   if (op != NULL_TREE)
 437                     rank = propagate_rank (rank, op);
 438                 }
 439             }
 440         }
 441       else
 442         {
 443           n = gimple_num_ops (stmt);
 444           for (i = 1; i < n; i++)
 445             {
 446               op = gimple_op (stmt, i);
 447               gcc_assert (op);
 448               rank = propagate_rank (rank, op);
 449             }
 450         }
 451
 452       if (dump_file && (dump_flags & TDF_DETAILS))
 453         {
 454           fprintf (dump_file, "Rank for ");
 455           print_generic_expr (dump_file, e, 0);
 456           fprintf (dump_file, " is %ld\n", (rank + 1));
 457         }
 458
 459       /* Note the rank in the hashtable so we don't recompute it.  */
 460       insert_operand_rank (e, (rank + 1));
 461       return (rank + 1);
 462     }
 463
 464   /* Globals, etc,  are rank 0 */
 465   return 0;
 466 }
 467
 468
 469 /* We want integer ones to end up last no matter what, since they are
 470    the ones we can do the most with.  */
 471 #define INTEGER_CONST_TYPE 1 << 3
 472 #define FLOAT_CONST_TYPE 1 << 2
 473 #define OTHER_CONST_TYPE 1 << 1
 474
 475 /* Classify an invariant tree into integer, float, or other, so that
 476    we can sort them to be near other constants of the same type.  */
 477 static inline int
 478 constant_type (tree t)
 479 {
 480   if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
 481     return INTEGER_CONST_TYPE;
 482   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
 483     return FLOAT_CONST_TYPE;
 484   else
 485     return OTHER_CONST_TYPE;
 486 }
 487
 488 /* qsort comparison function to sort operand entries PA and PB by rank
 489    so that the sorted array is ordered by rank in decreasing order.  */
 490 static int
 491 sort_by_operand_rank (const void *pa, const void *pb)
 492 {
 493   const operand_entry_t oea = *(const operand_entry_t *)pa;
 494   const operand_entry_t oeb = *(const operand_entry_t *)pb;
 495
 496   /* It's nicer for optimize_expression if constants that are likely
 497      to fold when added/multiplied//whatever are put next to each
 498      other.  Since all constants have rank 0, order them by type.  */
 499   if (oeb->rank == 0 && oea->rank == 0)
 500     {
 501       if (constant_type (oeb->op) != constant_type (oea->op))
 502         return constant_type (oeb->op) - constant_type (oea->op);
 503       else
 504         /* To make sorting result stable, we use unique IDs to determine
 505            order.  */
 506         return oeb->id - oea->id;
 507     }
 508
 509   /* Lastly, make sure the versions that are the same go next to each
 510      other.  */
 511   if ((oeb->rank - oea->rank == 0)
 512       && TREE_CODE (oea->op) == SSA_NAME
 513       && TREE_CODE (oeb->op) == SSA_NAME)
 514     {
 515       /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
 516          versions of removed SSA_NAMEs, so if possible, prefer to sort
 517          based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
 518          See PR60418.  */
 519       if (!SSA_NAME_IS_DEFAULT_DEF (oea->op)
 520           && !SSA_NAME_IS_DEFAULT_DEF (oeb->op)
 521           && SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
 522         {
 523           gimple stmta = SSA_NAME_DEF_STMT (oea->op);
 524           gimple stmtb = SSA_NAME_DEF_STMT (oeb->op);
 525           basic_block bba = gimple_bb (stmta);
 526           basic_block bbb = gimple_bb (stmtb);
 527           if (bbb != bba)
 528             {
 529               if (bb_rank[bbb->index] != bb_rank[bba->index])
 530                 return bb_rank[bbb->index] - bb_rank[bba->index];
 531             }
 532           else
 533             {
 534               bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
 535               bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
 536               if (da != db)
 537                 return da ? 1 : -1;
 538             }
 539         }
 540
 541       if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
 542         return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
 543       else
 544         return oeb->id - oea->id;
 545     }
 546
 547   if (oeb->rank != oea->rank)
 548     return oeb->rank - oea->rank;
 549   else
 550     return oeb->id - oea->id;
 551 }
 552
 553 /* Add an operand entry to *OPS for the tree operand OP.  */
 554
 555 static void
 556 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
 557 {
 558   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
 559
 560   oe->op = op;
 561   oe->rank = get_rank (op);
 562   oe->id = next_operand_entry_id++;
 563   oe->count = 1;
 564   ops->safe_push (oe);
 565 }
 566
 567 /* Add an operand entry to *OPS for the tree operand OP with repeat
 568    count REPEAT.  */
 569
 570 static void
 571 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
 572                        HOST_WIDE_INT repeat)
 573 {
 574   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
 575
 576   oe->op = op;
 577   oe->rank = get_rank (op);
 578   oe->id = next_operand_entry_id++;
 579   oe->count = repeat;
 580   ops->safe_push (oe);
 581
 582   reassociate_stats.pows_encountered++;
 583 }
 584
 585 /* Return true if STMT is reassociable operation containing a binary
 586    operation with tree code CODE, and is inside LOOP.  */
 587
 588 static bool
 589 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
 590 {
 591   basic_block bb = gimple_bb (stmt);
 592
 593   if (gimple_bb (stmt) == NULL)
 594     return false;
 595
 596   if (!flow_bb_inside_loop_p (loop, bb))
 597     return false;
 598
 599   if (is_gimple_assign (stmt)
 600       && gimple_assign_rhs_code (stmt) == code
 601       && has_single_use (gimple_assign_lhs (stmt)))
 602     return true;
 603
 604   return false;
 605 }
 606
 607
 608 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
 609    operand of the negate operation.  Otherwise, return NULL.  */
 610
 611 static tree
 612 get_unary_op (tree name, enum tree_code opcode)
 613 {
 614   gimple stmt = SSA_NAME_DEF_STMT (name);
 615
 616   if (!is_gimple_assign (stmt))
 617     return NULL_TREE;
 618
 619   if (gimple_assign_rhs_code (stmt) == opcode)
 620     return gimple_assign_rhs1 (stmt);
 621   return NULL_TREE;
 622 }
 623
 624 /* If CURR and LAST are a pair of ops that OPCODE allows us to
 625    eliminate through equivalences, do so, remove them from OPS, and
 626    return true.  Otherwise, return false.  */
 627
 628 static bool
 629 eliminate_duplicate_pair (enum tree_code opcode,
 630                           vec<operand_entry_t> *ops,
 631                           bool *all_done,
 632                           unsigned int i,
 633                           operand_entry_t curr,
 634                           operand_entry_t last)
 635 {
 636
 637   /* If we have two of the same op, and the opcode is & |, min, or max,
 638      we can eliminate one of them.
 639      If we have two of the same op, and the opcode is ^, we can
 640      eliminate both of them.  */
 641
 642   if (last && last->op == curr->op)
 643     {
 644       switch (opcode)
 645         {
 646         case MAX_EXPR:
 647         case MIN_EXPR:
 648         case BIT_IOR_EXPR:
 649         case BIT_AND_EXPR:
 650           if (dump_file && (dump_flags & TDF_DETAILS))
 651             {
 652               fprintf (dump_file, "Equivalence: ");
 653               print_generic_expr (dump_file, curr->op, 0);
 654               fprintf (dump_file, " [&|minmax] ");
 655               print_generic_expr (dump_file, last->op, 0);
 656               fprintf (dump_file, " -> ");
 657               print_generic_stmt (dump_file, last->op, 0);
 658             }
 659
 660           ops->ordered_remove (i);
 661           reassociate_stats.ops_eliminated ++;
 662
 663           return true;
 664
 665         case BIT_XOR_EXPR:
 666           if (dump_file && (dump_flags & TDF_DETAILS))
 667             {
 668               fprintf (dump_file, "Equivalence: ");
 669               print_generic_expr (dump_file, curr->op, 0);
 670               fprintf (dump_file, " ^ ");
 671               print_generic_expr (dump_file, last->op, 0);
 672               fprintf (dump_file, " -> nothing\n");
 673             }
 674
 675           reassociate_stats.ops_eliminated += 2;
 676
 677           if (ops->length () == 2)
 678             {
 679               ops->create (0);
 680               add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
 681               *all_done = true;
 682             }
 683           else
 684             {
 685               ops->ordered_remove (i-1);
 686               ops->ordered_remove (i-1);
 687             }
 688
 689           return true;
 690
 691         default:
 692           break;
 693         }
 694     }
 695   return false;
 696 }
 697
 698 static vec<tree> plus_negates;
 699
 700 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
 701    expression, look in OPS for a corresponding positive operation to cancel
 702    it out.  If we find one, remove the other from OPS, replace
 703    OPS[CURRINDEX] with 0 or -1, respectively, and return true.  Otherwise,
 704    return false. */
 705
 706 static bool
 707 eliminate_plus_minus_pair (enum tree_code opcode,
 708                            vec<operand_entry_t> *ops,
 709                            unsigned int currindex,
 710                            operand_entry_t curr)
 711 {
 712   tree negateop;
 713   tree notop;
 714   unsigned int i;
 715   operand_entry_t oe;
 716
 717   if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
 718     return false;
 719
 720   negateop = get_unary_op (curr->op, NEGATE_EXPR);
 721   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 722   if (negateop == NULL_TREE && notop == NULL_TREE)
 723     return false;
 724
 725   /* Any non-negated version will have a rank that is one less than
 726      the current rank.  So once we hit those ranks, if we don't find
 727      one, we can stop.  */
 728
 729   for (i = currindex + 1;
 730        ops->iterate (i, &oe)
 731        && oe->rank >= curr->rank - 1 ;
 732        i++)
 733     {
 734       if (oe->op == negateop)
 735         {
 736
 737           if (dump_file && (dump_flags & TDF_DETAILS))
 738             {
 739               fprintf (dump_file, "Equivalence: ");
 740               print_generic_expr (dump_file, negateop, 0);
 741               fprintf (dump_file, " + -");
 742               print_generic_expr (dump_file, oe->op, 0);
 743               fprintf (dump_file, " -> 0\n");
 744             }
 745
 746           ops->ordered_remove (i);
 747           add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
 748           ops->ordered_remove (currindex);
 749           reassociate_stats.ops_eliminated ++;
 750
 751           return true;
 752         }
 753       else if (oe->op == notop)
 754         {
 755           tree op_type = TREE_TYPE (oe->op);
 756
 757           if (dump_file && (dump_flags & TDF_DETAILS))
 758             {
 759               fprintf (dump_file, "Equivalence: ");
 760               print_generic_expr (dump_file, notop, 0);
 761               fprintf (dump_file, " + ~");
 762               print_generic_expr (dump_file, oe->op, 0);
 763               fprintf (dump_file, " -> -1\n");
 764             }
 765
 766           ops->ordered_remove (i);
 767           add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
 768           ops->ordered_remove (currindex);
 769           reassociate_stats.ops_eliminated ++;
 770
 771           return true;
 772         }
 773     }
 774
 775   /* CURR->OP is a negate expr in a plus expr: save it for later
 776      inspection in repropagate_negates().  */
 777   if (negateop != NULL_TREE)
 778     plus_negates.safe_push (curr->op);
 779
 780   return false;
 781 }
 782
 783 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
 784    bitwise not expression, look in OPS for a corresponding operand to
 785    cancel it out.  If we find one, remove the other from OPS, replace
 786    OPS[CURRINDEX] with 0, and return true.  Otherwise, return
 787    false. */
 788
 789 static bool
 790 eliminate_not_pairs (enum tree_code opcode,
 791                      vec<operand_entry_t> *ops,
 792                      unsigned int currindex,
 793                      operand_entry_t curr)
 794 {
 795   tree notop;
 796   unsigned int i;
 797   operand_entry_t oe;
 798
 799   if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
 800       || TREE_CODE (curr->op) != SSA_NAME)
 801     return false;
 802
 803   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 804   if (notop == NULL_TREE)
 805     return false;
 806
 807   /* Any non-not version will have a rank that is one less than
 808      the current rank.  So once we hit those ranks, if we don't find
 809      one, we can stop.  */
 810
 811   for (i = currindex + 1;
 812        ops->iterate (i, &oe)
 813        && oe->rank >= curr->rank - 1;
 814        i++)
 815     {
 816       if (oe->op == notop)
 817         {
 818           if (dump_file && (dump_flags & TDF_DETAILS))
 819             {
 820               fprintf (dump_file, "Equivalence: ");
 821               print_generic_expr (dump_file, notop, 0);
 822               if (opcode == BIT_AND_EXPR)
 823                 fprintf (dump_file, " & ~");
 824               else if (opcode == BIT_IOR_EXPR)
 825                 fprintf (dump_file, " | ~");
 826               print_generic_expr (dump_file, oe->op, 0);
 827               if (opcode == BIT_AND_EXPR)
 828                 fprintf (dump_file, " -> 0\n");
 829               else if (opcode == BIT_IOR_EXPR)
 830                 fprintf (dump_file, " -> -1\n");
 831             }
 832
 833           if (opcode == BIT_AND_EXPR)
 834             oe->op = build_zero_cst (TREE_TYPE (oe->op));
 835           else if (opcode == BIT_IOR_EXPR)
 836             oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
 837
 838           reassociate_stats.ops_eliminated += ops->length () - 1;
 839           ops->truncate (0);
 840           ops->quick_push (oe);
 841           return true;
 842         }
 843     }
 844
 845   return false;
 846 }
 847
 848 /* Use constant value that may be present in OPS to try to eliminate
 849    operands.  Note that this function is only really used when we've
 850    eliminated ops for other reasons, or merged constants.  Across
 851    single statements, fold already does all of this, plus more.  There
 852    is little point in duplicating logic, so I've only included the
 853    identities that I could ever construct testcases to trigger.  */
 854
 855 static void
 856 eliminate_using_constants (enum tree_code opcode,
 857                            vec<operand_entry_t> *ops)
 858 {
 859   operand_entry_t oelast = ops->last ();
 860   tree type = TREE_TYPE (oelast->op);
 861
 862   if (oelast->rank == 0
 863       && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
 864     {
 865       switch (opcode)
 866         {
 867         case BIT_AND_EXPR:
 868           if (integer_zerop (oelast->op))
 869             {
 870               if (ops->length () != 1)
 871                 {
 872                   if (dump_file && (dump_flags & TDF_DETAILS))
 873                     fprintf (dump_file, "Found & 0, removing all other ops\n");
 874
 875                   reassociate_stats.ops_eliminated += ops->length () - 1;
 876
 877                   ops->truncate (0);
 878                   ops->quick_push (oelast);
 879                   return;
 880                 }
 881             }
 882           else if (integer_all_onesp (oelast->op))
 883             {
 884               if (ops->length () != 1)
 885                 {
 886                   if (dump_file && (dump_flags & TDF_DETAILS))
 887                     fprintf (dump_file, "Found & -1, removing\n");
 888                   ops->pop ();
 889                   reassociate_stats.ops_eliminated++;
 890                 }
 891             }
 892           break;
 893         case BIT_IOR_EXPR:
 894           if (integer_all_onesp (oelast->op))
 895             {
 896               if (ops->length () != 1)
 897                 {
 898                   if (dump_file && (dump_flags & TDF_DETAILS))
 899                     fprintf (dump_file, "Found | -1, removing all other ops\n");
 900
 901                   reassociate_stats.ops_eliminated += ops->length () - 1;
 902
 903                   ops->truncate (0);
 904                   ops->quick_push (oelast);
 905                   return;
 906                 }
 907             }
 908           else if (integer_zerop (oelast->op))
 909             {
 910               if (ops->length () != 1)
 911                 {
 912                   if (dump_file && (dump_flags & TDF_DETAILS))
 913                     fprintf (dump_file, "Found | 0, removing\n");
 914                   ops->pop ();
 915                   reassociate_stats.ops_eliminated++;
 916                 }
 917             }
 918           break;
 919         case MULT_EXPR:
 920           if (integer_zerop (oelast->op)
 921               || (FLOAT_TYPE_P (type)
 922                   && !HONOR_NANS (TYPE_MODE (type))
 923                   && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
 924                   && real_zerop (oelast->op)))
 925             {
 926               if (ops->length () != 1)
 927                 {
 928                   if (dump_file && (dump_flags & TDF_DETAILS))
 929                     fprintf (dump_file, "Found * 0, removing all other ops\n");
 930
 931                   reassociate_stats.ops_eliminated += ops->length () - 1;
 932                   ops->truncate (1);
 933                   ops->quick_push (oelast);
 934                   return;
 935                 }
 936             }
 937           else if (integer_onep (oelast->op)
 938                    || (FLOAT_TYPE_P (type)
 939                        && !HONOR_SNANS (TYPE_MODE (type))
 940                        && real_onep (oelast->op)))
 941             {
 942               if (ops->length () != 1)
 943                 {
 944                   if (dump_file && (dump_flags & TDF_DETAILS))
 945                     fprintf (dump_file, "Found * 1, removing\n");
 946                   ops->pop ();
 947                   reassociate_stats.ops_eliminated++;
 948                   return;
 949                 }
 950             }
 951           break;
 952         case BIT_XOR_EXPR:
 953         case PLUS_EXPR:
 954         case MINUS_EXPR:
 955           if (integer_zerop (oelast->op)
 956               || (FLOAT_TYPE_P (type)
 957                   && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
 958                   && fold_real_zero_addition_p (type, oelast->op,
 959                                                 opcode == MINUS_EXPR)))
 960             {
 961               if (ops->length () != 1)
 962                 {
 963                   if (dump_file && (dump_flags & TDF_DETAILS))
 964                     fprintf (dump_file, "Found [|^+] 0, removing\n");
 965                   ops->pop ();
 966                   reassociate_stats.ops_eliminated++;
 967                   return;
 968                 }
 969             }
 970           break;
 971         default:
 972           break;
 973         }
 974     }
 975 }
 976
 977
 978 static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
 979                                  bool, bool);
 980
 981 /* Structure for tracking and counting operands.  */
 982 typedef struct oecount_s {
 983   int cnt;
 984   int id;
 985   enum tree_code oecode;
 986   tree op;
 987 } oecount;
 988
 989
 990 /* The heap for the oecount hashtable and the sorted list of operands.  */
 991 static vec<oecount> cvec;
 992
 993
 994 /* Oecount hashtable helpers.  */
 995
 996 struct oecount_hasher : typed_noop_remove <void>
 997 {
 998   /* Note that this hash table stores integers, not pointers.
 999      So, observe the casting in the member functions.  */
1000   typedef void value_type;
1001   typedef void compare_type;
1002   static inline hashval_t hash (const value_type *);
1003   static inline bool equal (const value_type *, const compare_type *);
1004 };
1005
1006 /* Hash function for oecount.  */
1007
1008 inline hashval_t
1009 oecount_hasher::hash (const value_type *p)
1010 {
1011   const oecount *c = &cvec[(size_t)p - 42];
1012   return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1013 }
1014
1015 /* Comparison function for oecount.  */
1016
1017 inline bool
1018 oecount_hasher::equal (const value_type *p1, const compare_type *p2)
1019 {
1020   const oecount *c1 = &cvec[(size_t)p1 - 42];
1021   const oecount *c2 = &cvec[(size_t)p2 - 42];
1022   return (c1->oecode == c2->oecode
1023           && c1->op == c2->op);
1024 }
1025
1026 /* Comparison function for qsort sorting oecount elements by count.  */
1027
1028 static int
1029 oecount_cmp (const void *p1, const void *p2)
1030 {
1031   const oecount *c1 = (const oecount *)p1;
1032   const oecount *c2 = (const oecount *)p2;
1033   if (c1->cnt != c2->cnt)
1034     return c1->cnt - c2->cnt;
1035   else
1036     /* If counts are identical, use unique IDs to stabilize qsort.  */
1037     return c1->id - c2->id;
1038 }
1039
1040 /* Return TRUE iff STMT represents a builtin call that raises OP
1041    to some exponent.  */
1042
1043 static bool
1044 stmt_is_power_of_op (gimple stmt, tree op)
1045 {
1046   tree fndecl;
1047
1048   if (!is_gimple_call (stmt))
1049     return false;
1050
1051   fndecl = gimple_call_fndecl (stmt);
1052
1053   if (!fndecl
1054       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
1055     return false;
1056
1057   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1058     {
1059     CASE_FLT_FN (BUILT_IN_POW):
1060     CASE_FLT_FN (BUILT_IN_POWI):
1061       return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1062
1063     default:
1064       return false;
1065     }
1066 }
1067
1068 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1069    in place and return the result.  Assumes that stmt_is_power_of_op
1070    was previously called for STMT and returned TRUE.  */
1071
1072 static HOST_WIDE_INT
1073 decrement_power (gimple stmt)
1074 {
1075   REAL_VALUE_TYPE c, cint;
1076   HOST_WIDE_INT power;
1077   tree arg1;
1078
1079   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1080     {
1081     CASE_FLT_FN (BUILT_IN_POW):
1082       arg1 = gimple_call_arg (stmt, 1);
1083       c = TREE_REAL_CST (arg1);
1084       power = real_to_integer (&c) - 1;
1085       real_from_integer (&cint, VOIDmode, power, 0, 0);
1086       gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1087       return power;
1088
1089     CASE_FLT_FN (BUILT_IN_POWI):
1090       arg1 = gimple_call_arg (stmt, 1);
1091       power = TREE_INT_CST_LOW (arg1) - 1;
1092       gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1093       return power;
1094
1095     default:
1096       gcc_unreachable ();
1097     }
1098 }
1099
1100 /* Find the single immediate use of STMT's LHS, and replace it
1101    with OP.  Remove STMT.  If STMT's LHS is the same as *DEF,
1102    replace *DEF with OP as well.  */
1103
1104 static void
1105 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1106 {
1107   tree lhs;
1108   gimple use_stmt;
1109   use_operand_p use;
1110   gimple_stmt_iterator gsi;
1111
1112   if (is_gimple_call (stmt))
1113     lhs = gimple_call_lhs (stmt);
1114   else
1115     lhs = gimple_assign_lhs (stmt);
1116
1117   gcc_assert (has_single_use (lhs));
1118   single_imm_use (lhs, &use, &use_stmt);
1119   if (lhs == *def)
1120     *def = op;
1121   SET_USE (use, op);
1122   if (TREE_CODE (op) != SSA_NAME)
1123     update_stmt (use_stmt);
1124   gsi = gsi_for_stmt (stmt);
1125   unlink_stmt_vdef (stmt);
1126   gsi_remove (&gsi, true);
1127   release_defs (stmt);
1128 }
1129
1130 /* Walks the linear chain with result *DEF searching for an operation
1131    with operand OP and code OPCODE removing that from the chain.  *DEF
1132    is updated if there is only one operand but no operation left.  */
1133
1134 static void
1135 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1136 {
1137   gimple stmt = SSA_NAME_DEF_STMT (*def);
1138
1139   do
1140     {
1141       tree name;
1142
1143       if (opcode == MULT_EXPR
1144           && stmt_is_power_of_op (stmt, op))
1145         {
1146           if (decrement_power (stmt) == 1)
1147             propagate_op_to_single_use (op, stmt, def);
1148           return;
1149         }
1150
1151       name = gimple_assign_rhs1 (stmt);
1152
1153       /* If this is the operation we look for and one of the operands
1154          is ours simply propagate the other operand into the stmts
1155          single use.  */
1156       if (gimple_assign_rhs_code (stmt) == opcode
1157           && (name == op
1158               || gimple_assign_rhs2 (stmt) == op))
1159         {
1160           if (name == op)
1161             name = gimple_assign_rhs2 (stmt);
1162           propagate_op_to_single_use (name, stmt, def);
1163           return;
1164         }
1165
1166       /* We might have a multiply of two __builtin_pow* calls, and
1167          the operand might be hiding in the rightmost one.  */
1168       if (opcode == MULT_EXPR
1169           && gimple_assign_rhs_code (stmt) == opcode
1170           && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1171           && has_single_use (gimple_assign_rhs2 (stmt)))
1172         {
1173           gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1174           if (stmt_is_power_of_op (stmt2, op))
1175             {
1176               if (decrement_power (stmt2) == 1)
1177                 propagate_op_to_single_use (op, stmt2, def);
1178               return;
1179             }
1180         }
1181
1182       /* Continue walking the chain.  */
1183       gcc_assert (name != op
1184                   && TREE_CODE (name) == SSA_NAME);
1185       stmt = SSA_NAME_DEF_STMT (name);
1186     }
1187   while (1);
1188 }
1189
1190 /* Returns true if statement S1 dominates statement S2.  Like
1191    stmt_dominates_stmt_p, but uses stmt UIDs to optimize.  */
1192
1193 static bool
1194 reassoc_stmt_dominates_stmt_p (gimple s1, gimple s2)
1195 {
1196   basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1197
1198   /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1199      SSA_NAME.  Assume it lives at the beginning of function and
1200      thus dominates everything.  */
1201   if (!bb1 || s1 == s2)
1202     return true;
1203
1204   /* If bb2 is NULL, it doesn't dominate any stmt with a bb.  */
1205   if (!bb2)
1206     return false;
1207
1208   if (bb1 == bb2)
1209     {
1210       /* PHIs in the same basic block are assumed to be
1211          executed all in parallel, if only one stmt is a PHI,
1212          it dominates the other stmt in the same basic block.  */
1213       if (gimple_code (s1) == GIMPLE_PHI)
1214         return true;
1215
1216       if (gimple_code (s2) == GIMPLE_PHI)
1217         return false;
1218
1219       gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1220
1221       if (gimple_uid (s1) < gimple_uid (s2))
1222         return true;
1223
1224       if (gimple_uid (s1) > gimple_uid (s2))
1225         return false;
1226
1227       gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1228       unsigned int uid = gimple_uid (s1);
1229       for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1230         {
1231           gimple s = gsi_stmt (gsi);
1232           if (gimple_uid (s) != uid)
1233             break;
1234           if (s == s2)
1235             return true;
1236         }
1237
1238       return false;
1239     }
1240
1241   return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1242 }
1243
1244 /* Insert STMT after INSERT_POINT.  */
1245
1246 static void
1247 insert_stmt_after (gimple stmt, gimple insert_point)
1248 {
1249   gimple_stmt_iterator gsi;
1250   basic_block bb;
1251
1252   if (gimple_code (insert_point) == GIMPLE_PHI)
1253     bb = gimple_bb (insert_point);
1254   else if (!stmt_ends_bb_p (insert_point))
1255     {
1256       gsi = gsi_for_stmt (insert_point);
1257       gimple_set_uid (stmt, gimple_uid (insert_point));
1258       gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1259       return;
1260     }
1261   else
1262     /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1263        thus if it must end a basic block, it should be a call that can
1264        throw, or some assignment that can throw.  If it throws, the LHS
1265        of it will not be initialized though, so only valid places using
1266        the SSA_NAME should be dominated by the fallthru edge.  */
1267     bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1268   gsi = gsi_after_labels (bb);
1269   if (gsi_end_p (gsi))
1270     {
1271       gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1272       gimple_set_uid (stmt,
1273                       gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1274     }
1275   else
1276     gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1277   gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1278 }
1279
1280 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1281    the result.  Places the statement after the definition of either
1282    OP1 or OP2.  Returns the new statement.  */
1283
1284 static gimple
1285 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1286 {
1287   gimple op1def = NULL, op2def = NULL;
1288   gimple_stmt_iterator gsi;
1289   tree op;
1290   gimple sum;
1291
1292   /* Create the addition statement.  */
1293   op = make_ssa_name (type, NULL);
1294   sum = gimple_build_assign_with_ops (opcode, op, op1, op2);
1295
1296   /* Find an insertion place and insert.  */
1297   if (TREE_CODE (op1) == SSA_NAME)
1298     op1def = SSA_NAME_DEF_STMT (op1);
1299   if (TREE_CODE (op2) == SSA_NAME)
1300     op2def = SSA_NAME_DEF_STMT (op2);
1301   if ((!op1def || gimple_nop_p (op1def))
1302       && (!op2def || gimple_nop_p (op2def)))
1303     {
1304       gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1305       if (gsi_end_p (gsi))
1306         {
1307           gimple_stmt_iterator gsi2
1308             = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1309           gimple_set_uid (sum,
1310                           gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1311         }
1312       else
1313         gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1314       gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1315     }
1316   else
1317     {
1318       gimple insert_point;
1319       if ((!op1def || gimple_nop_p (op1def))
1320            || (op2def && !gimple_nop_p (op2def)
1321                && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1322         insert_point = op2def;
1323       else
1324         insert_point = op1def;
1325       insert_stmt_after (sum, insert_point);
1326     }
1327   update_stmt (sum);
1328
1329   return sum;
1330 }
1331
1332 /* Perform un-distribution of divisions and multiplications.
1333    A * X + B * X is transformed into (A + B) * X and A / X + B / X
1334    to (A + B) / X for real X.
1335
1336    The algorithm is organized as follows.
1337
1338     - First we walk the addition chain *OPS looking for summands that
1339       are defined by a multiplication or a real division.  This results
1340       in the candidates bitmap with relevant indices into *OPS.
1341
1342     - Second we build the chains of multiplications or divisions for
1343       these candidates, counting the number of occurrences of (operand, code)
1344       pairs in all of the candidates chains.
1345
1346     - Third we sort the (operand, code) pairs by number of occurrence and
1347       process them starting with the pair with the most uses.
1348
1349       * For each such pair we walk the candidates again to build a
1350         second candidate bitmap noting all multiplication/division chains
1351         that have at least one occurrence of (operand, code).
1352
1353       * We build an alternate addition chain only covering these
1354         candidates with one (operand, code) operation removed from their
1355         multiplication/division chain.
1356
1357       * The first candidate gets replaced by the alternate addition chain
1358         multiplied/divided by the operand.
1359
1360       * All candidate chains get disabled for further processing and
1361         processing of (operand, code) pairs continues.
1362
1363   The alternate addition chains built are re-processed by the main
1364   reassociation algorithm which allows optimizing a * x * y + b * y * x
1365   to (a + b ) * x * y in one invocation of the reassociation pass.  */
1366
1367 static bool
1368 undistribute_ops_list (enum tree_code opcode,
1369                        vec<operand_entry_t> *ops, struct loop *loop)
1370 {
1371   unsigned int length = ops->length ();
1372   operand_entry_t oe1;
1373   unsigned i, j;
1374   sbitmap candidates, candidates2;
1375   unsigned nr_candidates, nr_candidates2;
1376   sbitmap_iterator sbi0;
1377   vec<operand_entry_t> *subops;
1378   hash_table <oecount_hasher> ctable;
1379   bool changed = false;
1380   int next_oecount_id = 0;
1381
1382   if (length <= 1
1383       || opcode != PLUS_EXPR)
1384     return false;
1385
1386   /* Build a list of candidates to process.  */
1387   candidates = sbitmap_alloc (length);
1388   bitmap_clear (candidates);
1389   nr_candidates = 0;
1390   FOR_EACH_VEC_ELT (*ops, i, oe1)
1391     {
1392       enum tree_code dcode;
1393       gimple oe1def;
1394
1395       if (TREE_CODE (oe1->op) != SSA_NAME)
1396         continue;
1397       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1398       if (!is_gimple_assign (oe1def))
1399         continue;
1400       dcode = gimple_assign_rhs_code (oe1def);
1401       if ((dcode != MULT_EXPR
1402            && dcode != RDIV_EXPR)
1403           || !is_reassociable_op (oe1def, dcode, loop))
1404         continue;
1405
1406       bitmap_set_bit (candidates, i);
1407       nr_candidates++;
1408     }
1409
1410   if (nr_candidates < 2)
1411     {
1412       sbitmap_free (candidates);
1413       return false;
1414     }
1415
1416   if (dump_file && (dump_flags & TDF_DETAILS))
1417     {
1418       fprintf (dump_file, "searching for un-distribute opportunities ");
1419       print_generic_expr (dump_file,
1420         (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1421       fprintf (dump_file, " %d\n", nr_candidates);
1422     }
1423
1424   /* Build linearized sub-operand lists and the counting table.  */
1425   cvec.create (0);
1426   ctable.create (15);
1427   /* ??? Macro arguments cannot have multi-argument template types in
1428      them.  This typedef is needed to workaround that limitation.  */
1429   typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1430   subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1431   EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1432     {
1433       gimple oedef;
1434       enum tree_code oecode;
1435       unsigned j;
1436
1437       oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1438       oecode = gimple_assign_rhs_code (oedef);
1439       linearize_expr_tree (&subops[i], oedef,
1440                            associative_tree_code (oecode), false);
1441
1442       FOR_EACH_VEC_ELT (subops[i], j, oe1)
1443         {
1444           oecount c;
1445           void **slot;
1446           size_t idx;
1447           c.oecode = oecode;
1448           c.cnt = 1;
1449           c.id = next_oecount_id++;
1450           c.op = oe1->op;
1451           cvec.safe_push (c);
1452           idx = cvec.length () + 41;
1453           slot = ctable.find_slot ((void *)idx, INSERT);
1454           if (!*slot)
1455             {
1456               *slot = (void *)idx;
1457             }
1458           else
1459             {
1460               cvec.pop ();
1461               cvec[(size_t)*slot - 42].cnt++;
1462             }
1463         }
1464     }
1465   ctable.dispose ();
1466
1467   /* Sort the counting table.  */
1468   cvec.qsort (oecount_cmp);
1469
1470   if (dump_file && (dump_flags & TDF_DETAILS))
1471     {
1472       oecount *c;
1473       fprintf (dump_file, "Candidates:\n");
1474       FOR_EACH_VEC_ELT (cvec, j, c)
1475         {
1476           fprintf (dump_file, "  %u %s: ", c->cnt,
1477                    c->oecode == MULT_EXPR
1478                    ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1479           print_generic_expr (dump_file, c->op, 0);
1480           fprintf (dump_file, "\n");
1481         }
1482     }
1483
1484   /* Process the (operand, code) pairs in order of most occurrence.  */
1485   candidates2 = sbitmap_alloc (length);
1486   while (!cvec.is_empty ())
1487     {
1488       oecount *c = &cvec.last ();
1489       if (c->cnt < 2)
1490         break;
1491
1492       /* Now collect the operands in the outer chain that contain
1493          the common operand in their inner chain.  */
1494       bitmap_clear (candidates2);
1495       nr_candidates2 = 0;
1496       EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1497         {
1498           gimple oedef;
1499           enum tree_code oecode;
1500           unsigned j;
1501           tree op = (*ops)[i]->op;
1502
1503           /* If we undistributed in this chain already this may be
1504              a constant.  */
1505           if (TREE_CODE (op) != SSA_NAME)
1506             continue;
1507
1508           oedef = SSA_NAME_DEF_STMT (op);
1509           oecode = gimple_assign_rhs_code (oedef);
1510           if (oecode != c->oecode)
1511             continue;
1512
1513           FOR_EACH_VEC_ELT (subops[i], j, oe1)
1514             {
1515               if (oe1->op == c->op)
1516                 {
1517                   bitmap_set_bit (candidates2, i);
1518                   ++nr_candidates2;
1519                   break;
1520                 }
1521             }
1522         }
1523
1524       if (nr_candidates2 >= 2)
1525         {
1526           operand_entry_t oe1, oe2;
1527           gimple prod;
1528           int first = bitmap_first_set_bit (candidates2);
1529
1530           /* Build the new addition chain.  */
1531           oe1 = (*ops)[first];
1532           if (dump_file && (dump_flags & TDF_DETAILS))
1533             {
1534               fprintf (dump_file, "Building (");
1535               print_generic_expr (dump_file, oe1->op, 0);
1536             }
1537           zero_one_operation (&oe1->op, c->oecode, c->op);
1538           EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1539             {
1540               gimple sum;
1541               oe2 = (*ops)[i];
1542               if (dump_file && (dump_flags & TDF_DETAILS))
1543                 {
1544                   fprintf (dump_file, " + ");
1545                   print_generic_expr (dump_file, oe2->op, 0);
1546                 }
1547               zero_one_operation (&oe2->op, c->oecode, c->op);
1548               sum = build_and_add_sum (TREE_TYPE (oe1->op),
1549                                        oe1->op, oe2->op, opcode);
1550               oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1551               oe2->rank = 0;
1552               oe1->op = gimple_get_lhs (sum);
1553             }
1554
1555           /* Apply the multiplication/division.  */
1556           prod = build_and_add_sum (TREE_TYPE (oe1->op),
1557                                     oe1->op, c->op, c->oecode);
1558           if (dump_file && (dump_flags & TDF_DETAILS))
1559             {
1560               fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1561               print_generic_expr (dump_file, c->op, 0);
1562               fprintf (dump_file, "\n");
1563             }
1564
1565           /* Record it in the addition chain and disable further
1566              undistribution with this op.  */
1567           oe1->op = gimple_assign_lhs (prod);
1568           oe1->rank = get_rank (oe1->op);
1569           subops[first].release ();
1570
1571           changed = true;
1572         }
1573
1574       cvec.pop ();
1575     }
1576
1577   for (i = 0; i < ops->length (); ++i)
1578     subops[i].release ();
1579   free (subops);
1580   cvec.release ();
1581   sbitmap_free (candidates);
1582   sbitmap_free (candidates2);
1583
1584   return changed;
1585 }
1586
1587 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1588    expression, examine the other OPS to see if any of them are comparisons
1589    of the same values, which we may be able to combine or eliminate.
1590    For example, we can rewrite (a < b) | (a == b) as (a <= b).  */
1591
1592 static bool
1593 eliminate_redundant_comparison (enum tree_code opcode,
1594                                 vec<operand_entry_t> *ops,
1595                                 unsigned int currindex,
1596                                 operand_entry_t curr)
1597 {
1598   tree op1, op2;
1599   enum tree_code lcode, rcode;
1600   gimple def1, def2;
1601   int i;
1602   operand_entry_t oe;
1603
1604   if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1605     return false;
1606
1607   /* Check that CURR is a comparison.  */
1608   if (TREE_CODE (curr->op) != SSA_NAME)
1609     return false;
1610   def1 = SSA_NAME_DEF_STMT (curr->op);
1611   if (!is_gimple_assign (def1))
1612     return false;
1613   lcode = gimple_assign_rhs_code (def1);
1614   if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1615     return false;
1616   op1 = gimple_assign_rhs1 (def1);
1617   op2 = gimple_assign_rhs2 (def1);
1618
1619   /* Now look for a similar comparison in the remaining OPS.  */
1620   for (i = currindex + 1; ops->iterate (i, &oe); i++)
1621     {
1622       tree t;
1623
1624       if (TREE_CODE (oe->op) != SSA_NAME)
1625         continue;
1626       def2 = SSA_NAME_DEF_STMT (oe->op);
1627       if (!is_gimple_assign (def2))
1628         continue;
1629       rcode = gimple_assign_rhs_code (def2);
1630       if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1631         continue;
1632
1633       /* If we got here, we have a match.  See if we can combine the
1634          two comparisons.  */
1635       if (opcode == BIT_IOR_EXPR)
1636         t = maybe_fold_or_comparisons (lcode, op1, op2,
1637                                        rcode, gimple_assign_rhs1 (def2),
1638                                        gimple_assign_rhs2 (def2));
1639       else
1640         t = maybe_fold_and_comparisons (lcode, op1, op2,
1641                                         rcode, gimple_assign_rhs1 (def2),
1642                                         gimple_assign_rhs2 (def2));
1643       if (!t)
1644         continue;
1645
1646       /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1647          always give us a boolean_type_node value back.  If the original
1648          BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1649          we need to convert.  */
1650       if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1651         t = fold_convert (TREE_TYPE (curr->op), t);
1652
1653       if (TREE_CODE (t) != INTEGER_CST
1654           && !operand_equal_p (t, curr->op, 0))
1655         {
1656           enum tree_code subcode;
1657           tree newop1, newop2;
1658           if (!COMPARISON_CLASS_P (t))
1659             continue;
1660           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1661           STRIP_USELESS_TYPE_CONVERSION (newop1);
1662           STRIP_USELESS_TYPE_CONVERSION (newop2);
1663           if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1664             continue;
1665         }
1666
1667       if (dump_file && (dump_flags & TDF_DETAILS))
1668         {
1669           fprintf (dump_file, "Equivalence: ");
1670           print_generic_expr (dump_file, curr->op, 0);
1671           fprintf (dump_file, " %s ", op_symbol_code (opcode));
1672           print_generic_expr (dump_file, oe->op, 0);
1673           fprintf (dump_file, " -> ");
1674           print_generic_expr (dump_file, t, 0);
1675           fprintf (dump_file, "\n");
1676         }
1677
1678       /* Now we can delete oe, as it has been subsumed by the new combined
1679          expression t.  */
1680       ops->ordered_remove (i);
1681       reassociate_stats.ops_eliminated ++;
1682
1683       /* If t is the same as curr->op, we're done.  Otherwise we must
1684          replace curr->op with t.  Special case is if we got a constant
1685          back, in which case we add it to the end instead of in place of
1686          the current entry.  */
1687       if (TREE_CODE (t) == INTEGER_CST)
1688         {
1689           ops->ordered_remove (currindex);
1690           add_to_ops_vec (ops, t);
1691         }
1692       else if (!operand_equal_p (t, curr->op, 0))
1693         {
1694           gimple sum;
1695           enum tree_code subcode;
1696           tree newop1;
1697           tree newop2;
1698           gcc_assert (COMPARISON_CLASS_P (t));
1699           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1700           STRIP_USELESS_TYPE_CONVERSION (newop1);
1701           STRIP_USELESS_TYPE_CONVERSION (newop2);
1702           gcc_checking_assert (is_gimple_val (newop1)
1703                                && is_gimple_val (newop2));
1704           sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1705           curr->op = gimple_get_lhs (sum);
1706         }
1707       return true;
1708     }
1709
1710   return false;
1711 }
1712
1713 /* Perform various identities and other optimizations on the list of
1714    operand entries, stored in OPS.  The tree code for the binary
1715    operation between all the operands is OPCODE.  */
1716
1717 static void
1718 optimize_ops_list (enum tree_code opcode,
1719                    vec<operand_entry_t> *ops)
1720 {
1721   unsigned int length = ops->length ();
1722   unsigned int i;
1723   operand_entry_t oe;
1724   operand_entry_t oelast = NULL;
1725   bool iterate = false;
1726
1727   if (length == 1)
1728     return;
1729
1730   oelast = ops->last ();
1731
1732   /* If the last two are constants, pop the constants off, merge them
1733      and try the next two.  */
1734   if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1735     {
1736       operand_entry_t oelm1 = (*ops)[length - 2];
1737
1738       if (oelm1->rank == 0
1739           && is_gimple_min_invariant (oelm1->op)
1740           && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1741                                        TREE_TYPE (oelast->op)))
1742         {
1743           tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1744                                      oelm1->op, oelast->op);
1745
1746           if (folded && is_gimple_min_invariant (folded))
1747             {
1748               if (dump_file && (dump_flags & TDF_DETAILS))
1749                 fprintf (dump_file, "Merging constants\n");
1750
1751               ops->pop ();
1752               ops->pop ();
1753
1754               add_to_ops_vec (ops, folded);
1755               reassociate_stats.constants_eliminated++;
1756
1757               optimize_ops_list (opcode, ops);
1758               return;
1759             }
1760         }
1761     }
1762
1763   eliminate_using_constants (opcode, ops);
1764   oelast = NULL;
1765
1766   for (i = 0; ops->iterate (i, &oe);)
1767     {
1768       bool done = false;
1769
1770       if (eliminate_not_pairs (opcode, ops, i, oe))
1771         return;
1772       if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1773           || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1774           || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1775         {
1776           if (done)
1777             return;
1778           iterate = true;
1779           oelast = NULL;
1780           continue;
1781         }
1782       oelast = oe;
1783       i++;
1784     }
1785
1786   length = ops->length ();
1787   oelast = ops->last ();
1788
1789   if (iterate)
1790     optimize_ops_list (opcode, ops);
1791 }
1792
1793 /* The following functions are subroutines to optimize_range_tests and allow
1794    it to try to change a logical combination of comparisons into a range
1795    test.
1796
1797    For example, both
1798         X == 2 || X == 5 || X == 3 || X == 4
1799    and
1800         X >= 2 && X <= 5
1801    are converted to
1802         (unsigned) (X - 2) <= 3
1803
1804    For more information see comments above fold_test_range in fold-const.c,
1805    this implementation is for GIMPLE.  */
1806
1807 struct range_entry
1808 {
1809   tree exp;
1810   tree low;
1811   tree high;
1812   bool in_p;
1813   bool strict_overflow_p;
1814   unsigned int idx, next;
1815 };
1816
1817 /* This is similar to make_range in fold-const.c, but on top of
1818    GIMPLE instead of trees.  If EXP is non-NULL, it should be
1819    an SSA_NAME and STMT argument is ignored, otherwise STMT
1820    argument should be a GIMPLE_COND.  */
1821
1822 static void
1823 init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1824 {
1825   int in_p;
1826   tree low, high;
1827   bool is_bool, strict_overflow_p;
1828
1829   r->exp = NULL_TREE;
1830   r->in_p = false;
1831   r->strict_overflow_p = false;
1832   r->low = NULL_TREE;
1833   r->high = NULL_TREE;
1834   if (exp != NULL_TREE
1835       && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1836     return;
1837
1838   /* Start with simply saying "EXP != 0" and then look at the code of EXP
1839      and see if we can refine the range.  Some of the cases below may not
1840      happen, but it doesn't seem worth worrying about this.  We "continue"
1841      the outer loop when we've changed something; otherwise we "break"
1842      the switch, which will "break" the while.  */
1843   low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1844   high = low;
1845   in_p = 0;
1846   strict_overflow_p = false;
1847   is_bool = false;
1848   if (exp == NULL_TREE)
1849     is_bool = true;
1850   else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1851     {
1852       if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1853         is_bool = true;
1854       else
1855         return;
1856     }
1857   else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1858     is_bool = true;
1859
1860   while (1)
1861     {
1862       enum tree_code code;
1863       tree arg0, arg1, exp_type;
1864       tree nexp;
1865       location_t loc;
1866
1867       if (exp != NULL_TREE)
1868         {
1869           if (TREE_CODE (exp) != SSA_NAME
1870               || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
1871             break;
1872
1873           stmt = SSA_NAME_DEF_STMT (exp);
1874           if (!is_gimple_assign (stmt))
1875             break;
1876
1877           code = gimple_assign_rhs_code (stmt);
1878           arg0 = gimple_assign_rhs1 (stmt);
1879           arg1 = gimple_assign_rhs2 (stmt);
1880           exp_type = TREE_TYPE (exp);
1881         }
1882       else
1883         {
1884           code = gimple_cond_code (stmt);
1885           arg0 = gimple_cond_lhs (stmt);
1886           arg1 = gimple_cond_rhs (stmt);
1887           exp_type = boolean_type_node;
1888         }
1889
1890       if (TREE_CODE (arg0) != SSA_NAME)
1891         break;
1892       loc = gimple_location (stmt);
1893       switch (code)
1894         {
1895         case BIT_NOT_EXPR:
1896           if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
1897               /* Ensure the range is either +[-,0], +[0,0],
1898                  -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1899                  -[1,1].  If it is e.g. +[-,-] or -[-,-]
1900                  or similar expression of unconditional true or
1901                  false, it should not be negated.  */
1902               && ((high && integer_zerop (high))
1903                   || (low && integer_onep (low))))
1904             {
1905               in_p = !in_p;
1906               exp = arg0;
1907               continue;
1908             }
1909           break;
1910         case SSA_NAME:
1911           exp = arg0;
1912           continue;
1913         CASE_CONVERT:
1914           if (is_bool)
1915             goto do_default;
1916           if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1917             {
1918               if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1919                 is_bool = true;
1920               else
1921                 return;
1922             }
1923           else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1924             is_bool = true;
1925           goto do_default;
1926         case EQ_EXPR:
1927         case NE_EXPR:
1928         case LT_EXPR:
1929         case LE_EXPR:
1930         case GE_EXPR:
1931         case GT_EXPR:
1932           is_bool = true;
1933           /* FALLTHRU */
1934         default:
1935           if (!is_bool)
1936             return;
1937         do_default:
1938           nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1939                                   &low, &high, &in_p,
1940                                   &strict_overflow_p);
1941           if (nexp != NULL_TREE)
1942             {
1943               exp = nexp;
1944               gcc_assert (TREE_CODE (exp) == SSA_NAME);
1945               continue;
1946             }
1947           break;
1948         }
1949       break;
1950     }
1951   if (is_bool)
1952     {
1953       r->exp = exp;
1954       r->in_p = in_p;
1955       r->low = low;
1956       r->high = high;
1957       r->strict_overflow_p = strict_overflow_p;
1958     }
1959 }
1960
1961 /* Comparison function for qsort.  Sort entries
1962    without SSA_NAME exp first, then with SSA_NAMEs sorted
1963    by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1964    by increasing ->low and if ->low is the same, by increasing
1965    ->high.  ->low == NULL_TREE means minimum, ->high == NULL_TREE
1966    maximum.  */
1967
1968 static int
1969 range_entry_cmp (const void *a, const void *b)
1970 {
1971   const struct range_entry *p = (const struct range_entry *) a;
1972   const struct range_entry *q = (const struct range_entry *) b;
1973
1974   if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
1975     {
1976       if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1977         {
1978           /* Group range_entries for the same SSA_NAME together.  */
1979           if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
1980             return -1;
1981           else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
1982             return 1;
1983           /* If ->low is different, NULL low goes first, then by
1984              ascending low.  */
1985           if (p->low != NULL_TREE)
1986             {
1987               if (q->low != NULL_TREE)
1988                 {
1989                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
1990                                           p->low, q->low);
1991                   if (tem && integer_onep (tem))
1992                     return -1;
1993                   tem = fold_binary (GT_EXPR, boolean_type_node,
1994                                      p->low, q->low);
1995                   if (tem && integer_onep (tem))
1996                     return 1;
1997                 }
1998               else
1999                 return 1;
2000             }
2001           else if (q->low != NULL_TREE)
2002             return -1;
2003           /* If ->high is different, NULL high goes last, before that by
2004              ascending high.  */
2005           if (p->high != NULL_TREE)
2006             {
2007               if (q->high != NULL_TREE)
2008                 {
2009                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
2010                                           p->high, q->high);
2011                   if (tem && integer_onep (tem))
2012                     return -1;
2013                   tem = fold_binary (GT_EXPR, boolean_type_node,
2014                                      p->high, q->high);
2015                   if (tem && integer_onep (tem))
2016                     return 1;
2017                 }
2018               else
2019                 return -1;
2020             }
2021           else if (p->high != NULL_TREE)
2022             return 1;
2023           /* If both ranges are the same, sort below by ascending idx.  */
2024         }
2025       else
2026         return 1;
2027     }
2028   else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2029     return -1;
2030
2031   if (p->idx < q->idx)
2032     return -1;
2033   else
2034     {
2035       gcc_checking_assert (p->idx > q->idx);
2036       return 1;
2037     }
2038 }
2039
2040 /* Helper routine of optimize_range_test.
2041    [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2042    RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2043    OPCODE and OPS are arguments of optimize_range_tests.  Return
2044    true if the range merge has been successful.
2045    If OPCODE is ERROR_MARK, this is called from within
2046    maybe_optimize_range_tests and is performing inter-bb range optimization.
2047    In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2048    oe->rank.  */
2049
2050 static bool
2051 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2052                    unsigned int count, enum tree_code opcode,
2053                    vec<operand_entry_t> *ops, tree exp, bool in_p,
2054                    tree low, tree high, bool strict_overflow_p)
2055 {
2056   operand_entry_t oe = (*ops)[range->idx];
2057   tree op = oe->op;
2058   gimple stmt = op ? SSA_NAME_DEF_STMT (op) :
2059     last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2060   location_t loc = gimple_location (stmt);
2061   tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2062   tree tem = build_range_check (loc, optype, exp, in_p, low, high);
2063   enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2064   gimple_stmt_iterator gsi;
2065
2066   if (tem == NULL_TREE)
2067     return false;
2068
2069   if (strict_overflow_p && issue_strict_overflow_warning (wc))
2070     warning_at (loc, OPT_Wstrict_overflow,
2071                 "assuming signed overflow does not occur "
2072                 "when simplifying range test");
2073
2074   if (dump_file && (dump_flags & TDF_DETAILS))
2075     {
2076       struct range_entry *r;
2077       fprintf (dump_file, "Optimizing range tests ");
2078       print_generic_expr (dump_file, range->exp, 0);
2079       fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2080       print_generic_expr (dump_file, range->low, 0);
2081       fprintf (dump_file, ", ");
2082       print_generic_expr (dump_file, range->high, 0);
2083       fprintf (dump_file, "]");
2084       for (r = otherrange; r < otherrange + count; r++)
2085         {
2086           fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2087           print_generic_expr (dump_file, r->low, 0);
2088           fprintf (dump_file, ", ");
2089           print_generic_expr (dump_file, r->high, 0);
2090           fprintf (dump_file, "]");
2091         }
2092       fprintf (dump_file, "\n into ");
2093       print_generic_expr (dump_file, tem, 0);
2094       fprintf (dump_file, "\n");
2095     }
2096
2097   if (opcode == BIT_IOR_EXPR
2098       || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2099     tem = invert_truthvalue_loc (loc, tem);
2100
2101   tem = fold_convert_loc (loc, optype, tem);
2102   gsi = gsi_for_stmt (stmt);
2103   /* In rare cases range->exp can be equal to lhs of stmt.
2104      In that case we have to insert after the stmt rather then before
2105      it.  */
2106   if (op == range->exp)
2107     tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false,
2108                                     GSI_CONTINUE_LINKING);
2109   else
2110     {
2111       tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2112                                       GSI_SAME_STMT);
2113       gsi_prev (&gsi);
2114     }
2115   for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2116     if (gimple_uid (gsi_stmt (gsi)))
2117       break;
2118     else
2119       gimple_set_uid (gsi_stmt (gsi), gimple_uid (stmt));
2120
2121   oe->op = tem;
2122   range->exp = exp;
2123   range->low = low;
2124   range->high = high;
2125   range->in_p = in_p;
2126   range->strict_overflow_p = false;
2127
2128   for (range = otherrange; range < otherrange + count; range++)
2129     {
2130       oe = (*ops)[range->idx];
2131       /* Now change all the other range test immediate uses, so that
2132          those tests will be optimized away.  */
2133       if (opcode == ERROR_MARK)
2134         {
2135           if (oe->op)
2136             oe->op = build_int_cst (TREE_TYPE (oe->op),
2137                                     oe->rank == BIT_IOR_EXPR ? 0 : 1);
2138           else
2139             oe->op = (oe->rank == BIT_IOR_EXPR
2140                       ? boolean_false_node : boolean_true_node);
2141         }
2142       else
2143         oe->op = error_mark_node;
2144       range->exp = NULL_TREE;
2145     }
2146   return true;
2147 }
2148
2149 /* Optimize X == CST1 || X == CST2
2150    if popcount (CST1 ^ CST2) == 1 into
2151    (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2152    Similarly for ranges.  E.g.
2153    X != 2 && X != 3 && X != 10 && X != 11
2154    will be transformed by the previous optimization into
2155    !((X - 2U) <= 1U || (X - 10U) <= 1U)
2156    and this loop can transform that into
2157    !(((X & ~8) - 2U) <= 1U).  */
2158
2159 static bool
2160 optimize_range_tests_xor (enum tree_code opcode, tree type,
2161                           tree lowi, tree lowj, tree highi, tree highj,
2162                           vec<operand_entry_t> *ops,
2163                           struct range_entry *rangei,
2164                           struct range_entry *rangej)
2165 {
2166   tree lowxor, highxor, tem, exp;
2167   /* Check highi ^ lowi == highj ^ lowj and
2168      popcount (highi ^ lowi) == 1.  */
2169   lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2170   if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2171     return false;
2172   if (tree_log2 (lowxor) < 0)
2173     return false;
2174   highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2175   if (!tree_int_cst_equal (lowxor, highxor))
2176     return false;
2177
2178   tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2179   exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2180   lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2181   highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2182   if (update_range_test (rangei, rangej, 1, opcode, ops, exp,
2183                          rangei->in_p, lowj, highj,
2184                          rangei->strict_overflow_p
2185                          || rangej->strict_overflow_p))
2186     return true;
2187   return false;
2188 }
2189
2190 /* Optimize X == CST1 || X == CST2
2191    if popcount (CST2 - CST1) == 1 into
2192    ((X - CST1) & ~(CST2 - CST1)) == 0.
2193    Similarly for ranges.  E.g.
2194    X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2195    || X == 75 || X == 45
2196    will be transformed by the previous optimization into
2197    (X - 43U) <= 3U || (X - 75U) <= 3U
2198    and this loop can transform that into
2199    ((X - 43U) & ~(75U - 43U)) <= 3U.  */
2200 static bool
2201 optimize_range_tests_diff (enum tree_code opcode, tree type,
2202                             tree lowi, tree lowj, tree highi, tree highj,
2203                             vec<operand_entry_t> *ops,
2204                             struct range_entry *rangei,
2205                             struct range_entry *rangej)
2206 {
2207   tree tem1, tem2, mask;
2208   /* Check highi - lowi == highj - lowj.  */
2209   tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2210   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2211     return false;
2212   tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2213   if (!tree_int_cst_equal (tem1, tem2))
2214     return false;
2215   /* Check popcount (lowj - lowi) == 1.  */
2216   tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2217   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2218     return false;
2219   if (tree_log2 (tem1) < 0)
2220     return false;
2221
2222   mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2223   tem1 = fold_binary (MINUS_EXPR, type, rangei->exp, lowi);
2224   tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2225   lowj = build_int_cst (type, 0);
2226   if (update_range_test (rangei, rangej, 1, opcode, ops, tem1,
2227                          rangei->in_p, lowj, tem2,
2228                          rangei->strict_overflow_p
2229                          || rangej->strict_overflow_p))
2230     return true;
2231   return false;
2232 }
2233
2234 /* It does some common checks for function optimize_range_tests_xor and
2235    optimize_range_tests_diff.
2236    If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2237    Else it calls optimize_range_tests_diff.  */
2238
2239 static bool
2240 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2241                         bool optimize_xor, vec<operand_entry_t> *ops,
2242                         struct range_entry *ranges)
2243 {
2244   int i, j;
2245   bool any_changes = false;
2246   for (i = first; i < length; i++)
2247     {
2248       tree lowi, highi, lowj, highj, type, tem;
2249
2250       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2251         continue;
2252       type = TREE_TYPE (ranges[i].exp);
2253       if (!INTEGRAL_TYPE_P (type))
2254         continue;
2255       lowi = ranges[i].low;
2256       if (lowi == NULL_TREE)
2257         lowi = TYPE_MIN_VALUE (type);
2258       highi = ranges[i].high;
2259       if (highi == NULL_TREE)
2260         continue;
2261       for (j = i + 1; j < length && j < i + 64; j++)
2262         {
2263           bool changes;
2264           if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2265             continue;
2266           lowj = ranges[j].low;
2267           if (lowj == NULL_TREE)
2268             continue;
2269           highj = ranges[j].high;
2270           if (highj == NULL_TREE)
2271             highj = TYPE_MAX_VALUE (type);
2272           /* Check lowj > highi.  */
2273           tem = fold_binary (GT_EXPR, boolean_type_node,
2274                              lowj, highi);
2275           if (tem == NULL_TREE || !integer_onep (tem))
2276             continue;
2277           if (optimize_xor)
2278             changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2279                                                 highi, highj, ops,
2280                                                 ranges + i, ranges + j);
2281           else
2282             changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2283                                                  highi, highj, ops,
2284                                                  ranges + i, ranges + j);
2285           if (changes)
2286             {
2287               any_changes = true;
2288               break;
2289             }
2290         }
2291     }
2292   return any_changes;
2293 }
2294
2295 /* Optimize range tests, similarly how fold_range_test optimizes
2296    it on trees.  The tree code for the binary
2297    operation between all the operands is OPCODE.
2298    If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2299    maybe_optimize_range_tests for inter-bb range optimization.
2300    In that case if oe->op is NULL, oe->id is bb->index whose
2301    GIMPLE_COND is && or ||ed into the test, and oe->rank says
2302    the actual opcode.  */
2303
2304 static bool
2305 optimize_range_tests (enum tree_code opcode,
2306                       vec<operand_entry_t> *ops)
2307 {
2308   unsigned int length = ops->length (), i, j, first;
2309   operand_entry_t oe;
2310   struct range_entry *ranges;
2311   bool any_changes = false;
2312
2313   if (length == 1)
2314     return false;
2315
2316   ranges = XNEWVEC (struct range_entry, length);
2317   for (i = 0; i < length; i++)
2318     {
2319       oe = (*ops)[i];
2320       ranges[i].idx = i;
2321       init_range_entry (ranges + i, oe->op,
2322                         oe->op ? NULL :
2323                           last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
2324       /* For | invert it now, we will invert it again before emitting
2325          the optimized expression.  */
2326       if (opcode == BIT_IOR_EXPR
2327           || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2328         ranges[i].in_p = !ranges[i].in_p;
2329     }
2330
2331   qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2332   for (i = 0; i < length; i++)
2333     if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2334       break;
2335
2336   /* Try to merge ranges.  */
2337   for (first = i; i < length; i++)
2338     {
2339       tree low = ranges[i].low;
2340       tree high = ranges[i].high;
2341       int in_p = ranges[i].in_p;
2342       bool strict_overflow_p = ranges[i].strict_overflow_p;
2343       int update_fail_count = 0;
2344
2345       for (j = i + 1; j < length; j++)
2346         {
2347           if (ranges[i].exp != ranges[j].exp)
2348             break;
2349           if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2350                              ranges[j].in_p, ranges[j].low, ranges[j].high))
2351             break;
2352           strict_overflow_p |= ranges[j].strict_overflow_p;
2353         }
2354
2355       if (j == i + 1)
2356         continue;
2357
2358       if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
2359                              ops, ranges[i].exp, in_p, low, high,
2360                              strict_overflow_p))
2361         {
2362           i = j - 1;
2363           any_changes = true;
2364         }
2365       /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2366          while update_range_test would fail.  */
2367       else if (update_fail_count == 64)
2368         i = j - 1;
2369       else
2370         ++update_fail_count;
2371     }
2372
2373   any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2374                                          ops, ranges);
2375
2376   if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2377     any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2378                                            ops, ranges);
2379
2380   if (any_changes && opcode != ERROR_MARK)
2381     {
2382       j = 0;
2383       FOR_EACH_VEC_ELT (*ops, i, oe)
2384         {
2385           if (oe->op == error_mark_node)
2386             continue;
2387           else if (i != j)
2388             (*ops)[j] = oe;
2389           j++;
2390         }
2391       ops->truncate (j);
2392     }
2393
2394   XDELETEVEC (ranges);
2395   return any_changes;
2396 }
2397
2398 /* Return true if STMT is a cast like:
2399    <bb N>:
2400    ...
2401    _123 = (int) _234;
2402
2403    <bb M>:
2404    # _345 = PHI <_123(N), 1(...), 1(...)>
2405    where _234 has bool type, _123 has single use and
2406    bb N has a single successor M.  This is commonly used in
2407    the last block of a range test.  */
2408
2409 static bool
2410 final_range_test_p (gimple stmt)
2411 {
2412   basic_block bb, rhs_bb;
2413   edge e;
2414   tree lhs, rhs;
2415   use_operand_p use_p;
2416   gimple use_stmt;
2417
2418   if (!gimple_assign_cast_p (stmt))
2419     return false;
2420   bb = gimple_bb (stmt);
2421   if (!single_succ_p (bb))
2422     return false;
2423   e = single_succ_edge (bb);
2424   if (e->flags & EDGE_COMPLEX)
2425     return false;
2426
2427   lhs = gimple_assign_lhs (stmt);
2428   rhs = gimple_assign_rhs1 (stmt);
2429   if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2430       || TREE_CODE (rhs) != SSA_NAME
2431       || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2432     return false;
2433
2434   /* Test whether lhs is consumed only by a PHI in the only successor bb.  */
2435   if (!single_imm_use (lhs, &use_p, &use_stmt))
2436     return false;
2437
2438   if (gimple_code (use_stmt) != GIMPLE_PHI
2439       || gimple_bb (use_stmt) != e->dest)
2440     return false;
2441
2442   /* And that the rhs is defined in the same loop.  */
2443   rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2444   if (rhs_bb == NULL
2445       || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2446     return false;
2447
2448   return true;
2449 }
2450
2451 /* Return true if BB is suitable basic block for inter-bb range test
2452    optimization.  If BACKWARD is true, BB should be the only predecessor
2453    of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2454    or compared with to find a common basic block to which all conditions
2455    branch to if true resp. false.  If BACKWARD is false, TEST_BB should
2456    be the only predecessor of BB.  */
2457
2458 static bool
2459 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2460                   bool backward)
2461 {
2462   edge_iterator ei, ei2;
2463   edge e, e2;
2464   gimple stmt;
2465   gimple_stmt_iterator gsi;
2466   bool other_edge_seen = false;
2467   bool is_cond;
2468
2469   if (test_bb == bb)
2470     return false;
2471   /* Check last stmt first.  */
2472   stmt = last_stmt (bb);
2473   if (stmt == NULL
2474       || (gimple_code (stmt) != GIMPLE_COND
2475           && (backward || !final_range_test_p (stmt)))
2476       || gimple_visited_p (stmt)
2477       || stmt_could_throw_p (stmt)
2478       || *other_bb == bb)
2479     return false;
2480   is_cond = gimple_code (stmt) == GIMPLE_COND;
2481   if (is_cond)
2482     {
2483       /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2484          goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2485          to *OTHER_BB (if not set yet, try to find it out).  */
2486       if (EDGE_COUNT (bb->succs) != 2)
2487         return false;
2488       FOR_EACH_EDGE (e, ei, bb->succs)
2489         {
2490           if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2491             return false;
2492           if (e->dest == test_bb)
2493             {
2494               if (backward)
2495                 continue;
2496               else
2497                 return false;
2498             }
2499           if (e->dest == bb)
2500             return false;
2501           if (*other_bb == NULL)
2502             {
2503               FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2504                 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2505                   return false;
2506                 else if (e->dest == e2->dest)
2507                   *other_bb = e->dest;
2508               if (*other_bb == NULL)
2509                 return false;
2510             }
2511           if (e->dest == *other_bb)
2512             other_edge_seen = true;
2513           else if (backward)
2514             return false;
2515         }
2516       if (*other_bb == NULL || !other_edge_seen)
2517         return false;
2518     }
2519   else if (single_succ (bb) != *other_bb)
2520     return false;
2521
2522   /* Now check all PHIs of *OTHER_BB.  */
2523   e = find_edge (bb, *other_bb);
2524   e2 = find_edge (test_bb, *other_bb);
2525   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2526     {
2527       gimple phi = gsi_stmt (gsi);
2528       /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2529          corresponding to BB and TEST_BB predecessor must be the same.  */
2530       if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2531                             gimple_phi_arg_def (phi, e2->dest_idx), 0))
2532         {
2533           /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2534              one of the PHIs should have the lhs of the last stmt in
2535              that block as PHI arg and that PHI should have 0 or 1
2536              corresponding to it in all other range test basic blocks
2537              considered.  */
2538           if (!is_cond)
2539             {
2540               if (gimple_phi_arg_def (phi, e->dest_idx)
2541                   == gimple_assign_lhs (stmt)
2542                   && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2543                       || integer_onep (gimple_phi_arg_def (phi,
2544                                                            e2->dest_idx))))
2545                 continue;
2546             }
2547           else
2548             {
2549               gimple test_last = last_stmt (test_bb);
2550               if (gimple_code (test_last) != GIMPLE_COND
2551                   && gimple_phi_arg_def (phi, e2->dest_idx)
2552                      == gimple_assign_lhs (test_last)
2553                   && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2554                       || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2555                 continue;
2556             }
2557
2558           return false;
2559         }
2560     }
2561   return true;
2562 }
2563
2564 /* Return true if BB doesn't have side-effects that would disallow
2565    range test optimization, all SSA_NAMEs set in the bb are consumed
2566    in the bb and there are no PHIs.  */
2567
2568 static bool
2569 no_side_effect_bb (basic_block bb)
2570 {
2571   gimple_stmt_iterator gsi;
2572   gimple last;
2573
2574   if (!gimple_seq_empty_p (phi_nodes (bb)))
2575     return false;
2576   last = last_stmt (bb);
2577   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2578     {
2579       gimple stmt = gsi_stmt (gsi);
2580       tree lhs;
2581       imm_use_iterator imm_iter;
2582       use_operand_p use_p;
2583
2584       if (is_gimple_debug (stmt))
2585         continue;
2586       if (gimple_has_side_effects (stmt))
2587         return false;
2588       if (stmt == last)
2589         return true;
2590       if (!is_gimple_assign (stmt))
2591         return false;
2592       lhs = gimple_assign_lhs (stmt);
2593       if (TREE_CODE (lhs) != SSA_NAME)
2594         return false;
2595       if (gimple_assign_rhs_could_trap_p (stmt))
2596         return false;
2597       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2598         {
2599           gimple use_stmt = USE_STMT (use_p);
2600           if (is_gimple_debug (use_stmt))
2601             continue;
2602           if (gimple_bb (use_stmt) != bb)
2603             return false;
2604         }
2605     }
2606   return false;
2607 }
2608
2609 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2610    return true and fill in *OPS recursively.  */
2611
2612 static bool
2613 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2614          struct loop *loop)
2615 {
2616   gimple stmt = SSA_NAME_DEF_STMT (var);
2617   tree rhs[2];
2618   int i;
2619
2620   if (!is_reassociable_op (stmt, code, loop))
2621     return false;
2622
2623   rhs[0] = gimple_assign_rhs1 (stmt);
2624   rhs[1] = gimple_assign_rhs2 (stmt);
2625   gimple_set_visited (stmt, true);
2626   for (i = 0; i < 2; i++)
2627     if (TREE_CODE (rhs[i]) == SSA_NAME
2628         && !get_ops (rhs[i], code, ops, loop)
2629         && has_single_use (rhs[i]))
2630       {
2631         operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
2632
2633         oe->op = rhs[i];
2634         oe->rank = code;
2635         oe->id = 0;
2636         oe->count = 1;
2637         ops->safe_push (oe);
2638       }
2639   return true;
2640 }
2641
2642 /* Find the ops that were added by get_ops starting from VAR, see if
2643    they were changed during update_range_test and if yes, create new
2644    stmts.  */
2645
2646 static tree
2647 update_ops (tree var, enum tree_code code, vec<operand_entry_t> ops,
2648             unsigned int *pidx, struct loop *loop)
2649 {
2650   gimple stmt = SSA_NAME_DEF_STMT (var);
2651   tree rhs[4];
2652   int i;
2653
2654   if (!is_reassociable_op (stmt, code, loop))
2655     return NULL;
2656
2657   rhs[0] = gimple_assign_rhs1 (stmt);
2658   rhs[1] = gimple_assign_rhs2 (stmt);
2659   rhs[2] = rhs[0];
2660   rhs[3] = rhs[1];
2661   for (i = 0; i < 2; i++)
2662     if (TREE_CODE (rhs[i]) == SSA_NAME)
2663       {
2664         rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
2665         if (rhs[2 + i] == NULL_TREE)
2666           {
2667             if (has_single_use (rhs[i]))
2668               rhs[2 + i] = ops[(*pidx)++]->op;
2669             else
2670               rhs[2 + i] = rhs[i];
2671           }
2672       }
2673   if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
2674       && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
2675     {
2676       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2677       var = make_ssa_name (TREE_TYPE (var), NULL);
2678       gimple g = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
2679                                                var, rhs[2], rhs[3]);
2680       gimple_set_uid (g, gimple_uid (stmt));
2681       gimple_set_visited (g, true);
2682       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2683     }
2684   return var;
2685 }
2686
2687 /* Structure to track the initial value passed to get_ops and
2688    the range in the ops vector for each basic block.  */
2689
2690 struct inter_bb_range_test_entry
2691 {
2692   tree op;
2693   unsigned int first_idx, last_idx;
2694 };
2695
2696 /* Inter-bb range test optimization.  */
2697
2698 static void
2699 maybe_optimize_range_tests (gimple stmt)
2700 {
2701   basic_block first_bb = gimple_bb (stmt);
2702   basic_block last_bb = first_bb;
2703   basic_block other_bb = NULL;
2704   basic_block bb;
2705   edge_iterator ei;
2706   edge e;
2707   auto_vec<operand_entry_t> ops;
2708   auto_vec<inter_bb_range_test_entry> bbinfo;
2709   bool any_changes = false;
2710
2711   /* Consider only basic blocks that end with GIMPLE_COND or
2712      a cast statement satisfying final_range_test_p.  All
2713      but the last bb in the first_bb .. last_bb range
2714      should end with GIMPLE_COND.  */
2715   if (gimple_code (stmt) == GIMPLE_COND)
2716     {
2717       if (EDGE_COUNT (first_bb->succs) != 2)
2718         return;
2719     }
2720   else if (final_range_test_p (stmt))
2721     other_bb = single_succ (first_bb);
2722   else
2723     return;
2724
2725   if (stmt_could_throw_p (stmt))
2726     return;
2727
2728   /* As relative ordering of post-dominator sons isn't fixed,
2729      maybe_optimize_range_tests can be called first on any
2730      bb in the range we want to optimize.  So, start searching
2731      backwards, if first_bb can be set to a predecessor.  */
2732   while (single_pred_p (first_bb))
2733     {
2734       basic_block pred_bb = single_pred (first_bb);
2735       if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
2736         break;
2737       if (!no_side_effect_bb (first_bb))
2738         break;
2739       first_bb = pred_bb;
2740     }
2741   /* If first_bb is last_bb, other_bb hasn't been computed yet.
2742      Before starting forward search in last_bb successors, find
2743      out the other_bb.  */
2744   if (first_bb == last_bb)
2745     {
2746       other_bb = NULL;
2747       /* As non-GIMPLE_COND last stmt always terminates the range,
2748          if forward search didn't discover anything, just give up.  */
2749       if (gimple_code (stmt) != GIMPLE_COND)
2750         return;
2751       /* Look at both successors.  Either it ends with a GIMPLE_COND
2752          and satisfies suitable_cond_bb, or ends with a cast and
2753          other_bb is that cast's successor.  */
2754       FOR_EACH_EDGE (e, ei, first_bb->succs)
2755         if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
2756             || e->dest == first_bb)
2757           return;
2758         else if (single_pred_p (e->dest))
2759           {
2760             stmt = last_stmt (e->dest);
2761             if (stmt
2762                 && gimple_code (stmt) == GIMPLE_COND
2763                 && EDGE_COUNT (e->dest->succs) == 2)
2764               {
2765                 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
2766                   break;
2767                 else
2768                   other_bb = NULL;
2769               }
2770             else if (stmt
2771                      && final_range_test_p (stmt)
2772                      && find_edge (first_bb, single_succ (e->dest)))
2773               {
2774                 other_bb = single_succ (e->dest);
2775                 if (other_bb == first_bb)
2776                   other_bb = NULL;
2777               }
2778           }
2779       if (other_bb == NULL)
2780         return;
2781     }
2782   /* Now do the forward search, moving last_bb to successor bbs
2783      that aren't other_bb.  */
2784   while (EDGE_COUNT (last_bb->succs) == 2)
2785     {
2786       FOR_EACH_EDGE (e, ei, last_bb->succs)
2787         if (e->dest != other_bb)
2788           break;
2789       if (e == NULL)
2790         break;
2791       if (!single_pred_p (e->dest))
2792         break;
2793       if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
2794         break;
2795       if (!no_side_effect_bb (e->dest))
2796         break;
2797       last_bb = e->dest;
2798     }
2799   if (first_bb == last_bb)
2800     return;
2801   /* Here basic blocks first_bb through last_bb's predecessor
2802      end with GIMPLE_COND, all of them have one of the edges to
2803      other_bb and another to another block in the range,
2804      all blocks except first_bb don't have side-effects and
2805      last_bb ends with either GIMPLE_COND, or cast satisfying
2806      final_range_test_p.  */
2807   for (bb = last_bb; ; bb = single_pred (bb))
2808     {
2809       enum tree_code code;
2810       tree lhs, rhs;
2811       inter_bb_range_test_entry bb_ent;
2812
2813       bb_ent.op = NULL_TREE;
2814       bb_ent.first_idx = ops.length ();
2815       bb_ent.last_idx = bb_ent.first_idx;
2816       e = find_edge (bb, other_bb);
2817       stmt = last_stmt (bb);
2818       gimple_set_visited (stmt, true);
2819       if (gimple_code (stmt) != GIMPLE_COND)
2820         {
2821           use_operand_p use_p;
2822           gimple phi;
2823           edge e2;
2824           unsigned int d;
2825
2826           lhs = gimple_assign_lhs (stmt);
2827           rhs = gimple_assign_rhs1 (stmt);
2828           gcc_assert (bb == last_bb);
2829
2830           /* stmt is
2831              _123 = (int) _234;
2832
2833              followed by:
2834              <bb M>:
2835              # _345 = PHI <_123(N), 1(...), 1(...)>
2836
2837              or 0 instead of 1.  If it is 0, the _234
2838              range test is anded together with all the
2839              other range tests, if it is 1, it is ored with
2840              them.  */
2841           single_imm_use (lhs, &use_p, &phi);
2842           gcc_assert (gimple_code (phi) == GIMPLE_PHI);
2843           e2 = find_edge (first_bb, other_bb);
2844           d = e2->dest_idx;
2845           gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
2846           if (integer_zerop (gimple_phi_arg_def (phi, d)))
2847             code = BIT_AND_EXPR;
2848           else
2849             {
2850               gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
2851               code = BIT_IOR_EXPR;
2852             }
2853
2854           /* If _234 SSA_NAME_DEF_STMT is
2855              _234 = _567 | _789;
2856              (or &, corresponding to 1/0 in the phi arguments,
2857              push into ops the individual range test arguments
2858              of the bitwise or resp. and, recursively.  */
2859           if (!get_ops (rhs, code, &ops,
2860                         loop_containing_stmt (stmt))
2861               && has_single_use (rhs))
2862             {
2863               /* Otherwise, push the _234 range test itself.  */
2864               operand_entry_t oe
2865                 = (operand_entry_t) pool_alloc (operand_entry_pool);
2866
2867               oe->op = rhs;
2868               oe->rank = code;
2869               oe->id = 0;
2870               oe->count = 1;
2871               ops.safe_push (oe);
2872               bb_ent.last_idx++;
2873             }
2874           else
2875             bb_ent.last_idx = ops.length ();
2876           bb_ent.op = rhs;
2877           bbinfo.safe_push (bb_ent);
2878           continue;
2879         }
2880       /* Otherwise stmt is GIMPLE_COND.  */
2881       code = gimple_cond_code (stmt);
2882       lhs = gimple_cond_lhs (stmt);
2883       rhs = gimple_cond_rhs (stmt);
2884       if (TREE_CODE (lhs) == SSA_NAME
2885           && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2886           && ((code != EQ_EXPR && code != NE_EXPR)
2887               || rhs != boolean_false_node
2888                  /* Either push into ops the individual bitwise
2889                     or resp. and operands, depending on which
2890                     edge is other_bb.  */
2891               || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
2892                                  ^ (code == EQ_EXPR))
2893                                 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
2894                            loop_containing_stmt (stmt))))
2895         {
2896           /* Or push the GIMPLE_COND stmt itself.  */
2897           operand_entry_t oe
2898             = (operand_entry_t) pool_alloc (operand_entry_pool);
2899
2900           oe->op = NULL;
2901           oe->rank = (e->flags & EDGE_TRUE_VALUE)
2902                      ? BIT_IOR_EXPR : BIT_AND_EXPR;
2903           /* oe->op = NULL signs that there is no SSA_NAME
2904              for the range test, and oe->id instead is the
2905              basic block number, at which's end the GIMPLE_COND
2906              is.  */
2907           oe->id = bb->index;
2908           oe->count = 1;
2909           ops.safe_push (oe);
2910           bb_ent.op = NULL;
2911           bb_ent.last_idx++;
2912         }
2913       else if (ops.length () > bb_ent.first_idx)
2914         {
2915           bb_ent.op = lhs;
2916           bb_ent.last_idx = ops.length ();
2917         }
2918       bbinfo.safe_push (bb_ent);
2919       if (bb == first_bb)
2920         break;
2921     }
2922   if (ops.length () > 1)
2923     any_changes = optimize_range_tests (ERROR_MARK, &ops);
2924   if (any_changes)
2925     {
2926       unsigned int idx;
2927       /* update_ops relies on has_single_use predicates returning the
2928          same values as it did during get_ops earlier.  Additionally it
2929          never removes statements, only adds new ones and it should walk
2930          from the single imm use and check the predicate already before
2931          making those changes.
2932          On the other side, the handling of GIMPLE_COND directly can turn
2933          previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
2934          it needs to be done in a separate loop afterwards.  */
2935       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
2936         {
2937           if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
2938               && bbinfo[idx].op != NULL_TREE)
2939             {
2940               tree new_op;
2941
2942               stmt = last_stmt (bb);
2943               new_op = update_ops (bbinfo[idx].op,
2944                                    (enum tree_code)
2945                                    ops[bbinfo[idx].first_idx]->rank,
2946                                    ops, &bbinfo[idx].first_idx,
2947                                    loop_containing_stmt (stmt));
2948               if (new_op == NULL_TREE)
2949                 {
2950                   gcc_assert (bb == last_bb);
2951                   new_op = ops[bbinfo[idx].first_idx++]->op;
2952                 }
2953               if (bbinfo[idx].op != new_op)
2954                 {
2955                   imm_use_iterator iter;
2956                   use_operand_p use_p;
2957                   gimple use_stmt, cast_stmt = NULL;
2958
2959                   FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
2960                     if (is_gimple_debug (use_stmt))
2961                       continue;
2962                     else if (gimple_code (use_stmt) == GIMPLE_COND
2963                              || gimple_code (use_stmt) == GIMPLE_PHI)
2964                       FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2965                         SET_USE (use_p, new_op);
2966                     else if (gimple_assign_cast_p (use_stmt))
2967                       cast_stmt = use_stmt;
2968                     else
2969                       gcc_unreachable ();
2970                   if (cast_stmt)
2971                     {
2972                       gcc_assert (bb == last_bb);
2973                       tree lhs = gimple_assign_lhs (cast_stmt);
2974                       tree new_lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
2975                       enum tree_code rhs_code
2976                         = gimple_assign_rhs_code (cast_stmt);
2977                       gimple g;
2978                       if (is_gimple_min_invariant (new_op))
2979                         {
2980                           new_op = fold_convert (TREE_TYPE (lhs), new_op);
2981                           g = gimple_build_assign (new_lhs, new_op);
2982                         }
2983                       else
2984                         g = gimple_build_assign_with_ops (rhs_code, new_lhs,
2985                                                           new_op, NULL_TREE);
2986                       gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt);
2987                       gimple_set_uid (g, gimple_uid (cast_stmt));
2988                       gimple_set_visited (g, true);
2989                       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2990                       FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
2991                         if (is_gimple_debug (use_stmt))
2992                           continue;
2993                         else if (gimple_code (use_stmt) == GIMPLE_COND
2994                                  || gimple_code (use_stmt) == GIMPLE_PHI)
2995                           FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2996                             SET_USE (use_p, new_lhs);
2997                         else
2998                           gcc_unreachable ();
2999                     }
3000                 }
3001             }
3002           if (bb == first_bb)
3003             break;
3004         }
3005       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3006         {
3007           if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3008               && bbinfo[idx].op == NULL_TREE
3009               && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
3010             {
3011               stmt = last_stmt (bb);
3012               if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
3013                 gimple_cond_make_false (stmt);
3014               else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
3015                 gimple_cond_make_true (stmt);
3016               else
3017                 {
3018                   gimple_cond_set_code (stmt, NE_EXPR);
3019                   gimple_cond_set_lhs (stmt, ops[bbinfo[idx].first_idx]->op);
3020                   gimple_cond_set_rhs (stmt, boolean_false_node);
3021                 }
3022               update_stmt (stmt);
3023             }
3024           if (bb == first_bb)
3025             break;
3026         }
3027     }
3028 }
3029
3030 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3031    of STMT in it's operands.  This is also known as a "destructive
3032    update" operation.  */
3033
3034 static bool
3035 is_phi_for_stmt (gimple stmt, tree operand)
3036 {
3037   gimple def_stmt;
3038   tree lhs;
3039   use_operand_p arg_p;
3040   ssa_op_iter i;
3041
3042   if (TREE_CODE (operand) != SSA_NAME)
3043     return false;
3044
3045   lhs = gimple_assign_lhs (stmt);
3046
3047   def_stmt = SSA_NAME_DEF_STMT (operand);
3048   if (gimple_code (def_stmt) != GIMPLE_PHI)
3049     return false;
3050
3051   FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
3052     if (lhs == USE_FROM_PTR (arg_p))
3053       return true;
3054   return false;
3055 }
3056
3057 /* Remove def stmt of VAR if VAR has zero uses and recurse
3058    on rhs1 operand if so.  */
3059
3060 static void
3061 remove_visited_stmt_chain (tree var)
3062 {
3063   gimple stmt;
3064   gimple_stmt_iterator gsi;
3065
3066   while (1)
3067     {
3068       if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
3069         return;
3070       stmt = SSA_NAME_DEF_STMT (var);
3071       if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
3072         {
3073           var = gimple_assign_rhs1 (stmt);
3074           gsi = gsi_for_stmt (stmt);
3075           gsi_remove (&gsi, true);
3076           release_defs (stmt);
3077         }
3078       else
3079         return;
3080     }
3081 }
3082
3083 /* This function checks three consequtive operands in
3084    passed operands vector OPS starting from OPINDEX and
3085    swaps two operands if it is profitable for binary operation
3086    consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3087
3088    We pair ops with the same rank if possible.
3089
3090    The alternative we try is to see if STMT is a destructive
3091    update style statement, which is like:
3092    b = phi (a, ...)
3093    a = c + b;
3094    In that case, we want to use the destructive update form to
3095    expose the possible vectorizer sum reduction opportunity.
3096    In that case, the third operand will be the phi node. This
3097    check is not performed if STMT is null.
3098
3099    We could, of course, try to be better as noted above, and do a
3100    lot of work to try to find these opportunities in >3 operand
3101    cases, but it is unlikely to be worth it.  */
3102
3103 static void
3104 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
3105                           unsigned int opindex, gimple stmt)
3106 {
3107   operand_entry_t oe1, oe2, oe3;
3108
3109   oe1 = ops[opindex];
3110   oe2 = ops[opindex + 1];
3111   oe3 = ops[opindex + 2];
3112
3113   if ((oe1->rank == oe2->rank
3114        && oe2->rank != oe3->rank)
3115       || (stmt && is_phi_for_stmt (stmt, oe3->op)
3116           && !is_phi_for_stmt (stmt, oe1->op)
3117           && !is_phi_for_stmt (stmt, oe2->op)))
3118     {
3119       struct operand_entry temp = *oe3;
3120       oe3->op = oe1->op;
3121       oe3->rank = oe1->rank;
3122       oe1->op = temp.op;
3123       oe1->rank= temp.rank;
3124     }
3125   else if ((oe1->rank == oe3->rank
3126             && oe2->rank != oe3->rank)
3127            || (stmt && is_phi_for_stmt (stmt, oe2->op)
3128                && !is_phi_for_stmt (stmt, oe1->op)
3129                && !is_phi_for_stmt (stmt, oe3->op)))
3130     {
3131       struct operand_entry temp = *oe2;
3132       oe2->op = oe1->op;
3133       oe2->rank = oe1->rank;
3134       oe1->op = temp.op;
3135       oe1->rank = temp.rank;
3136     }
3137 }
3138
3139 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3140    two definitions, otherwise return STMT.  */
3141
3142 static inline gimple
3143 find_insert_point (gimple stmt, tree rhs1, tree rhs2)
3144 {
3145   if (TREE_CODE (rhs1) == SSA_NAME
3146       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
3147     stmt = SSA_NAME_DEF_STMT (rhs1);
3148   if (TREE_CODE (rhs2) == SSA_NAME
3149       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
3150     stmt = SSA_NAME_DEF_STMT (rhs2);
3151   return stmt;
3152 }
3153
3154 /* Recursively rewrite our linearized statements so that the operators
3155    match those in OPS[OPINDEX], putting the computation in rank
3156    order.  Return new lhs.  */
3157
3158 static tree
3159 rewrite_expr_tree (gimple stmt, unsigned int opindex,
3160                    vec<operand_entry_t> ops, bool changed)
3161 {
3162   tree rhs1 = gimple_assign_rhs1 (stmt);
3163   tree rhs2 = gimple_assign_rhs2 (stmt);
3164   tree lhs = gimple_assign_lhs (stmt);
3165   operand_entry_t oe;
3166
3167   /* The final recursion case for this function is that you have
3168      exactly two operations left.
3169      If we had one exactly one op in the entire list to start with, we
3170      would have never called this function, and the tail recursion
3171      rewrites them one at a time.  */
3172   if (opindex + 2 == ops.length ())
3173     {
3174       operand_entry_t oe1, oe2;
3175
3176       oe1 = ops[opindex];
3177       oe2 = ops[opindex + 1];
3178
3179       if (rhs1 != oe1->op || rhs2 != oe2->op)
3180         {
3181           gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3182           unsigned int uid = gimple_uid (stmt);
3183
3184           if (dump_file && (dump_flags & TDF_DETAILS))
3185             {
3186               fprintf (dump_file, "Transforming ");
3187               print_gimple_stmt (dump_file, stmt, 0, 0);
3188             }
3189
3190           if (changed)
3191             {
3192               gimple insert_point = find_insert_point (stmt, oe1->op, oe2->op);
3193               lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3194               stmt
3195                 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3196                                                 lhs, oe1->op, oe2->op);
3197               gimple_set_uid (stmt, uid);
3198               gimple_set_visited (stmt, true);
3199               if (insert_point == gsi_stmt (gsi))
3200                 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3201               else
3202                 insert_stmt_after (stmt, insert_point);
3203             }
3204           else
3205             {
3206               gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
3207                                    == stmt);
3208               gimple_assign_set_rhs1 (stmt, oe1->op);
3209               gimple_assign_set_rhs2 (stmt, oe2->op);
3210               update_stmt (stmt);
3211             }
3212
3213           if (rhs1 != oe1->op && rhs1 != oe2->op)
3214             remove_visited_stmt_chain (rhs1);
3215
3216           if (dump_file && (dump_flags & TDF_DETAILS))
3217             {
3218               fprintf (dump_file, " into ");
3219               print_gimple_stmt (dump_file, stmt, 0, 0);
3220             }
3221         }
3222       return lhs;
3223     }
3224
3225   /* If we hit here, we should have 3 or more ops left.  */
3226   gcc_assert (opindex + 2 < ops.length ());
3227
3228   /* Rewrite the next operator.  */
3229   oe = ops[opindex];
3230
3231   /* Recurse on the LHS of the binary operator, which is guaranteed to
3232      be the non-leaf side.  */
3233   tree new_rhs1
3234     = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
3235                          changed || oe->op != rhs2);
3236
3237   if (oe->op != rhs2 || new_rhs1 != rhs1)
3238     {
3239       if (dump_file && (dump_flags & TDF_DETAILS))
3240         {
3241           fprintf (dump_file, "Transforming ");
3242           print_gimple_stmt (dump_file, stmt, 0, 0);
3243         }
3244
3245       /* If changed is false, this is either opindex == 0
3246          or all outer rhs2's were equal to corresponding oe->op,
3247          and powi_result is NULL.
3248          That means lhs is equivalent before and after reassociation.
3249          Otherwise ensure the old lhs SSA_NAME is not reused and
3250          create a new stmt as well, so that any debug stmts will be
3251          properly adjusted.  */
3252       if (changed)
3253         {
3254           gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3255           unsigned int uid = gimple_uid (stmt);
3256           gimple insert_point = find_insert_point (stmt, new_rhs1, oe->op);
3257
3258           lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3259           stmt = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3260                                                lhs, new_rhs1, oe->op);
3261           gimple_set_uid (stmt, uid);
3262           gimple_set_visited (stmt, true);
3263           if (insert_point == gsi_stmt (gsi))
3264             gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3265           else
3266             insert_stmt_after (stmt, insert_point);
3267         }
3268       else
3269         {
3270           gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
3271                                == stmt);
3272           gimple_assign_set_rhs1 (stmt, new_rhs1);
3273           gimple_assign_set_rhs2 (stmt, oe->op);
3274           update_stmt (stmt);
3275         }
3276
3277       if (dump_file && (dump_flags & TDF_DETAILS))
3278         {
3279           fprintf (dump_file, " into ");
3280           print_gimple_stmt (dump_file, stmt, 0, 0);
3281         }
3282     }
3283   return lhs;
3284 }
3285
3286 /* Find out how many cycles we need to compute statements chain.
3287    OPS_NUM holds number os statements in a chain.  CPU_WIDTH is a
3288    maximum number of independent statements we may execute per cycle.  */
3289
3290 static int
3291 get_required_cycles (int ops_num, int cpu_width)
3292 {
3293   int res;
3294   int elog;
3295   unsigned int rest;
3296
3297   /* While we have more than 2 * cpu_width operands
3298      we may reduce number of operands by cpu_width
3299      per cycle.  */
3300   res = ops_num / (2 * cpu_width);
3301
3302   /* Remained operands count may be reduced twice per cycle
3303      until we have only one operand.  */
3304   rest = (unsigned)(ops_num - res * cpu_width);
3305   elog = exact_log2 (rest);
3306   if (elog >= 0)
3307     res += elog;
3308   else
3309     res += floor_log2 (rest) + 1;
3310
3311   return res;
3312 }
3313
3314 /* Returns an optimal number of registers to use for computation of
3315    given statements.  */
3316
3317 static int
3318 get_reassociation_width (int ops_num, enum tree_code opc,
3319                          enum machine_mode mode)
3320 {
3321   int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
3322   int width;
3323   int width_min;
3324   int cycles_best;
3325
3326   if (param_width > 0)
3327     width = param_width;
3328   else
3329     width = targetm.sched.reassociation_width (opc, mode);
3330
3331   if (width == 1)
3332     return width;
3333
3334   /* Get the minimal time required for sequence computation.  */
3335   cycles_best = get_required_cycles (ops_num, width);
3336
3337   /* Check if we may use less width and still compute sequence for
3338      the same time.  It will allow us to reduce registers usage.
3339      get_required_cycles is monotonically increasing with lower width
3340      so we can perform a binary search for the minimal width that still
3341      results in the optimal cycle count.  */
3342   width_min = 1;
3343   while (width > width_min)
3344     {
3345       int width_mid = (width + width_min) / 2;
3346
3347       if (get_required_cycles (ops_num, width_mid) == cycles_best)
3348         width = width_mid;
3349       else if (width_min < width_mid)
3350         width_min = width_mid;
3351       else
3352         break;
3353     }
3354
3355   return width;
3356 }
3357
3358 /* Recursively rewrite our linearized statements so that the operators
3359    match those in OPS[OPINDEX], putting the computation in rank
3360    order and trying to allow operations to be executed in
3361    parallel.  */
3362
3363 static void
3364 rewrite_expr_tree_parallel (gimple stmt, int width,
3365                             vec<operand_entry_t> ops)
3366 {
3367   enum tree_code opcode = gimple_assign_rhs_code (stmt);
3368   int op_num = ops.length ();
3369   int stmt_num = op_num - 1;
3370   gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3371   int op_index = op_num - 1;
3372   int stmt_index = 0;
3373   int ready_stmts_end = 0;
3374   int i = 0;
3375   tree last_rhs1 = gimple_assign_rhs1 (stmt);
3376
3377   /* We start expression rewriting from the top statements.
3378      So, in this loop we create a full list of statements
3379      we will work with.  */
3380   stmts[stmt_num - 1] = stmt;
3381   for (i = stmt_num - 2; i >= 0; i--)
3382     stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3383
3384   for (i = 0; i < stmt_num; i++)
3385     {
3386       tree op1, op2;
3387
3388       /* Determine whether we should use results of
3389          already handled statements or not.  */
3390       if (ready_stmts_end == 0
3391           && (i - stmt_index >= width || op_index < 1))
3392         ready_stmts_end = i;
3393
3394       /* Now we choose operands for the next statement.  Non zero
3395          value in ready_stmts_end means here that we should use
3396          the result of already generated statements as new operand.  */
3397       if (ready_stmts_end > 0)
3398         {
3399           op1 = gimple_assign_lhs (stmts[stmt_index++]);
3400           if (ready_stmts_end > stmt_index)
3401             op2 = gimple_assign_lhs (stmts[stmt_index++]);
3402           else if (op_index >= 0)
3403             op2 = ops[op_index--]->op;
3404           else
3405             {
3406               gcc_assert (stmt_index < i);
3407               op2 = gimple_assign_lhs (stmts[stmt_index++]);
3408             }
3409
3410           if (stmt_index >= ready_stmts_end)
3411             ready_stmts_end = 0;
3412         }
3413       else
3414         {
3415           if (op_index > 1)
3416             swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3417           op2 = ops[op_index--]->op;
3418           op1 = ops[op_index--]->op;
3419         }
3420
3421       /* If we emit the last statement then we should put
3422          operands into the last statement.  It will also
3423          break the loop.  */
3424       if (op_index < 0 && stmt_index == i)
3425         i = stmt_num - 1;
3426
3427       if (dump_file && (dump_flags & TDF_DETAILS))
3428         {
3429           fprintf (dump_file, "Transforming ");
3430           print_gimple_stmt (dump_file, stmts[i], 0, 0);
3431         }
3432
3433       /* We keep original statement only for the last one.  All
3434          others are recreated.  */
3435       if (i == stmt_num - 1)
3436         {
3437           gimple_assign_set_rhs1 (stmts[i], op1);
3438           gimple_assign_set_rhs2 (stmts[i], op2);
3439           update_stmt (stmts[i]);
3440         }
3441       else
3442         stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3443
3444       if (dump_file && (dump_flags & TDF_DETAILS))
3445         {
3446           fprintf (dump_file, " into ");
3447           print_gimple_stmt (dump_file, stmts[i], 0, 0);
3448         }
3449     }
3450
3451   remove_visited_stmt_chain (last_rhs1);
3452 }
3453
3454 /* Transform STMT, which is really (A +B) + (C + D) into the left
3455    linear form, ((A+B)+C)+D.
3456    Recurse on D if necessary.  */
3457
3458 static void
3459 linearize_expr (gimple stmt)
3460 {
3461   gimple_stmt_iterator gsi;
3462   gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3463   gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3464   gimple oldbinrhs = binrhs;
3465   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3466   gimple newbinrhs = NULL;
3467   struct loop *loop = loop_containing_stmt (stmt);
3468   tree lhs = gimple_assign_lhs (stmt);
3469
3470   gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3471               && is_reassociable_op (binrhs, rhscode, loop));
3472
3473   gsi = gsi_for_stmt (stmt);
3474
3475   gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3476   binrhs = gimple_build_assign_with_ops (gimple_assign_rhs_code (binrhs),
3477                                          make_ssa_name (TREE_TYPE (lhs), NULL),
3478                                          gimple_assign_lhs (binlhs),
3479                                          gimple_assign_rhs2 (binrhs));
3480   gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3481   gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
3482   gimple_set_uid (binrhs, gimple_uid (stmt));
3483
3484   if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3485     newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3486
3487   if (dump_file && (dump_flags & TDF_DETAILS))
3488     {
3489       fprintf (dump_file, "Linearized: ");
3490       print_gimple_stmt (dump_file, stmt, 0, 0);
3491     }
3492
3493   reassociate_stats.linearized++;
3494   update_stmt (stmt);
3495
3496   gsi = gsi_for_stmt (oldbinrhs);
3497   gsi_remove (&gsi, true);
3498   release_defs (oldbinrhs);
3499
3500   gimple_set_visited (stmt, true);
3501   gimple_set_visited (binlhs, true);
3502   gimple_set_visited (binrhs, true);
3503
3504   /* Tail recurse on the new rhs if it still needs reassociation.  */
3505   if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3506     /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3507            want to change the algorithm while converting to tuples.  */
3508     linearize_expr (stmt);
3509 }
3510
3511 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3512    it.  Otherwise, return NULL.  */
3513
3514 static gimple
3515 get_single_immediate_use (tree lhs)
3516 {
3517   use_operand_p immuse;
3518   gimple immusestmt;
3519
3520   if (TREE_CODE (lhs) == SSA_NAME
3521       && single_imm_use (lhs, &immuse, &immusestmt)
3522       && is_gimple_assign (immusestmt))
3523     return immusestmt;
3524
3525   return NULL;
3526 }
3527
3528 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3529    representing the negated value.  Insertions of any necessary
3530    instructions go before GSI.
3531    This function is recursive in that, if you hand it "a_5" as the
3532    value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3533    transform b_3 + b_4 into a_5 = -b_3 + -b_4.  */
3534
3535 static tree
3536 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
3537 {
3538   gimple negatedefstmt = NULL;
3539   tree resultofnegate;
3540   gimple_stmt_iterator gsi;
3541   unsigned int uid;
3542
3543   /* If we are trying to negate a name, defined by an add, negate the
3544      add operands instead.  */
3545   if (TREE_CODE (tonegate) == SSA_NAME)
3546     negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3547   if (TREE_CODE (tonegate) == SSA_NAME
3548       && is_gimple_assign (negatedefstmt)
3549       && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3550       && has_single_use (gimple_assign_lhs (negatedefstmt))
3551       && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3552     {
3553       tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3554       tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3555       tree lhs = gimple_assign_lhs (negatedefstmt);
3556       gimple g;
3557
3558       gsi = gsi_for_stmt (negatedefstmt);
3559       rhs1 = negate_value (rhs1, &gsi);
3560
3561       gsi = gsi_for_stmt (negatedefstmt);
3562       rhs2 = negate_value (rhs2, &gsi);
3563
3564       gsi = gsi_for_stmt (negatedefstmt);
3565       lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3566       gimple_set_visited (negatedefstmt, true);
3567       g = gimple_build_assign_with_ops (PLUS_EXPR, lhs, rhs1, rhs2);
3568       gimple_set_uid (g, gimple_uid (negatedefstmt));
3569       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3570       return lhs;
3571     }
3572
3573   tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3574   resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
3575                                              NULL_TREE, true, GSI_SAME_STMT);
3576   gsi = *gsip;
3577   uid = gimple_uid (gsi_stmt (gsi));
3578   for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
3579     {
3580       gimple stmt = gsi_stmt (gsi);
3581       if (gimple_uid (stmt) != 0)
3582         break;
3583       gimple_set_uid (stmt, uid);
3584     }
3585   return resultofnegate;
3586 }
3587
3588 /* Return true if we should break up the subtract in STMT into an add
3589    with negate.  This is true when we the subtract operands are really
3590    adds, or the subtract itself is used in an add expression.  In
3591    either case, breaking up the subtract into an add with negate
3592    exposes the adds to reassociation.  */
3593
3594 static bool
3595 should_break_up_subtract (gimple stmt)
3596 {
3597   tree lhs = gimple_assign_lhs (stmt);
3598   tree binlhs = gimple_assign_rhs1 (stmt);
3599   tree binrhs = gimple_assign_rhs2 (stmt);
3600   gimple immusestmt;
3601   struct loop *loop = loop_containing_stmt (stmt);
3602
3603   if (TREE_CODE (binlhs) == SSA_NAME
3604       && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3605     return true;
3606
3607   if (TREE_CODE (binrhs) == SSA_NAME
3608       && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3609     return true;
3610
3611   if (TREE_CODE (lhs) == SSA_NAME
3612       && (immusestmt = get_single_immediate_use (lhs))
3613       && is_gimple_assign (immusestmt)
3614       && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3615           ||  gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3616     return true;
3617   return false;
3618 }
3619
3620 /* Transform STMT from A - B into A + -B.  */
3621
3622 static void
3623 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3624 {
3625   tree rhs1 = gimple_assign_rhs1 (stmt);
3626   tree rhs2 = gimple_assign_rhs2 (stmt);
3627
3628   if (dump_file && (dump_flags & TDF_DETAILS))
3629     {
3630       fprintf (dump_file, "Breaking up subtract ");
3631       print_gimple_stmt (dump_file, stmt, 0, 0);
3632     }
3633
3634   rhs2 = negate_value (rhs2, gsip);
3635   gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3636   update_stmt (stmt);
3637 }
3638
3639 /* Determine whether STMT is a builtin call that raises an SSA name
3640    to an integer power and has only one use.  If so, and this is early
3641    reassociation and unsafe math optimizations are permitted, place
3642    the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3643    If any of these conditions does not hold, return FALSE.  */
3644
3645 static bool
3646 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3647 {
3648   tree fndecl, arg1;
3649   REAL_VALUE_TYPE c, cint;
3650
3651   if (!first_pass_instance
3652       || !flag_unsafe_math_optimizations
3653       || !is_gimple_call (stmt)
3654       || !has_single_use (gimple_call_lhs (stmt)))
3655     return false;
3656
3657   fndecl = gimple_call_fndecl (stmt);
3658
3659   if (!fndecl
3660       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3661     return false;
3662
3663   switch (DECL_FUNCTION_CODE (fndecl))
3664     {
3665     CASE_FLT_FN (BUILT_IN_POW):
3666       *base = gimple_call_arg (stmt, 0);
3667       arg1 = gimple_call_arg (stmt, 1);
3668
3669       if (TREE_CODE (arg1) != REAL_CST)
3670         return false;
3671
3672       c = TREE_REAL_CST (arg1);
3673
3674       if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
3675         return false;
3676
3677       *exponent = real_to_integer (&c);
3678       real_from_integer (&cint, VOIDmode, *exponent,
3679                          *exponent < 0 ? -1 : 0, 0);
3680       if (!real_identical (&c, &cint))
3681         return false;
3682
3683       break;
3684
3685     CASE_FLT_FN (BUILT_IN_POWI):
3686       *base = gimple_call_arg (stmt, 0);
3687       arg1 = gimple_call_arg (stmt, 1);
3688
3689       if (!tree_fits_shwi_p (arg1))
3690         return false;
3691
3692       *exponent = tree_to_shwi (arg1);
3693       break;
3694
3695     default:
3696       return false;
3697     }
3698
3699   /* Expanding negative exponents is generally unproductive, so we don't
3700      complicate matters with those.  Exponents of zero and one should
3701      have been handled by expression folding.  */
3702   if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
3703     return false;
3704
3705   return true;
3706 }
3707
3708 /* Recursively linearize a binary expression that is the RHS of STMT.
3709    Place the operands of the expression tree in the vector named OPS.  */
3710
3711 static void
3712 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
3713                      bool is_associative, bool set_visited)
3714 {
3715   tree binlhs = gimple_assign_rhs1 (stmt);
3716   tree binrhs = gimple_assign_rhs2 (stmt);
3717   gimple binlhsdef = NULL, binrhsdef = NULL;
3718   bool binlhsisreassoc = false;
3719   bool binrhsisreassoc = false;
3720   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3721   struct loop *loop = loop_containing_stmt (stmt);
3722   tree base = NULL_TREE;
3723   HOST_WIDE_INT exponent = 0;
3724
3725   if (set_visited)
3726     gimple_set_visited (stmt, true);
3727
3728   if (TREE_CODE (binlhs) == SSA_NAME)
3729     {
3730       binlhsdef = SSA_NAME_DEF_STMT (binlhs);
3731       binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
3732                          && !stmt_could_throw_p (binlhsdef));
3733     }
3734
3735   if (TREE_CODE (binrhs) == SSA_NAME)
3736     {
3737       binrhsdef = SSA_NAME_DEF_STMT (binrhs);
3738       binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
3739                          && !stmt_could_throw_p (binrhsdef));
3740     }
3741
3742   /* If the LHS is not reassociable, but the RHS is, we need to swap
3743      them.  If neither is reassociable, there is nothing we can do, so
3744      just put them in the ops vector.  If the LHS is reassociable,
3745      linearize it.  If both are reassociable, then linearize the RHS
3746      and the LHS.  */
3747
3748   if (!binlhsisreassoc)
3749     {
3750       tree temp;
3751
3752       /* If this is not a associative operation like division, give up.  */
3753       if (!is_associative)
3754         {
3755           add_to_ops_vec (ops, binrhs);
3756           return;
3757         }
3758
3759       if (!binrhsisreassoc)
3760         {
3761           if (rhscode == MULT_EXPR
3762               && TREE_CODE (binrhs) == SSA_NAME
3763               && acceptable_pow_call (binrhsdef, &base, &exponent))
3764             {
3765               add_repeat_to_ops_vec (ops, base, exponent);
3766               gimple_set_visited (binrhsdef, true);
3767             }
3768           else
3769             add_to_ops_vec (ops, binrhs);
3770
3771           if (rhscode == MULT_EXPR
3772               && TREE_CODE (binlhs) == SSA_NAME
3773               && acceptable_pow_call (binlhsdef, &base, &exponent))
3774             {
3775               add_repeat_to_ops_vec (ops, base, exponent);
3776               gimple_set_visited (binlhsdef, true);
3777             }
3778           else
3779             add_to_ops_vec (ops, binlhs);
3780
3781           return;
3782         }
3783
3784       if (dump_file && (dump_flags & TDF_DETAILS))
3785         {
3786           fprintf (dump_file, "swapping operands of ");
3787           print_gimple_stmt (dump_file, stmt, 0, 0);
3788         }
3789
3790       swap_ssa_operands (stmt,
3791                          gimple_assign_rhs1_ptr (stmt),
3792                          gimple_assign_rhs2_ptr (stmt));
3793       update_stmt (stmt);
3794
3795       if (dump_file && (dump_flags & TDF_DETAILS))
3796         {
3797           fprintf (dump_file, " is now ");
3798           print_gimple_stmt (dump_file, stmt, 0, 0);
3799         }
3800
3801       /* We want to make it so the lhs is always the reassociative op,
3802          so swap.  */
3803       temp = binlhs;
3804       binlhs = binrhs;
3805       binrhs = temp;
3806     }
3807   else if (binrhsisreassoc)
3808     {
3809       linearize_expr (stmt);
3810       binlhs = gimple_assign_rhs1 (stmt);
3811       binrhs = gimple_assign_rhs2 (stmt);
3812     }
3813
3814   gcc_assert (TREE_CODE (binrhs) != SSA_NAME
3815               || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
3816                                       rhscode, loop));
3817   linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
3818                        is_associative, set_visited);
3819
3820   if (rhscode == MULT_EXPR
3821       && TREE_CODE (binrhs) == SSA_NAME
3822       && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
3823     {
3824       add_repeat_to_ops_vec (ops, base, exponent);
3825       gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
3826     }
3827   else
3828     add_to_ops_vec (ops, binrhs);
3829 }
3830
3831 /* Repropagate the negates back into subtracts, since no other pass
3832    currently does it.  */
3833
3834 static void
3835 repropagate_negates (void)
3836 {
3837   unsigned int i = 0;
3838   tree negate;
3839
3840   FOR_EACH_VEC_ELT (plus_negates, i, negate)
3841     {
3842       gimple user = get_single_immediate_use (negate);
3843
3844       if (!user || !is_gimple_assign (user))
3845         continue;
3846
3847       /* The negate operand can be either operand of a PLUS_EXPR
3848          (it can be the LHS if the RHS is a constant for example).
3849
3850          Force the negate operand to the RHS of the PLUS_EXPR, then
3851          transform the PLUS_EXPR into a MINUS_EXPR.  */
3852       if (gimple_assign_rhs_code (user) == PLUS_EXPR)
3853         {
3854           /* If the negated operand appears on the LHS of the
3855              PLUS_EXPR, exchange the operands of the PLUS_EXPR
3856              to force the negated operand to the RHS of the PLUS_EXPR.  */
3857           if (gimple_assign_rhs1 (user) == negate)
3858             {
3859               swap_ssa_operands (user,
3860                                  gimple_assign_rhs1_ptr (user),
3861                                  gimple_assign_rhs2_ptr (user));
3862             }
3863
3864           /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
3865              the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR.  */
3866           if (gimple_assign_rhs2 (user) == negate)
3867             {
3868               tree rhs1 = gimple_assign_rhs1 (user);
3869               tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
3870               gimple_stmt_iterator gsi = gsi_for_stmt (user);
3871               gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
3872               update_stmt (user);
3873             }
3874         }
3875       else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
3876         {
3877           if (gimple_assign_rhs1 (user) == negate)
3878             {
3879               /* We have
3880                    x = -a
3881                    y = x - b
3882                  which we transform into
3883                    x = a + b
3884                    y = -x .
3885                  This pushes down the negate which we possibly can merge
3886                  into some other operation, hence insert it into the
3887                  plus_negates vector.  */
3888               gimple feed = SSA_NAME_DEF_STMT (negate);
3889               tree a = gimple_assign_rhs1 (feed);
3890               tree b = gimple_assign_rhs2 (user);
3891               gimple_stmt_iterator gsi = gsi_for_stmt (feed);
3892               gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
3893               tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)), NULL);
3894               gimple g = gimple_build_assign_with_ops (PLUS_EXPR, x, a, b);
3895               gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3896               gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x, NULL);
3897               user = gsi_stmt (gsi2);
3898               update_stmt (user);
3899               gsi_remove (&gsi, true);
3900               release_defs (feed);
3901               plus_negates.safe_push (gimple_assign_lhs (user));
3902             }
3903           else
3904             {
3905               /* Transform "x = -a; y = b - x" into "y = b + a", getting
3906                  rid of one operation.  */
3907               gimple feed = SSA_NAME_DEF_STMT (negate);
3908               tree a = gimple_assign_rhs1 (feed);
3909               tree rhs1 = gimple_assign_rhs1 (user);
3910               gimple_stmt_iterator gsi = gsi_for_stmt (user);
3911               gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
3912               update_stmt (gsi_stmt (gsi));
3913             }
3914         }
3915     }
3916 }
3917
3918 /* Returns true if OP is of a type for which we can do reassociation.
3919    That is for integral or non-saturating fixed-point types, and for
3920    floating point type when associative-math is enabled.  */
3921
3922 static bool
3923 can_reassociate_p (tree op)
3924 {
3925   tree type = TREE_TYPE (op);
3926   if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
3927       || NON_SAT_FIXED_POINT_TYPE_P (type)
3928       || (flag_associative_math && FLOAT_TYPE_P (type)))
3929     return true;
3930   return false;
3931 }
3932
3933 /* Break up subtract operations in block BB.
3934
3935    We do this top down because we don't know whether the subtract is
3936    part of a possible chain of reassociation except at the top.
3937
3938    IE given
3939    d = f + g
3940    c = a + e
3941    b = c - d
3942    q = b - r
3943    k = t - q
3944
3945    we want to break up k = t - q, but we won't until we've transformed q
3946    = b - r, which won't be broken up until we transform b = c - d.
3947
3948    En passant, clear the GIMPLE visited flag on every statement
3949    and set UIDs within each basic block.  */
3950
3951 static void
3952 break_up_subtract_bb (basic_block bb)
3953 {
3954   gimple_stmt_iterator gsi;
3955   basic_block son;
3956   unsigned int uid = 1;
3957
3958   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3959     {
3960       gimple stmt = gsi_stmt (gsi);
3961       gimple_set_visited (stmt, false);
3962       gimple_set_uid (stmt, uid++);
3963
3964       if (!is_gimple_assign (stmt)
3965           || !can_reassociate_p (gimple_assign_lhs (stmt)))
3966         continue;
3967
3968       /* Look for simple gimple subtract operations.  */
3969       if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
3970         {
3971           if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
3972               || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
3973             continue;
3974
3975           /* Check for a subtract used only in an addition.  If this
3976              is the case, transform it into add of a negate for better
3977              reassociation.  IE transform C = A-B into C = A + -B if C
3978              is only used in an addition.  */
3979           if (should_break_up_subtract (stmt))
3980             break_up_subtract (stmt, &gsi);
3981         }
3982       else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
3983                && can_reassociate_p (gimple_assign_rhs1 (stmt)))
3984         plus_negates.safe_push (gimple_assign_lhs (stmt));
3985     }
3986   for (son = first_dom_son (CDI_DOMINATORS, bb);
3987        son;
3988        son = next_dom_son (CDI_DOMINATORS, son))
3989     break_up_subtract_bb (son);
3990 }
3991
3992 /* Used for repeated factor analysis.  */
3993 struct repeat_factor_d
3994 {
3995   /* An SSA name that occurs in a multiply chain.  */
3996   tree factor;
3997
3998   /* Cached rank of the factor.  */
3999   unsigned rank;
4000
4001   /* Number of occurrences of the factor in the chain.  */
4002   HOST_WIDE_INT count;
4003
4004   /* An SSA name representing the product of this factor and
4005      all factors appearing later in the repeated factor vector.  */
4006   tree repr;
4007 };
4008
4009 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
4010 typedef const struct repeat_factor_d *const_repeat_factor_t;
4011
4012
4013 static vec<repeat_factor> repeat_factor_vec;
4014
4015 /* Used for sorting the repeat factor vector.  Sort primarily by
4016    ascending occurrence count, secondarily by descending rank.  */
4017
4018 static int
4019 compare_repeat_factors (const void *x1, const void *x2)
4020 {
4021   const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
4022   const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
4023
4024   if (rf1->count != rf2->count)
4025     return rf1->count - rf2->count;
4026
4027   return rf2->rank - rf1->rank;
4028 }
4029
4030 /* Look for repeated operands in OPS in the multiply tree rooted at
4031    STMT.  Replace them with an optimal sequence of multiplies and powi
4032    builtin calls, and remove the used operands from OPS.  Return an
4033    SSA name representing the value of the replacement sequence.  */
4034
4035 static tree
4036 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
4037 {
4038   unsigned i, j, vec_len;
4039   int ii;
4040   operand_entry_t oe;
4041   repeat_factor_t rf1, rf2;
4042   repeat_factor rfnew;
4043   tree result = NULL_TREE;
4044   tree target_ssa, iter_result;
4045   tree type = TREE_TYPE (gimple_get_lhs (stmt));
4046   tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
4047   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4048   gimple mul_stmt, pow_stmt;
4049
4050   /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4051      target.  */
4052   if (!powi_fndecl)
4053     return NULL_TREE;
4054
4055   /* Allocate the repeated factor vector.  */
4056   repeat_factor_vec.create (10);
4057
4058   /* Scan the OPS vector for all SSA names in the product and build
4059      up a vector of occurrence counts for each factor.  */
4060   FOR_EACH_VEC_ELT (*ops, i, oe)
4061     {
4062       if (TREE_CODE (oe->op) == SSA_NAME)
4063         {
4064           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4065             {
4066               if (rf1->factor == oe->op)
4067                 {
4068                   rf1->count += oe->count;
4069                   break;
4070                 }
4071             }
4072
4073           if (j >= repeat_factor_vec.length ())
4074             {
4075               rfnew.factor = oe->op;
4076               rfnew.rank = oe->rank;
4077               rfnew.count = oe->count;
4078               rfnew.repr = NULL_TREE;
4079               repeat_factor_vec.safe_push (rfnew);
4080             }
4081         }
4082     }
4083
4084   /* Sort the repeated factor vector by (a) increasing occurrence count,
4085      and (b) decreasing rank.  */
4086   repeat_factor_vec.qsort (compare_repeat_factors);
4087
4088   /* It is generally best to combine as many base factors as possible
4089      into a product before applying __builtin_powi to the result.
4090      However, the sort order chosen for the repeated factor vector
4091      allows us to cache partial results for the product of the base
4092      factors for subsequent use.  When we already have a cached partial
4093      result from a previous iteration, it is best to make use of it
4094      before looking for another __builtin_pow opportunity.
4095
4096      As an example, consider x * x * y * y * y * z * z * z * z.
4097      We want to first compose the product x * y * z, raise it to the
4098      second power, then multiply this by y * z, and finally multiply
4099      by z.  This can be done in 5 multiplies provided we cache y * z
4100      for use in both expressions:
4101
4102         t1 = y * z
4103         t2 = t1 * x
4104         t3 = t2 * t2
4105         t4 = t1 * t3
4106         result = t4 * z
4107
4108      If we instead ignored the cached y * z and first multiplied by
4109      the __builtin_pow opportunity z * z, we would get the inferior:
4110
4111         t1 = y * z
4112         t2 = t1 * x
4113         t3 = t2 * t2
4114         t4 = z * z
4115         t5 = t3 * t4
4116         result = t5 * y  */
4117
4118   vec_len = repeat_factor_vec.length ();
4119
4120   /* Repeatedly look for opportunities to create a builtin_powi call.  */
4121   while (true)
4122     {
4123       HOST_WIDE_INT power;
4124
4125       /* First look for the largest cached product of factors from
4126          preceding iterations.  If found, create a builtin_powi for
4127          it if the minimum occurrence count for its factors is at
4128          least 2, or just use this cached product as our next
4129          multiplicand if the minimum occurrence count is 1.  */
4130       FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4131         {
4132           if (rf1->repr && rf1->count > 0)
4133             break;
4134         }
4135
4136       if (j < vec_len)
4137         {
4138           power = rf1->count;
4139
4140           if (power == 1)
4141             {
4142               iter_result = rf1->repr;
4143
4144               if (dump_file && (dump_flags & TDF_DETAILS))
4145                 {
4146                   unsigned elt;
4147                   repeat_factor_t rf;
4148                   fputs ("Multiplying by cached product ", dump_file);
4149                   for (elt = j; elt < vec_len; elt++)
4150                     {
4151                       rf = &repeat_factor_vec[elt];
4152                       print_generic_expr (dump_file, rf->factor, 0);
4153                       if (elt < vec_len - 1)
4154                         fputs (" * ", dump_file);
4155                     }
4156                   fputs ("\n", dump_file);
4157                 }
4158             }
4159           else
4160             {
4161               iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4162               pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4163                                             build_int_cst (integer_type_node,
4164                                                            power));
4165               gimple_call_set_lhs (pow_stmt, iter_result);
4166               gimple_set_location (pow_stmt, gimple_location (stmt));
4167               gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4168
4169               if (dump_file && (dump_flags & TDF_DETAILS))
4170                 {
4171                   unsigned elt;
4172                   repeat_factor_t rf;
4173                   fputs ("Building __builtin_pow call for cached product (",
4174                          dump_file);
4175                   for (elt = j; elt < vec_len; elt++)
4176                     {
4177                       rf = &repeat_factor_vec[elt];
4178                       print_generic_expr (dump_file, rf->factor, 0);
4179                       if (elt < vec_len - 1)
4180                         fputs (" * ", dump_file);
4181                     }
4182                   fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n",
4183                            power);
4184                 }
4185             }
4186         }
4187       else
4188         {
4189           /* Otherwise, find the first factor in the repeated factor
4190              vector whose occurrence count is at least 2.  If no such
4191              factor exists, there are no builtin_powi opportunities
4192              remaining.  */
4193           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4194             {
4195               if (rf1->count >= 2)
4196                 break;
4197             }
4198
4199           if (j >= vec_len)
4200             break;
4201
4202           power = rf1->count;
4203
4204           if (dump_file && (dump_flags & TDF_DETAILS))
4205             {
4206               unsigned elt;
4207               repeat_factor_t rf;
4208               fputs ("Building __builtin_pow call for (", dump_file);
4209               for (elt = j; elt < vec_len; elt++)
4210                 {
4211                   rf = &repeat_factor_vec[elt];
4212                   print_generic_expr (dump_file, rf->factor, 0);
4213                   if (elt < vec_len - 1)
4214                     fputs (" * ", dump_file);
4215                 }
4216               fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power);
4217             }
4218
4219           reassociate_stats.pows_created++;
4220
4221           /* Visit each element of the vector in reverse order (so that
4222              high-occurrence elements are visited first, and within the
4223              same occurrence count, lower-ranked elements are visited
4224              first).  Form a linear product of all elements in this order
4225              whose occurrencce count is at least that of element J.
4226              Record the SSA name representing the product of each element
4227              with all subsequent elements in the vector.  */
4228           if (j == vec_len - 1)
4229             rf1->repr = rf1->factor;
4230           else
4231             {
4232               for (ii = vec_len - 2; ii >= (int)j; ii--)
4233                 {
4234                   tree op1, op2;
4235
4236                   rf1 = &repeat_factor_vec[ii];
4237                   rf2 = &repeat_factor_vec[ii + 1];
4238
4239                   /* Init the last factor's representative to be itself.  */
4240                   if (!rf2->repr)
4241                     rf2->repr = rf2->factor;
4242
4243                   op1 = rf1->factor;
4244                   op2 = rf2->repr;
4245
4246                   target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4247                   mul_stmt = gimple_build_assign_with_ops (MULT_EXPR,
4248                                                            target_ssa,
4249                                                            op1, op2);
4250                   gimple_set_location (mul_stmt, gimple_location (stmt));
4251                   gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4252                   rf1->repr = target_ssa;
4253
4254                   /* Don't reprocess the multiply we just introduced.  */
4255                   gimple_set_visited (mul_stmt, true);
4256                 }
4257             }
4258
4259           /* Form a call to __builtin_powi for the maximum product
4260              just formed, raised to the power obtained earlier.  */
4261           rf1 = &repeat_factor_vec[j];
4262           iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4263           pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4264                                         build_int_cst (integer_type_node,
4265                                                        power));
4266           gimple_call_set_lhs (pow_stmt, iter_result);
4267           gimple_set_location (pow_stmt, gimple_location (stmt));
4268           gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4269         }
4270
4271       /* If we previously formed at least one other builtin_powi call,
4272          form the product of this one and those others.  */
4273       if (result)
4274         {
4275           tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4276           mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result,
4277                                                    result, iter_result);
4278           gimple_set_location (mul_stmt, gimple_location (stmt));
4279           gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4280           gimple_set_visited (mul_stmt, true);
4281           result = new_result;
4282         }
4283       else
4284         result = iter_result;
4285
4286       /* Decrement the occurrence count of each element in the product
4287          by the count found above, and remove this many copies of each
4288          factor from OPS.  */
4289       for (i = j; i < vec_len; i++)
4290         {
4291           unsigned k = power;
4292           unsigned n;
4293
4294           rf1 = &repeat_factor_vec[i];
4295           rf1->count -= power;
4296
4297           FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
4298             {
4299               if (oe->op == rf1->factor)
4300                 {
4301                   if (oe->count <= k)
4302                     {
4303                       ops->ordered_remove (n);
4304                       k -= oe->count;
4305
4306                       if (k == 0)
4307                         break;
4308                     }
4309                   else
4310                     {
4311                       oe->count -= k;
4312                       break;
4313                     }
4314                 }
4315             }
4316         }
4317     }
4318
4319   /* At this point all elements in the repeated factor vector have a
4320      remaining occurrence count of 0 or 1, and those with a count of 1
4321      don't have cached representatives.  Re-sort the ops vector and
4322      clean up.  */
4323   ops->qsort (sort_by_operand_rank);
4324   repeat_factor_vec.release ();
4325
4326   /* Return the final product computed herein.  Note that there may
4327      still be some elements with single occurrence count left in OPS;
4328      those will be handled by the normal reassociation logic.  */
4329   return result;
4330 }
4331
4332 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS.  */
4333
4334 static void
4335 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
4336 {
4337   tree rhs1;
4338
4339   if (dump_file && (dump_flags & TDF_DETAILS))
4340     {
4341       fprintf (dump_file, "Transforming ");
4342       print_gimple_stmt (dump_file, stmt, 0, 0);
4343     }
4344
4345   rhs1 = gimple_assign_rhs1 (stmt);
4346   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
4347   update_stmt (stmt);
4348   remove_visited_stmt_chain (rhs1);
4349
4350   if (dump_file && (dump_flags & TDF_DETAILS))
4351     {
4352       fprintf (dump_file, " into ");
4353       print_gimple_stmt (dump_file, stmt, 0, 0);
4354     }
4355 }
4356
4357 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2.  */
4358
4359 static void
4360 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
4361                             tree rhs1, tree rhs2)
4362 {
4363   if (dump_file && (dump_flags & TDF_DETAILS))
4364     {
4365       fprintf (dump_file, "Transforming ");
4366       print_gimple_stmt (dump_file, stmt, 0, 0);
4367     }
4368
4369   gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
4370   update_stmt (gsi_stmt (*gsi));
4371   remove_visited_stmt_chain (rhs1);
4372
4373   if (dump_file && (dump_flags & TDF_DETAILS))
4374     {
4375       fprintf (dump_file, " into ");
4376       print_gimple_stmt (dump_file, stmt, 0, 0);
4377     }
4378 }
4379
4380 /* Reassociate expressions in basic block BB and its post-dominator as
4381    children.  */
4382
4383 static void
4384 reassociate_bb (basic_block bb)
4385 {
4386   gimple_stmt_iterator gsi;
4387   basic_block son;
4388   gimple stmt = last_stmt (bb);
4389
4390   if (stmt && !gimple_visited_p (stmt))
4391     maybe_optimize_range_tests (stmt);
4392
4393   for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4394     {
4395       stmt = gsi_stmt (gsi);
4396
4397       if (is_gimple_assign (stmt)
4398           && !stmt_could_throw_p (stmt))
4399         {
4400           tree lhs, rhs1, rhs2;
4401           enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4402
4403           /* If this is not a gimple binary expression, there is
4404              nothing for us to do with it.  */
4405           if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4406             continue;
4407
4408           /* If this was part of an already processed statement,
4409              we don't need to touch it again. */
4410           if (gimple_visited_p (stmt))
4411             {
4412               /* This statement might have become dead because of previous
4413                  reassociations.  */
4414               if (has_zero_uses (gimple_get_lhs (stmt)))
4415                 {
4416                   gsi_remove (&gsi, true);
4417                   release_defs (stmt);
4418                   /* We might end up removing the last stmt above which
4419                      places the iterator to the end of the sequence.
4420                      Reset it to the last stmt in this case which might
4421                      be the end of the sequence as well if we removed
4422                      the last statement of the sequence.  In which case
4423                      we need to bail out.  */
4424                   if (gsi_end_p (gsi))
4425                     {
4426                       gsi = gsi_last_bb (bb);
4427                       if (gsi_end_p (gsi))
4428                         break;
4429                     }
4430                 }
4431               continue;
4432             }
4433
4434           lhs = gimple_assign_lhs (stmt);
4435           rhs1 = gimple_assign_rhs1 (stmt);
4436           rhs2 = gimple_assign_rhs2 (stmt);
4437
4438           /* For non-bit or min/max operations we can't associate
4439              all types.  Verify that here.  */
4440           if (rhs_code != BIT_IOR_EXPR
4441               && rhs_code != BIT_AND_EXPR
4442               && rhs_code != BIT_XOR_EXPR
4443               && rhs_code != MIN_EXPR
4444               && rhs_code != MAX_EXPR
4445               && (!can_reassociate_p (lhs)
4446                   || !can_reassociate_p (rhs1)
4447                   || !can_reassociate_p (rhs2)))
4448             continue;
4449
4450           if (associative_tree_code (rhs_code))
4451             {
4452               auto_vec<operand_entry_t> ops;
4453               tree powi_result = NULL_TREE;
4454
4455               /* There may be no immediate uses left by the time we
4456                  get here because we may have eliminated them all.  */
4457               if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4458                 continue;
4459
4460               gimple_set_visited (stmt, true);
4461               linearize_expr_tree (&ops, stmt, true, true);
4462               ops.qsort (sort_by_operand_rank);
4463               optimize_ops_list (rhs_code, &ops);
4464               if (undistribute_ops_list (rhs_code, &ops,
4465                                          loop_containing_stmt (stmt)))
4466                 {
4467                   ops.qsort (sort_by_operand_rank);
4468                   optimize_ops_list (rhs_code, &ops);
4469                 }
4470
4471               if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4472                 optimize_range_tests (rhs_code, &ops);
4473
4474               if (first_pass_instance
4475                   && rhs_code == MULT_EXPR
4476                   && flag_unsafe_math_optimizations)
4477                 powi_result = attempt_builtin_powi (stmt, &ops);
4478
4479               /* If the operand vector is now empty, all operands were
4480                  consumed by the __builtin_powi optimization.  */
4481               if (ops.length () == 0)
4482                 transform_stmt_to_copy (&gsi, stmt, powi_result);
4483               else if (ops.length () == 1)
4484                 {
4485                   tree last_op = ops.last ()->op;
4486
4487                   if (powi_result)
4488                     transform_stmt_to_multiply (&gsi, stmt, last_op,
4489                                                 powi_result);
4490                   else
4491                     transform_stmt_to_copy (&gsi, stmt, last_op);
4492                 }
4493               else
4494                 {
4495                   enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4496                   int ops_num = ops.length ();
4497                   int width = get_reassociation_width (ops_num, rhs_code, mode);
4498                   tree new_lhs = lhs;
4499
4500                   if (dump_file && (dump_flags & TDF_DETAILS))
4501                     fprintf (dump_file,
4502                              "Width = %d was chosen for reassociation\n", width);
4503
4504                   if (width > 1
4505                       && ops.length () > 3)
4506                     rewrite_expr_tree_parallel (stmt, width, ops);
4507                   else
4508                     {
4509                       /* When there are three operands left, we want
4510                          to make sure the ones that get the double
4511                          binary op are chosen wisely.  */
4512                       int len = ops.length ();
4513                       if (len >= 3)
4514                         swap_ops_for_binary_stmt (ops, len - 3, stmt);
4515
4516                       new_lhs = rewrite_expr_tree (stmt, 0, ops,
4517                                                    powi_result != NULL);
4518                     }
4519
4520                   /* If we combined some repeated factors into a
4521                      __builtin_powi call, multiply that result by the
4522                      reassociated operands.  */
4523                   if (powi_result)
4524                     {
4525                       gimple mul_stmt, lhs_stmt = SSA_NAME_DEF_STMT (lhs);
4526                       tree type = TREE_TYPE (lhs);
4527                       tree target_ssa = make_temp_ssa_name (type, NULL,
4528                                                             "reassocpow");
4529                       gimple_set_lhs (lhs_stmt, target_ssa);
4530                       update_stmt (lhs_stmt);
4531                       if (lhs != new_lhs)
4532                         target_ssa = new_lhs;
4533                       mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs,
4534                                                                powi_result,
4535                                                                target_ssa);
4536                       gimple_set_location (mul_stmt, gimple_location (stmt));
4537                       gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4538                     }
4539                 }
4540             }
4541         }
4542     }
4543   for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4544        son;
4545        son = next_dom_son (CDI_POST_DOMINATORS, son))
4546     reassociate_bb (son);
4547 }
4548
4549 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4550 void debug_ops_vector (vec<operand_entry_t> ops);
4551
4552 /* Dump the operand entry vector OPS to FILE.  */
4553
4554 void
4555 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
4556 {
4557   operand_entry_t oe;
4558   unsigned int i;
4559
4560   FOR_EACH_VEC_ELT (ops, i, oe)
4561     {
4562       fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
4563       print_generic_expr (file, oe->op, 0);
4564     }
4565 }
4566
4567 /* Dump the operand entry vector OPS to STDERR.  */
4568
4569 DEBUG_FUNCTION void
4570 debug_ops_vector (vec<operand_entry_t> ops)
4571 {
4572   dump_ops_vector (stderr, ops);
4573 }
4574
4575 static void
4576 do_reassoc (void)
4577 {
4578   break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
4579   reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
4580 }
4581
4582 /* Initialize the reassociation pass.  */
4583
4584 static void
4585 init_reassoc (void)
4586 {
4587   int i;
4588   long rank = 2;
4589   int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
4590
4591   /* Find the loops, so that we can prevent moving calculations in
4592      them.  */
4593   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4594
4595   memset (&reassociate_stats, 0, sizeof (reassociate_stats));
4596
4597   operand_entry_pool = create_alloc_pool ("operand entry pool",
4598                                           sizeof (struct operand_entry), 30);
4599   next_operand_entry_id = 0;
4600
4601   /* Reverse RPO (Reverse Post Order) will give us something where
4602      deeper loops come later.  */
4603   pre_and_rev_post_order_compute (NULL, bbs, false);
4604   bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
4605   operand_rank = pointer_map_create ();
4606
4607   /* Give each default definition a distinct rank.  This includes
4608      parameters and the static chain.  Walk backwards over all
4609      SSA names so that we get proper rank ordering according
4610      to tree_swap_operands_p.  */
4611   for (i = num_ssa_names - 1; i > 0; --i)
4612     {
4613       tree name = ssa_name (i);
4614       if (name && SSA_NAME_IS_DEFAULT_DEF (name))
4615         insert_operand_rank (name, ++rank);
4616     }
4617
4618   /* Set up rank for each BB  */
4619   for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
4620     bb_rank[bbs[i]] = ++rank  << 16;
4621
4622   free (bbs);
4623   calculate_dominance_info (CDI_POST_DOMINATORS);
4624   plus_negates = vNULL;
4625 }
4626
4627 /* Cleanup after the reassociation pass, and print stats if
4628    requested.  */
4629
4630 static void
4631 fini_reassoc (void)
4632 {
4633   statistics_counter_event (cfun, "Linearized",
4634                             reassociate_stats.linearized);
4635   statistics_counter_event (cfun, "Constants eliminated",
4636                             reassociate_stats.constants_eliminated);
4637   statistics_counter_event (cfun, "Ops eliminated",
4638                             reassociate_stats.ops_eliminated);
4639   statistics_counter_event (cfun, "Statements rewritten",
4640                             reassociate_stats.rewritten);
4641   statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
4642                             reassociate_stats.pows_encountered);
4643   statistics_counter_event (cfun, "Built-in powi calls created",
4644                             reassociate_stats.pows_created);
4645
4646   pointer_map_destroy (operand_rank);
4647   free_alloc_pool (operand_entry_pool);
4648   free (bb_rank);
4649   plus_negates.release ();
4650   free_dominance_info (CDI_POST_DOMINATORS);
4651   loop_optimizer_finalize ();
4652 }
4653
4654 /* Gate and execute functions for Reassociation.  */
4655
4656 static unsigned int
4657 execute_reassoc (void)
4658 {
4659   init_reassoc ();
4660
4661   do_reassoc ();
4662   repropagate_negates ();
4663
4664   fini_reassoc ();
4665   return 0;
4666 }
4667
4668 static bool
4669 gate_tree_ssa_reassoc (void)
4670 {
4671   return flag_tree_reassoc != 0;
4672 }
4673
4674 namespace {
4675
4676 const pass_data pass_data_reassoc =
4677 {
4678   GIMPLE_PASS, /* type */
4679   "reassoc", /* name */
4680   OPTGROUP_NONE, /* optinfo_flags */
4681   true, /* has_gate */
4682   true, /* has_execute */
4683   TV_TREE_REASSOC, /* tv_id */
4684   ( PROP_cfg | PROP_ssa ), /* properties_required */
4685   0, /* properties_provided */
4686   0, /* properties_destroyed */
4687   0, /* todo_flags_start */
4688   ( TODO_verify_ssa
4689     | TODO_update_ssa_only_virtuals
4690     | TODO_verify_flow ), /* todo_flags_finish */
4691 };
4692
4693 class pass_reassoc : public gimple_opt_pass
4694 {
4695 public:
4696   pass_reassoc (gcc::context *ctxt)
4697     : gimple_opt_pass (pass_data_reassoc, ctxt)
4698   {}
4699
4700   /* opt_pass methods: */
4701   opt_pass * clone () { return new pass_reassoc (m_ctxt); }
4702   bool gate () { return gate_tree_ssa_reassoc (); }
4703   unsigned int execute () { return execute_reassoc (); }
4704
4705 }; // class pass_reassoc
4706
4707 } // anon namespace
4708
4709 gimple_opt_pass *
4710 make_pass_reassoc (gcc::context *ctxt)
4711 {
4712   return new pass_reassoc (ctxt);
4713 }