gcc/tree-ssa-reassoc.c

   1 /* Reassociation for trees.
   2    Copyright (C) 2005-2013 Free Software Foundation, Inc.
   3    Contributed by Daniel Berlin <dan@dberlin.org>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 3, or (at your option)
  10 any later version.
  11
  12 GCC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "hash-table.h"
  25 #include "tm.h"
  26 #include "rtl.h"
  27 #include "tm_p.h"
  28 #include "tree.h"
  29 #include "basic-block.h"
  30 #include "gimple-pretty-print.h"
  31 #include "tree-inline.h"
  32 #include "gimplify.h"
  33 #include "gimple-ssa.h"
  34 #include "tree-cfg.h"
  35 #include "tree-phinodes.h"
  36 #include "ssa-iterators.h"
  37 #include "tree-ssanames.h"
  38 #include "tree-ssa-loop-niter.h"
  39 #include "tree-ssa-loop.h"
  40 #include "tree-dfa.h"
  41 #include "tree-ssa.h"
  42 #include "tree-iterator.h"
  43 #include "tree-pass.h"
  44 #include "alloc-pool.h"
  45 #include "vec.h"
  46 #include "langhooks.h"
  47 #include "pointer-set.h"
  48 #include "cfgloop.h"
  49 #include "flags.h"
  50 #include "target.h"
  51 #include "params.h"
  52 #include "diagnostic-core.h"
  53
  54 /*  This is a simple global reassociation pass.  It is, in part, based
  55     on the LLVM pass of the same name (They do some things more/less
  56     than we do, in different orders, etc).
  57
  58     It consists of five steps:
  59
  60     1. Breaking up subtract operations into addition + negate, where
  61     it would promote the reassociation of adds.
  62
  63     2. Left linearization of the expression trees, so that (A+B)+(C+D)
  64     becomes (((A+B)+C)+D), which is easier for us to rewrite later.
  65     During linearization, we place the operands of the binary
  66     expressions into a vector of operand_entry_t
  67
  68     3. Optimization of the operand lists, eliminating things like a +
  69     -a, a & a, etc.
  70
  71     3a. Combine repeated factors with the same occurrence counts
  72     into a __builtin_powi call that will later be optimized into
  73     an optimal number of multiplies.
  74
  75     4. Rewrite the expression trees we linearized and optimized so
  76     they are in proper rank order.
  77
  78     5. Repropagate negates, as nothing else will clean it up ATM.
  79
  80     A bit of theory on #4, since nobody seems to write anything down
  81     about why it makes sense to do it the way they do it:
  82
  83     We could do this much nicer theoretically, but don't (for reasons
  84     explained after how to do it theoretically nice :P).
  85
  86     In order to promote the most redundancy elimination, you want
  87     binary expressions whose operands are the same rank (or
  88     preferably, the same value) exposed to the redundancy eliminator,
  89     for possible elimination.
  90
  91     So the way to do this if we really cared, is to build the new op
  92     tree from the leaves to the roots, merging as you go, and putting the
  93     new op on the end of the worklist, until you are left with one
  94     thing on the worklist.
  95
  96     IE if you have to rewrite the following set of operands (listed with
  97     rank in parentheses), with opcode PLUS_EXPR:
  98
  99     a (1),  b (1),  c (1),  d (2), e (2)
 100
 101
 102     We start with our merge worklist empty, and the ops list with all of
 103     those on it.
 104
 105     You want to first merge all leaves of the same rank, as much as
 106     possible.
 107
 108     So first build a binary op of
 109
 110     mergetmp = a + b, and put "mergetmp" on the merge worklist.
 111
 112     Because there is no three operand form of PLUS_EXPR, c is not going to
 113     be exposed to redundancy elimination as a rank 1 operand.
 114
 115     So you might as well throw it on the merge worklist (you could also
 116     consider it to now be a rank two operand, and merge it with d and e,
 117     but in this case, you then have evicted e from a binary op. So at
 118     least in this situation, you can't win.)
 119
 120     Then build a binary op of d + e
 121     mergetmp2 = d + e
 122
 123     and put mergetmp2 on the merge worklist.
 124
 125     so merge worklist = {mergetmp, c, mergetmp2}
 126
 127     Continue building binary ops of these operations until you have only
 128     one operation left on the worklist.
 129
 130     So we have
 131
 132     build binary op
 133     mergetmp3 = mergetmp + c
 134
 135     worklist = {mergetmp2, mergetmp3}
 136
 137     mergetmp4 = mergetmp2 + mergetmp3
 138
 139     worklist = {mergetmp4}
 140
 141     because we have one operation left, we can now just set the original
 142     statement equal to the result of that operation.
 143
 144     This will at least expose a + b  and d + e to redundancy elimination
 145     as binary operations.
 146
 147     For extra points, you can reuse the old statements to build the
 148     mergetmps, since you shouldn't run out.
 149
 150     So why don't we do this?
 151
 152     Because it's expensive, and rarely will help.  Most trees we are
 153     reassociating have 3 or less ops.  If they have 2 ops, they already
 154     will be written into a nice single binary op.  If you have 3 ops, a
 155     single simple check suffices to tell you whether the first two are of the
 156     same rank.  If so, you know to order it
 157
 158     mergetmp = op1 + op2
 159     newstmt = mergetmp + op3
 160
 161     instead of
 162     mergetmp = op2 + op3
 163     newstmt = mergetmp + op1
 164
 165     If all three are of the same rank, you can't expose them all in a
 166     single binary operator anyway, so the above is *still* the best you
 167     can do.
 168
 169     Thus, this is what we do.  When we have three ops left, we check to see
 170     what order to put them in, and call it a day.  As a nod to vector sum
 171     reduction, we check if any of the ops are really a phi node that is a
 172     destructive update for the associating op, and keep the destructive
 173     update together for vector sum reduction recognition.  */
 174
 175
 176 /* Statistics */
 177 static struct
 178 {
 179   int linearized;
 180   int constants_eliminated;
 181   int ops_eliminated;
 182   int rewritten;
 183   int pows_encountered;
 184   int pows_created;
 185 } reassociate_stats;
 186
 187 /* Operator, rank pair.  */
 188 typedef struct operand_entry
 189 {
 190   unsigned int rank;
 191   int id;
 192   tree op;
 193   unsigned int count;
 194 } *operand_entry_t;
 195
 196 static alloc_pool operand_entry_pool;
 197
 198 /* This is used to assign a unique ID to each struct operand_entry
 199    so that qsort results are identical on different hosts.  */
 200 static int next_operand_entry_id;
 201
 202 /* Starting rank number for a given basic block, so that we can rank
 203    operations using unmovable instructions in that BB based on the bb
 204    depth.  */
 205 static long *bb_rank;
 206
 207 /* Operand->rank hashtable.  */
 208 static struct pointer_map_t *operand_rank;
 209
 210 /* Forward decls.  */
 211 static long get_rank (tree);
 212
 213
 214 /* Bias amount for loop-carried phis.  We want this to be larger than
 215    the depth of any reassociation tree we can see, but not larger than
 216    the rank difference between two blocks.  */
 217 #define PHI_LOOP_BIAS (1 << 15)
 218
 219 /* Rank assigned to a phi statement.  If STMT is a loop-carried phi of
 220    an innermost loop, and the phi has only a single use which is inside
 221    the loop, then the rank is the block rank of the loop latch plus an
 222    extra bias for the loop-carried dependence.  This causes expressions
 223    calculated into an accumulator variable to be independent for each
 224    iteration of the loop.  If STMT is some other phi, the rank is the
 225    block rank of its containing block.  */
 226 static long
 227 phi_rank (gimple stmt)
 228 {
 229   basic_block bb = gimple_bb (stmt);
 230   struct loop *father = bb->loop_father;
 231   tree res;
 232   unsigned i;
 233   use_operand_p use;
 234   gimple use_stmt;
 235
 236   /* We only care about real loops (those with a latch).  */
 237   if (!father->latch)
 238     return bb_rank[bb->index];
 239
 240   /* Interesting phis must be in headers of innermost loops.  */
 241   if (bb != father->header
 242       || father->inner)
 243     return bb_rank[bb->index];
 244
 245   /* Ignore virtual SSA_NAMEs.  */
 246   res = gimple_phi_result (stmt);
 247   if (virtual_operand_p (res))
 248     return bb_rank[bb->index];
 249
 250   /* The phi definition must have a single use, and that use must be
 251      within the loop.  Otherwise this isn't an accumulator pattern.  */
 252   if (!single_imm_use (res, &use, &use_stmt)
 253       || gimple_bb (use_stmt)->loop_father != father)
 254     return bb_rank[bb->index];
 255
 256   /* Look for phi arguments from within the loop.  If found, bias this phi.  */
 257   for (i = 0; i < gimple_phi_num_args (stmt); i++)
 258     {
 259       tree arg = gimple_phi_arg_def (stmt, i);
 260       if (TREE_CODE (arg) == SSA_NAME
 261           && !SSA_NAME_IS_DEFAULT_DEF (arg))
 262         {
 263           gimple def_stmt = SSA_NAME_DEF_STMT (arg);
 264           if (gimple_bb (def_stmt)->loop_father == father)
 265             return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
 266         }
 267     }
 268
 269   /* Must be an uninteresting phi.  */
 270   return bb_rank[bb->index];
 271 }
 272
 273 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
 274    loop-carried dependence of an innermost loop, return TRUE; else
 275    return FALSE.  */
 276 static bool
 277 loop_carried_phi (tree exp)
 278 {
 279   gimple phi_stmt;
 280   long block_rank;
 281
 282   if (TREE_CODE (exp) != SSA_NAME
 283       || SSA_NAME_IS_DEFAULT_DEF (exp))
 284     return false;
 285
 286   phi_stmt = SSA_NAME_DEF_STMT (exp);
 287
 288   if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
 289     return false;
 290
 291   /* Non-loop-carried phis have block rank.  Loop-carried phis have
 292      an additional bias added in.  If this phi doesn't have block rank,
 293      it's biased and should not be propagated.  */
 294   block_rank = bb_rank[gimple_bb (phi_stmt)->index];
 295
 296   if (phi_rank (phi_stmt) != block_rank)
 297     return true;
 298
 299   return false;
 300 }
 301
 302 /* Return the maximum of RANK and the rank that should be propagated
 303    from expression OP.  For most operands, this is just the rank of OP.
 304    For loop-carried phis, the value is zero to avoid undoing the bias
 305    in favor of the phi.  */
 306 static long
 307 propagate_rank (long rank, tree op)
 308 {
 309   long op_rank;
 310
 311   if (loop_carried_phi (op))
 312     return rank;
 313
 314   op_rank = get_rank (op);
 315
 316   return MAX (rank, op_rank);
 317 }
 318
 319 /* Look up the operand rank structure for expression E.  */
 320
 321 static inline long
 322 find_operand_rank (tree e)
 323 {
 324   void **slot = pointer_map_contains (operand_rank, e);
 325   return slot ? (long) (intptr_t) *slot : -1;
 326 }
 327
 328 /* Insert {E,RANK} into the operand rank hashtable.  */
 329
 330 static inline void
 331 insert_operand_rank (tree e, long rank)
 332 {
 333   void **slot;
 334   gcc_assert (rank > 0);
 335   slot = pointer_map_insert (operand_rank, e);
 336   gcc_assert (!*slot);
 337   *slot = (void *) (intptr_t) rank;
 338 }
 339
 340 /* Given an expression E, return the rank of the expression.  */
 341
 342 static long
 343 get_rank (tree e)
 344 {
 345   /* Constants have rank 0.  */
 346   if (is_gimple_min_invariant (e))
 347     return 0;
 348
 349   /* SSA_NAME's have the rank of the expression they are the result
 350      of.
 351      For globals and uninitialized values, the rank is 0.
 352      For function arguments, use the pre-setup rank.
 353      For PHI nodes, stores, asm statements, etc, we use the rank of
 354      the BB.
 355      For simple operations, the rank is the maximum rank of any of
 356      its operands, or the bb_rank, whichever is less.
 357      I make no claims that this is optimal, however, it gives good
 358      results.  */
 359
 360   /* We make an exception to the normal ranking system to break
 361      dependences of accumulator variables in loops.  Suppose we
 362      have a simple one-block loop containing:
 363
 364        x_1 = phi(x_0, x_2)
 365        b = a + x_1
 366        c = b + d
 367        x_2 = c + e
 368
 369      As shown, each iteration of the calculation into x is fully
 370      dependent upon the iteration before it.  We would prefer to
 371      see this in the form:
 372
 373        x_1 = phi(x_0, x_2)
 374        b = a + d
 375        c = b + e
 376        x_2 = c + x_1
 377
 378      If the loop is unrolled, the calculations of b and c from
 379      different iterations can be interleaved.
 380
 381      To obtain this result during reassociation, we bias the rank
 382      of the phi definition x_1 upward, when it is recognized as an
 383      accumulator pattern.  The artificial rank causes it to be
 384      added last, providing the desired independence.  */
 385
 386   if (TREE_CODE (e) == SSA_NAME)
 387     {
 388       gimple stmt;
 389       long rank;
 390       int i, n;
 391       tree op;
 392
 393       if (SSA_NAME_IS_DEFAULT_DEF (e))
 394         return find_operand_rank (e);
 395
 396       stmt = SSA_NAME_DEF_STMT (e);
 397       if (gimple_code (stmt) == GIMPLE_PHI)
 398         return phi_rank (stmt);
 399
 400       if (!is_gimple_assign (stmt)
 401           || gimple_vdef (stmt))
 402         return bb_rank[gimple_bb (stmt)->index];
 403
 404       /* If we already have a rank for this expression, use that.  */
 405       rank = find_operand_rank (e);
 406       if (rank != -1)
 407         return rank;
 408
 409       /* Otherwise, find the maximum rank for the operands.  As an
 410          exception, remove the bias from loop-carried phis when propagating
 411          the rank so that dependent operations are not also biased.  */
 412       rank = 0;
 413       if (gimple_assign_single_p (stmt))
 414         {
 415           tree rhs = gimple_assign_rhs1 (stmt);
 416           n = TREE_OPERAND_LENGTH (rhs);
 417           if (n == 0)
 418             rank = propagate_rank (rank, rhs);
 419           else
 420             {
 421               for (i = 0; i < n; i++)
 422                 {
 423                   op = TREE_OPERAND (rhs, i);
 424
 425                   if (op != NULL_TREE)
 426                     rank = propagate_rank (rank, op);
 427                 }
 428             }
 429         }
 430       else
 431         {
 432           n = gimple_num_ops (stmt);
 433           for (i = 1; i < n; i++)
 434             {
 435               op = gimple_op (stmt, i);
 436               gcc_assert (op);
 437               rank = propagate_rank (rank, op);
 438             }
 439         }
 440
 441       if (dump_file && (dump_flags & TDF_DETAILS))
 442         {
 443           fprintf (dump_file, "Rank for ");
 444           print_generic_expr (dump_file, e, 0);
 445           fprintf (dump_file, " is %ld\n", (rank + 1));
 446         }
 447
 448       /* Note the rank in the hashtable so we don't recompute it.  */
 449       insert_operand_rank (e, (rank + 1));
 450       return (rank + 1);
 451     }
 452
 453   /* Globals, etc,  are rank 0 */
 454   return 0;
 455 }
 456
 457
 458 /* We want integer ones to end up last no matter what, since they are
 459    the ones we can do the most with.  */
 460 #define INTEGER_CONST_TYPE 1 << 3
 461 #define FLOAT_CONST_TYPE 1 << 2
 462 #define OTHER_CONST_TYPE 1 << 1
 463
 464 /* Classify an invariant tree into integer, float, or other, so that
 465    we can sort them to be near other constants of the same type.  */
 466 static inline int
 467 constant_type (tree t)
 468 {
 469   if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
 470     return INTEGER_CONST_TYPE;
 471   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
 472     return FLOAT_CONST_TYPE;
 473   else
 474     return OTHER_CONST_TYPE;
 475 }
 476
 477 /* qsort comparison function to sort operand entries PA and PB by rank
 478    so that the sorted array is ordered by rank in decreasing order.  */
 479 static int
 480 sort_by_operand_rank (const void *pa, const void *pb)
 481 {
 482   const operand_entry_t oea = *(const operand_entry_t *)pa;
 483   const operand_entry_t oeb = *(const operand_entry_t *)pb;
 484
 485   /* It's nicer for optimize_expression if constants that are likely
 486      to fold when added/multiplied//whatever are put next to each
 487      other.  Since all constants have rank 0, order them by type.  */
 488   if (oeb->rank == 0 && oea->rank == 0)
 489     {
 490       if (constant_type (oeb->op) != constant_type (oea->op))
 491         return constant_type (oeb->op) - constant_type (oea->op);
 492       else
 493         /* To make sorting result stable, we use unique IDs to determine
 494            order.  */
 495         return oeb->id - oea->id;
 496     }
 497
 498   /* Lastly, make sure the versions that are the same go next to each
 499      other.  We use SSA_NAME_VERSION because it's stable.  */
 500   if ((oeb->rank - oea->rank == 0)
 501       && TREE_CODE (oea->op) == SSA_NAME
 502       && TREE_CODE (oeb->op) == SSA_NAME)
 503     {
 504       if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
 505         return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
 506       else
 507         return oeb->id - oea->id;
 508     }
 509
 510   if (oeb->rank != oea->rank)
 511     return oeb->rank - oea->rank;
 512   else
 513     return oeb->id - oea->id;
 514 }
 515
 516 /* Add an operand entry to *OPS for the tree operand OP.  */
 517
 518 static void
 519 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
 520 {
 521   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
 522
 523   oe->op = op;
 524   oe->rank = get_rank (op);
 525   oe->id = next_operand_entry_id++;
 526   oe->count = 1;
 527   ops->safe_push (oe);
 528 }
 529
 530 /* Add an operand entry to *OPS for the tree operand OP with repeat
 531    count REPEAT.  */
 532
 533 static void
 534 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
 535                        HOST_WIDE_INT repeat)
 536 {
 537   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
 538
 539   oe->op = op;
 540   oe->rank = get_rank (op);
 541   oe->id = next_operand_entry_id++;
 542   oe->count = repeat;
 543   ops->safe_push (oe);
 544
 545   reassociate_stats.pows_encountered++;
 546 }
 547
 548 /* Return true if STMT is reassociable operation containing a binary
 549    operation with tree code CODE, and is inside LOOP.  */
 550
 551 static bool
 552 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
 553 {
 554   basic_block bb = gimple_bb (stmt);
 555
 556   if (gimple_bb (stmt) == NULL)
 557     return false;
 558
 559   if (!flow_bb_inside_loop_p (loop, bb))
 560     return false;
 561
 562   if (is_gimple_assign (stmt)
 563       && gimple_assign_rhs_code (stmt) == code
 564       && has_single_use (gimple_assign_lhs (stmt)))
 565     return true;
 566
 567   return false;
 568 }
 569
 570
 571 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
 572    operand of the negate operation.  Otherwise, return NULL.  */
 573
 574 static tree
 575 get_unary_op (tree name, enum tree_code opcode)
 576 {
 577   gimple stmt = SSA_NAME_DEF_STMT (name);
 578
 579   if (!is_gimple_assign (stmt))
 580     return NULL_TREE;
 581
 582   if (gimple_assign_rhs_code (stmt) == opcode)
 583     return gimple_assign_rhs1 (stmt);
 584   return NULL_TREE;
 585 }
 586
 587 /* If CURR and LAST are a pair of ops that OPCODE allows us to
 588    eliminate through equivalences, do so, remove them from OPS, and
 589    return true.  Otherwise, return false.  */
 590
 591 static bool
 592 eliminate_duplicate_pair (enum tree_code opcode,
 593                           vec<operand_entry_t> *ops,
 594                           bool *all_done,
 595                           unsigned int i,
 596                           operand_entry_t curr,
 597                           operand_entry_t last)
 598 {
 599
 600   /* If we have two of the same op, and the opcode is & |, min, or max,
 601      we can eliminate one of them.
 602      If we have two of the same op, and the opcode is ^, we can
 603      eliminate both of them.  */
 604
 605   if (last && last->op == curr->op)
 606     {
 607       switch (opcode)
 608         {
 609         case MAX_EXPR:
 610         case MIN_EXPR:
 611         case BIT_IOR_EXPR:
 612         case BIT_AND_EXPR:
 613           if (dump_file && (dump_flags & TDF_DETAILS))
 614             {
 615               fprintf (dump_file, "Equivalence: ");
 616               print_generic_expr (dump_file, curr->op, 0);
 617               fprintf (dump_file, " [&|minmax] ");
 618               print_generic_expr (dump_file, last->op, 0);
 619               fprintf (dump_file, " -> ");
 620               print_generic_stmt (dump_file, last->op, 0);
 621             }
 622
 623           ops->ordered_remove (i);
 624           reassociate_stats.ops_eliminated ++;
 625
 626           return true;
 627
 628         case BIT_XOR_EXPR:
 629           if (dump_file && (dump_flags & TDF_DETAILS))
 630             {
 631               fprintf (dump_file, "Equivalence: ");
 632               print_generic_expr (dump_file, curr->op, 0);
 633               fprintf (dump_file, " ^ ");
 634               print_generic_expr (dump_file, last->op, 0);
 635               fprintf (dump_file, " -> nothing\n");
 636             }
 637
 638           reassociate_stats.ops_eliminated += 2;
 639
 640           if (ops->length () == 2)
 641             {
 642               ops->create (0);
 643               add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
 644               *all_done = true;
 645             }
 646           else
 647             {
 648               ops->ordered_remove (i-1);
 649               ops->ordered_remove (i-1);
 650             }
 651
 652           return true;
 653
 654         default:
 655           break;
 656         }
 657     }
 658   return false;
 659 }
 660
 661 static vec<tree> plus_negates;
 662
 663 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
 664    expression, look in OPS for a corresponding positive operation to cancel
 665    it out.  If we find one, remove the other from OPS, replace
 666    OPS[CURRINDEX] with 0 or -1, respectively, and return true.  Otherwise,
 667    return false. */
 668
 669 static bool
 670 eliminate_plus_minus_pair (enum tree_code opcode,
 671                            vec<operand_entry_t> *ops,
 672                            unsigned int currindex,
 673                            operand_entry_t curr)
 674 {
 675   tree negateop;
 676   tree notop;
 677   unsigned int i;
 678   operand_entry_t oe;
 679
 680   if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
 681     return false;
 682
 683   negateop = get_unary_op (curr->op, NEGATE_EXPR);
 684   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 685   if (negateop == NULL_TREE && notop == NULL_TREE)
 686     return false;
 687
 688   /* Any non-negated version will have a rank that is one less than
 689      the current rank.  So once we hit those ranks, if we don't find
 690      one, we can stop.  */
 691
 692   for (i = currindex + 1;
 693        ops->iterate (i, &oe)
 694        && oe->rank >= curr->rank - 1 ;
 695        i++)
 696     {
 697       if (oe->op == negateop)
 698         {
 699
 700           if (dump_file && (dump_flags & TDF_DETAILS))
 701             {
 702               fprintf (dump_file, "Equivalence: ");
 703               print_generic_expr (dump_file, negateop, 0);
 704               fprintf (dump_file, " + -");
 705               print_generic_expr (dump_file, oe->op, 0);
 706               fprintf (dump_file, " -> 0\n");
 707             }
 708
 709           ops->ordered_remove (i);
 710           add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
 711           ops->ordered_remove (currindex);
 712           reassociate_stats.ops_eliminated ++;
 713
 714           return true;
 715         }
 716       else if (oe->op == notop)
 717         {
 718           tree op_type = TREE_TYPE (oe->op);
 719
 720           if (dump_file && (dump_flags & TDF_DETAILS))
 721             {
 722               fprintf (dump_file, "Equivalence: ");
 723               print_generic_expr (dump_file, notop, 0);
 724               fprintf (dump_file, " + ~");
 725               print_generic_expr (dump_file, oe->op, 0);
 726               fprintf (dump_file, " -> -1\n");
 727             }
 728
 729           ops->ordered_remove (i);
 730           add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
 731           ops->ordered_remove (currindex);
 732           reassociate_stats.ops_eliminated ++;
 733
 734           return true;
 735         }
 736     }
 737
 738   /* CURR->OP is a negate expr in a plus expr: save it for later
 739      inspection in repropagate_negates().  */
 740   if (negateop != NULL_TREE)
 741     plus_negates.safe_push (curr->op);
 742
 743   return false;
 744 }
 745
 746 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
 747    bitwise not expression, look in OPS for a corresponding operand to
 748    cancel it out.  If we find one, remove the other from OPS, replace
 749    OPS[CURRINDEX] with 0, and return true.  Otherwise, return
 750    false. */
 751
 752 static bool
 753 eliminate_not_pairs (enum tree_code opcode,
 754                      vec<operand_entry_t> *ops,
 755                      unsigned int currindex,
 756                      operand_entry_t curr)
 757 {
 758   tree notop;
 759   unsigned int i;
 760   operand_entry_t oe;
 761
 762   if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
 763       || TREE_CODE (curr->op) != SSA_NAME)
 764     return false;
 765
 766   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 767   if (notop == NULL_TREE)
 768     return false;
 769
 770   /* Any non-not version will have a rank that is one less than
 771      the current rank.  So once we hit those ranks, if we don't find
 772      one, we can stop.  */
 773
 774   for (i = currindex + 1;
 775        ops->iterate (i, &oe)
 776        && oe->rank >= curr->rank - 1;
 777        i++)
 778     {
 779       if (oe->op == notop)
 780         {
 781           if (dump_file && (dump_flags & TDF_DETAILS))
 782             {
 783               fprintf (dump_file, "Equivalence: ");
 784               print_generic_expr (dump_file, notop, 0);
 785               if (opcode == BIT_AND_EXPR)
 786                 fprintf (dump_file, " & ~");
 787               else if (opcode == BIT_IOR_EXPR)
 788                 fprintf (dump_file, " | ~");
 789               print_generic_expr (dump_file, oe->op, 0);
 790               if (opcode == BIT_AND_EXPR)
 791                 fprintf (dump_file, " -> 0\n");
 792               else if (opcode == BIT_IOR_EXPR)
 793                 fprintf (dump_file, " -> -1\n");
 794             }
 795
 796           if (opcode == BIT_AND_EXPR)
 797             oe->op = build_zero_cst (TREE_TYPE (oe->op));
 798           else if (opcode == BIT_IOR_EXPR)
 799             oe->op = build_low_bits_mask (TREE_TYPE (oe->op),
 800                                           TYPE_PRECISION (TREE_TYPE (oe->op)));
 801
 802           reassociate_stats.ops_eliminated += ops->length () - 1;
 803           ops->truncate (0);
 804           ops->quick_push (oe);
 805           return true;
 806         }
 807     }
 808
 809   return false;
 810 }
 811
 812 /* Use constant value that may be present in OPS to try to eliminate
 813    operands.  Note that this function is only really used when we've
 814    eliminated ops for other reasons, or merged constants.  Across
 815    single statements, fold already does all of this, plus more.  There
 816    is little point in duplicating logic, so I've only included the
 817    identities that I could ever construct testcases to trigger.  */
 818
 819 static void
 820 eliminate_using_constants (enum tree_code opcode,
 821                            vec<operand_entry_t> *ops)
 822 {
 823   operand_entry_t oelast = ops->last ();
 824   tree type = TREE_TYPE (oelast->op);
 825
 826   if (oelast->rank == 0
 827       && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
 828     {
 829       switch (opcode)
 830         {
 831         case BIT_AND_EXPR:
 832           if (integer_zerop (oelast->op))
 833             {
 834               if (ops->length () != 1)
 835                 {
 836                   if (dump_file && (dump_flags & TDF_DETAILS))
 837                     fprintf (dump_file, "Found & 0, removing all other ops\n");
 838
 839                   reassociate_stats.ops_eliminated += ops->length () - 1;
 840
 841                   ops->truncate (0);
 842                   ops->quick_push (oelast);
 843                   return;
 844                 }
 845             }
 846           else if (integer_all_onesp (oelast->op))
 847             {
 848               if (ops->length () != 1)
 849                 {
 850                   if (dump_file && (dump_flags & TDF_DETAILS))
 851                     fprintf (dump_file, "Found & -1, removing\n");
 852                   ops->pop ();
 853                   reassociate_stats.ops_eliminated++;
 854                 }
 855             }
 856           break;
 857         case BIT_IOR_EXPR:
 858           if (integer_all_onesp (oelast->op))
 859             {
 860               if (ops->length () != 1)
 861                 {
 862                   if (dump_file && (dump_flags & TDF_DETAILS))
 863                     fprintf (dump_file, "Found | -1, removing all other ops\n");
 864
 865                   reassociate_stats.ops_eliminated += ops->length () - 1;
 866
 867                   ops->truncate (0);
 868                   ops->quick_push (oelast);
 869                   return;
 870                 }
 871             }
 872           else if (integer_zerop (oelast->op))
 873             {
 874               if (ops->length () != 1)
 875                 {
 876                   if (dump_file && (dump_flags & TDF_DETAILS))
 877                     fprintf (dump_file, "Found | 0, removing\n");
 878                   ops->pop ();
 879                   reassociate_stats.ops_eliminated++;
 880                 }
 881             }
 882           break;
 883         case MULT_EXPR:
 884           if (integer_zerop (oelast->op)
 885               || (FLOAT_TYPE_P (type)
 886                   && !HONOR_NANS (TYPE_MODE (type))
 887                   && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
 888                   && real_zerop (oelast->op)))
 889             {
 890               if (ops->length () != 1)
 891                 {
 892                   if (dump_file && (dump_flags & TDF_DETAILS))
 893                     fprintf (dump_file, "Found * 0, removing all other ops\n");
 894
 895                   reassociate_stats.ops_eliminated += ops->length () - 1;
 896                   ops->truncate (1);
 897                   ops->quick_push (oelast);
 898                   return;
 899                 }
 900             }
 901           else if (integer_onep (oelast->op)
 902                    || (FLOAT_TYPE_P (type)
 903                        && !HONOR_SNANS (TYPE_MODE (type))
 904                        && real_onep (oelast->op)))
 905             {
 906               if (ops->length () != 1)
 907                 {
 908                   if (dump_file && (dump_flags & TDF_DETAILS))
 909                     fprintf (dump_file, "Found * 1, removing\n");
 910                   ops->pop ();
 911                   reassociate_stats.ops_eliminated++;
 912                   return;
 913                 }
 914             }
 915           break;
 916         case BIT_XOR_EXPR:
 917         case PLUS_EXPR:
 918         case MINUS_EXPR:
 919           if (integer_zerop (oelast->op)
 920               || (FLOAT_TYPE_P (type)
 921                   && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
 922                   && fold_real_zero_addition_p (type, oelast->op,
 923                                                 opcode == MINUS_EXPR)))
 924             {
 925               if (ops->length () != 1)
 926                 {
 927                   if (dump_file && (dump_flags & TDF_DETAILS))
 928                     fprintf (dump_file, "Found [|^+] 0, removing\n");
 929                   ops->pop ();
 930                   reassociate_stats.ops_eliminated++;
 931                   return;
 932                 }
 933             }
 934           break;
 935         default:
 936           break;
 937         }
 938     }
 939 }
 940
 941
 942 static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
 943                                  bool, bool);
 944
 945 /* Structure for tracking and counting operands.  */
 946 typedef struct oecount_s {
 947   int cnt;
 948   int id;
 949   enum tree_code oecode;
 950   tree op;
 951 } oecount;
 952
 953
 954 /* The heap for the oecount hashtable and the sorted list of operands.  */
 955 static vec<oecount> cvec;
 956
 957
 958 /* Oecount hashtable helpers.  */
 959
 960 struct oecount_hasher : typed_noop_remove <void>
 961 {
 962   /* Note that this hash table stores integers, not pointers.
 963      So, observe the casting in the member functions.  */
 964   typedef void value_type;
 965   typedef void compare_type;
 966   static inline hashval_t hash (const value_type *);
 967   static inline bool equal (const value_type *, const compare_type *);
 968 };
 969
 970 /* Hash function for oecount.  */
 971
 972 inline hashval_t
 973 oecount_hasher::hash (const value_type *p)
 974 {
 975   const oecount *c = &cvec[(size_t)p - 42];
 976   return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
 977 }
 978
 979 /* Comparison function for oecount.  */
 980
 981 inline bool
 982 oecount_hasher::equal (const value_type *p1, const compare_type *p2)
 983 {
 984   const oecount *c1 = &cvec[(size_t)p1 - 42];
 985   const oecount *c2 = &cvec[(size_t)p2 - 42];
 986   return (c1->oecode == c2->oecode
 987           && c1->op == c2->op);
 988 }
 989
 990 /* Comparison function for qsort sorting oecount elements by count.  */
 991
 992 static int
 993 oecount_cmp (const void *p1, const void *p2)
 994 {
 995   const oecount *c1 = (const oecount *)p1;
 996   const oecount *c2 = (const oecount *)p2;
 997   if (c1->cnt != c2->cnt)
 998     return c1->cnt - c2->cnt;
 999   else
1000     /* If counts are identical, use unique IDs to stabilize qsort.  */
1001     return c1->id - c2->id;
1002 }
1003
1004 /* Return TRUE iff STMT represents a builtin call that raises OP
1005    to some exponent.  */
1006
1007 static bool
1008 stmt_is_power_of_op (gimple stmt, tree op)
1009 {
1010   tree fndecl;
1011
1012   if (!is_gimple_call (stmt))
1013     return false;
1014
1015   fndecl = gimple_call_fndecl (stmt);
1016
1017   if (!fndecl
1018       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
1019     return false;
1020
1021   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1022     {
1023     CASE_FLT_FN (BUILT_IN_POW):
1024     CASE_FLT_FN (BUILT_IN_POWI):
1025       return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1026
1027     default:
1028       return false;
1029     }
1030 }
1031
1032 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1033    in place and return the result.  Assumes that stmt_is_power_of_op
1034    was previously called for STMT and returned TRUE.  */
1035
1036 static HOST_WIDE_INT
1037 decrement_power (gimple stmt)
1038 {
1039   REAL_VALUE_TYPE c, cint;
1040   HOST_WIDE_INT power;
1041   tree arg1;
1042
1043   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1044     {
1045     CASE_FLT_FN (BUILT_IN_POW):
1046       arg1 = gimple_call_arg (stmt, 1);
1047       c = TREE_REAL_CST (arg1);
1048       power = real_to_integer (&c) - 1;
1049       real_from_integer (&cint, VOIDmode, power, 0, 0);
1050       gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1051       return power;
1052
1053     CASE_FLT_FN (BUILT_IN_POWI):
1054       arg1 = gimple_call_arg (stmt, 1);
1055       power = TREE_INT_CST_LOW (arg1) - 1;
1056       gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1057       return power;
1058
1059     default:
1060       gcc_unreachable ();
1061     }
1062 }
1063
1064 /* Find the single immediate use of STMT's LHS, and replace it
1065    with OP.  Remove STMT.  If STMT's LHS is the same as *DEF,
1066    replace *DEF with OP as well.  */
1067
1068 static void
1069 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1070 {
1071   tree lhs;
1072   gimple use_stmt;
1073   use_operand_p use;
1074   gimple_stmt_iterator gsi;
1075
1076   if (is_gimple_call (stmt))
1077     lhs = gimple_call_lhs (stmt);
1078   else
1079     lhs = gimple_assign_lhs (stmt);
1080
1081   gcc_assert (has_single_use (lhs));
1082   single_imm_use (lhs, &use, &use_stmt);
1083   if (lhs == *def)
1084     *def = op;
1085   SET_USE (use, op);
1086   if (TREE_CODE (op) != SSA_NAME)
1087     update_stmt (use_stmt);
1088   gsi = gsi_for_stmt (stmt);
1089   unlink_stmt_vdef (stmt);
1090   gsi_remove (&gsi, true);
1091   release_defs (stmt);
1092 }
1093
1094 /* Walks the linear chain with result *DEF searching for an operation
1095    with operand OP and code OPCODE removing that from the chain.  *DEF
1096    is updated if there is only one operand but no operation left.  */
1097
1098 static void
1099 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1100 {
1101   gimple stmt = SSA_NAME_DEF_STMT (*def);
1102
1103   do
1104     {
1105       tree name;
1106
1107       if (opcode == MULT_EXPR
1108           && stmt_is_power_of_op (stmt, op))
1109         {
1110           if (decrement_power (stmt) == 1)
1111             propagate_op_to_single_use (op, stmt, def);
1112           return;
1113         }
1114
1115       name = gimple_assign_rhs1 (stmt);
1116
1117       /* If this is the operation we look for and one of the operands
1118          is ours simply propagate the other operand into the stmts
1119          single use.  */
1120       if (gimple_assign_rhs_code (stmt) == opcode
1121           && (name == op
1122               || gimple_assign_rhs2 (stmt) == op))
1123         {
1124           if (name == op)
1125             name = gimple_assign_rhs2 (stmt);
1126           propagate_op_to_single_use (name, stmt, def);
1127           return;
1128         }
1129
1130       /* We might have a multiply of two __builtin_pow* calls, and
1131          the operand might be hiding in the rightmost one.  */
1132       if (opcode == MULT_EXPR
1133           && gimple_assign_rhs_code (stmt) == opcode
1134           && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1135           && has_single_use (gimple_assign_rhs2 (stmt)))
1136         {
1137           gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1138           if (stmt_is_power_of_op (stmt2, op))
1139             {
1140               if (decrement_power (stmt2) == 1)
1141                 propagate_op_to_single_use (op, stmt2, def);
1142               return;
1143             }
1144         }
1145
1146       /* Continue walking the chain.  */
1147       gcc_assert (name != op
1148                   && TREE_CODE (name) == SSA_NAME);
1149       stmt = SSA_NAME_DEF_STMT (name);
1150     }
1151   while (1);
1152 }
1153
1154 /* Returns true if statement S1 dominates statement S2.  Like
1155    stmt_dominates_stmt_p, but uses stmt UIDs to optimize.  */
1156
1157 static bool
1158 reassoc_stmt_dominates_stmt_p (gimple s1, gimple s2)
1159 {
1160   basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1161
1162   /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1163      SSA_NAME.  Assume it lives at the beginning of function and
1164      thus dominates everything.  */
1165   if (!bb1 || s1 == s2)
1166     return true;
1167
1168   /* If bb2 is NULL, it doesn't dominate any stmt with a bb.  */
1169   if (!bb2)
1170     return false;
1171
1172   if (bb1 == bb2)
1173     {
1174       /* PHIs in the same basic block are assumed to be
1175          executed all in parallel, if only one stmt is a PHI,
1176          it dominates the other stmt in the same basic block.  */
1177       if (gimple_code (s1) == GIMPLE_PHI)
1178         return true;
1179
1180       if (gimple_code (s2) == GIMPLE_PHI)
1181         return false;
1182
1183       gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1184
1185       if (gimple_uid (s1) < gimple_uid (s2))
1186         return true;
1187
1188       if (gimple_uid (s1) > gimple_uid (s2))
1189         return false;
1190
1191       gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1192       unsigned int uid = gimple_uid (s1);
1193       for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1194         {
1195           gimple s = gsi_stmt (gsi);
1196           if (gimple_uid (s) != uid)
1197             break;
1198           if (s == s2)
1199             return true;
1200         }
1201
1202       return false;
1203     }
1204
1205   return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1206 }
1207
1208 /* Insert STMT after INSERT_POINT.  */
1209
1210 static void
1211 insert_stmt_after (gimple stmt, gimple insert_point)
1212 {
1213   gimple_stmt_iterator gsi;
1214   basic_block bb;
1215
1216   if (gimple_code (insert_point) == GIMPLE_PHI)
1217     bb = gimple_bb (insert_point);
1218   else if (!stmt_ends_bb_p (insert_point))
1219     {
1220       gsi = gsi_for_stmt (insert_point);
1221       gimple_set_uid (stmt, gimple_uid (insert_point));
1222       gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1223       return;
1224     }
1225   else
1226     /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1227        thus if it must end a basic block, it should be a call that can
1228        throw, or some assignment that can throw.  If it throws, the LHS
1229        of it will not be initialized though, so only valid places using
1230        the SSA_NAME should be dominated by the fallthru edge.  */
1231     bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1232   gsi = gsi_after_labels (bb);
1233   if (gsi_end_p (gsi))
1234     {
1235       gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1236       gimple_set_uid (stmt,
1237                       gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1238     }
1239   else
1240     gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1241   gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1242 }
1243
1244 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1245    the result.  Places the statement after the definition of either
1246    OP1 or OP2.  Returns the new statement.  */
1247
1248 static gimple
1249 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1250 {
1251   gimple op1def = NULL, op2def = NULL;
1252   gimple_stmt_iterator gsi;
1253   tree op;
1254   gimple sum;
1255
1256   /* Create the addition statement.  */
1257   op = make_ssa_name (type, NULL);
1258   sum = gimple_build_assign_with_ops (opcode, op, op1, op2);
1259
1260   /* Find an insertion place and insert.  */
1261   if (TREE_CODE (op1) == SSA_NAME)
1262     op1def = SSA_NAME_DEF_STMT (op1);
1263   if (TREE_CODE (op2) == SSA_NAME)
1264     op2def = SSA_NAME_DEF_STMT (op2);
1265   if ((!op1def || gimple_nop_p (op1def))
1266       && (!op2def || gimple_nop_p (op2def)))
1267     {
1268       gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR));
1269       if (gsi_end_p (gsi))
1270         {
1271           gimple_stmt_iterator gsi2
1272             = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR));
1273           gimple_set_uid (sum,
1274                           gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1275         }
1276       else
1277         gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1278       gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1279     }
1280   else
1281     {
1282       gimple insert_point;
1283       if ((!op1def || gimple_nop_p (op1def))
1284            || (op2def && !gimple_nop_p (op2def)
1285                && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1286         insert_point = op2def;
1287       else
1288         insert_point = op1def;
1289       insert_stmt_after (sum, insert_point);
1290     }
1291   update_stmt (sum);
1292
1293   return sum;
1294 }
1295
1296 /* Perform un-distribution of divisions and multiplications.
1297    A * X + B * X is transformed into (A + B) * X and A / X + B / X
1298    to (A + B) / X for real X.
1299
1300    The algorithm is organized as follows.
1301
1302     - First we walk the addition chain *OPS looking for summands that
1303       are defined by a multiplication or a real division.  This results
1304       in the candidates bitmap with relevant indices into *OPS.
1305
1306     - Second we build the chains of multiplications or divisions for
1307       these candidates, counting the number of occurrences of (operand, code)
1308       pairs in all of the candidates chains.
1309
1310     - Third we sort the (operand, code) pairs by number of occurrence and
1311       process them starting with the pair with the most uses.
1312
1313       * For each such pair we walk the candidates again to build a
1314         second candidate bitmap noting all multiplication/division chains
1315         that have at least one occurrence of (operand, code).
1316
1317       * We build an alternate addition chain only covering these
1318         candidates with one (operand, code) operation removed from their
1319         multiplication/division chain.
1320
1321       * The first candidate gets replaced by the alternate addition chain
1322         multiplied/divided by the operand.
1323
1324       * All candidate chains get disabled for further processing and
1325         processing of (operand, code) pairs continues.
1326
1327   The alternate addition chains built are re-processed by the main
1328   reassociation algorithm which allows optimizing a * x * y + b * y * x
1329   to (a + b ) * x * y in one invocation of the reassociation pass.  */
1330
1331 static bool
1332 undistribute_ops_list (enum tree_code opcode,
1333                        vec<operand_entry_t> *ops, struct loop *loop)
1334 {
1335   unsigned int length = ops->length ();
1336   operand_entry_t oe1;
1337   unsigned i, j;
1338   sbitmap candidates, candidates2;
1339   unsigned nr_candidates, nr_candidates2;
1340   sbitmap_iterator sbi0;
1341   vec<operand_entry_t> *subops;
1342   hash_table <oecount_hasher> ctable;
1343   bool changed = false;
1344   int next_oecount_id = 0;
1345
1346   if (length <= 1
1347       || opcode != PLUS_EXPR)
1348     return false;
1349
1350   /* Build a list of candidates to process.  */
1351   candidates = sbitmap_alloc (length);
1352   bitmap_clear (candidates);
1353   nr_candidates = 0;
1354   FOR_EACH_VEC_ELT (*ops, i, oe1)
1355     {
1356       enum tree_code dcode;
1357       gimple oe1def;
1358
1359       if (TREE_CODE (oe1->op) != SSA_NAME)
1360         continue;
1361       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1362       if (!is_gimple_assign (oe1def))
1363         continue;
1364       dcode = gimple_assign_rhs_code (oe1def);
1365       if ((dcode != MULT_EXPR
1366            && dcode != RDIV_EXPR)
1367           || !is_reassociable_op (oe1def, dcode, loop))
1368         continue;
1369
1370       bitmap_set_bit (candidates, i);
1371       nr_candidates++;
1372     }
1373
1374   if (nr_candidates < 2)
1375     {
1376       sbitmap_free (candidates);
1377       return false;
1378     }
1379
1380   if (dump_file && (dump_flags & TDF_DETAILS))
1381     {
1382       fprintf (dump_file, "searching for un-distribute opportunities ");
1383       print_generic_expr (dump_file,
1384         (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1385       fprintf (dump_file, " %d\n", nr_candidates);
1386     }
1387
1388   /* Build linearized sub-operand lists and the counting table.  */
1389   cvec.create (0);
1390   ctable.create (15);
1391   /* ??? Macro arguments cannot have multi-argument template types in
1392      them.  This typedef is needed to workaround that limitation.  */
1393   typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1394   subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1395   EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1396     {
1397       gimple oedef;
1398       enum tree_code oecode;
1399       unsigned j;
1400
1401       oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1402       oecode = gimple_assign_rhs_code (oedef);
1403       linearize_expr_tree (&subops[i], oedef,
1404                            associative_tree_code (oecode), false);
1405
1406       FOR_EACH_VEC_ELT (subops[i], j, oe1)
1407         {
1408           oecount c;
1409           void **slot;
1410           size_t idx;
1411           c.oecode = oecode;
1412           c.cnt = 1;
1413           c.id = next_oecount_id++;
1414           c.op = oe1->op;
1415           cvec.safe_push (c);
1416           idx = cvec.length () + 41;
1417           slot = ctable.find_slot ((void *)idx, INSERT);
1418           if (!*slot)
1419             {
1420               *slot = (void *)idx;
1421             }
1422           else
1423             {
1424               cvec.pop ();
1425               cvec[(size_t)*slot - 42].cnt++;
1426             }
1427         }
1428     }
1429   ctable.dispose ();
1430
1431   /* Sort the counting table.  */
1432   cvec.qsort (oecount_cmp);
1433
1434   if (dump_file && (dump_flags & TDF_DETAILS))
1435     {
1436       oecount *c;
1437       fprintf (dump_file, "Candidates:\n");
1438       FOR_EACH_VEC_ELT (cvec, j, c)
1439         {
1440           fprintf (dump_file, "  %u %s: ", c->cnt,
1441                    c->oecode == MULT_EXPR
1442                    ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1443           print_generic_expr (dump_file, c->op, 0);
1444           fprintf (dump_file, "\n");
1445         }
1446     }
1447
1448   /* Process the (operand, code) pairs in order of most occurrence.  */
1449   candidates2 = sbitmap_alloc (length);
1450   while (!cvec.is_empty ())
1451     {
1452       oecount *c = &cvec.last ();
1453       if (c->cnt < 2)
1454         break;
1455
1456       /* Now collect the operands in the outer chain that contain
1457          the common operand in their inner chain.  */
1458       bitmap_clear (candidates2);
1459       nr_candidates2 = 0;
1460       EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1461         {
1462           gimple oedef;
1463           enum tree_code oecode;
1464           unsigned j;
1465           tree op = (*ops)[i]->op;
1466
1467           /* If we undistributed in this chain already this may be
1468              a constant.  */
1469           if (TREE_CODE (op) != SSA_NAME)
1470             continue;
1471
1472           oedef = SSA_NAME_DEF_STMT (op);
1473           oecode = gimple_assign_rhs_code (oedef);
1474           if (oecode != c->oecode)
1475             continue;
1476
1477           FOR_EACH_VEC_ELT (subops[i], j, oe1)
1478             {
1479               if (oe1->op == c->op)
1480                 {
1481                   bitmap_set_bit (candidates2, i);
1482                   ++nr_candidates2;
1483                   break;
1484                 }
1485             }
1486         }
1487
1488       if (nr_candidates2 >= 2)
1489         {
1490           operand_entry_t oe1, oe2;
1491           gimple prod;
1492           int first = bitmap_first_set_bit (candidates2);
1493
1494           /* Build the new addition chain.  */
1495           oe1 = (*ops)[first];
1496           if (dump_file && (dump_flags & TDF_DETAILS))
1497             {
1498               fprintf (dump_file, "Building (");
1499               print_generic_expr (dump_file, oe1->op, 0);
1500             }
1501           zero_one_operation (&oe1->op, c->oecode, c->op);
1502           EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1503             {
1504               gimple sum;
1505               oe2 = (*ops)[i];
1506               if (dump_file && (dump_flags & TDF_DETAILS))
1507                 {
1508                   fprintf (dump_file, " + ");
1509                   print_generic_expr (dump_file, oe2->op, 0);
1510                 }
1511               zero_one_operation (&oe2->op, c->oecode, c->op);
1512               sum = build_and_add_sum (TREE_TYPE (oe1->op),
1513                                        oe1->op, oe2->op, opcode);
1514               oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1515               oe2->rank = 0;
1516               oe1->op = gimple_get_lhs (sum);
1517             }
1518
1519           /* Apply the multiplication/division.  */
1520           prod = build_and_add_sum (TREE_TYPE (oe1->op),
1521                                     oe1->op, c->op, c->oecode);
1522           if (dump_file && (dump_flags & TDF_DETAILS))
1523             {
1524               fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1525               print_generic_expr (dump_file, c->op, 0);
1526               fprintf (dump_file, "\n");
1527             }
1528
1529           /* Record it in the addition chain and disable further
1530              undistribution with this op.  */
1531           oe1->op = gimple_assign_lhs (prod);
1532           oe1->rank = get_rank (oe1->op);
1533           subops[first].release ();
1534
1535           changed = true;
1536         }
1537
1538       cvec.pop ();
1539     }
1540
1541   for (i = 0; i < ops->length (); ++i)
1542     subops[i].release ();
1543   free (subops);
1544   cvec.release ();
1545   sbitmap_free (candidates);
1546   sbitmap_free (candidates2);
1547
1548   return changed;
1549 }
1550
1551 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1552    expression, examine the other OPS to see if any of them are comparisons
1553    of the same values, which we may be able to combine or eliminate.
1554    For example, we can rewrite (a < b) | (a == b) as (a <= b).  */
1555
1556 static bool
1557 eliminate_redundant_comparison (enum tree_code opcode,
1558                                 vec<operand_entry_t> *ops,
1559                                 unsigned int currindex,
1560                                 operand_entry_t curr)
1561 {
1562   tree op1, op2;
1563   enum tree_code lcode, rcode;
1564   gimple def1, def2;
1565   int i;
1566   operand_entry_t oe;
1567
1568   if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1569     return false;
1570
1571   /* Check that CURR is a comparison.  */
1572   if (TREE_CODE (curr->op) != SSA_NAME)
1573     return false;
1574   def1 = SSA_NAME_DEF_STMT (curr->op);
1575   if (!is_gimple_assign (def1))
1576     return false;
1577   lcode = gimple_assign_rhs_code (def1);
1578   if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1579     return false;
1580   op1 = gimple_assign_rhs1 (def1);
1581   op2 = gimple_assign_rhs2 (def1);
1582
1583   /* Now look for a similar comparison in the remaining OPS.  */
1584   for (i = currindex + 1; ops->iterate (i, &oe); i++)
1585     {
1586       tree t;
1587
1588       if (TREE_CODE (oe->op) != SSA_NAME)
1589         continue;
1590       def2 = SSA_NAME_DEF_STMT (oe->op);
1591       if (!is_gimple_assign (def2))
1592         continue;
1593       rcode = gimple_assign_rhs_code (def2);
1594       if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1595         continue;
1596
1597       /* If we got here, we have a match.  See if we can combine the
1598          two comparisons.  */
1599       if (opcode == BIT_IOR_EXPR)
1600         t = maybe_fold_or_comparisons (lcode, op1, op2,
1601                                        rcode, gimple_assign_rhs1 (def2),
1602                                        gimple_assign_rhs2 (def2));
1603       else
1604         t = maybe_fold_and_comparisons (lcode, op1, op2,
1605                                         rcode, gimple_assign_rhs1 (def2),
1606                                         gimple_assign_rhs2 (def2));
1607       if (!t)
1608         continue;
1609
1610       /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1611          always give us a boolean_type_node value back.  If the original
1612          BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1613          we need to convert.  */
1614       if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1615         t = fold_convert (TREE_TYPE (curr->op), t);
1616
1617       if (TREE_CODE (t) != INTEGER_CST
1618           && !operand_equal_p (t, curr->op, 0))
1619         {
1620           enum tree_code subcode;
1621           tree newop1, newop2;
1622           if (!COMPARISON_CLASS_P (t))
1623             continue;
1624           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1625           STRIP_USELESS_TYPE_CONVERSION (newop1);
1626           STRIP_USELESS_TYPE_CONVERSION (newop2);
1627           if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1628             continue;
1629         }
1630
1631       if (dump_file && (dump_flags & TDF_DETAILS))
1632         {
1633           fprintf (dump_file, "Equivalence: ");
1634           print_generic_expr (dump_file, curr->op, 0);
1635           fprintf (dump_file, " %s ", op_symbol_code (opcode));
1636           print_generic_expr (dump_file, oe->op, 0);
1637           fprintf (dump_file, " -> ");
1638           print_generic_expr (dump_file, t, 0);
1639           fprintf (dump_file, "\n");
1640         }
1641
1642       /* Now we can delete oe, as it has been subsumed by the new combined
1643          expression t.  */
1644       ops->ordered_remove (i);
1645       reassociate_stats.ops_eliminated ++;
1646
1647       /* If t is the same as curr->op, we're done.  Otherwise we must
1648          replace curr->op with t.  Special case is if we got a constant
1649          back, in which case we add it to the end instead of in place of
1650          the current entry.  */
1651       if (TREE_CODE (t) == INTEGER_CST)
1652         {
1653           ops->ordered_remove (currindex);
1654           add_to_ops_vec (ops, t);
1655         }
1656       else if (!operand_equal_p (t, curr->op, 0))
1657         {
1658           gimple sum;
1659           enum tree_code subcode;
1660           tree newop1;
1661           tree newop2;
1662           gcc_assert (COMPARISON_CLASS_P (t));
1663           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1664           STRIP_USELESS_TYPE_CONVERSION (newop1);
1665           STRIP_USELESS_TYPE_CONVERSION (newop2);
1666           gcc_checking_assert (is_gimple_val (newop1)
1667                                && is_gimple_val (newop2));
1668           sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1669           curr->op = gimple_get_lhs (sum);
1670         }
1671       return true;
1672     }
1673
1674   return false;
1675 }
1676
1677 /* Perform various identities and other optimizations on the list of
1678    operand entries, stored in OPS.  The tree code for the binary
1679    operation between all the operands is OPCODE.  */
1680
1681 static void
1682 optimize_ops_list (enum tree_code opcode,
1683                    vec<operand_entry_t> *ops)
1684 {
1685   unsigned int length = ops->length ();
1686   unsigned int i;
1687   operand_entry_t oe;
1688   operand_entry_t oelast = NULL;
1689   bool iterate = false;
1690
1691   if (length == 1)
1692     return;
1693
1694   oelast = ops->last ();
1695
1696   /* If the last two are constants, pop the constants off, merge them
1697      and try the next two.  */
1698   if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1699     {
1700       operand_entry_t oelm1 = (*ops)[length - 2];
1701
1702       if (oelm1->rank == 0
1703           && is_gimple_min_invariant (oelm1->op)
1704           && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1705                                        TREE_TYPE (oelast->op)))
1706         {
1707           tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1708                                      oelm1->op, oelast->op);
1709
1710           if (folded && is_gimple_min_invariant (folded))
1711             {
1712               if (dump_file && (dump_flags & TDF_DETAILS))
1713                 fprintf (dump_file, "Merging constants\n");
1714
1715               ops->pop ();
1716               ops->pop ();
1717
1718               add_to_ops_vec (ops, folded);
1719               reassociate_stats.constants_eliminated++;
1720
1721               optimize_ops_list (opcode, ops);
1722               return;
1723             }
1724         }
1725     }
1726
1727   eliminate_using_constants (opcode, ops);
1728   oelast = NULL;
1729
1730   for (i = 0; ops->iterate (i, &oe);)
1731     {
1732       bool done = false;
1733
1734       if (eliminate_not_pairs (opcode, ops, i, oe))
1735         return;
1736       if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1737           || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1738           || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1739         {
1740           if (done)
1741             return;
1742           iterate = true;
1743           oelast = NULL;
1744           continue;
1745         }
1746       oelast = oe;
1747       i++;
1748     }
1749
1750   length = ops->length ();
1751   oelast = ops->last ();
1752
1753   if (iterate)
1754     optimize_ops_list (opcode, ops);
1755 }
1756
1757 /* The following functions are subroutines to optimize_range_tests and allow
1758    it to try to change a logical combination of comparisons into a range
1759    test.
1760
1761    For example, both
1762         X == 2 || X == 5 || X == 3 || X == 4
1763    and
1764         X >= 2 && X <= 5
1765    are converted to
1766         (unsigned) (X - 2) <= 3
1767
1768    For more information see comments above fold_test_range in fold-const.c,
1769    this implementation is for GIMPLE.  */
1770
1771 struct range_entry
1772 {
1773   tree exp;
1774   tree low;
1775   tree high;
1776   bool in_p;
1777   bool strict_overflow_p;
1778   unsigned int idx, next;
1779 };
1780
1781 /* This is similar to make_range in fold-const.c, but on top of
1782    GIMPLE instead of trees.  If EXP is non-NULL, it should be
1783    an SSA_NAME and STMT argument is ignored, otherwise STMT
1784    argument should be a GIMPLE_COND.  */
1785
1786 static void
1787 init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1788 {
1789   int in_p;
1790   tree low, high;
1791   bool is_bool, strict_overflow_p;
1792
1793   r->exp = NULL_TREE;
1794   r->in_p = false;
1795   r->strict_overflow_p = false;
1796   r->low = NULL_TREE;
1797   r->high = NULL_TREE;
1798   if (exp != NULL_TREE
1799       && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1800     return;
1801
1802   /* Start with simply saying "EXP != 0" and then look at the code of EXP
1803      and see if we can refine the range.  Some of the cases below may not
1804      happen, but it doesn't seem worth worrying about this.  We "continue"
1805      the outer loop when we've changed something; otherwise we "break"
1806      the switch, which will "break" the while.  */
1807   low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1808   high = low;
1809   in_p = 0;
1810   strict_overflow_p = false;
1811   is_bool = false;
1812   if (exp == NULL_TREE)
1813     is_bool = true;
1814   else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1815     {
1816       if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1817         is_bool = true;
1818       else
1819         return;
1820     }
1821   else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1822     is_bool = true;
1823
1824   while (1)
1825     {
1826       enum tree_code code;
1827       tree arg0, arg1, exp_type;
1828       tree nexp;
1829       location_t loc;
1830
1831       if (exp != NULL_TREE)
1832         {
1833           if (TREE_CODE (exp) != SSA_NAME)
1834             break;
1835
1836           stmt = SSA_NAME_DEF_STMT (exp);
1837           if (!is_gimple_assign (stmt))
1838             break;
1839
1840           code = gimple_assign_rhs_code (stmt);
1841           arg0 = gimple_assign_rhs1 (stmt);
1842           arg1 = gimple_assign_rhs2 (stmt);
1843           exp_type = TREE_TYPE (exp);
1844         }
1845       else
1846         {
1847           code = gimple_cond_code (stmt);
1848           arg0 = gimple_cond_lhs (stmt);
1849           arg1 = gimple_cond_rhs (stmt);
1850           exp_type = boolean_type_node;
1851         }
1852
1853       if (TREE_CODE (arg0) != SSA_NAME)
1854         break;
1855       loc = gimple_location (stmt);
1856       switch (code)
1857         {
1858         case BIT_NOT_EXPR:
1859           if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
1860               /* Ensure the range is either +[-,0], +[0,0],
1861                  -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1862                  -[1,1].  If it is e.g. +[-,-] or -[-,-]
1863                  or similar expression of unconditional true or
1864                  false, it should not be negated.  */
1865               && ((high && integer_zerop (high))
1866                   || (low && integer_onep (low))))
1867             {
1868               in_p = !in_p;
1869               exp = arg0;
1870               continue;
1871             }
1872           break;
1873         case SSA_NAME:
1874           exp = arg0;
1875           continue;
1876         CASE_CONVERT:
1877           if (is_bool)
1878             goto do_default;
1879           if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1880             {
1881               if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1882                 is_bool = true;
1883               else
1884                 return;
1885             }
1886           else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1887             is_bool = true;
1888           goto do_default;
1889         case EQ_EXPR:
1890         case NE_EXPR:
1891         case LT_EXPR:
1892         case LE_EXPR:
1893         case GE_EXPR:
1894         case GT_EXPR:
1895           is_bool = true;
1896           /* FALLTHRU */
1897         default:
1898           if (!is_bool)
1899             return;
1900         do_default:
1901           nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1902                                   &low, &high, &in_p,
1903                                   &strict_overflow_p);
1904           if (nexp != NULL_TREE)
1905             {
1906               exp = nexp;
1907               gcc_assert (TREE_CODE (exp) == SSA_NAME);
1908               continue;
1909             }
1910           break;
1911         }
1912       break;
1913     }
1914   if (is_bool)
1915     {
1916       r->exp = exp;
1917       r->in_p = in_p;
1918       r->low = low;
1919       r->high = high;
1920       r->strict_overflow_p = strict_overflow_p;
1921     }
1922 }
1923
1924 /* Comparison function for qsort.  Sort entries
1925    without SSA_NAME exp first, then with SSA_NAMEs sorted
1926    by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1927    by increasing ->low and if ->low is the same, by increasing
1928    ->high.  ->low == NULL_TREE means minimum, ->high == NULL_TREE
1929    maximum.  */
1930
1931 static int
1932 range_entry_cmp (const void *a, const void *b)
1933 {
1934   const struct range_entry *p = (const struct range_entry *) a;
1935   const struct range_entry *q = (const struct range_entry *) b;
1936
1937   if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
1938     {
1939       if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1940         {
1941           /* Group range_entries for the same SSA_NAME together.  */
1942           if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
1943             return -1;
1944           else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
1945             return 1;
1946           /* If ->low is different, NULL low goes first, then by
1947              ascending low.  */
1948           if (p->low != NULL_TREE)
1949             {
1950               if (q->low != NULL_TREE)
1951                 {
1952                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
1953                                           p->low, q->low);
1954                   if (tem && integer_onep (tem))
1955                     return -1;
1956                   tem = fold_binary (GT_EXPR, boolean_type_node,
1957                                      p->low, q->low);
1958                   if (tem && integer_onep (tem))
1959                     return 1;
1960                 }
1961               else
1962                 return 1;
1963             }
1964           else if (q->low != NULL_TREE)
1965             return -1;
1966           /* If ->high is different, NULL high goes last, before that by
1967              ascending high.  */
1968           if (p->high != NULL_TREE)
1969             {
1970               if (q->high != NULL_TREE)
1971                 {
1972                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
1973                                           p->high, q->high);
1974                   if (tem && integer_onep (tem))
1975                     return -1;
1976                   tem = fold_binary (GT_EXPR, boolean_type_node,
1977                                      p->high, q->high);
1978                   if (tem && integer_onep (tem))
1979                     return 1;
1980                 }
1981               else
1982                 return -1;
1983             }
1984           else if (p->high != NULL_TREE)
1985             return 1;
1986           /* If both ranges are the same, sort below by ascending idx.  */
1987         }
1988       else
1989         return 1;
1990     }
1991   else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1992     return -1;
1993
1994   if (p->idx < q->idx)
1995     return -1;
1996   else
1997     {
1998       gcc_checking_assert (p->idx > q->idx);
1999       return 1;
2000     }
2001 }
2002
2003 /* Helper routine of optimize_range_test.
2004    [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2005    RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2006    OPCODE and OPS are arguments of optimize_range_tests.  Return
2007    true if the range merge has been successful.
2008    If OPCODE is ERROR_MARK, this is called from within
2009    maybe_optimize_range_tests and is performing inter-bb range optimization.
2010    In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2011    oe->rank.  */
2012
2013 static bool
2014 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2015                    unsigned int count, enum tree_code opcode,
2016                    vec<operand_entry_t> *ops, tree exp, bool in_p,
2017                    tree low, tree high, bool strict_overflow_p)
2018 {
2019   operand_entry_t oe = (*ops)[range->idx];
2020   tree op = oe->op;
2021   gimple stmt = op ? SSA_NAME_DEF_STMT (op) : last_stmt (BASIC_BLOCK (oe->id));
2022   location_t loc = gimple_location (stmt);
2023   tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2024   tree tem = build_range_check (loc, optype, exp, in_p, low, high);
2025   enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2026   gimple_stmt_iterator gsi;
2027
2028   if (tem == NULL_TREE)
2029     return false;
2030
2031   if (strict_overflow_p && issue_strict_overflow_warning (wc))
2032     warning_at (loc, OPT_Wstrict_overflow,
2033                 "assuming signed overflow does not occur "
2034                 "when simplifying range test");
2035
2036   if (dump_file && (dump_flags & TDF_DETAILS))
2037     {
2038       struct range_entry *r;
2039       fprintf (dump_file, "Optimizing range tests ");
2040       print_generic_expr (dump_file, range->exp, 0);
2041       fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2042       print_generic_expr (dump_file, range->low, 0);
2043       fprintf (dump_file, ", ");
2044       print_generic_expr (dump_file, range->high, 0);
2045       fprintf (dump_file, "]");
2046       for (r = otherrange; r < otherrange + count; r++)
2047         {
2048           fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2049           print_generic_expr (dump_file, r->low, 0);
2050           fprintf (dump_file, ", ");
2051           print_generic_expr (dump_file, r->high, 0);
2052           fprintf (dump_file, "]");
2053         }
2054       fprintf (dump_file, "\n into ");
2055       print_generic_expr (dump_file, tem, 0);
2056       fprintf (dump_file, "\n");
2057     }
2058
2059   if (opcode == BIT_IOR_EXPR
2060       || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2061     tem = invert_truthvalue_loc (loc, tem);
2062
2063   tem = fold_convert_loc (loc, optype, tem);
2064   gsi = gsi_for_stmt (stmt);
2065   tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2066                                   GSI_SAME_STMT);
2067   for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
2068     if (gimple_uid (gsi_stmt (gsi)))
2069       break;
2070     else
2071       gimple_set_uid (gsi_stmt (gsi), gimple_uid (stmt));
2072
2073   oe->op = tem;
2074   range->exp = exp;
2075   range->low = low;
2076   range->high = high;
2077   range->in_p = in_p;
2078   range->strict_overflow_p = false;
2079
2080   for (range = otherrange; range < otherrange + count; range++)
2081     {
2082       oe = (*ops)[range->idx];
2083       /* Now change all the other range test immediate uses, so that
2084          those tests will be optimized away.  */
2085       if (opcode == ERROR_MARK)
2086         {
2087           if (oe->op)
2088             oe->op = build_int_cst (TREE_TYPE (oe->op),
2089                                     oe->rank == BIT_IOR_EXPR ? 0 : 1);
2090           else
2091             oe->op = (oe->rank == BIT_IOR_EXPR
2092                       ? boolean_false_node : boolean_true_node);
2093         }
2094       else
2095         oe->op = error_mark_node;
2096       range->exp = NULL_TREE;
2097     }
2098   return true;
2099 }
2100
2101 /* Optimize X == CST1 || X == CST2
2102    if popcount (CST1 ^ CST2) == 1 into
2103    (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2104    Similarly for ranges.  E.g.
2105    X != 2 && X != 3 && X != 10 && X != 11
2106    will be transformed by the previous optimization into
2107    !((X - 2U) <= 1U || (X - 10U) <= 1U)
2108    and this loop can transform that into
2109    !(((X & ~8) - 2U) <= 1U).  */
2110
2111 static bool
2112 optimize_range_tests_xor (enum tree_code opcode, tree type,
2113                           tree lowi, tree lowj, tree highi, tree highj,
2114                           vec<operand_entry_t> *ops,
2115                           struct range_entry *rangei,
2116                           struct range_entry *rangej)
2117 {
2118   tree lowxor, highxor, tem, exp;
2119   /* Check highi ^ lowi == highj ^ lowj and
2120      popcount (highi ^ lowi) == 1.  */
2121   lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2122   if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2123     return false;
2124   if (tree_log2 (lowxor) < 0)
2125     return false;
2126   highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2127   if (!tree_int_cst_equal (lowxor, highxor))
2128     return false;
2129
2130   tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2131   exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2132   lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2133   highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2134   if (update_range_test (rangei, rangej, 1, opcode, ops, exp,
2135                          rangei->in_p, lowj, highj,
2136                          rangei->strict_overflow_p
2137                          || rangej->strict_overflow_p))
2138     return true;
2139   return false;
2140 }
2141
2142 /* Optimize X == CST1 || X == CST2
2143    if popcount (CST2 - CST1) == 1 into
2144    ((X - CST1) & ~(CST2 - CST1)) == 0.
2145    Similarly for ranges.  E.g.
2146    X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2147    || X == 75 || X == 45
2148    will be transformed by the previous optimization into
2149    (X - 43U) <= 3U || (X - 75U) <= 3U
2150    and this loop can transform that into
2151    ((X - 43U) & ~(75U - 43U)) <= 3U.  */
2152 static bool
2153 optimize_range_tests_diff (enum tree_code opcode, tree type,
2154                             tree lowi, tree lowj, tree highi, tree highj,
2155                             vec<operand_entry_t> *ops,
2156                             struct range_entry *rangei,
2157                             struct range_entry *rangej)
2158 {
2159   tree tem1, tem2, mask;
2160   /* Check highi - lowi == highj - lowj.  */
2161   tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2162   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2163     return false;
2164   tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2165   if (!tree_int_cst_equal (tem1, tem2))
2166     return false;
2167   /* Check popcount (lowj - lowi) == 1.  */
2168   tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2169   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2170     return false;
2171   if (tree_log2 (tem1) < 0)
2172     return false;
2173
2174   mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2175   tem1 = fold_binary (MINUS_EXPR, type, rangei->exp, lowi);
2176   tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2177   lowj = build_int_cst (type, 0);
2178   if (update_range_test (rangei, rangej, 1, opcode, ops, tem1,
2179                          rangei->in_p, lowj, tem2,
2180                          rangei->strict_overflow_p
2181                          || rangej->strict_overflow_p))
2182     return true;
2183   return false;
2184 }
2185
2186 /* It does some common checks for function optimize_range_tests_xor and
2187    optimize_range_tests_diff.
2188    If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2189    Else it calls optimize_range_tests_diff.  */
2190
2191 static bool
2192 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2193                         bool optimize_xor, vec<operand_entry_t> *ops,
2194                         struct range_entry *ranges)
2195 {
2196   int i, j;
2197   bool any_changes = false;
2198   for (i = first; i < length; i++)
2199     {
2200       tree lowi, highi, lowj, highj, type, tem;
2201
2202       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2203         continue;
2204       type = TREE_TYPE (ranges[i].exp);
2205       if (!INTEGRAL_TYPE_P (type))
2206         continue;
2207       lowi = ranges[i].low;
2208       if (lowi == NULL_TREE)
2209         lowi = TYPE_MIN_VALUE (type);
2210       highi = ranges[i].high;
2211       if (highi == NULL_TREE)
2212         continue;
2213       for (j = i + 1; j < length && j < i + 64; j++)
2214         {
2215           bool changes;
2216           if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2217             continue;
2218           lowj = ranges[j].low;
2219           if (lowj == NULL_TREE)
2220             continue;
2221           highj = ranges[j].high;
2222           if (highj == NULL_TREE)
2223             highj = TYPE_MAX_VALUE (type);
2224           /* Check lowj > highi.  */
2225           tem = fold_binary (GT_EXPR, boolean_type_node,
2226                              lowj, highi);
2227           if (tem == NULL_TREE || !integer_onep (tem))
2228             continue;
2229           if (optimize_xor)
2230             changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2231                                                 highi, highj, ops,
2232                                                 ranges + i, ranges + j);
2233           else
2234             changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2235                                                  highi, highj, ops,
2236                                                  ranges + i, ranges + j);
2237           if (changes)
2238             {
2239               any_changes = true;
2240               break;
2241             }
2242         }
2243     }
2244   return any_changes;
2245 }
2246
2247 /* Optimize range tests, similarly how fold_range_test optimizes
2248    it on trees.  The tree code for the binary
2249    operation between all the operands is OPCODE.
2250    If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2251    maybe_optimize_range_tests for inter-bb range optimization.
2252    In that case if oe->op is NULL, oe->id is bb->index whose
2253    GIMPLE_COND is && or ||ed into the test, and oe->rank says
2254    the actual opcode.  */
2255
2256 static bool
2257 optimize_range_tests (enum tree_code opcode,
2258                       vec<operand_entry_t> *ops)
2259 {
2260   unsigned int length = ops->length (), i, j, first;
2261   operand_entry_t oe;
2262   struct range_entry *ranges;
2263   bool any_changes = false;
2264
2265   if (length == 1)
2266     return false;
2267
2268   ranges = XNEWVEC (struct range_entry, length);
2269   for (i = 0; i < length; i++)
2270     {
2271       oe = (*ops)[i];
2272       ranges[i].idx = i;
2273       init_range_entry (ranges + i, oe->op,
2274                         oe->op ? NULL : last_stmt (BASIC_BLOCK (oe->id)));
2275       /* For | invert it now, we will invert it again before emitting
2276          the optimized expression.  */
2277       if (opcode == BIT_IOR_EXPR
2278           || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2279         ranges[i].in_p = !ranges[i].in_p;
2280     }
2281
2282   qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2283   for (i = 0; i < length; i++)
2284     if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2285       break;
2286
2287   /* Try to merge ranges.  */
2288   for (first = i; i < length; i++)
2289     {
2290       tree low = ranges[i].low;
2291       tree high = ranges[i].high;
2292       int in_p = ranges[i].in_p;
2293       bool strict_overflow_p = ranges[i].strict_overflow_p;
2294       int update_fail_count = 0;
2295
2296       for (j = i + 1; j < length; j++)
2297         {
2298           if (ranges[i].exp != ranges[j].exp)
2299             break;
2300           if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2301                              ranges[j].in_p, ranges[j].low, ranges[j].high))
2302             break;
2303           strict_overflow_p |= ranges[j].strict_overflow_p;
2304         }
2305
2306       if (j == i + 1)
2307         continue;
2308
2309       if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
2310                              ops, ranges[i].exp, in_p, low, high,
2311                              strict_overflow_p))
2312         {
2313           i = j - 1;
2314           any_changes = true;
2315         }
2316       /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2317          while update_range_test would fail.  */
2318       else if (update_fail_count == 64)
2319         i = j - 1;
2320       else
2321         ++update_fail_count;
2322     }
2323
2324   any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2325                                          ops, ranges);
2326
2327   if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2328     any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2329                                            ops, ranges);
2330
2331   if (any_changes && opcode != ERROR_MARK)
2332     {
2333       j = 0;
2334       FOR_EACH_VEC_ELT (*ops, i, oe)
2335         {
2336           if (oe->op == error_mark_node)
2337             continue;
2338           else if (i != j)
2339             (*ops)[j] = oe;
2340           j++;
2341         }
2342       ops->truncate (j);
2343     }
2344
2345   XDELETEVEC (ranges);
2346   return any_changes;
2347 }
2348
2349 /* Return true if STMT is a cast like:
2350    <bb N>:
2351    ...
2352    _123 = (int) _234;
2353
2354    <bb M>:
2355    # _345 = PHI <_123(N), 1(...), 1(...)>
2356    where _234 has bool type, _123 has single use and
2357    bb N has a single successor M.  This is commonly used in
2358    the last block of a range test.  */
2359
2360 static bool
2361 final_range_test_p (gimple stmt)
2362 {
2363   basic_block bb, rhs_bb;
2364   edge e;
2365   tree lhs, rhs;
2366   use_operand_p use_p;
2367   gimple use_stmt;
2368
2369   if (!gimple_assign_cast_p (stmt))
2370     return false;
2371   bb = gimple_bb (stmt);
2372   if (!single_succ_p (bb))
2373     return false;
2374   e = single_succ_edge (bb);
2375   if (e->flags & EDGE_COMPLEX)
2376     return false;
2377
2378   lhs = gimple_assign_lhs (stmt);
2379   rhs = gimple_assign_rhs1 (stmt);
2380   if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2381       || TREE_CODE (rhs) != SSA_NAME
2382       || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2383     return false;
2384
2385   /* Test whether lhs is consumed only by a PHI in the only successor bb.  */
2386   if (!single_imm_use (lhs, &use_p, &use_stmt))
2387     return false;
2388
2389   if (gimple_code (use_stmt) != GIMPLE_PHI
2390       || gimple_bb (use_stmt) != e->dest)
2391     return false;
2392
2393   /* And that the rhs is defined in the same loop.  */
2394   rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2395   if (rhs_bb == NULL
2396       || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2397     return false;
2398
2399   return true;
2400 }
2401
2402 /* Return true if BB is suitable basic block for inter-bb range test
2403    optimization.  If BACKWARD is true, BB should be the only predecessor
2404    of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2405    or compared with to find a common basic block to which all conditions
2406    branch to if true resp. false.  If BACKWARD is false, TEST_BB should
2407    be the only predecessor of BB.  */
2408
2409 static bool
2410 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2411                   bool backward)
2412 {
2413   edge_iterator ei, ei2;
2414   edge e, e2;
2415   gimple stmt;
2416   gimple_stmt_iterator gsi;
2417   bool other_edge_seen = false;
2418   bool is_cond;
2419
2420   if (test_bb == bb)
2421     return false;
2422   /* Check last stmt first.  */
2423   stmt = last_stmt (bb);
2424   if (stmt == NULL
2425       || (gimple_code (stmt) != GIMPLE_COND
2426           && (backward || !final_range_test_p (stmt)))
2427       || gimple_visited_p (stmt)
2428       || stmt_could_throw_p (stmt)
2429       || *other_bb == bb)
2430     return false;
2431   is_cond = gimple_code (stmt) == GIMPLE_COND;
2432   if (is_cond)
2433     {
2434       /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2435          goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2436          to *OTHER_BB (if not set yet, try to find it out).  */
2437       if (EDGE_COUNT (bb->succs) != 2)
2438         return false;
2439       FOR_EACH_EDGE (e, ei, bb->succs)
2440         {
2441           if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2442             return false;
2443           if (e->dest == test_bb)
2444             {
2445               if (backward)
2446                 continue;
2447               else
2448                 return false;
2449             }
2450           if (e->dest == bb)
2451             return false;
2452           if (*other_bb == NULL)
2453             {
2454               FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2455                 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2456                   return false;
2457                 else if (e->dest == e2->dest)
2458                   *other_bb = e->dest;
2459               if (*other_bb == NULL)
2460                 return false;
2461             }
2462           if (e->dest == *other_bb)
2463             other_edge_seen = true;
2464           else if (backward)
2465             return false;
2466         }
2467       if (*other_bb == NULL || !other_edge_seen)
2468         return false;
2469     }
2470   else if (single_succ (bb) != *other_bb)
2471     return false;
2472
2473   /* Now check all PHIs of *OTHER_BB.  */
2474   e = find_edge (bb, *other_bb);
2475   e2 = find_edge (test_bb, *other_bb);
2476   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2477     {
2478       gimple phi = gsi_stmt (gsi);
2479       /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2480          corresponding to BB and TEST_BB predecessor must be the same.  */
2481       if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2482                             gimple_phi_arg_def (phi, e2->dest_idx), 0))
2483         {
2484           /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2485              one of the PHIs should have the lhs of the last stmt in
2486              that block as PHI arg and that PHI should have 0 or 1
2487              corresponding to it in all other range test basic blocks
2488              considered.  */
2489           if (!is_cond)
2490             {
2491               if (gimple_phi_arg_def (phi, e->dest_idx)
2492                   == gimple_assign_lhs (stmt)
2493                   && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2494                       || integer_onep (gimple_phi_arg_def (phi,
2495                                                            e2->dest_idx))))
2496                 continue;
2497             }
2498           else
2499             {
2500               gimple test_last = last_stmt (test_bb);
2501               if (gimple_code (test_last) != GIMPLE_COND
2502                   && gimple_phi_arg_def (phi, e2->dest_idx)
2503                      == gimple_assign_lhs (test_last)
2504                   && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2505                       || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2506                 continue;
2507             }
2508
2509           return false;
2510         }
2511     }
2512   return true;
2513 }
2514
2515 /* Return true if BB doesn't have side-effects that would disallow
2516    range test optimization, all SSA_NAMEs set in the bb are consumed
2517    in the bb and there are no PHIs.  */
2518
2519 static bool
2520 no_side_effect_bb (basic_block bb)
2521 {
2522   gimple_stmt_iterator gsi;
2523   gimple last;
2524
2525   if (!gimple_seq_empty_p (phi_nodes (bb)))
2526     return false;
2527   last = last_stmt (bb);
2528   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2529     {
2530       gimple stmt = gsi_stmt (gsi);
2531       tree lhs;
2532       imm_use_iterator imm_iter;
2533       use_operand_p use_p;
2534
2535       if (is_gimple_debug (stmt))
2536         continue;
2537       if (gimple_has_side_effects (stmt))
2538         return false;
2539       if (stmt == last)
2540         return true;
2541       if (!is_gimple_assign (stmt))
2542         return false;
2543       lhs = gimple_assign_lhs (stmt);
2544       if (TREE_CODE (lhs) != SSA_NAME)
2545         return false;
2546       if (gimple_assign_rhs_could_trap_p (stmt))
2547         return false;
2548       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2549         {
2550           gimple use_stmt = USE_STMT (use_p);
2551           if (is_gimple_debug (use_stmt))
2552             continue;
2553           if (gimple_bb (use_stmt) != bb)
2554             return false;
2555         }
2556     }
2557   return false;
2558 }
2559
2560 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2561    return true and fill in *OPS recursively.  */
2562
2563 static bool
2564 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2565          struct loop *loop)
2566 {
2567   gimple stmt = SSA_NAME_DEF_STMT (var);
2568   tree rhs[2];
2569   int i;
2570
2571   if (!is_reassociable_op (stmt, code, loop))
2572     return false;
2573
2574   rhs[0] = gimple_assign_rhs1 (stmt);
2575   rhs[1] = gimple_assign_rhs2 (stmt);
2576   gimple_set_visited (stmt, true);
2577   for (i = 0; i < 2; i++)
2578     if (TREE_CODE (rhs[i]) == SSA_NAME
2579         && !get_ops (rhs[i], code, ops, loop)
2580         && has_single_use (rhs[i]))
2581       {
2582         operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
2583
2584         oe->op = rhs[i];
2585         oe->rank = code;
2586         oe->id = 0;
2587         oe->count = 1;
2588         ops->safe_push (oe);
2589       }
2590   return true;
2591 }
2592
2593 /* Find the ops that were added by get_ops starting from VAR, see if
2594    they were changed during update_range_test and if yes, create new
2595    stmts.  */
2596
2597 static tree
2598 update_ops (tree var, enum tree_code code, vec<operand_entry_t> ops,
2599             unsigned int *pidx, struct loop *loop)
2600 {
2601   gimple stmt = SSA_NAME_DEF_STMT (var);
2602   tree rhs[4];
2603   int i;
2604
2605   if (!is_reassociable_op (stmt, code, loop))
2606     return NULL;
2607
2608   rhs[0] = gimple_assign_rhs1 (stmt);
2609   rhs[1] = gimple_assign_rhs2 (stmt);
2610   rhs[2] = rhs[0];
2611   rhs[3] = rhs[1];
2612   for (i = 0; i < 2; i++)
2613     if (TREE_CODE (rhs[i]) == SSA_NAME)
2614       {
2615         rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
2616         if (rhs[2 + i] == NULL_TREE)
2617           {
2618             if (has_single_use (rhs[i]))
2619               rhs[2 + i] = ops[(*pidx)++]->op;
2620             else
2621               rhs[2 + i] = rhs[i];
2622           }
2623       }
2624   if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
2625       && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
2626     {
2627       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2628       var = make_ssa_name (TREE_TYPE (var), NULL);
2629       gimple g = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
2630                                                var, rhs[2], rhs[3]);
2631       gimple_set_uid (g, gimple_uid (stmt));
2632       gimple_set_visited (g, true);
2633       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2634     }
2635   return var;
2636 }
2637
2638 /* Structure to track the initial value passed to get_ops and
2639    the range in the ops vector for each basic block.  */
2640
2641 struct inter_bb_range_test_entry
2642 {
2643   tree op;
2644   unsigned int first_idx, last_idx;
2645 };
2646
2647 /* Inter-bb range test optimization.  */
2648
2649 static void
2650 maybe_optimize_range_tests (gimple stmt)
2651 {
2652   basic_block first_bb = gimple_bb (stmt);
2653   basic_block last_bb = first_bb;
2654   basic_block other_bb = NULL;
2655   basic_block bb;
2656   edge_iterator ei;
2657   edge e;
2658   vec<operand_entry_t> ops = vNULL;
2659   vec<inter_bb_range_test_entry> bbinfo = vNULL;
2660   bool any_changes = false;
2661
2662   /* Consider only basic blocks that end with GIMPLE_COND or
2663      a cast statement satisfying final_range_test_p.  All
2664      but the last bb in the first_bb .. last_bb range
2665      should end with GIMPLE_COND.  */
2666   if (gimple_code (stmt) == GIMPLE_COND)
2667     {
2668       if (EDGE_COUNT (first_bb->succs) != 2)
2669         return;
2670     }
2671   else if (final_range_test_p (stmt))
2672     other_bb = single_succ (first_bb);
2673   else
2674     return;
2675
2676   if (stmt_could_throw_p (stmt))
2677     return;
2678
2679   /* As relative ordering of post-dominator sons isn't fixed,
2680      maybe_optimize_range_tests can be called first on any
2681      bb in the range we want to optimize.  So, start searching
2682      backwards, if first_bb can be set to a predecessor.  */
2683   while (single_pred_p (first_bb))
2684     {
2685       basic_block pred_bb = single_pred (first_bb);
2686       if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
2687         break;
2688       if (!no_side_effect_bb (first_bb))
2689         break;
2690       first_bb = pred_bb;
2691     }
2692   /* If first_bb is last_bb, other_bb hasn't been computed yet.
2693      Before starting forward search in last_bb successors, find
2694      out the other_bb.  */
2695   if (first_bb == last_bb)
2696     {
2697       other_bb = NULL;
2698       /* As non-GIMPLE_COND last stmt always terminates the range,
2699          if forward search didn't discover anything, just give up.  */
2700       if (gimple_code (stmt) != GIMPLE_COND)
2701         return;
2702       /* Look at both successors.  Either it ends with a GIMPLE_COND
2703          and satisfies suitable_cond_bb, or ends with a cast and
2704          other_bb is that cast's successor.  */
2705       FOR_EACH_EDGE (e, ei, first_bb->succs)
2706         if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
2707             || e->dest == first_bb)
2708           return;
2709         else if (single_pred_p (e->dest))
2710           {
2711             stmt = last_stmt (e->dest);
2712             if (stmt
2713                 && gimple_code (stmt) == GIMPLE_COND
2714                 && EDGE_COUNT (e->dest->succs) == 2)
2715               {
2716                 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
2717                   break;
2718                 else
2719                   other_bb = NULL;
2720               }
2721             else if (stmt
2722                      && final_range_test_p (stmt)
2723                      && find_edge (first_bb, single_succ (e->dest)))
2724               {
2725                 other_bb = single_succ (e->dest);
2726                 if (other_bb == first_bb)
2727                   other_bb = NULL;
2728               }
2729           }
2730       if (other_bb == NULL)
2731         return;
2732     }
2733   /* Now do the forward search, moving last_bb to successor bbs
2734      that aren't other_bb.  */
2735   while (EDGE_COUNT (last_bb->succs) == 2)
2736     {
2737       FOR_EACH_EDGE (e, ei, last_bb->succs)
2738         if (e->dest != other_bb)
2739           break;
2740       if (e == NULL)
2741         break;
2742       if (!single_pred_p (e->dest))
2743         break;
2744       if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
2745         break;
2746       if (!no_side_effect_bb (e->dest))
2747         break;
2748       last_bb = e->dest;
2749     }
2750   if (first_bb == last_bb)
2751     return;
2752   /* Here basic blocks first_bb through last_bb's predecessor
2753      end with GIMPLE_COND, all of them have one of the edges to
2754      other_bb and another to another block in the range,
2755      all blocks except first_bb don't have side-effects and
2756      last_bb ends with either GIMPLE_COND, or cast satisfying
2757      final_range_test_p.  */
2758   for (bb = last_bb; ; bb = single_pred (bb))
2759     {
2760       enum tree_code code;
2761       tree lhs, rhs;
2762       inter_bb_range_test_entry bb_ent;
2763
2764       bb_ent.op = NULL_TREE;
2765       bb_ent.first_idx = ops.length ();
2766       bb_ent.last_idx = bb_ent.first_idx;
2767       e = find_edge (bb, other_bb);
2768       stmt = last_stmt (bb);
2769       gimple_set_visited (stmt, true);
2770       if (gimple_code (stmt) != GIMPLE_COND)
2771         {
2772           use_operand_p use_p;
2773           gimple phi;
2774           edge e2;
2775           unsigned int d;
2776
2777           lhs = gimple_assign_lhs (stmt);
2778           rhs = gimple_assign_rhs1 (stmt);
2779           gcc_assert (bb == last_bb);
2780
2781           /* stmt is
2782              _123 = (int) _234;
2783
2784              followed by:
2785              <bb M>:
2786              # _345 = PHI <_123(N), 1(...), 1(...)>
2787
2788              or 0 instead of 1.  If it is 0, the _234
2789              range test is anded together with all the
2790              other range tests, if it is 1, it is ored with
2791              them.  */
2792           single_imm_use (lhs, &use_p, &phi);
2793           gcc_assert (gimple_code (phi) == GIMPLE_PHI);
2794           e2 = find_edge (first_bb, other_bb);
2795           d = e2->dest_idx;
2796           gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
2797           if (integer_zerop (gimple_phi_arg_def (phi, d)))
2798             code = BIT_AND_EXPR;
2799           else
2800             {
2801               gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
2802               code = BIT_IOR_EXPR;
2803             }
2804
2805           /* If _234 SSA_NAME_DEF_STMT is
2806              _234 = _567 | _789;
2807              (or &, corresponding to 1/0 in the phi arguments,
2808              push into ops the individual range test arguments
2809              of the bitwise or resp. and, recursively.  */
2810           if (!get_ops (rhs, code, &ops,
2811                         loop_containing_stmt (stmt))
2812               && has_single_use (rhs))
2813             {
2814               /* Otherwise, push the _234 range test itself.  */
2815               operand_entry_t oe
2816                 = (operand_entry_t) pool_alloc (operand_entry_pool);
2817
2818               oe->op = rhs;
2819               oe->rank = code;
2820               oe->id = 0;
2821               oe->count = 1;
2822               ops.safe_push (oe);
2823               bb_ent.last_idx++;
2824             }
2825           else
2826             bb_ent.last_idx = ops.length ();
2827           bb_ent.op = rhs;
2828           bbinfo.safe_push (bb_ent);
2829           continue;
2830         }
2831       /* Otherwise stmt is GIMPLE_COND.  */
2832       code = gimple_cond_code (stmt);
2833       lhs = gimple_cond_lhs (stmt);
2834       rhs = gimple_cond_rhs (stmt);
2835       if (TREE_CODE (lhs) == SSA_NAME
2836           && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2837           && ((code != EQ_EXPR && code != NE_EXPR)
2838               || rhs != boolean_false_node
2839                  /* Either push into ops the individual bitwise
2840                     or resp. and operands, depending on which
2841                     edge is other_bb.  */
2842               || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
2843                                  ^ (code == EQ_EXPR))
2844                                 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
2845                            loop_containing_stmt (stmt))))
2846         {
2847           /* Or push the GIMPLE_COND stmt itself.  */
2848           operand_entry_t oe
2849             = (operand_entry_t) pool_alloc (operand_entry_pool);
2850
2851           oe->op = NULL;
2852           oe->rank = (e->flags & EDGE_TRUE_VALUE)
2853                      ? BIT_IOR_EXPR : BIT_AND_EXPR;
2854           /* oe->op = NULL signs that there is no SSA_NAME
2855              for the range test, and oe->id instead is the
2856              basic block number, at which's end the GIMPLE_COND
2857              is.  */
2858           oe->id = bb->index;
2859           oe->count = 1;
2860           ops.safe_push (oe);
2861           bb_ent.op = NULL;
2862           bb_ent.last_idx++;
2863         }
2864       else if (ops.length () > bb_ent.first_idx)
2865         {
2866           bb_ent.op = lhs;
2867           bb_ent.last_idx = ops.length ();
2868         }
2869       bbinfo.safe_push (bb_ent);
2870       if (bb == first_bb)
2871         break;
2872     }
2873   if (ops.length () > 1)
2874     any_changes = optimize_range_tests (ERROR_MARK, &ops);
2875   if (any_changes)
2876     {
2877       unsigned int idx;
2878       /* update_ops relies on has_single_use predicates returning the
2879          same values as it did during get_ops earlier.  Additionally it
2880          never removes statements, only adds new ones and it should walk
2881          from the single imm use and check the predicate already before
2882          making those changes.
2883          On the other side, the handling of GIMPLE_COND directly can turn
2884          previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
2885          it needs to be done in a separate loop afterwards.  */
2886       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
2887         {
2888           if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
2889               && bbinfo[idx].op != NULL_TREE)
2890             {
2891               tree new_op;
2892
2893               stmt = last_stmt (bb);
2894               new_op = update_ops (bbinfo[idx].op,
2895                                    (enum tree_code)
2896                                    ops[bbinfo[idx].first_idx]->rank,
2897                                    ops, &bbinfo[idx].first_idx,
2898                                    loop_containing_stmt (stmt));
2899               if (new_op == NULL_TREE)
2900                 {
2901                   gcc_assert (bb == last_bb);
2902                   new_op = ops[bbinfo[idx].first_idx++]->op;
2903                 }
2904               if (bbinfo[idx].op != new_op)
2905                 {
2906                   imm_use_iterator iter;
2907                   use_operand_p use_p;
2908                   gimple use_stmt, cast_stmt = NULL;
2909
2910                   FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
2911                     if (is_gimple_debug (use_stmt))
2912                       continue;
2913                     else if (gimple_code (use_stmt) == GIMPLE_COND
2914                              || gimple_code (use_stmt) == GIMPLE_PHI)
2915                       FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2916                         SET_USE (use_p, new_op);
2917                     else if (gimple_assign_cast_p (use_stmt))
2918                       cast_stmt = use_stmt;
2919                     else
2920                       gcc_unreachable ();
2921                   if (cast_stmt)
2922                     {
2923                       gcc_assert (bb == last_bb);
2924                       tree lhs = gimple_assign_lhs (cast_stmt);
2925                       tree new_lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
2926                       enum tree_code rhs_code
2927                         = gimple_assign_rhs_code (cast_stmt);
2928                       gimple g
2929                         = gimple_build_assign_with_ops (rhs_code, new_lhs,
2930                                                         new_op, NULL_TREE);
2931                       gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt);
2932                       gimple_set_uid (g, gimple_uid (cast_stmt));
2933                       gimple_set_visited (g, true);
2934                       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2935                       FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
2936                         if (is_gimple_debug (use_stmt))
2937                           continue;
2938                         else if (gimple_code (use_stmt) == GIMPLE_COND
2939                                  || gimple_code (use_stmt) == GIMPLE_PHI)
2940                           FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2941                             SET_USE (use_p, new_lhs);
2942                         else
2943                           gcc_unreachable ();
2944                     }
2945                 }
2946             }
2947           if (bb == first_bb)
2948             break;
2949         }
2950       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
2951         {
2952           if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
2953               && bbinfo[idx].op == NULL_TREE
2954               && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
2955             {
2956               stmt = last_stmt (bb);
2957               if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
2958                 gimple_cond_make_false (stmt);
2959               else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
2960                 gimple_cond_make_true (stmt);
2961               else
2962                 {
2963                   gimple_cond_set_code (stmt, NE_EXPR);
2964                   gimple_cond_set_lhs (stmt, ops[bbinfo[idx].first_idx]->op);
2965                   gimple_cond_set_rhs (stmt, boolean_false_node);
2966                 }
2967               update_stmt (stmt);
2968             }
2969           if (bb == first_bb)
2970             break;
2971         }
2972     }
2973   bbinfo.release ();
2974   ops.release ();
2975 }
2976
2977 /* Return true if OPERAND is defined by a PHI node which uses the LHS
2978    of STMT in it's operands.  This is also known as a "destructive
2979    update" operation.  */
2980
2981 static bool
2982 is_phi_for_stmt (gimple stmt, tree operand)
2983 {
2984   gimple def_stmt;
2985   tree lhs;
2986   use_operand_p arg_p;
2987   ssa_op_iter i;
2988
2989   if (TREE_CODE (operand) != SSA_NAME)
2990     return false;
2991
2992   lhs = gimple_assign_lhs (stmt);
2993
2994   def_stmt = SSA_NAME_DEF_STMT (operand);
2995   if (gimple_code (def_stmt) != GIMPLE_PHI)
2996     return false;
2997
2998   FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
2999     if (lhs == USE_FROM_PTR (arg_p))
3000       return true;
3001   return false;
3002 }
3003
3004 /* Remove def stmt of VAR if VAR has zero uses and recurse
3005    on rhs1 operand if so.  */
3006
3007 static void
3008 remove_visited_stmt_chain (tree var)
3009 {
3010   gimple stmt;
3011   gimple_stmt_iterator gsi;
3012
3013   while (1)
3014     {
3015       if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
3016         return;
3017       stmt = SSA_NAME_DEF_STMT (var);
3018       if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
3019         {
3020           var = gimple_assign_rhs1 (stmt);
3021           gsi = gsi_for_stmt (stmt);
3022           gsi_remove (&gsi, true);
3023           release_defs (stmt);
3024         }
3025       else
3026         return;
3027     }
3028 }
3029
3030 /* This function checks three consequtive operands in
3031    passed operands vector OPS starting from OPINDEX and
3032    swaps two operands if it is profitable for binary operation
3033    consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3034
3035    We pair ops with the same rank if possible.
3036
3037    The alternative we try is to see if STMT is a destructive
3038    update style statement, which is like:
3039    b = phi (a, ...)
3040    a = c + b;
3041    In that case, we want to use the destructive update form to
3042    expose the possible vectorizer sum reduction opportunity.
3043    In that case, the third operand will be the phi node. This
3044    check is not performed if STMT is null.
3045
3046    We could, of course, try to be better as noted above, and do a
3047    lot of work to try to find these opportunities in >3 operand
3048    cases, but it is unlikely to be worth it.  */
3049
3050 static void
3051 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
3052                           unsigned int opindex, gimple stmt)
3053 {
3054   operand_entry_t oe1, oe2, oe3;
3055
3056   oe1 = ops[opindex];
3057   oe2 = ops[opindex + 1];
3058   oe3 = ops[opindex + 2];
3059
3060   if ((oe1->rank == oe2->rank
3061        && oe2->rank != oe3->rank)
3062       || (stmt && is_phi_for_stmt (stmt, oe3->op)
3063           && !is_phi_for_stmt (stmt, oe1->op)
3064           && !is_phi_for_stmt (stmt, oe2->op)))
3065     {
3066       struct operand_entry temp = *oe3;
3067       oe3->op = oe1->op;
3068       oe3->rank = oe1->rank;
3069       oe1->op = temp.op;
3070       oe1->rank= temp.rank;
3071     }
3072   else if ((oe1->rank == oe3->rank
3073             && oe2->rank != oe3->rank)
3074            || (stmt && is_phi_for_stmt (stmt, oe2->op)
3075                && !is_phi_for_stmt (stmt, oe1->op)
3076                && !is_phi_for_stmt (stmt, oe3->op)))
3077     {
3078       struct operand_entry temp = *oe2;
3079       oe2->op = oe1->op;
3080       oe2->rank = oe1->rank;
3081       oe1->op = temp.op;
3082       oe1->rank = temp.rank;
3083     }
3084 }
3085
3086 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3087    two definitions, otherwise return STMT.  */
3088
3089 static inline gimple
3090 find_insert_point (gimple stmt, tree rhs1, tree rhs2)
3091 {
3092   if (TREE_CODE (rhs1) == SSA_NAME
3093       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
3094     stmt = SSA_NAME_DEF_STMT (rhs1);
3095   if (TREE_CODE (rhs2) == SSA_NAME
3096       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
3097     stmt = SSA_NAME_DEF_STMT (rhs2);
3098   return stmt;
3099 }
3100
3101 /* Recursively rewrite our linearized statements so that the operators
3102    match those in OPS[OPINDEX], putting the computation in rank
3103    order.  Return new lhs.  */
3104
3105 static tree
3106 rewrite_expr_tree (gimple stmt, unsigned int opindex,
3107                    vec<operand_entry_t> ops, bool changed)
3108 {
3109   tree rhs1 = gimple_assign_rhs1 (stmt);
3110   tree rhs2 = gimple_assign_rhs2 (stmt);
3111   tree lhs = gimple_assign_lhs (stmt);
3112   operand_entry_t oe;
3113
3114   /* The final recursion case for this function is that you have
3115      exactly two operations left.
3116      If we had one exactly one op in the entire list to start with, we
3117      would have never called this function, and the tail recursion
3118      rewrites them one at a time.  */
3119   if (opindex + 2 == ops.length ())
3120     {
3121       operand_entry_t oe1, oe2;
3122
3123       oe1 = ops[opindex];
3124       oe2 = ops[opindex + 1];
3125
3126       if (rhs1 != oe1->op || rhs2 != oe2->op)
3127         {
3128           gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3129           unsigned int uid = gimple_uid (stmt);
3130
3131           if (dump_file && (dump_flags & TDF_DETAILS))
3132             {
3133               fprintf (dump_file, "Transforming ");
3134               print_gimple_stmt (dump_file, stmt, 0, 0);
3135             }
3136
3137           if (changed)
3138             {
3139               gimple insert_point = find_insert_point (stmt, oe1->op, oe2->op);
3140               lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3141               stmt
3142                 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3143                                                 lhs, oe1->op, oe2->op);
3144               gimple_set_uid (stmt, uid);
3145               gimple_set_visited (stmt, true);
3146               if (insert_point == gsi_stmt (gsi))
3147                 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3148               else
3149                 insert_stmt_after (stmt, insert_point);
3150             }
3151           else
3152             {
3153               gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
3154                                    == stmt);
3155               gimple_assign_set_rhs1 (stmt, oe1->op);
3156               gimple_assign_set_rhs2 (stmt, oe2->op);
3157               update_stmt (stmt);
3158             }
3159
3160           if (rhs1 != oe1->op && rhs1 != oe2->op)
3161             remove_visited_stmt_chain (rhs1);
3162
3163           if (dump_file && (dump_flags & TDF_DETAILS))
3164             {
3165               fprintf (dump_file, " into ");
3166               print_gimple_stmt (dump_file, stmt, 0, 0);
3167             }
3168         }
3169       return lhs;
3170     }
3171
3172   /* If we hit here, we should have 3 or more ops left.  */
3173   gcc_assert (opindex + 2 < ops.length ());
3174
3175   /* Rewrite the next operator.  */
3176   oe = ops[opindex];
3177
3178   /* Recurse on the LHS of the binary operator, which is guaranteed to
3179      be the non-leaf side.  */
3180   tree new_rhs1
3181     = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
3182                          changed || oe->op != rhs2);
3183
3184   if (oe->op != rhs2 || new_rhs1 != rhs1)
3185     {
3186       if (dump_file && (dump_flags & TDF_DETAILS))
3187         {
3188           fprintf (dump_file, "Transforming ");
3189           print_gimple_stmt (dump_file, stmt, 0, 0);
3190         }
3191
3192       /* If changed is false, this is either opindex == 0
3193          or all outer rhs2's were equal to corresponding oe->op,
3194          and powi_result is NULL.
3195          That means lhs is equivalent before and after reassociation.
3196          Otherwise ensure the old lhs SSA_NAME is not reused and
3197          create a new stmt as well, so that any debug stmts will be
3198          properly adjusted.  */
3199       if (changed)
3200         {
3201           gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3202           unsigned int uid = gimple_uid (stmt);
3203           gimple insert_point = find_insert_point (stmt, new_rhs1, oe->op);
3204
3205           lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3206           stmt = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3207                                                lhs, new_rhs1, oe->op);
3208           gimple_set_uid (stmt, uid);
3209           gimple_set_visited (stmt, true);
3210           if (insert_point == gsi_stmt (gsi))
3211             gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3212           else
3213             insert_stmt_after (stmt, insert_point);
3214         }
3215       else
3216         {
3217           gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
3218                                == stmt);
3219           gimple_assign_set_rhs1 (stmt, new_rhs1);
3220           gimple_assign_set_rhs2 (stmt, oe->op);
3221           update_stmt (stmt);
3222         }
3223
3224       if (dump_file && (dump_flags & TDF_DETAILS))
3225         {
3226           fprintf (dump_file, " into ");
3227           print_gimple_stmt (dump_file, stmt, 0, 0);
3228         }
3229     }
3230   return lhs;
3231 }
3232
3233 /* Find out how many cycles we need to compute statements chain.
3234    OPS_NUM holds number os statements in a chain.  CPU_WIDTH is a
3235    maximum number of independent statements we may execute per cycle.  */
3236
3237 static int
3238 get_required_cycles (int ops_num, int cpu_width)
3239 {
3240   int res;
3241   int elog;
3242   unsigned int rest;
3243
3244   /* While we have more than 2 * cpu_width operands
3245      we may reduce number of operands by cpu_width
3246      per cycle.  */
3247   res = ops_num / (2 * cpu_width);
3248
3249   /* Remained operands count may be reduced twice per cycle
3250      until we have only one operand.  */
3251   rest = (unsigned)(ops_num - res * cpu_width);
3252   elog = exact_log2 (rest);
3253   if (elog >= 0)
3254     res += elog;
3255   else
3256     res += floor_log2 (rest) + 1;
3257
3258   return res;
3259 }
3260
3261 /* Returns an optimal number of registers to use for computation of
3262    given statements.  */
3263
3264 static int
3265 get_reassociation_width (int ops_num, enum tree_code opc,
3266                          enum machine_mode mode)
3267 {
3268   int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
3269   int width;
3270   int width_min;
3271   int cycles_best;
3272
3273   if (param_width > 0)
3274     width = param_width;
3275   else
3276     width = targetm.sched.reassociation_width (opc, mode);
3277
3278   if (width == 1)
3279     return width;
3280
3281   /* Get the minimal time required for sequence computation.  */
3282   cycles_best = get_required_cycles (ops_num, width);
3283
3284   /* Check if we may use less width and still compute sequence for
3285      the same time.  It will allow us to reduce registers usage.
3286      get_required_cycles is monotonically increasing with lower width
3287      so we can perform a binary search for the minimal width that still
3288      results in the optimal cycle count.  */
3289   width_min = 1;
3290   while (width > width_min)
3291     {
3292       int width_mid = (width + width_min) / 2;
3293
3294       if (get_required_cycles (ops_num, width_mid) == cycles_best)
3295         width = width_mid;
3296       else if (width_min < width_mid)
3297         width_min = width_mid;
3298       else
3299         break;
3300     }
3301
3302   return width;
3303 }
3304
3305 /* Recursively rewrite our linearized statements so that the operators
3306    match those in OPS[OPINDEX], putting the computation in rank
3307    order and trying to allow operations to be executed in
3308    parallel.  */
3309
3310 static void
3311 rewrite_expr_tree_parallel (gimple stmt, int width,
3312                             vec<operand_entry_t> ops)
3313 {
3314   enum tree_code opcode = gimple_assign_rhs_code (stmt);
3315   int op_num = ops.length ();
3316   int stmt_num = op_num - 1;
3317   gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3318   int op_index = op_num - 1;
3319   int stmt_index = 0;
3320   int ready_stmts_end = 0;
3321   int i = 0;
3322   tree last_rhs1 = gimple_assign_rhs1 (stmt);
3323
3324   /* We start expression rewriting from the top statements.
3325      So, in this loop we create a full list of statements
3326      we will work with.  */
3327   stmts[stmt_num - 1] = stmt;
3328   for (i = stmt_num - 2; i >= 0; i--)
3329     stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3330
3331   for (i = 0; i < stmt_num; i++)
3332     {
3333       tree op1, op2;
3334
3335       /* Determine whether we should use results of
3336          already handled statements or not.  */
3337       if (ready_stmts_end == 0
3338           && (i - stmt_index >= width || op_index < 1))
3339         ready_stmts_end = i;
3340
3341       /* Now we choose operands for the next statement.  Non zero
3342          value in ready_stmts_end means here that we should use
3343          the result of already generated statements as new operand.  */
3344       if (ready_stmts_end > 0)
3345         {
3346           op1 = gimple_assign_lhs (stmts[stmt_index++]);
3347           if (ready_stmts_end > stmt_index)
3348             op2 = gimple_assign_lhs (stmts[stmt_index++]);
3349           else if (op_index >= 0)
3350             op2 = ops[op_index--]->op;
3351           else
3352             {
3353               gcc_assert (stmt_index < i);
3354               op2 = gimple_assign_lhs (stmts[stmt_index++]);
3355             }
3356
3357           if (stmt_index >= ready_stmts_end)
3358             ready_stmts_end = 0;
3359         }
3360       else
3361         {
3362           if (op_index > 1)
3363             swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3364           op2 = ops[op_index--]->op;
3365           op1 = ops[op_index--]->op;
3366         }
3367
3368       /* If we emit the last statement then we should put
3369          operands into the last statement.  It will also
3370          break the loop.  */
3371       if (op_index < 0 && stmt_index == i)
3372         i = stmt_num - 1;
3373
3374       if (dump_file && (dump_flags & TDF_DETAILS))
3375         {
3376           fprintf (dump_file, "Transforming ");
3377           print_gimple_stmt (dump_file, stmts[i], 0, 0);
3378         }
3379
3380       /* We keep original statement only for the last one.  All
3381          others are recreated.  */
3382       if (i == stmt_num - 1)
3383         {
3384           gimple_assign_set_rhs1 (stmts[i], op1);
3385           gimple_assign_set_rhs2 (stmts[i], op2);
3386           update_stmt (stmts[i]);
3387         }
3388       else
3389         stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3390
3391       if (dump_file && (dump_flags & TDF_DETAILS))
3392         {
3393           fprintf (dump_file, " into ");
3394           print_gimple_stmt (dump_file, stmts[i], 0, 0);
3395         }
3396     }
3397
3398   remove_visited_stmt_chain (last_rhs1);
3399 }
3400
3401 /* Transform STMT, which is really (A +B) + (C + D) into the left
3402    linear form, ((A+B)+C)+D.
3403    Recurse on D if necessary.  */
3404
3405 static void
3406 linearize_expr (gimple stmt)
3407 {
3408   gimple_stmt_iterator gsi;
3409   gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3410   gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3411   gimple oldbinrhs = binrhs;
3412   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3413   gimple newbinrhs = NULL;
3414   struct loop *loop = loop_containing_stmt (stmt);
3415   tree lhs = gimple_assign_lhs (stmt);
3416
3417   gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3418               && is_reassociable_op (binrhs, rhscode, loop));
3419
3420   gsi = gsi_for_stmt (stmt);
3421
3422   gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3423   binrhs = gimple_build_assign_with_ops (gimple_assign_rhs_code (binrhs),
3424                                          make_ssa_name (TREE_TYPE (lhs), NULL),
3425                                          gimple_assign_lhs (binlhs),
3426                                          gimple_assign_rhs2 (binrhs));
3427   gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3428   gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
3429   gimple_set_uid (binrhs, gimple_uid (stmt));
3430
3431   if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3432     newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3433
3434   if (dump_file && (dump_flags & TDF_DETAILS))
3435     {
3436       fprintf (dump_file, "Linearized: ");
3437       print_gimple_stmt (dump_file, stmt, 0, 0);
3438     }
3439
3440   reassociate_stats.linearized++;
3441   update_stmt (stmt);
3442
3443   gsi = gsi_for_stmt (oldbinrhs);
3444   gsi_remove (&gsi, true);
3445   release_defs (oldbinrhs);
3446
3447   gimple_set_visited (stmt, true);
3448   gimple_set_visited (binlhs, true);
3449   gimple_set_visited (binrhs, true);
3450
3451   /* Tail recurse on the new rhs if it still needs reassociation.  */
3452   if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3453     /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3454            want to change the algorithm while converting to tuples.  */
3455     linearize_expr (stmt);
3456 }
3457
3458 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3459    it.  Otherwise, return NULL.  */
3460
3461 static gimple
3462 get_single_immediate_use (tree lhs)
3463 {
3464   use_operand_p immuse;
3465   gimple immusestmt;
3466
3467   if (TREE_CODE (lhs) == SSA_NAME
3468       && single_imm_use (lhs, &immuse, &immusestmt)
3469       && is_gimple_assign (immusestmt))
3470     return immusestmt;
3471
3472   return NULL;
3473 }
3474
3475 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3476    representing the negated value.  Insertions of any necessary
3477    instructions go before GSI.
3478    This function is recursive in that, if you hand it "a_5" as the
3479    value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3480    transform b_3 + b_4 into a_5 = -b_3 + -b_4.  */
3481
3482 static tree
3483 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
3484 {
3485   gimple negatedefstmt = NULL;
3486   tree resultofnegate;
3487   gimple_stmt_iterator gsi;
3488   unsigned int uid;
3489
3490   /* If we are trying to negate a name, defined by an add, negate the
3491      add operands instead.  */
3492   if (TREE_CODE (tonegate) == SSA_NAME)
3493     negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3494   if (TREE_CODE (tonegate) == SSA_NAME
3495       && is_gimple_assign (negatedefstmt)
3496       && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3497       && has_single_use (gimple_assign_lhs (negatedefstmt))
3498       && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3499     {
3500       tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3501       tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3502       tree lhs = gimple_assign_lhs (negatedefstmt);
3503       gimple g;
3504
3505       gsi = gsi_for_stmt (negatedefstmt);
3506       rhs1 = negate_value (rhs1, &gsi);
3507
3508       gsi = gsi_for_stmt (negatedefstmt);
3509       rhs2 = negate_value (rhs2, &gsi);
3510
3511       gsi = gsi_for_stmt (negatedefstmt);
3512       lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3513       gimple_set_visited (negatedefstmt, true);
3514       g = gimple_build_assign_with_ops (PLUS_EXPR, lhs, rhs1, rhs2);
3515       gimple_set_uid (g, gimple_uid (negatedefstmt));
3516       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3517       return lhs;
3518     }
3519
3520   tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3521   resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
3522                                              NULL_TREE, true, GSI_SAME_STMT);
3523   gsi = *gsip;
3524   uid = gimple_uid (gsi_stmt (gsi));
3525   for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
3526     {
3527       gimple stmt = gsi_stmt (gsi);
3528       if (gimple_uid (stmt) != 0)
3529         break;
3530       gimple_set_uid (stmt, uid);
3531     }
3532   return resultofnegate;
3533 }
3534
3535 /* Return true if we should break up the subtract in STMT into an add
3536    with negate.  This is true when we the subtract operands are really
3537    adds, or the subtract itself is used in an add expression.  In
3538    either case, breaking up the subtract into an add with negate
3539    exposes the adds to reassociation.  */
3540
3541 static bool
3542 should_break_up_subtract (gimple stmt)
3543 {
3544   tree lhs = gimple_assign_lhs (stmt);
3545   tree binlhs = gimple_assign_rhs1 (stmt);
3546   tree binrhs = gimple_assign_rhs2 (stmt);
3547   gimple immusestmt;
3548   struct loop *loop = loop_containing_stmt (stmt);
3549
3550   if (TREE_CODE (binlhs) == SSA_NAME
3551       && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3552     return true;
3553
3554   if (TREE_CODE (binrhs) == SSA_NAME
3555       && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3556     return true;
3557
3558   if (TREE_CODE (lhs) == SSA_NAME
3559       && (immusestmt = get_single_immediate_use (lhs))
3560       && is_gimple_assign (immusestmt)
3561       && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3562           ||  gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3563     return true;
3564   return false;
3565 }
3566
3567 /* Transform STMT from A - B into A + -B.  */
3568
3569 static void
3570 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3571 {
3572   tree rhs1 = gimple_assign_rhs1 (stmt);
3573   tree rhs2 = gimple_assign_rhs2 (stmt);
3574
3575   if (dump_file && (dump_flags & TDF_DETAILS))
3576     {
3577       fprintf (dump_file, "Breaking up subtract ");
3578       print_gimple_stmt (dump_file, stmt, 0, 0);
3579     }
3580
3581   rhs2 = negate_value (rhs2, gsip);
3582   gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3583   update_stmt (stmt);
3584 }
3585
3586 /* Determine whether STMT is a builtin call that raises an SSA name
3587    to an integer power and has only one use.  If so, and this is early
3588    reassociation and unsafe math optimizations are permitted, place
3589    the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3590    If any of these conditions does not hold, return FALSE.  */
3591
3592 static bool
3593 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3594 {
3595   tree fndecl, arg1;
3596   REAL_VALUE_TYPE c, cint;
3597
3598   if (!first_pass_instance
3599       || !flag_unsafe_math_optimizations
3600       || !is_gimple_call (stmt)
3601       || !has_single_use (gimple_call_lhs (stmt)))
3602     return false;
3603
3604   fndecl = gimple_call_fndecl (stmt);
3605
3606   if (!fndecl
3607       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3608     return false;
3609
3610   switch (DECL_FUNCTION_CODE (fndecl))
3611     {
3612     CASE_FLT_FN (BUILT_IN_POW):
3613       *base = gimple_call_arg (stmt, 0);
3614       arg1 = gimple_call_arg (stmt, 1);
3615
3616       if (TREE_CODE (arg1) != REAL_CST)
3617         return false;
3618
3619       c = TREE_REAL_CST (arg1);
3620
3621       if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
3622         return false;
3623
3624       *exponent = real_to_integer (&c);
3625       real_from_integer (&cint, VOIDmode, *exponent,
3626                          *exponent < 0 ? -1 : 0, 0);
3627       if (!real_identical (&c, &cint))
3628         return false;
3629
3630       break;
3631
3632     CASE_FLT_FN (BUILT_IN_POWI):
3633       *base = gimple_call_arg (stmt, 0);
3634       arg1 = gimple_call_arg (stmt, 1);
3635
3636       if (!host_integerp (arg1, 0))
3637         return false;
3638
3639       *exponent = TREE_INT_CST_LOW (arg1);
3640       break;
3641
3642     default:
3643       return false;
3644     }
3645
3646   /* Expanding negative exponents is generally unproductive, so we don't
3647      complicate matters with those.  Exponents of zero and one should
3648      have been handled by expression folding.  */
3649   if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
3650     return false;
3651
3652   return true;
3653 }
3654
3655 /* Recursively linearize a binary expression that is the RHS of STMT.
3656    Place the operands of the expression tree in the vector named OPS.  */
3657
3658 static void
3659 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
3660                      bool is_associative, bool set_visited)
3661 {
3662   tree binlhs = gimple_assign_rhs1 (stmt);
3663   tree binrhs = gimple_assign_rhs2 (stmt);
3664   gimple binlhsdef = NULL, binrhsdef = NULL;
3665   bool binlhsisreassoc = false;
3666   bool binrhsisreassoc = false;
3667   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3668   struct loop *loop = loop_containing_stmt (stmt);
3669   tree base = NULL_TREE;
3670   HOST_WIDE_INT exponent = 0;
3671
3672   if (set_visited)
3673     gimple_set_visited (stmt, true);
3674
3675   if (TREE_CODE (binlhs) == SSA_NAME)
3676     {
3677       binlhsdef = SSA_NAME_DEF_STMT (binlhs);
3678       binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
3679                          && !stmt_could_throw_p (binlhsdef));
3680     }
3681
3682   if (TREE_CODE (binrhs) == SSA_NAME)
3683     {
3684       binrhsdef = SSA_NAME_DEF_STMT (binrhs);
3685       binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
3686                          && !stmt_could_throw_p (binrhsdef));
3687     }
3688
3689   /* If the LHS is not reassociable, but the RHS is, we need to swap
3690      them.  If neither is reassociable, there is nothing we can do, so
3691      just put them in the ops vector.  If the LHS is reassociable,
3692      linearize it.  If both are reassociable, then linearize the RHS
3693      and the LHS.  */
3694
3695   if (!binlhsisreassoc)
3696     {
3697       tree temp;
3698
3699       /* If this is not a associative operation like division, give up.  */
3700       if (!is_associative)
3701         {
3702           add_to_ops_vec (ops, binrhs);
3703           return;
3704         }
3705
3706       if (!binrhsisreassoc)
3707         {
3708           if (rhscode == MULT_EXPR
3709               && TREE_CODE (binrhs) == SSA_NAME
3710               && acceptable_pow_call (binrhsdef, &base, &exponent))
3711             {
3712               add_repeat_to_ops_vec (ops, base, exponent);
3713               gimple_set_visited (binrhsdef, true);
3714             }
3715           else
3716             add_to_ops_vec (ops, binrhs);
3717
3718           if (rhscode == MULT_EXPR
3719               && TREE_CODE (binlhs) == SSA_NAME
3720               && acceptable_pow_call (binlhsdef, &base, &exponent))
3721             {
3722               add_repeat_to_ops_vec (ops, base, exponent);
3723               gimple_set_visited (binlhsdef, true);
3724             }
3725           else
3726             add_to_ops_vec (ops, binlhs);
3727
3728           return;
3729         }
3730
3731       if (dump_file && (dump_flags & TDF_DETAILS))
3732         {
3733           fprintf (dump_file, "swapping operands of ");
3734           print_gimple_stmt (dump_file, stmt, 0, 0);
3735         }
3736
3737       swap_ssa_operands (stmt,
3738                          gimple_assign_rhs1_ptr (stmt),
3739                          gimple_assign_rhs2_ptr (stmt));
3740       update_stmt (stmt);
3741
3742       if (dump_file && (dump_flags & TDF_DETAILS))
3743         {
3744           fprintf (dump_file, " is now ");
3745           print_gimple_stmt (dump_file, stmt, 0, 0);
3746         }
3747
3748       /* We want to make it so the lhs is always the reassociative op,
3749          so swap.  */
3750       temp = binlhs;
3751       binlhs = binrhs;
3752       binrhs = temp;
3753     }
3754   else if (binrhsisreassoc)
3755     {
3756       linearize_expr (stmt);
3757       binlhs = gimple_assign_rhs1 (stmt);
3758       binrhs = gimple_assign_rhs2 (stmt);
3759     }
3760
3761   gcc_assert (TREE_CODE (binrhs) != SSA_NAME
3762               || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
3763                                       rhscode, loop));
3764   linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
3765                        is_associative, set_visited);
3766
3767   if (rhscode == MULT_EXPR
3768       && TREE_CODE (binrhs) == SSA_NAME
3769       && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
3770     {
3771       add_repeat_to_ops_vec (ops, base, exponent);
3772       gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
3773     }
3774   else
3775     add_to_ops_vec (ops, binrhs);
3776 }
3777
3778 /* Repropagate the negates back into subtracts, since no other pass
3779    currently does it.  */
3780
3781 static void
3782 repropagate_negates (void)
3783 {
3784   unsigned int i = 0;
3785   tree negate;
3786
3787   FOR_EACH_VEC_ELT (plus_negates, i, negate)
3788     {
3789       gimple user = get_single_immediate_use (negate);
3790
3791       if (!user || !is_gimple_assign (user))
3792         continue;
3793
3794       /* The negate operand can be either operand of a PLUS_EXPR
3795          (it can be the LHS if the RHS is a constant for example).
3796
3797          Force the negate operand to the RHS of the PLUS_EXPR, then
3798          transform the PLUS_EXPR into a MINUS_EXPR.  */
3799       if (gimple_assign_rhs_code (user) == PLUS_EXPR)
3800         {
3801           /* If the negated operand appears on the LHS of the
3802              PLUS_EXPR, exchange the operands of the PLUS_EXPR
3803              to force the negated operand to the RHS of the PLUS_EXPR.  */
3804           if (gimple_assign_rhs1 (user) == negate)
3805             {
3806               swap_ssa_operands (user,
3807                                  gimple_assign_rhs1_ptr (user),
3808                                  gimple_assign_rhs2_ptr (user));
3809             }
3810
3811           /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
3812              the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR.  */
3813           if (gimple_assign_rhs2 (user) == negate)
3814             {
3815               tree rhs1 = gimple_assign_rhs1 (user);
3816               tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
3817               gimple_stmt_iterator gsi = gsi_for_stmt (user);
3818               gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
3819               update_stmt (user);
3820             }
3821         }
3822       else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
3823         {
3824           if (gimple_assign_rhs1 (user) == negate)
3825             {
3826               /* We have
3827                    x = -a
3828                    y = x - b
3829                  which we transform into
3830                    x = a + b
3831                    y = -x .
3832                  This pushes down the negate which we possibly can merge
3833                  into some other operation, hence insert it into the
3834                  plus_negates vector.  */
3835               gimple feed = SSA_NAME_DEF_STMT (negate);
3836               tree a = gimple_assign_rhs1 (feed);
3837               tree b = gimple_assign_rhs2 (user);
3838               gimple_stmt_iterator gsi = gsi_for_stmt (feed);
3839               gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
3840               tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)), NULL);
3841               gimple g = gimple_build_assign_with_ops (PLUS_EXPR, x, a, b);
3842               gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3843               gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x, NULL);
3844               user = gsi_stmt (gsi2);
3845               update_stmt (user);
3846               gsi_remove (&gsi, true);
3847               release_defs (feed);
3848               plus_negates.safe_push (gimple_assign_lhs (user));
3849             }
3850           else
3851             {
3852               /* Transform "x = -a; y = b - x" into "y = b + a", getting
3853                  rid of one operation.  */
3854               gimple feed = SSA_NAME_DEF_STMT (negate);
3855               tree a = gimple_assign_rhs1 (feed);
3856               tree rhs1 = gimple_assign_rhs1 (user);
3857               gimple_stmt_iterator gsi = gsi_for_stmt (user);
3858               gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
3859               update_stmt (gsi_stmt (gsi));
3860             }
3861         }
3862     }
3863 }
3864
3865 /* Returns true if OP is of a type for which we can do reassociation.
3866    That is for integral or non-saturating fixed-point types, and for
3867    floating point type when associative-math is enabled.  */
3868
3869 static bool
3870 can_reassociate_p (tree op)
3871 {
3872   tree type = TREE_TYPE (op);
3873   if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
3874       || NON_SAT_FIXED_POINT_TYPE_P (type)
3875       || (flag_associative_math && FLOAT_TYPE_P (type)))
3876     return true;
3877   return false;
3878 }
3879
3880 /* Break up subtract operations in block BB.
3881
3882    We do this top down because we don't know whether the subtract is
3883    part of a possible chain of reassociation except at the top.
3884
3885    IE given
3886    d = f + g
3887    c = a + e
3888    b = c - d
3889    q = b - r
3890    k = t - q
3891
3892    we want to break up k = t - q, but we won't until we've transformed q
3893    = b - r, which won't be broken up until we transform b = c - d.
3894
3895    En passant, clear the GIMPLE visited flag on every statement
3896    and set UIDs within each basic block.  */
3897
3898 static void
3899 break_up_subtract_bb (basic_block bb)
3900 {
3901   gimple_stmt_iterator gsi;
3902   basic_block son;
3903   unsigned int uid = 1;
3904
3905   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3906     {
3907       gimple stmt = gsi_stmt (gsi);
3908       gimple_set_visited (stmt, false);
3909       gimple_set_uid (stmt, uid++);
3910
3911       if (!is_gimple_assign (stmt)
3912           || !can_reassociate_p (gimple_assign_lhs (stmt)))
3913         continue;
3914
3915       /* Look for simple gimple subtract operations.  */
3916       if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
3917         {
3918           if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
3919               || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
3920             continue;
3921
3922           /* Check for a subtract used only in an addition.  If this
3923              is the case, transform it into add of a negate for better
3924              reassociation.  IE transform C = A-B into C = A + -B if C
3925              is only used in an addition.  */
3926           if (should_break_up_subtract (stmt))
3927             break_up_subtract (stmt, &gsi);
3928         }
3929       else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
3930                && can_reassociate_p (gimple_assign_rhs1 (stmt)))
3931         plus_negates.safe_push (gimple_assign_lhs (stmt));
3932     }
3933   for (son = first_dom_son (CDI_DOMINATORS, bb);
3934        son;
3935        son = next_dom_son (CDI_DOMINATORS, son))
3936     break_up_subtract_bb (son);
3937 }
3938
3939 /* Used for repeated factor analysis.  */
3940 struct repeat_factor_d
3941 {
3942   /* An SSA name that occurs in a multiply chain.  */
3943   tree factor;
3944
3945   /* Cached rank of the factor.  */
3946   unsigned rank;
3947
3948   /* Number of occurrences of the factor in the chain.  */
3949   HOST_WIDE_INT count;
3950
3951   /* An SSA name representing the product of this factor and
3952      all factors appearing later in the repeated factor vector.  */
3953   tree repr;
3954 };
3955
3956 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
3957 typedef const struct repeat_factor_d *const_repeat_factor_t;
3958
3959
3960 static vec<repeat_factor> repeat_factor_vec;
3961
3962 /* Used for sorting the repeat factor vector.  Sort primarily by
3963    ascending occurrence count, secondarily by descending rank.  */
3964
3965 static int
3966 compare_repeat_factors (const void *x1, const void *x2)
3967 {
3968   const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
3969   const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
3970
3971   if (rf1->count != rf2->count)
3972     return rf1->count - rf2->count;
3973
3974   return rf2->rank - rf1->rank;
3975 }
3976
3977 /* Look for repeated operands in OPS in the multiply tree rooted at
3978    STMT.  Replace them with an optimal sequence of multiplies and powi
3979    builtin calls, and remove the used operands from OPS.  Return an
3980    SSA name representing the value of the replacement sequence.  */
3981
3982 static tree
3983 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
3984 {
3985   unsigned i, j, vec_len;
3986   int ii;
3987   operand_entry_t oe;
3988   repeat_factor_t rf1, rf2;
3989   repeat_factor rfnew;
3990   tree result = NULL_TREE;
3991   tree target_ssa, iter_result;
3992   tree type = TREE_TYPE (gimple_get_lhs (stmt));
3993   tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
3994   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3995   gimple mul_stmt, pow_stmt;
3996
3997   /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
3998      target.  */
3999   if (!powi_fndecl)
4000     return NULL_TREE;
4001
4002   /* Allocate the repeated factor vector.  */
4003   repeat_factor_vec.create (10);
4004
4005   /* Scan the OPS vector for all SSA names in the product and build
4006      up a vector of occurrence counts for each factor.  */
4007   FOR_EACH_VEC_ELT (*ops, i, oe)
4008     {
4009       if (TREE_CODE (oe->op) == SSA_NAME)
4010         {
4011           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4012             {
4013               if (rf1->factor == oe->op)
4014                 {
4015                   rf1->count += oe->count;
4016                   break;
4017                 }
4018             }
4019
4020           if (j >= repeat_factor_vec.length ())
4021             {
4022               rfnew.factor = oe->op;
4023               rfnew.rank = oe->rank;
4024               rfnew.count = oe->count;
4025               rfnew.repr = NULL_TREE;
4026               repeat_factor_vec.safe_push (rfnew);
4027             }
4028         }
4029     }
4030
4031   /* Sort the repeated factor vector by (a) increasing occurrence count,
4032      and (b) decreasing rank.  */
4033   repeat_factor_vec.qsort (compare_repeat_factors);
4034
4035   /* It is generally best to combine as many base factors as possible
4036      into a product before applying __builtin_powi to the result.
4037      However, the sort order chosen for the repeated factor vector
4038      allows us to cache partial results for the product of the base
4039      factors for subsequent use.  When we already have a cached partial
4040      result from a previous iteration, it is best to make use of it
4041      before looking for another __builtin_pow opportunity.
4042
4043      As an example, consider x * x * y * y * y * z * z * z * z.
4044      We want to first compose the product x * y * z, raise it to the
4045      second power, then multiply this by y * z, and finally multiply
4046      by z.  This can be done in 5 multiplies provided we cache y * z
4047      for use in both expressions:
4048
4049         t1 = y * z
4050         t2 = t1 * x
4051         t3 = t2 * t2
4052         t4 = t1 * t3
4053         result = t4 * z
4054
4055      If we instead ignored the cached y * z and first multiplied by
4056      the __builtin_pow opportunity z * z, we would get the inferior:
4057
4058         t1 = y * z
4059         t2 = t1 * x
4060         t3 = t2 * t2
4061         t4 = z * z
4062         t5 = t3 * t4
4063         result = t5 * y  */
4064
4065   vec_len = repeat_factor_vec.length ();
4066
4067   /* Repeatedly look for opportunities to create a builtin_powi call.  */
4068   while (true)
4069     {
4070       HOST_WIDE_INT power;
4071
4072       /* First look for the largest cached product of factors from
4073          preceding iterations.  If found, create a builtin_powi for
4074          it if the minimum occurrence count for its factors is at
4075          least 2, or just use this cached product as our next
4076          multiplicand if the minimum occurrence count is 1.  */
4077       FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4078         {
4079           if (rf1->repr && rf1->count > 0)
4080             break;
4081         }
4082
4083       if (j < vec_len)
4084         {
4085           power = rf1->count;
4086
4087           if (power == 1)
4088             {
4089               iter_result = rf1->repr;
4090
4091               if (dump_file && (dump_flags & TDF_DETAILS))
4092                 {
4093                   unsigned elt;
4094                   repeat_factor_t rf;
4095                   fputs ("Multiplying by cached product ", dump_file);
4096                   for (elt = j; elt < vec_len; elt++)
4097                     {
4098                       rf = &repeat_factor_vec[elt];
4099                       print_generic_expr (dump_file, rf->factor, 0);
4100                       if (elt < vec_len - 1)
4101                         fputs (" * ", dump_file);
4102                     }
4103                   fputs ("\n", dump_file);
4104                 }
4105             }
4106           else
4107             {
4108               iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4109               pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4110                                             build_int_cst (integer_type_node,
4111                                                            power));
4112               gimple_call_set_lhs (pow_stmt, iter_result);
4113               gimple_set_location (pow_stmt, gimple_location (stmt));
4114               gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4115
4116               if (dump_file && (dump_flags & TDF_DETAILS))
4117                 {
4118                   unsigned elt;
4119                   repeat_factor_t rf;
4120                   fputs ("Building __builtin_pow call for cached product (",
4121                          dump_file);
4122                   for (elt = j; elt < vec_len; elt++)
4123                     {
4124                       rf = &repeat_factor_vec[elt];
4125                       print_generic_expr (dump_file, rf->factor, 0);
4126                       if (elt < vec_len - 1)
4127                         fputs (" * ", dump_file);
4128                     }
4129                   fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n",
4130                            power);
4131                 }
4132             }
4133         }
4134       else
4135         {
4136           /* Otherwise, find the first factor in the repeated factor
4137              vector whose occurrence count is at least 2.  If no such
4138              factor exists, there are no builtin_powi opportunities
4139              remaining.  */
4140           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4141             {
4142               if (rf1->count >= 2)
4143                 break;
4144             }
4145
4146           if (j >= vec_len)
4147             break;
4148
4149           power = rf1->count;
4150
4151           if (dump_file && (dump_flags & TDF_DETAILS))
4152             {
4153               unsigned elt;
4154               repeat_factor_t rf;
4155               fputs ("Building __builtin_pow call for (", dump_file);
4156               for (elt = j; elt < vec_len; elt++)
4157                 {
4158                   rf = &repeat_factor_vec[elt];
4159                   print_generic_expr (dump_file, rf->factor, 0);
4160                   if (elt < vec_len - 1)
4161                     fputs (" * ", dump_file);
4162                 }
4163               fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power);
4164             }
4165
4166           reassociate_stats.pows_created++;
4167
4168           /* Visit each element of the vector in reverse order (so that
4169              high-occurrence elements are visited first, and within the
4170              same occurrence count, lower-ranked elements are visited
4171              first).  Form a linear product of all elements in this order
4172              whose occurrencce count is at least that of element J.
4173              Record the SSA name representing the product of each element
4174              with all subsequent elements in the vector.  */
4175           if (j == vec_len - 1)
4176             rf1->repr = rf1->factor;
4177           else
4178             {
4179               for (ii = vec_len - 2; ii >= (int)j; ii--)
4180                 {
4181                   tree op1, op2;
4182
4183                   rf1 = &repeat_factor_vec[ii];
4184                   rf2 = &repeat_factor_vec[ii + 1];
4185
4186                   /* Init the last factor's representative to be itself.  */
4187                   if (!rf2->repr)
4188                     rf2->repr = rf2->factor;
4189
4190                   op1 = rf1->factor;
4191                   op2 = rf2->repr;
4192
4193                   target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4194                   mul_stmt = gimple_build_assign_with_ops (MULT_EXPR,
4195                                                            target_ssa,
4196                                                            op1, op2);
4197                   gimple_set_location (mul_stmt, gimple_location (stmt));
4198                   gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4199                   rf1->repr = target_ssa;
4200
4201                   /* Don't reprocess the multiply we just introduced.  */
4202                   gimple_set_visited (mul_stmt, true);
4203                 }
4204             }
4205
4206           /* Form a call to __builtin_powi for the maximum product
4207              just formed, raised to the power obtained earlier.  */
4208           rf1 = &repeat_factor_vec[j];
4209           iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4210           pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4211                                         build_int_cst (integer_type_node,
4212                                                        power));
4213           gimple_call_set_lhs (pow_stmt, iter_result);
4214           gimple_set_location (pow_stmt, gimple_location (stmt));
4215           gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4216         }
4217
4218       /* If we previously formed at least one other builtin_powi call,
4219          form the product of this one and those others.  */
4220       if (result)
4221         {
4222           tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4223           mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result,
4224                                                    result, iter_result);
4225           gimple_set_location (mul_stmt, gimple_location (stmt));
4226           gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4227           gimple_set_visited (mul_stmt, true);
4228           result = new_result;
4229         }
4230       else
4231         result = iter_result;
4232
4233       /* Decrement the occurrence count of each element in the product
4234          by the count found above, and remove this many copies of each
4235          factor from OPS.  */
4236       for (i = j; i < vec_len; i++)
4237         {
4238           unsigned k = power;
4239           unsigned n;
4240
4241           rf1 = &repeat_factor_vec[i];
4242           rf1->count -= power;
4243
4244           FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
4245             {
4246               if (oe->op == rf1->factor)
4247                 {
4248                   if (oe->count <= k)
4249                     {
4250                       ops->ordered_remove (n);
4251                       k -= oe->count;
4252
4253                       if (k == 0)
4254                         break;
4255                     }
4256                   else
4257                     {
4258                       oe->count -= k;
4259                       break;
4260                     }
4261                 }
4262             }
4263         }
4264     }
4265
4266   /* At this point all elements in the repeated factor vector have a
4267      remaining occurrence count of 0 or 1, and those with a count of 1
4268      don't have cached representatives.  Re-sort the ops vector and
4269      clean up.  */
4270   ops->qsort (sort_by_operand_rank);
4271   repeat_factor_vec.release ();
4272
4273   /* Return the final product computed herein.  Note that there may
4274      still be some elements with single occurrence count left in OPS;
4275      those will be handled by the normal reassociation logic.  */
4276   return result;
4277 }
4278
4279 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS.  */
4280
4281 static void
4282 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
4283 {
4284   tree rhs1;
4285
4286   if (dump_file && (dump_flags & TDF_DETAILS))
4287     {
4288       fprintf (dump_file, "Transforming ");
4289       print_gimple_stmt (dump_file, stmt, 0, 0);
4290     }
4291
4292   rhs1 = gimple_assign_rhs1 (stmt);
4293   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
4294   update_stmt (stmt);
4295   remove_visited_stmt_chain (rhs1);
4296
4297   if (dump_file && (dump_flags & TDF_DETAILS))
4298     {
4299       fprintf (dump_file, " into ");
4300       print_gimple_stmt (dump_file, stmt, 0, 0);
4301     }
4302 }
4303
4304 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2.  */
4305
4306 static void
4307 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
4308                             tree rhs1, tree rhs2)
4309 {
4310   if (dump_file && (dump_flags & TDF_DETAILS))
4311     {
4312       fprintf (dump_file, "Transforming ");
4313       print_gimple_stmt (dump_file, stmt, 0, 0);
4314     }
4315
4316   gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
4317   update_stmt (gsi_stmt (*gsi));
4318   remove_visited_stmt_chain (rhs1);
4319
4320   if (dump_file && (dump_flags & TDF_DETAILS))
4321     {
4322       fprintf (dump_file, " into ");
4323       print_gimple_stmt (dump_file, stmt, 0, 0);
4324     }
4325 }
4326
4327 /* Reassociate expressions in basic block BB and its post-dominator as
4328    children.  */
4329
4330 static void
4331 reassociate_bb (basic_block bb)
4332 {
4333   gimple_stmt_iterator gsi;
4334   basic_block son;
4335   gimple stmt = last_stmt (bb);
4336
4337   if (stmt && !gimple_visited_p (stmt))
4338     maybe_optimize_range_tests (stmt);
4339
4340   for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4341     {
4342       stmt = gsi_stmt (gsi);
4343
4344       if (is_gimple_assign (stmt)
4345           && !stmt_could_throw_p (stmt))
4346         {
4347           tree lhs, rhs1, rhs2;
4348           enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4349
4350           /* If this is not a gimple binary expression, there is
4351              nothing for us to do with it.  */
4352           if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4353             continue;
4354
4355           /* If this was part of an already processed statement,
4356              we don't need to touch it again. */
4357           if (gimple_visited_p (stmt))
4358             {
4359               /* This statement might have become dead because of previous
4360                  reassociations.  */
4361               if (has_zero_uses (gimple_get_lhs (stmt)))
4362                 {
4363                   gsi_remove (&gsi, true);
4364                   release_defs (stmt);
4365                   /* We might end up removing the last stmt above which
4366                      places the iterator to the end of the sequence.
4367                      Reset it to the last stmt in this case which might
4368                      be the end of the sequence as well if we removed
4369                      the last statement of the sequence.  In which case
4370                      we need to bail out.  */
4371                   if (gsi_end_p (gsi))
4372                     {
4373                       gsi = gsi_last_bb (bb);
4374                       if (gsi_end_p (gsi))
4375                         break;
4376                     }
4377                 }
4378               continue;
4379             }
4380
4381           lhs = gimple_assign_lhs (stmt);
4382           rhs1 = gimple_assign_rhs1 (stmt);
4383           rhs2 = gimple_assign_rhs2 (stmt);
4384
4385           /* For non-bit or min/max operations we can't associate
4386              all types.  Verify that here.  */
4387           if (rhs_code != BIT_IOR_EXPR
4388               && rhs_code != BIT_AND_EXPR
4389               && rhs_code != BIT_XOR_EXPR
4390               && rhs_code != MIN_EXPR
4391               && rhs_code != MAX_EXPR
4392               && (!can_reassociate_p (lhs)
4393                   || !can_reassociate_p (rhs1)
4394                   || !can_reassociate_p (rhs2)))
4395             continue;
4396
4397           if (associative_tree_code (rhs_code))
4398             {
4399               vec<operand_entry_t> ops = vNULL;
4400               tree powi_result = NULL_TREE;
4401
4402               /* There may be no immediate uses left by the time we
4403                  get here because we may have eliminated them all.  */
4404               if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4405                 continue;
4406
4407               gimple_set_visited (stmt, true);
4408               linearize_expr_tree (&ops, stmt, true, true);
4409               ops.qsort (sort_by_operand_rank);
4410               optimize_ops_list (rhs_code, &ops);
4411               if (undistribute_ops_list (rhs_code, &ops,
4412                                          loop_containing_stmt (stmt)))
4413                 {
4414                   ops.qsort (sort_by_operand_rank);
4415                   optimize_ops_list (rhs_code, &ops);
4416                 }
4417
4418               if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4419                 optimize_range_tests (rhs_code, &ops);
4420
4421               if (first_pass_instance
4422                   && rhs_code == MULT_EXPR
4423                   && flag_unsafe_math_optimizations)
4424                 powi_result = attempt_builtin_powi (stmt, &ops);
4425
4426               /* If the operand vector is now empty, all operands were
4427                  consumed by the __builtin_powi optimization.  */
4428               if (ops.length () == 0)
4429                 transform_stmt_to_copy (&gsi, stmt, powi_result);
4430               else if (ops.length () == 1)
4431                 {
4432                   tree last_op = ops.last ()->op;
4433
4434                   if (powi_result)
4435                     transform_stmt_to_multiply (&gsi, stmt, last_op,
4436                                                 powi_result);
4437                   else
4438                     transform_stmt_to_copy (&gsi, stmt, last_op);
4439                 }
4440               else
4441                 {
4442                   enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4443                   int ops_num = ops.length ();
4444                   int width = get_reassociation_width (ops_num, rhs_code, mode);
4445                   tree new_lhs = lhs;
4446
4447                   if (dump_file && (dump_flags & TDF_DETAILS))
4448                     fprintf (dump_file,
4449                              "Width = %d was chosen for reassociation\n", width);
4450
4451                   if (width > 1
4452                       && ops.length () > 3)
4453                     rewrite_expr_tree_parallel (stmt, width, ops);
4454                   else
4455                     {
4456                       /* When there are three operands left, we want
4457                          to make sure the ones that get the double
4458                          binary op are chosen wisely.  */
4459                       int len = ops.length ();
4460                       if (len >= 3)
4461                         swap_ops_for_binary_stmt (ops, len - 3, stmt);
4462
4463                       new_lhs = rewrite_expr_tree (stmt, 0, ops,
4464                                                    powi_result != NULL);
4465                     }
4466
4467                   /* If we combined some repeated factors into a
4468                      __builtin_powi call, multiply that result by the
4469                      reassociated operands.  */
4470                   if (powi_result)
4471                     {
4472                       gimple mul_stmt, lhs_stmt = SSA_NAME_DEF_STMT (lhs);
4473                       tree type = TREE_TYPE (lhs);
4474                       tree target_ssa = make_temp_ssa_name (type, NULL,
4475                                                             "reassocpow");
4476                       gimple_set_lhs (lhs_stmt, target_ssa);
4477                       update_stmt (lhs_stmt);
4478                       if (lhs != new_lhs)
4479                         target_ssa = new_lhs;
4480                       mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs,
4481                                                                powi_result,
4482                                                                target_ssa);
4483                       gimple_set_location (mul_stmt, gimple_location (stmt));
4484                       gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4485                     }
4486                 }
4487
4488               ops.release ();
4489             }
4490         }
4491     }
4492   for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4493        son;
4494        son = next_dom_son (CDI_POST_DOMINATORS, son))
4495     reassociate_bb (son);
4496 }
4497
4498 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4499 void debug_ops_vector (vec<operand_entry_t> ops);
4500
4501 /* Dump the operand entry vector OPS to FILE.  */
4502
4503 void
4504 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
4505 {
4506   operand_entry_t oe;
4507   unsigned int i;
4508
4509   FOR_EACH_VEC_ELT (ops, i, oe)
4510     {
4511       fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
4512       print_generic_expr (file, oe->op, 0);
4513     }
4514 }
4515
4516 /* Dump the operand entry vector OPS to STDERR.  */
4517
4518 DEBUG_FUNCTION void
4519 debug_ops_vector (vec<operand_entry_t> ops)
4520 {
4521   dump_ops_vector (stderr, ops);
4522 }
4523
4524 static void
4525 do_reassoc (void)
4526 {
4527   break_up_subtract_bb (ENTRY_BLOCK_PTR);
4528   reassociate_bb (EXIT_BLOCK_PTR);
4529 }
4530
4531 /* Initialize the reassociation pass.  */
4532
4533 static void
4534 init_reassoc (void)
4535 {
4536   int i;
4537   long rank = 2;
4538   int *bbs = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
4539
4540   /* Find the loops, so that we can prevent moving calculations in
4541      them.  */
4542   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4543
4544   memset (&reassociate_stats, 0, sizeof (reassociate_stats));
4545
4546   operand_entry_pool = create_alloc_pool ("operand entry pool",
4547                                           sizeof (struct operand_entry), 30);
4548   next_operand_entry_id = 0;
4549
4550   /* Reverse RPO (Reverse Post Order) will give us something where
4551      deeper loops come later.  */
4552   pre_and_rev_post_order_compute (NULL, bbs, false);
4553   bb_rank = XCNEWVEC (long, last_basic_block);
4554   operand_rank = pointer_map_create ();
4555
4556   /* Give each default definition a distinct rank.  This includes
4557      parameters and the static chain.  Walk backwards over all
4558      SSA names so that we get proper rank ordering according
4559      to tree_swap_operands_p.  */
4560   for (i = num_ssa_names - 1; i > 0; --i)
4561     {
4562       tree name = ssa_name (i);
4563       if (name && SSA_NAME_IS_DEFAULT_DEF (name))
4564         insert_operand_rank (name, ++rank);
4565     }
4566
4567   /* Set up rank for each BB  */
4568   for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
4569     bb_rank[bbs[i]] = ++rank  << 16;
4570
4571   free (bbs);
4572   calculate_dominance_info (CDI_POST_DOMINATORS);
4573   plus_negates = vNULL;
4574 }
4575
4576 /* Cleanup after the reassociation pass, and print stats if
4577    requested.  */
4578
4579 static void
4580 fini_reassoc (void)
4581 {
4582   statistics_counter_event (cfun, "Linearized",
4583                             reassociate_stats.linearized);
4584   statistics_counter_event (cfun, "Constants eliminated",
4585                             reassociate_stats.constants_eliminated);
4586   statistics_counter_event (cfun, "Ops eliminated",
4587                             reassociate_stats.ops_eliminated);
4588   statistics_counter_event (cfun, "Statements rewritten",
4589                             reassociate_stats.rewritten);
4590   statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
4591                             reassociate_stats.pows_encountered);
4592   statistics_counter_event (cfun, "Built-in powi calls created",
4593                             reassociate_stats.pows_created);
4594
4595   pointer_map_destroy (operand_rank);
4596   free_alloc_pool (operand_entry_pool);
4597   free (bb_rank);
4598   plus_negates.release ();
4599   free_dominance_info (CDI_POST_DOMINATORS);
4600   loop_optimizer_finalize ();
4601 }
4602
4603 /* Gate and execute functions for Reassociation.  */
4604
4605 static unsigned int
4606 execute_reassoc (void)
4607 {
4608   init_reassoc ();
4609
4610   do_reassoc ();
4611   repropagate_negates ();
4612
4613   fini_reassoc ();
4614   return 0;
4615 }
4616
4617 static bool
4618 gate_tree_ssa_reassoc (void)
4619 {
4620   return flag_tree_reassoc != 0;
4621 }
4622
4623 namespace {
4624
4625 const pass_data pass_data_reassoc =
4626 {
4627   GIMPLE_PASS, /* type */
4628   "reassoc", /* name */
4629   OPTGROUP_NONE, /* optinfo_flags */
4630   true, /* has_gate */
4631   true, /* has_execute */
4632   TV_TREE_REASSOC, /* tv_id */
4633   ( PROP_cfg | PROP_ssa ), /* properties_required */
4634   0, /* properties_provided */
4635   0, /* properties_destroyed */
4636   0, /* todo_flags_start */
4637   ( TODO_verify_ssa
4638     | TODO_update_ssa_only_virtuals
4639     | TODO_verify_flow ), /* todo_flags_finish */
4640 };
4641
4642 class pass_reassoc : public gimple_opt_pass
4643 {
4644 public:
4645   pass_reassoc (gcc::context *ctxt)
4646     : gimple_opt_pass (pass_data_reassoc, ctxt)
4647   {}
4648
4649   /* opt_pass methods: */
4650   opt_pass * clone () { return new pass_reassoc (m_ctxt); }
4651   bool gate () { return gate_tree_ssa_reassoc (); }
4652   unsigned int execute () { return execute_reassoc (); }
4653
4654 }; // class pass_reassoc
4655
4656 } // anon namespace
4657
4658 gimple_opt_pass *
4659 make_pass_reassoc (gcc::context *ctxt)
4660 {
4661   return new pass_reassoc (ctxt);
4662 }