gcc/tree-ssa-reassoc.c

   1 /* Reassociation for trees.
   2    Copyright (C) 2005-2016 Free Software Foundation, Inc.
   3    Contributed by Daniel Berlin <dan@dberlin.org>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 3, or (at your option)
  10 any later version.
  11
  12 GCC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "backend.h"
  25 #include "target.h"
  26 #include "rtl.h"
  27 #include "tree.h"
  28 #include "gimple.h"
  29 #include "cfghooks.h"
  30 #include "alloc-pool.h"
  31 #include "tree-pass.h"
  32 #include "tm_p.h"
  33 #include "ssa.h"
  34 #include "optabs-tree.h"
  35 #include "gimple-pretty-print.h"
  36 #include "diagnostic-core.h"
  37 #include "fold-const.h"
  38 #include "stor-layout.h"
  39 #include "cfganal.h"
  40 #include "gimple-fold.h"
  41 #include "tree-eh.h"
  42 #include "gimple-iterator.h"
  43 #include "gimplify-me.h"
  44 #include "tree-cfg.h"
  45 #include "tree-ssa-loop.h"
  46 #include "flags.h"
  47 #include "tree-ssa.h"
  48 #include "langhooks.h"
  49 #include "cfgloop.h"
  50 #include "params.h"
  51 #include "builtins.h"
  52 #include "gimplify.h"
  53 #include "case-cfn-macros.h"
  54
  55 /*  This is a simple global reassociation pass.  It is, in part, based
  56     on the LLVM pass of the same name (They do some things more/less
  57     than we do, in different orders, etc).
  58
  59     It consists of five steps:
  60
  61     1. Breaking up subtract operations into addition + negate, where
  62     it would promote the reassociation of adds.
  63
  64     2. Left linearization of the expression trees, so that (A+B)+(C+D)
  65     becomes (((A+B)+C)+D), which is easier for us to rewrite later.
  66     During linearization, we place the operands of the binary
  67     expressions into a vector of operand_entry_*
  68
  69     3. Optimization of the operand lists, eliminating things like a +
  70     -a, a & a, etc.
  71
  72     3a. Combine repeated factors with the same occurrence counts
  73     into a __builtin_powi call that will later be optimized into
  74     an optimal number of multiplies.
  75
  76     4. Rewrite the expression trees we linearized and optimized so
  77     they are in proper rank order.
  78
  79     5. Repropagate negates, as nothing else will clean it up ATM.
  80
  81     A bit of theory on #4, since nobody seems to write anything down
  82     about why it makes sense to do it the way they do it:
  83
  84     We could do this much nicer theoretically, but don't (for reasons
  85     explained after how to do it theoretically nice :P).
  86
  87     In order to promote the most redundancy elimination, you want
  88     binary expressions whose operands are the same rank (or
  89     preferably, the same value) exposed to the redundancy eliminator,
  90     for possible elimination.
  91
  92     So the way to do this if we really cared, is to build the new op
  93     tree from the leaves to the roots, merging as you go, and putting the
  94     new op on the end of the worklist, until you are left with one
  95     thing on the worklist.
  96
  97     IE if you have to rewrite the following set of operands (listed with
  98     rank in parentheses), with opcode PLUS_EXPR:
  99
 100     a (1),  b (1),  c (1),  d (2), e (2)
 101
 102
 103     We start with our merge worklist empty, and the ops list with all of
 104     those on it.
 105
 106     You want to first merge all leaves of the same rank, as much as
 107     possible.
 108
 109     So first build a binary op of
 110
 111     mergetmp = a + b, and put "mergetmp" on the merge worklist.
 112
 113     Because there is no three operand form of PLUS_EXPR, c is not going to
 114     be exposed to redundancy elimination as a rank 1 operand.
 115
 116     So you might as well throw it on the merge worklist (you could also
 117     consider it to now be a rank two operand, and merge it with d and e,
 118     but in this case, you then have evicted e from a binary op. So at
 119     least in this situation, you can't win.)
 120
 121     Then build a binary op of d + e
 122     mergetmp2 = d + e
 123
 124     and put mergetmp2 on the merge worklist.
 125
 126     so merge worklist = {mergetmp, c, mergetmp2}
 127
 128     Continue building binary ops of these operations until you have only
 129     one operation left on the worklist.
 130
 131     So we have
 132
 133     build binary op
 134     mergetmp3 = mergetmp + c
 135
 136     worklist = {mergetmp2, mergetmp3}
 137
 138     mergetmp4 = mergetmp2 + mergetmp3
 139
 140     worklist = {mergetmp4}
 141
 142     because we have one operation left, we can now just set the original
 143     statement equal to the result of that operation.
 144
 145     This will at least expose a + b  and d + e to redundancy elimination
 146     as binary operations.
 147
 148     For extra points, you can reuse the old statements to build the
 149     mergetmps, since you shouldn't run out.
 150
 151     So why don't we do this?
 152
 153     Because it's expensive, and rarely will help.  Most trees we are
 154     reassociating have 3 or less ops.  If they have 2 ops, they already
 155     will be written into a nice single binary op.  If you have 3 ops, a
 156     single simple check suffices to tell you whether the first two are of the
 157     same rank.  If so, you know to order it
 158
 159     mergetmp = op1 + op2
 160     newstmt = mergetmp + op3
 161
 162     instead of
 163     mergetmp = op2 + op3
 164     newstmt = mergetmp + op1
 165
 166     If all three are of the same rank, you can't expose them all in a
 167     single binary operator anyway, so the above is *still* the best you
 168     can do.
 169
 170     Thus, this is what we do.  When we have three ops left, we check to see
 171     what order to put them in, and call it a day.  As a nod to vector sum
 172     reduction, we check if any of the ops are really a phi node that is a
 173     destructive update for the associating op, and keep the destructive
 174     update together for vector sum reduction recognition.  */
 175
 176 /* Enable insertion of __builtin_powi calls during execute_reassoc.  See
 177    point 3a in the pass header comment.  */
 178 static bool reassoc_insert_powi_p;
 179
 180 /* Statistics */
 181 static struct
 182 {
 183   int linearized;
 184   int constants_eliminated;
 185   int ops_eliminated;
 186   int rewritten;
 187   int pows_encountered;
 188   int pows_created;
 189 } reassociate_stats;
 190
 191 /* Operator, rank pair.  */
 192 struct operand_entry
 193 {
 194   unsigned int rank;
 195   int id;
 196   tree op;
 197   unsigned int count;
 198 };
 199
 200 static object_allocator<operand_entry> operand_entry_pool
 201   ("operand entry pool");
 202
 203 /* This is used to assign a unique ID to each struct operand_entry
 204    so that qsort results are identical on different hosts.  */
 205 static int next_operand_entry_id;
 206
 207 /* Starting rank number for a given basic block, so that we can rank
 208    operations using unmovable instructions in that BB based on the bb
 209    depth.  */
 210 static long *bb_rank;
 211
 212 /* Operand->rank hashtable.  */
 213 static hash_map<tree, long> *operand_rank;
 214
 215 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
 216    all basic blocks the CFG should be adjusted - basic blocks
 217    split right after that SSA_NAME's definition statement and before
 218    the only use, which must be a bit ior.  */
 219 static vec<tree> reassoc_branch_fixups;
 220
 221 /* Forward decls.  */
 222 static long get_rank (tree);
 223 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
 224
 225 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
 226    possibly added by gsi_remove.  */
 227
 228 bool
 229 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
 230 {
 231   gimple *stmt = gsi_stmt (*gsi);
 232
 233   if (!MAY_HAVE_DEBUG_STMTS || gimple_code (stmt) == GIMPLE_PHI)
 234     return gsi_remove (gsi, true);
 235
 236   gimple_stmt_iterator prev = *gsi;
 237   gsi_prev (&prev);
 238   unsigned uid = gimple_uid (stmt);
 239   basic_block bb = gimple_bb (stmt);
 240   bool ret = gsi_remove (gsi, true);
 241   if (!gsi_end_p (prev))
 242     gsi_next (&prev);
 243   else
 244     prev = gsi_start_bb (bb);
 245   gimple *end_stmt = gsi_stmt (*gsi);
 246   while ((stmt = gsi_stmt (prev)) != end_stmt)
 247     {
 248       gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
 249       gimple_set_uid (stmt, uid);
 250       gsi_next (&prev);
 251     }
 252   return ret;
 253 }
 254
 255 /* Bias amount for loop-carried phis.  We want this to be larger than
 256    the depth of any reassociation tree we can see, but not larger than
 257    the rank difference between two blocks.  */
 258 #define PHI_LOOP_BIAS (1 << 15)
 259
 260 /* Rank assigned to a phi statement.  If STMT is a loop-carried phi of
 261    an innermost loop, and the phi has only a single use which is inside
 262    the loop, then the rank is the block rank of the loop latch plus an
 263    extra bias for the loop-carried dependence.  This causes expressions
 264    calculated into an accumulator variable to be independent for each
 265    iteration of the loop.  If STMT is some other phi, the rank is the
 266    block rank of its containing block.  */
 267 static long
 268 phi_rank (gimple *stmt)
 269 {
 270   basic_block bb = gimple_bb (stmt);
 271   struct loop *father = bb->loop_father;
 272   tree res;
 273   unsigned i;
 274   use_operand_p use;
 275   gimple *use_stmt;
 276
 277   /* We only care about real loops (those with a latch).  */
 278   if (!father->latch)
 279     return bb_rank[bb->index];
 280
 281   /* Interesting phis must be in headers of innermost loops.  */
 282   if (bb != father->header
 283       || father->inner)
 284     return bb_rank[bb->index];
 285
 286   /* Ignore virtual SSA_NAMEs.  */
 287   res = gimple_phi_result (stmt);
 288   if (virtual_operand_p (res))
 289     return bb_rank[bb->index];
 290
 291   /* The phi definition must have a single use, and that use must be
 292      within the loop.  Otherwise this isn't an accumulator pattern.  */
 293   if (!single_imm_use (res, &use, &use_stmt)
 294       || gimple_bb (use_stmt)->loop_father != father)
 295     return bb_rank[bb->index];
 296
 297   /* Look for phi arguments from within the loop.  If found, bias this phi.  */
 298   for (i = 0; i < gimple_phi_num_args (stmt); i++)
 299     {
 300       tree arg = gimple_phi_arg_def (stmt, i);
 301       if (TREE_CODE (arg) == SSA_NAME
 302           && !SSA_NAME_IS_DEFAULT_DEF (arg))
 303         {
 304           gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
 305           if (gimple_bb (def_stmt)->loop_father == father)
 306             return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
 307         }
 308     }
 309
 310   /* Must be an uninteresting phi.  */
 311   return bb_rank[bb->index];
 312 }
 313
 314 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
 315    loop-carried dependence of an innermost loop, return TRUE; else
 316    return FALSE.  */
 317 static bool
 318 loop_carried_phi (tree exp)
 319 {
 320   gimple *phi_stmt;
 321   long block_rank;
 322
 323   if (TREE_CODE (exp) != SSA_NAME
 324       || SSA_NAME_IS_DEFAULT_DEF (exp))
 325     return false;
 326
 327   phi_stmt = SSA_NAME_DEF_STMT (exp);
 328
 329   if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
 330     return false;
 331
 332   /* Non-loop-carried phis have block rank.  Loop-carried phis have
 333      an additional bias added in.  If this phi doesn't have block rank,
 334      it's biased and should not be propagated.  */
 335   block_rank = bb_rank[gimple_bb (phi_stmt)->index];
 336
 337   if (phi_rank (phi_stmt) != block_rank)
 338     return true;
 339
 340   return false;
 341 }
 342
 343 /* Return the maximum of RANK and the rank that should be propagated
 344    from expression OP.  For most operands, this is just the rank of OP.
 345    For loop-carried phis, the value is zero to avoid undoing the bias
 346    in favor of the phi.  */
 347 static long
 348 propagate_rank (long rank, tree op)
 349 {
 350   long op_rank;
 351
 352   if (loop_carried_phi (op))
 353     return rank;
 354
 355   op_rank = get_rank (op);
 356
 357   return MAX (rank, op_rank);
 358 }
 359
 360 /* Look up the operand rank structure for expression E.  */
 361
 362 static inline long
 363 find_operand_rank (tree e)
 364 {
 365   long *slot = operand_rank->get (e);
 366   return slot ? *slot : -1;
 367 }
 368
 369 /* Insert {E,RANK} into the operand rank hashtable.  */
 370
 371 static inline void
 372 insert_operand_rank (tree e, long rank)
 373 {
 374   gcc_assert (rank > 0);
 375   gcc_assert (!operand_rank->put (e, rank));
 376 }
 377
 378 /* Given an expression E, return the rank of the expression.  */
 379
 380 static long
 381 get_rank (tree e)
 382 {
 383   /* SSA_NAME's have the rank of the expression they are the result
 384      of.
 385      For globals and uninitialized values, the rank is 0.
 386      For function arguments, use the pre-setup rank.
 387      For PHI nodes, stores, asm statements, etc, we use the rank of
 388      the BB.
 389      For simple operations, the rank is the maximum rank of any of
 390      its operands, or the bb_rank, whichever is less.
 391      I make no claims that this is optimal, however, it gives good
 392      results.  */
 393
 394   /* We make an exception to the normal ranking system to break
 395      dependences of accumulator variables in loops.  Suppose we
 396      have a simple one-block loop containing:
 397
 398        x_1 = phi(x_0, x_2)
 399        b = a + x_1
 400        c = b + d
 401        x_2 = c + e
 402
 403      As shown, each iteration of the calculation into x is fully
 404      dependent upon the iteration before it.  We would prefer to
 405      see this in the form:
 406
 407        x_1 = phi(x_0, x_2)
 408        b = a + d
 409        c = b + e
 410        x_2 = c + x_1
 411
 412      If the loop is unrolled, the calculations of b and c from
 413      different iterations can be interleaved.
 414
 415      To obtain this result during reassociation, we bias the rank
 416      of the phi definition x_1 upward, when it is recognized as an
 417      accumulator pattern.  The artificial rank causes it to be
 418      added last, providing the desired independence.  */
 419
 420   if (TREE_CODE (e) == SSA_NAME)
 421     {
 422       ssa_op_iter iter;
 423       gimple *stmt;
 424       long rank;
 425       tree op;
 426
 427       if (SSA_NAME_IS_DEFAULT_DEF (e))
 428         return find_operand_rank (e);
 429
 430       stmt = SSA_NAME_DEF_STMT (e);
 431       if (gimple_code (stmt) == GIMPLE_PHI)
 432         return phi_rank (stmt);
 433
 434       if (!is_gimple_assign (stmt))
 435         return bb_rank[gimple_bb (stmt)->index];
 436
 437       /* If we already have a rank for this expression, use that.  */
 438       rank = find_operand_rank (e);
 439       if (rank != -1)
 440         return rank;
 441
 442       /* Otherwise, find the maximum rank for the operands.  As an
 443          exception, remove the bias from loop-carried phis when propagating
 444          the rank so that dependent operations are not also biased.  */
 445       /* Simply walk over all SSA uses - this takes advatage of the
 446          fact that non-SSA operands are is_gimple_min_invariant and
 447          thus have rank 0.  */
 448       rank = 0;
 449       FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
 450         rank = propagate_rank (rank, op);
 451
 452       if (dump_file && (dump_flags & TDF_DETAILS))
 453         {
 454           fprintf (dump_file, "Rank for ");
 455           print_generic_expr (dump_file, e, 0);
 456           fprintf (dump_file, " is %ld\n", (rank + 1));
 457         }
 458
 459       /* Note the rank in the hashtable so we don't recompute it.  */
 460       insert_operand_rank (e, (rank + 1));
 461       return (rank + 1);
 462     }
 463
 464   /* Constants, globals, etc., are rank 0 */
 465   return 0;
 466 }
 467
 468
 469 /* We want integer ones to end up last no matter what, since they are
 470    the ones we can do the most with.  */
 471 #define INTEGER_CONST_TYPE 1 << 3
 472 #define FLOAT_CONST_TYPE 1 << 2
 473 #define OTHER_CONST_TYPE 1 << 1
 474
 475 /* Classify an invariant tree into integer, float, or other, so that
 476    we can sort them to be near other constants of the same type.  */
 477 static inline int
 478 constant_type (tree t)
 479 {
 480   if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
 481     return INTEGER_CONST_TYPE;
 482   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
 483     return FLOAT_CONST_TYPE;
 484   else
 485     return OTHER_CONST_TYPE;
 486 }
 487
 488 /* qsort comparison function to sort operand entries PA and PB by rank
 489    so that the sorted array is ordered by rank in decreasing order.  */
 490 static int
 491 sort_by_operand_rank (const void *pa, const void *pb)
 492 {
 493   const operand_entry *oea = *(const operand_entry *const *)pa;
 494   const operand_entry *oeb = *(const operand_entry *const *)pb;
 495
 496   /* It's nicer for optimize_expression if constants that are likely
 497      to fold when added/multiplied//whatever are put next to each
 498      other.  Since all constants have rank 0, order them by type.  */
 499   if (oeb->rank == 0 && oea->rank == 0)
 500     {
 501       if (constant_type (oeb->op) != constant_type (oea->op))
 502         return constant_type (oeb->op) - constant_type (oea->op);
 503       else
 504         /* To make sorting result stable, we use unique IDs to determine
 505            order.  */
 506         return oeb->id - oea->id;
 507     }
 508
 509   /* Lastly, make sure the versions that are the same go next to each
 510      other.  */
 511   if ((oeb->rank - oea->rank == 0)
 512       && TREE_CODE (oea->op) == SSA_NAME
 513       && TREE_CODE (oeb->op) == SSA_NAME)
 514     {
 515       /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
 516          versions of removed SSA_NAMEs, so if possible, prefer to sort
 517          based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
 518          See PR60418.  */
 519       if (!SSA_NAME_IS_DEFAULT_DEF (oea->op)
 520           && !SSA_NAME_IS_DEFAULT_DEF (oeb->op)
 521           && SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
 522         {
 523           gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
 524           gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
 525           basic_block bba = gimple_bb (stmta);
 526           basic_block bbb = gimple_bb (stmtb);
 527           if (bbb != bba)
 528             {
 529               if (bb_rank[bbb->index] != bb_rank[bba->index])
 530                 return bb_rank[bbb->index] - bb_rank[bba->index];
 531             }
 532           else
 533             {
 534               bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
 535               bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
 536               if (da != db)
 537                 return da ? 1 : -1;
 538             }
 539         }
 540
 541       if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
 542         return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
 543       else
 544         return oeb->id - oea->id;
 545     }
 546
 547   if (oeb->rank != oea->rank)
 548     return oeb->rank - oea->rank;
 549   else
 550     return oeb->id - oea->id;
 551 }
 552
 553 /* Add an operand entry to *OPS for the tree operand OP.  */
 554
 555 static void
 556 add_to_ops_vec (vec<operand_entry *> *ops, tree op)
 557 {
 558   operand_entry *oe = operand_entry_pool.allocate ();
 559
 560   oe->op = op;
 561   oe->rank = get_rank (op);
 562   oe->id = next_operand_entry_id++;
 563   oe->count = 1;
 564   ops->safe_push (oe);
 565 }
 566
 567 /* Add an operand entry to *OPS for the tree operand OP with repeat
 568    count REPEAT.  */
 569
 570 static void
 571 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
 572                        HOST_WIDE_INT repeat)
 573 {
 574   operand_entry *oe = operand_entry_pool.allocate ();
 575
 576   oe->op = op;
 577   oe->rank = get_rank (op);
 578   oe->id = next_operand_entry_id++;
 579   oe->count = repeat;
 580   ops->safe_push (oe);
 581
 582   reassociate_stats.pows_encountered++;
 583 }
 584
 585 /* Return true if STMT is reassociable operation containing a binary
 586    operation with tree code CODE, and is inside LOOP.  */
 587
 588 static bool
 589 is_reassociable_op (gimple *stmt, enum tree_code code, struct loop *loop)
 590 {
 591   basic_block bb = gimple_bb (stmt);
 592
 593   if (gimple_bb (stmt) == NULL)
 594     return false;
 595
 596   if (!flow_bb_inside_loop_p (loop, bb))
 597     return false;
 598
 599   if (is_gimple_assign (stmt)
 600       && gimple_assign_rhs_code (stmt) == code
 601       && has_single_use (gimple_assign_lhs (stmt)))
 602     return true;
 603
 604   return false;
 605 }
 606
 607
 608 /* Return true if STMT is a nop-conversion.  */
 609
 610 static bool
 611 gimple_nop_conversion_p (gimple *stmt)
 612 {
 613   if (gassign *ass = dyn_cast <gassign *> (stmt))
 614     {
 615       if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
 616           && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
 617                                     TREE_TYPE (gimple_assign_rhs1 (ass))))
 618         return true;
 619     }
 620   return false;
 621 }
 622
 623 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
 624    operand of the negate operation.  Otherwise, return NULL.  */
 625
 626 static tree
 627 get_unary_op (tree name, enum tree_code opcode)
 628 {
 629   gimple *stmt = SSA_NAME_DEF_STMT (name);
 630
 631   /* Look through nop conversions (sign changes).  */
 632   if (gimple_nop_conversion_p (stmt)
 633       && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
 634     stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
 635
 636   if (!is_gimple_assign (stmt))
 637     return NULL_TREE;
 638
 639   if (gimple_assign_rhs_code (stmt) == opcode)
 640     return gimple_assign_rhs1 (stmt);
 641   return NULL_TREE;
 642 }
 643
 644 /* Return true if OP1 and OP2 have the same value if casted to either type.  */
 645
 646 static bool
 647 ops_equal_values_p (tree op1, tree op2)
 648 {
 649   if (op1 == op2)
 650     return true;
 651
 652   tree orig_op1 = op1;
 653   if (TREE_CODE (op1) == SSA_NAME)
 654     {
 655       gimple *stmt = SSA_NAME_DEF_STMT (op1);
 656       if (gimple_nop_conversion_p (stmt))
 657         {
 658           op1 = gimple_assign_rhs1 (stmt);
 659           if (op1 == op2)
 660             return true;
 661         }
 662     }
 663
 664   if (TREE_CODE (op2) == SSA_NAME)
 665     {
 666       gimple *stmt = SSA_NAME_DEF_STMT (op2);
 667       if (gimple_nop_conversion_p (stmt))
 668         {
 669           op2 = gimple_assign_rhs1 (stmt);
 670           if (op1 == op2
 671               || orig_op1 == op2)
 672             return true;
 673         }
 674     }
 675
 676   return false;
 677 }
 678
 679
 680 /* If CURR and LAST are a pair of ops that OPCODE allows us to
 681    eliminate through equivalences, do so, remove them from OPS, and
 682    return true.  Otherwise, return false.  */
 683
 684 static bool
 685 eliminate_duplicate_pair (enum tree_code opcode,
 686                           vec<operand_entry *> *ops,
 687                           bool *all_done,
 688                           unsigned int i,
 689                           operand_entry *curr,
 690                           operand_entry *last)
 691 {
 692
 693   /* If we have two of the same op, and the opcode is & |, min, or max,
 694      we can eliminate one of them.
 695      If we have two of the same op, and the opcode is ^, we can
 696      eliminate both of them.  */
 697
 698   if (last && last->op == curr->op)
 699     {
 700       switch (opcode)
 701         {
 702         case MAX_EXPR:
 703         case MIN_EXPR:
 704         case BIT_IOR_EXPR:
 705         case BIT_AND_EXPR:
 706           if (dump_file && (dump_flags & TDF_DETAILS))
 707             {
 708               fprintf (dump_file, "Equivalence: ");
 709               print_generic_expr (dump_file, curr->op, 0);
 710               fprintf (dump_file, " [&|minmax] ");
 711               print_generic_expr (dump_file, last->op, 0);
 712               fprintf (dump_file, " -> ");
 713               print_generic_stmt (dump_file, last->op, 0);
 714             }
 715
 716           ops->ordered_remove (i);
 717           reassociate_stats.ops_eliminated ++;
 718
 719           return true;
 720
 721         case BIT_XOR_EXPR:
 722           if (dump_file && (dump_flags & TDF_DETAILS))
 723             {
 724               fprintf (dump_file, "Equivalence: ");
 725               print_generic_expr (dump_file, curr->op, 0);
 726               fprintf (dump_file, " ^ ");
 727               print_generic_expr (dump_file, last->op, 0);
 728               fprintf (dump_file, " -> nothing\n");
 729             }
 730
 731           reassociate_stats.ops_eliminated += 2;
 732
 733           if (ops->length () == 2)
 734             {
 735               ops->create (0);
 736               add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
 737               *all_done = true;
 738             }
 739           else
 740             {
 741               ops->ordered_remove (i-1);
 742               ops->ordered_remove (i-1);
 743             }
 744
 745           return true;
 746
 747         default:
 748           break;
 749         }
 750     }
 751   return false;
 752 }
 753
 754 static vec<tree> plus_negates;
 755
 756 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
 757    expression, look in OPS for a corresponding positive operation to cancel
 758    it out.  If we find one, remove the other from OPS, replace
 759    OPS[CURRINDEX] with 0 or -1, respectively, and return true.  Otherwise,
 760    return false. */
 761
 762 static bool
 763 eliminate_plus_minus_pair (enum tree_code opcode,
 764                            vec<operand_entry *> *ops,
 765                            unsigned int currindex,
 766                            operand_entry *curr)
 767 {
 768   tree negateop;
 769   tree notop;
 770   unsigned int i;
 771   operand_entry *oe;
 772
 773   if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
 774     return false;
 775
 776   negateop = get_unary_op (curr->op, NEGATE_EXPR);
 777   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 778   if (negateop == NULL_TREE && notop == NULL_TREE)
 779     return false;
 780
 781   /* Any non-negated version will have a rank that is one less than
 782      the current rank.  So once we hit those ranks, if we don't find
 783      one, we can stop.  */
 784
 785   for (i = currindex + 1;
 786        ops->iterate (i, &oe)
 787        && oe->rank >= curr->rank - 1 ;
 788        i++)
 789     {
 790       if (negateop
 791           && ops_equal_values_p (oe->op, negateop))
 792         {
 793           if (dump_file && (dump_flags & TDF_DETAILS))
 794             {
 795               fprintf (dump_file, "Equivalence: ");
 796               print_generic_expr (dump_file, negateop, 0);
 797               fprintf (dump_file, " + -");
 798               print_generic_expr (dump_file, oe->op, 0);
 799               fprintf (dump_file, " -> 0\n");
 800             }
 801
 802           ops->ordered_remove (i);
 803           add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
 804           ops->ordered_remove (currindex);
 805           reassociate_stats.ops_eliminated ++;
 806
 807           return true;
 808         }
 809       else if (notop
 810                && ops_equal_values_p (oe->op, notop))
 811         {
 812           tree op_type = TREE_TYPE (oe->op);
 813
 814           if (dump_file && (dump_flags & TDF_DETAILS))
 815             {
 816               fprintf (dump_file, "Equivalence: ");
 817               print_generic_expr (dump_file, notop, 0);
 818               fprintf (dump_file, " + ~");
 819               print_generic_expr (dump_file, oe->op, 0);
 820               fprintf (dump_file, " -> -1\n");
 821             }
 822
 823           ops->ordered_remove (i);
 824           add_to_ops_vec (ops, build_all_ones_cst (op_type));
 825           ops->ordered_remove (currindex);
 826           reassociate_stats.ops_eliminated ++;
 827
 828           return true;
 829         }
 830     }
 831
 832   /* If CURR->OP is a negate expr without nop conversion in a plus expr:
 833      save it for later inspection in repropagate_negates().  */
 834   if (negateop != NULL_TREE
 835       && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
 836     plus_negates.safe_push (curr->op);
 837
 838   return false;
 839 }
 840
 841 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
 842    bitwise not expression, look in OPS for a corresponding operand to
 843    cancel it out.  If we find one, remove the other from OPS, replace
 844    OPS[CURRINDEX] with 0, and return true.  Otherwise, return
 845    false. */
 846
 847 static bool
 848 eliminate_not_pairs (enum tree_code opcode,
 849                      vec<operand_entry *> *ops,
 850                      unsigned int currindex,
 851                      operand_entry *curr)
 852 {
 853   tree notop;
 854   unsigned int i;
 855   operand_entry *oe;
 856
 857   if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
 858       || TREE_CODE (curr->op) != SSA_NAME)
 859     return false;
 860
 861   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 862   if (notop == NULL_TREE)
 863     return false;
 864
 865   /* Any non-not version will have a rank that is one less than
 866      the current rank.  So once we hit those ranks, if we don't find
 867      one, we can stop.  */
 868
 869   for (i = currindex + 1;
 870        ops->iterate (i, &oe)
 871        && oe->rank >= curr->rank - 1;
 872        i++)
 873     {
 874       if (oe->op == notop)
 875         {
 876           if (dump_file && (dump_flags & TDF_DETAILS))
 877             {
 878               fprintf (dump_file, "Equivalence: ");
 879               print_generic_expr (dump_file, notop, 0);
 880               if (opcode == BIT_AND_EXPR)
 881                 fprintf (dump_file, " & ~");
 882               else if (opcode == BIT_IOR_EXPR)
 883                 fprintf (dump_file, " | ~");
 884               print_generic_expr (dump_file, oe->op, 0);
 885               if (opcode == BIT_AND_EXPR)
 886                 fprintf (dump_file, " -> 0\n");
 887               else if (opcode == BIT_IOR_EXPR)
 888                 fprintf (dump_file, " -> -1\n");
 889             }
 890
 891           if (opcode == BIT_AND_EXPR)
 892             oe->op = build_zero_cst (TREE_TYPE (oe->op));
 893           else if (opcode == BIT_IOR_EXPR)
 894             oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
 895
 896           reassociate_stats.ops_eliminated += ops->length () - 1;
 897           ops->truncate (0);
 898           ops->quick_push (oe);
 899           return true;
 900         }
 901     }
 902
 903   return false;
 904 }
 905
 906 /* Use constant value that may be present in OPS to try to eliminate
 907    operands.  Note that this function is only really used when we've
 908    eliminated ops for other reasons, or merged constants.  Across
 909    single statements, fold already does all of this, plus more.  There
 910    is little point in duplicating logic, so I've only included the
 911    identities that I could ever construct testcases to trigger.  */
 912
 913 static void
 914 eliminate_using_constants (enum tree_code opcode,
 915                            vec<operand_entry *> *ops)
 916 {
 917   operand_entry *oelast = ops->last ();
 918   tree type = TREE_TYPE (oelast->op);
 919
 920   if (oelast->rank == 0
 921       && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
 922     {
 923       switch (opcode)
 924         {
 925         case BIT_AND_EXPR:
 926           if (integer_zerop (oelast->op))
 927             {
 928               if (ops->length () != 1)
 929                 {
 930                   if (dump_file && (dump_flags & TDF_DETAILS))
 931                     fprintf (dump_file, "Found & 0, removing all other ops\n");
 932
 933                   reassociate_stats.ops_eliminated += ops->length () - 1;
 934
 935                   ops->truncate (0);
 936                   ops->quick_push (oelast);
 937                   return;
 938                 }
 939             }
 940           else if (integer_all_onesp (oelast->op))
 941             {
 942               if (ops->length () != 1)
 943                 {
 944                   if (dump_file && (dump_flags & TDF_DETAILS))
 945                     fprintf (dump_file, "Found & -1, removing\n");
 946                   ops->pop ();
 947                   reassociate_stats.ops_eliminated++;
 948                 }
 949             }
 950           break;
 951         case BIT_IOR_EXPR:
 952           if (integer_all_onesp (oelast->op))
 953             {
 954               if (ops->length () != 1)
 955                 {
 956                   if (dump_file && (dump_flags & TDF_DETAILS))
 957                     fprintf (dump_file, "Found | -1, removing all other ops\n");
 958
 959                   reassociate_stats.ops_eliminated += ops->length () - 1;
 960
 961                   ops->truncate (0);
 962                   ops->quick_push (oelast);
 963                   return;
 964                 }
 965             }
 966           else if (integer_zerop (oelast->op))
 967             {
 968               if (ops->length () != 1)
 969                 {
 970                   if (dump_file && (dump_flags & TDF_DETAILS))
 971                     fprintf (dump_file, "Found | 0, removing\n");
 972                   ops->pop ();
 973                   reassociate_stats.ops_eliminated++;
 974                 }
 975             }
 976           break;
 977         case MULT_EXPR:
 978           if (integer_zerop (oelast->op)
 979               || (FLOAT_TYPE_P (type)
 980                   && !HONOR_NANS (type)
 981                   && !HONOR_SIGNED_ZEROS (type)
 982                   && real_zerop (oelast->op)))
 983             {
 984               if (ops->length () != 1)
 985                 {
 986                   if (dump_file && (dump_flags & TDF_DETAILS))
 987                     fprintf (dump_file, "Found * 0, removing all other ops\n");
 988
 989                   reassociate_stats.ops_eliminated += ops->length () - 1;
 990                   ops->truncate (1);
 991                   ops->quick_push (oelast);
 992                   return;
 993                 }
 994             }
 995           else if (integer_onep (oelast->op)
 996                    || (FLOAT_TYPE_P (type)
 997                        && !HONOR_SNANS (type)
 998                        && real_onep (oelast->op)))
 999             {
1000               if (ops->length () != 1)
1001                 {
1002                   if (dump_file && (dump_flags & TDF_DETAILS))
1003                     fprintf (dump_file, "Found * 1, removing\n");
1004                   ops->pop ();
1005                   reassociate_stats.ops_eliminated++;
1006                   return;
1007                 }
1008             }
1009           break;
1010         case BIT_XOR_EXPR:
1011         case PLUS_EXPR:
1012         case MINUS_EXPR:
1013           if (integer_zerop (oelast->op)
1014               || (FLOAT_TYPE_P (type)
1015                   && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1016                   && fold_real_zero_addition_p (type, oelast->op,
1017                                                 opcode == MINUS_EXPR)))
1018             {
1019               if (ops->length () != 1)
1020                 {
1021                   if (dump_file && (dump_flags & TDF_DETAILS))
1022                     fprintf (dump_file, "Found [|^+] 0, removing\n");
1023                   ops->pop ();
1024                   reassociate_stats.ops_eliminated++;
1025                   return;
1026                 }
1027             }
1028           break;
1029         default:
1030           break;
1031         }
1032     }
1033 }
1034
1035
1036 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1037                                  bool, bool);
1038
1039 /* Structure for tracking and counting operands.  */
1040 struct oecount {
1041   int cnt;
1042   int id;
1043   enum tree_code oecode;
1044   tree op;
1045 };
1046
1047
1048 /* The heap for the oecount hashtable and the sorted list of operands.  */
1049 static vec<oecount> cvec;
1050
1051
1052 /* Oecount hashtable helpers.  */
1053
1054 struct oecount_hasher : int_hash <int, 0, 1>
1055 {
1056   static inline hashval_t hash (int);
1057   static inline bool equal (int, int);
1058 };
1059
1060 /* Hash function for oecount.  */
1061
1062 inline hashval_t
1063 oecount_hasher::hash (int p)
1064 {
1065   const oecount *c = &cvec[p - 42];
1066   return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1067 }
1068
1069 /* Comparison function for oecount.  */
1070
1071 inline bool
1072 oecount_hasher::equal (int p1, int p2)
1073 {
1074   const oecount *c1 = &cvec[p1 - 42];
1075   const oecount *c2 = &cvec[p2 - 42];
1076   return (c1->oecode == c2->oecode
1077           && c1->op == c2->op);
1078 }
1079
1080 /* Comparison function for qsort sorting oecount elements by count.  */
1081
1082 static int
1083 oecount_cmp (const void *p1, const void *p2)
1084 {
1085   const oecount *c1 = (const oecount *)p1;
1086   const oecount *c2 = (const oecount *)p2;
1087   if (c1->cnt != c2->cnt)
1088     return c1->cnt - c2->cnt;
1089   else
1090     /* If counts are identical, use unique IDs to stabilize qsort.  */
1091     return c1->id - c2->id;
1092 }
1093
1094 /* Return TRUE iff STMT represents a builtin call that raises OP
1095    to some exponent.  */
1096
1097 static bool
1098 stmt_is_power_of_op (gimple *stmt, tree op)
1099 {
1100   if (!is_gimple_call (stmt))
1101     return false;
1102
1103   switch (gimple_call_combined_fn (stmt))
1104     {
1105     CASE_CFN_POW:
1106     CASE_CFN_POWI:
1107       return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1108
1109     default:
1110       return false;
1111     }
1112 }
1113
1114 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1115    in place and return the result.  Assumes that stmt_is_power_of_op
1116    was previously called for STMT and returned TRUE.  */
1117
1118 static HOST_WIDE_INT
1119 decrement_power (gimple *stmt)
1120 {
1121   REAL_VALUE_TYPE c, cint;
1122   HOST_WIDE_INT power;
1123   tree arg1;
1124
1125   switch (gimple_call_combined_fn (stmt))
1126     {
1127     CASE_CFN_POW:
1128       arg1 = gimple_call_arg (stmt, 1);
1129       c = TREE_REAL_CST (arg1);
1130       power = real_to_integer (&c) - 1;
1131       real_from_integer (&cint, VOIDmode, power, SIGNED);
1132       gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1133       return power;
1134
1135     CASE_CFN_POWI:
1136       arg1 = gimple_call_arg (stmt, 1);
1137       power = TREE_INT_CST_LOW (arg1) - 1;
1138       gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1139       return power;
1140
1141     default:
1142       gcc_unreachable ();
1143     }
1144 }
1145
1146 /* Find the single immediate use of STMT's LHS, and replace it
1147    with OP.  Remove STMT.  If STMT's LHS is the same as *DEF,
1148    replace *DEF with OP as well.  */
1149
1150 static void
1151 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1152 {
1153   tree lhs;
1154   gimple *use_stmt;
1155   use_operand_p use;
1156   gimple_stmt_iterator gsi;
1157
1158   if (is_gimple_call (stmt))
1159     lhs = gimple_call_lhs (stmt);
1160   else
1161     lhs = gimple_assign_lhs (stmt);
1162
1163   gcc_assert (has_single_use (lhs));
1164   single_imm_use (lhs, &use, &use_stmt);
1165   if (lhs == *def)
1166     *def = op;
1167   SET_USE (use, op);
1168   if (TREE_CODE (op) != SSA_NAME)
1169     update_stmt (use_stmt);
1170   gsi = gsi_for_stmt (stmt);
1171   unlink_stmt_vdef (stmt);
1172   reassoc_remove_stmt (&gsi);
1173   release_defs (stmt);
1174 }
1175
1176 /* Walks the linear chain with result *DEF searching for an operation
1177    with operand OP and code OPCODE removing that from the chain.  *DEF
1178    is updated if there is only one operand but no operation left.  */
1179
1180 static void
1181 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1182 {
1183   gimple *stmt = SSA_NAME_DEF_STMT (*def);
1184
1185   do
1186     {
1187       tree name;
1188
1189       if (opcode == MULT_EXPR
1190           && stmt_is_power_of_op (stmt, op))
1191         {
1192           if (decrement_power (stmt) == 1)
1193             propagate_op_to_single_use (op, stmt, def);
1194           return;
1195         }
1196
1197       name = gimple_assign_rhs1 (stmt);
1198
1199       /* If this is the operation we look for and one of the operands
1200          is ours simply propagate the other operand into the stmts
1201          single use.  */
1202       if (gimple_assign_rhs_code (stmt) == opcode
1203           && (name == op
1204               || gimple_assign_rhs2 (stmt) == op))
1205         {
1206           if (name == op)
1207             name = gimple_assign_rhs2 (stmt);
1208           propagate_op_to_single_use (name, stmt, def);
1209           return;
1210         }
1211
1212       /* We might have a multiply of two __builtin_pow* calls, and
1213          the operand might be hiding in the rightmost one.  */
1214       if (opcode == MULT_EXPR
1215           && gimple_assign_rhs_code (stmt) == opcode
1216           && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1217           && has_single_use (gimple_assign_rhs2 (stmt)))
1218         {
1219           gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1220           if (stmt_is_power_of_op (stmt2, op))
1221             {
1222               if (decrement_power (stmt2) == 1)
1223                 propagate_op_to_single_use (op, stmt2, def);
1224               return;
1225             }
1226         }
1227
1228       /* Continue walking the chain.  */
1229       gcc_assert (name != op
1230                   && TREE_CODE (name) == SSA_NAME);
1231       stmt = SSA_NAME_DEF_STMT (name);
1232     }
1233   while (1);
1234 }
1235
1236 /* Returns true if statement S1 dominates statement S2.  Like
1237    stmt_dominates_stmt_p, but uses stmt UIDs to optimize.  */
1238
1239 static bool
1240 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1241 {
1242   basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1243
1244   /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1245      SSA_NAME.  Assume it lives at the beginning of function and
1246      thus dominates everything.  */
1247   if (!bb1 || s1 == s2)
1248     return true;
1249
1250   /* If bb2 is NULL, it doesn't dominate any stmt with a bb.  */
1251   if (!bb2)
1252     return false;
1253
1254   if (bb1 == bb2)
1255     {
1256       /* PHIs in the same basic block are assumed to be
1257          executed all in parallel, if only one stmt is a PHI,
1258          it dominates the other stmt in the same basic block.  */
1259       if (gimple_code (s1) == GIMPLE_PHI)
1260         return true;
1261
1262       if (gimple_code (s2) == GIMPLE_PHI)
1263         return false;
1264
1265       gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1266
1267       if (gimple_uid (s1) < gimple_uid (s2))
1268         return true;
1269
1270       if (gimple_uid (s1) > gimple_uid (s2))
1271         return false;
1272
1273       gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1274       unsigned int uid = gimple_uid (s1);
1275       for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1276         {
1277           gimple *s = gsi_stmt (gsi);
1278           if (gimple_uid (s) != uid)
1279             break;
1280           if (s == s2)
1281             return true;
1282         }
1283
1284       return false;
1285     }
1286
1287   return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1288 }
1289
1290 /* Insert STMT after INSERT_POINT.  */
1291
1292 static void
1293 insert_stmt_after (gimple *stmt, gimple *insert_point)
1294 {
1295   gimple_stmt_iterator gsi;
1296   basic_block bb;
1297
1298   if (gimple_code (insert_point) == GIMPLE_PHI)
1299     bb = gimple_bb (insert_point);
1300   else if (!stmt_ends_bb_p (insert_point))
1301     {
1302       gsi = gsi_for_stmt (insert_point);
1303       gimple_set_uid (stmt, gimple_uid (insert_point));
1304       gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1305       return;
1306     }
1307   else
1308     /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1309        thus if it must end a basic block, it should be a call that can
1310        throw, or some assignment that can throw.  If it throws, the LHS
1311        of it will not be initialized though, so only valid places using
1312        the SSA_NAME should be dominated by the fallthru edge.  */
1313     bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1314   gsi = gsi_after_labels (bb);
1315   if (gsi_end_p (gsi))
1316     {
1317       gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1318       gimple_set_uid (stmt,
1319                       gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1320     }
1321   else
1322     gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1323   gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1324 }
1325
1326 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1327    the result.  Places the statement after the definition of either
1328    OP1 or OP2.  Returns the new statement.  */
1329
1330 static gimple *
1331 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1332 {
1333   gimple *op1def = NULL, *op2def = NULL;
1334   gimple_stmt_iterator gsi;
1335   tree op;
1336   gassign *sum;
1337
1338   /* Create the addition statement.  */
1339   op = make_ssa_name (type);
1340   sum = gimple_build_assign (op, opcode, op1, op2);
1341
1342   /* Find an insertion place and insert.  */
1343   if (TREE_CODE (op1) == SSA_NAME)
1344     op1def = SSA_NAME_DEF_STMT (op1);
1345   if (TREE_CODE (op2) == SSA_NAME)
1346     op2def = SSA_NAME_DEF_STMT (op2);
1347   if ((!op1def || gimple_nop_p (op1def))
1348       && (!op2def || gimple_nop_p (op2def)))
1349     {
1350       gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1351       if (gsi_end_p (gsi))
1352         {
1353           gimple_stmt_iterator gsi2
1354             = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1355           gimple_set_uid (sum,
1356                           gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1357         }
1358       else
1359         gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1360       gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1361     }
1362   else
1363     {
1364       gimple *insert_point;
1365       if ((!op1def || gimple_nop_p (op1def))
1366            || (op2def && !gimple_nop_p (op2def)
1367                && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1368         insert_point = op2def;
1369       else
1370         insert_point = op1def;
1371       insert_stmt_after (sum, insert_point);
1372     }
1373   update_stmt (sum);
1374
1375   return sum;
1376 }
1377
1378 /* Perform un-distribution of divisions and multiplications.
1379    A * X + B * X is transformed into (A + B) * X and A / X + B / X
1380    to (A + B) / X for real X.
1381
1382    The algorithm is organized as follows.
1383
1384     - First we walk the addition chain *OPS looking for summands that
1385       are defined by a multiplication or a real division.  This results
1386       in the candidates bitmap with relevant indices into *OPS.
1387
1388     - Second we build the chains of multiplications or divisions for
1389       these candidates, counting the number of occurrences of (operand, code)
1390       pairs in all of the candidates chains.
1391
1392     - Third we sort the (operand, code) pairs by number of occurrence and
1393       process them starting with the pair with the most uses.
1394
1395       * For each such pair we walk the candidates again to build a
1396         second candidate bitmap noting all multiplication/division chains
1397         that have at least one occurrence of (operand, code).
1398
1399       * We build an alternate addition chain only covering these
1400         candidates with one (operand, code) operation removed from their
1401         multiplication/division chain.
1402
1403       * The first candidate gets replaced by the alternate addition chain
1404         multiplied/divided by the operand.
1405
1406       * All candidate chains get disabled for further processing and
1407         processing of (operand, code) pairs continues.
1408
1409   The alternate addition chains built are re-processed by the main
1410   reassociation algorithm which allows optimizing a * x * y + b * y * x
1411   to (a + b ) * x * y in one invocation of the reassociation pass.  */
1412
1413 static bool
1414 undistribute_ops_list (enum tree_code opcode,
1415                        vec<operand_entry *> *ops, struct loop *loop)
1416 {
1417   unsigned int length = ops->length ();
1418   operand_entry *oe1;
1419   unsigned i, j;
1420   sbitmap candidates, candidates2;
1421   unsigned nr_candidates, nr_candidates2;
1422   sbitmap_iterator sbi0;
1423   vec<operand_entry *> *subops;
1424   bool changed = false;
1425   int next_oecount_id = 0;
1426
1427   if (length <= 1
1428       || opcode != PLUS_EXPR)
1429     return false;
1430
1431   /* Build a list of candidates to process.  */
1432   candidates = sbitmap_alloc (length);
1433   bitmap_clear (candidates);
1434   nr_candidates = 0;
1435   FOR_EACH_VEC_ELT (*ops, i, oe1)
1436     {
1437       enum tree_code dcode;
1438       gimple *oe1def;
1439
1440       if (TREE_CODE (oe1->op) != SSA_NAME)
1441         continue;
1442       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1443       if (!is_gimple_assign (oe1def))
1444         continue;
1445       dcode = gimple_assign_rhs_code (oe1def);
1446       if ((dcode != MULT_EXPR
1447            && dcode != RDIV_EXPR)
1448           || !is_reassociable_op (oe1def, dcode, loop))
1449         continue;
1450
1451       bitmap_set_bit (candidates, i);
1452       nr_candidates++;
1453     }
1454
1455   if (nr_candidates < 2)
1456     {
1457       sbitmap_free (candidates);
1458       return false;
1459     }
1460
1461   if (dump_file && (dump_flags & TDF_DETAILS))
1462     {
1463       fprintf (dump_file, "searching for un-distribute opportunities ");
1464       print_generic_expr (dump_file,
1465         (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1466       fprintf (dump_file, " %d\n", nr_candidates);
1467     }
1468
1469   /* Build linearized sub-operand lists and the counting table.  */
1470   cvec.create (0);
1471
1472   hash_table<oecount_hasher> ctable (15);
1473
1474   /* ??? Macro arguments cannot have multi-argument template types in
1475      them.  This typedef is needed to workaround that limitation.  */
1476   typedef vec<operand_entry *> vec_operand_entry_t_heap;
1477   subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1478   EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1479     {
1480       gimple *oedef;
1481       enum tree_code oecode;
1482       unsigned j;
1483
1484       oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1485       oecode = gimple_assign_rhs_code (oedef);
1486       linearize_expr_tree (&subops[i], oedef,
1487                            associative_tree_code (oecode), false);
1488
1489       FOR_EACH_VEC_ELT (subops[i], j, oe1)
1490         {
1491           oecount c;
1492           int *slot;
1493           int idx;
1494           c.oecode = oecode;
1495           c.cnt = 1;
1496           c.id = next_oecount_id++;
1497           c.op = oe1->op;
1498           cvec.safe_push (c);
1499           idx = cvec.length () + 41;
1500           slot = ctable.find_slot (idx, INSERT);
1501           if (!*slot)
1502             {
1503               *slot = idx;
1504             }
1505           else
1506             {
1507               cvec.pop ();
1508               cvec[*slot - 42].cnt++;
1509             }
1510         }
1511     }
1512
1513   /* Sort the counting table.  */
1514   cvec.qsort (oecount_cmp);
1515
1516   if (dump_file && (dump_flags & TDF_DETAILS))
1517     {
1518       oecount *c;
1519       fprintf (dump_file, "Candidates:\n");
1520       FOR_EACH_VEC_ELT (cvec, j, c)
1521         {
1522           fprintf (dump_file, "  %u %s: ", c->cnt,
1523                    c->oecode == MULT_EXPR
1524                    ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1525           print_generic_expr (dump_file, c->op, 0);
1526           fprintf (dump_file, "\n");
1527         }
1528     }
1529
1530   /* Process the (operand, code) pairs in order of most occurrence.  */
1531   candidates2 = sbitmap_alloc (length);
1532   while (!cvec.is_empty ())
1533     {
1534       oecount *c = &cvec.last ();
1535       if (c->cnt < 2)
1536         break;
1537
1538       /* Now collect the operands in the outer chain that contain
1539          the common operand in their inner chain.  */
1540       bitmap_clear (candidates2);
1541       nr_candidates2 = 0;
1542       EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1543         {
1544           gimple *oedef;
1545           enum tree_code oecode;
1546           unsigned j;
1547           tree op = (*ops)[i]->op;
1548
1549           /* If we undistributed in this chain already this may be
1550              a constant.  */
1551           if (TREE_CODE (op) != SSA_NAME)
1552             continue;
1553
1554           oedef = SSA_NAME_DEF_STMT (op);
1555           oecode = gimple_assign_rhs_code (oedef);
1556           if (oecode != c->oecode)
1557             continue;
1558
1559           FOR_EACH_VEC_ELT (subops[i], j, oe1)
1560             {
1561               if (oe1->op == c->op)
1562                 {
1563                   bitmap_set_bit (candidates2, i);
1564                   ++nr_candidates2;
1565                   break;
1566                 }
1567             }
1568         }
1569
1570       if (nr_candidates2 >= 2)
1571         {
1572           operand_entry *oe1, *oe2;
1573           gimple *prod;
1574           int first = bitmap_first_set_bit (candidates2);
1575
1576           /* Build the new addition chain.  */
1577           oe1 = (*ops)[first];
1578           if (dump_file && (dump_flags & TDF_DETAILS))
1579             {
1580               fprintf (dump_file, "Building (");
1581               print_generic_expr (dump_file, oe1->op, 0);
1582             }
1583           zero_one_operation (&oe1->op, c->oecode, c->op);
1584           EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1585             {
1586               gimple *sum;
1587               oe2 = (*ops)[i];
1588               if (dump_file && (dump_flags & TDF_DETAILS))
1589                 {
1590                   fprintf (dump_file, " + ");
1591                   print_generic_expr (dump_file, oe2->op, 0);
1592                 }
1593               zero_one_operation (&oe2->op, c->oecode, c->op);
1594               sum = build_and_add_sum (TREE_TYPE (oe1->op),
1595                                        oe1->op, oe2->op, opcode);
1596               oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1597               oe2->rank = 0;
1598               oe1->op = gimple_get_lhs (sum);
1599             }
1600
1601           /* Apply the multiplication/division.  */
1602           prod = build_and_add_sum (TREE_TYPE (oe1->op),
1603                                     oe1->op, c->op, c->oecode);
1604           if (dump_file && (dump_flags & TDF_DETAILS))
1605             {
1606               fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1607               print_generic_expr (dump_file, c->op, 0);
1608               fprintf (dump_file, "\n");
1609             }
1610
1611           /* Record it in the addition chain and disable further
1612              undistribution with this op.  */
1613           oe1->op = gimple_assign_lhs (prod);
1614           oe1->rank = get_rank (oe1->op);
1615           subops[first].release ();
1616
1617           changed = true;
1618         }
1619
1620       cvec.pop ();
1621     }
1622
1623   for (i = 0; i < ops->length (); ++i)
1624     subops[i].release ();
1625   free (subops);
1626   cvec.release ();
1627   sbitmap_free (candidates);
1628   sbitmap_free (candidates2);
1629
1630   return changed;
1631 }
1632
1633 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1634    expression, examine the other OPS to see if any of them are comparisons
1635    of the same values, which we may be able to combine or eliminate.
1636    For example, we can rewrite (a < b) | (a == b) as (a <= b).  */
1637
1638 static bool
1639 eliminate_redundant_comparison (enum tree_code opcode,
1640                                 vec<operand_entry *> *ops,
1641                                 unsigned int currindex,
1642                                 operand_entry *curr)
1643 {
1644   tree op1, op2;
1645   enum tree_code lcode, rcode;
1646   gimple *def1, *def2;
1647   int i;
1648   operand_entry *oe;
1649
1650   if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1651     return false;
1652
1653   /* Check that CURR is a comparison.  */
1654   if (TREE_CODE (curr->op) != SSA_NAME)
1655     return false;
1656   def1 = SSA_NAME_DEF_STMT (curr->op);
1657   if (!is_gimple_assign (def1))
1658     return false;
1659   lcode = gimple_assign_rhs_code (def1);
1660   if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1661     return false;
1662   op1 = gimple_assign_rhs1 (def1);
1663   op2 = gimple_assign_rhs2 (def1);
1664
1665   /* Now look for a similar comparison in the remaining OPS.  */
1666   for (i = currindex + 1; ops->iterate (i, &oe); i++)
1667     {
1668       tree t;
1669
1670       if (TREE_CODE (oe->op) != SSA_NAME)
1671         continue;
1672       def2 = SSA_NAME_DEF_STMT (oe->op);
1673       if (!is_gimple_assign (def2))
1674         continue;
1675       rcode = gimple_assign_rhs_code (def2);
1676       if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1677         continue;
1678
1679       /* If we got here, we have a match.  See if we can combine the
1680          two comparisons.  */
1681       if (opcode == BIT_IOR_EXPR)
1682         t = maybe_fold_or_comparisons (lcode, op1, op2,
1683                                        rcode, gimple_assign_rhs1 (def2),
1684                                        gimple_assign_rhs2 (def2));
1685       else
1686         t = maybe_fold_and_comparisons (lcode, op1, op2,
1687                                         rcode, gimple_assign_rhs1 (def2),
1688                                         gimple_assign_rhs2 (def2));
1689       if (!t)
1690         continue;
1691
1692       /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1693          always give us a boolean_type_node value back.  If the original
1694          BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1695          we need to convert.  */
1696       if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1697         t = fold_convert (TREE_TYPE (curr->op), t);
1698
1699       if (TREE_CODE (t) != INTEGER_CST
1700           && !operand_equal_p (t, curr->op, 0))
1701         {
1702           enum tree_code subcode;
1703           tree newop1, newop2;
1704           if (!COMPARISON_CLASS_P (t))
1705             continue;
1706           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1707           STRIP_USELESS_TYPE_CONVERSION (newop1);
1708           STRIP_USELESS_TYPE_CONVERSION (newop2);
1709           if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1710             continue;
1711         }
1712
1713       if (dump_file && (dump_flags & TDF_DETAILS))
1714         {
1715           fprintf (dump_file, "Equivalence: ");
1716           print_generic_expr (dump_file, curr->op, 0);
1717           fprintf (dump_file, " %s ", op_symbol_code (opcode));
1718           print_generic_expr (dump_file, oe->op, 0);
1719           fprintf (dump_file, " -> ");
1720           print_generic_expr (dump_file, t, 0);
1721           fprintf (dump_file, "\n");
1722         }
1723
1724       /* Now we can delete oe, as it has been subsumed by the new combined
1725          expression t.  */
1726       ops->ordered_remove (i);
1727       reassociate_stats.ops_eliminated ++;
1728
1729       /* If t is the same as curr->op, we're done.  Otherwise we must
1730          replace curr->op with t.  Special case is if we got a constant
1731          back, in which case we add it to the end instead of in place of
1732          the current entry.  */
1733       if (TREE_CODE (t) == INTEGER_CST)
1734         {
1735           ops->ordered_remove (currindex);
1736           add_to_ops_vec (ops, t);
1737         }
1738       else if (!operand_equal_p (t, curr->op, 0))
1739         {
1740           gimple *sum;
1741           enum tree_code subcode;
1742           tree newop1;
1743           tree newop2;
1744           gcc_assert (COMPARISON_CLASS_P (t));
1745           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1746           STRIP_USELESS_TYPE_CONVERSION (newop1);
1747           STRIP_USELESS_TYPE_CONVERSION (newop2);
1748           gcc_checking_assert (is_gimple_val (newop1)
1749                                && is_gimple_val (newop2));
1750           sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1751           curr->op = gimple_get_lhs (sum);
1752         }
1753       return true;
1754     }
1755
1756   return false;
1757 }
1758
1759 /* Perform various identities and other optimizations on the list of
1760    operand entries, stored in OPS.  The tree code for the binary
1761    operation between all the operands is OPCODE.  */
1762
1763 static void
1764 optimize_ops_list (enum tree_code opcode,
1765                    vec<operand_entry *> *ops)
1766 {
1767   unsigned int length = ops->length ();
1768   unsigned int i;
1769   operand_entry *oe;
1770   operand_entry *oelast = NULL;
1771   bool iterate = false;
1772
1773   if (length == 1)
1774     return;
1775
1776   oelast = ops->last ();
1777
1778   /* If the last two are constants, pop the constants off, merge them
1779      and try the next two.  */
1780   if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1781     {
1782       operand_entry *oelm1 = (*ops)[length - 2];
1783
1784       if (oelm1->rank == 0
1785           && is_gimple_min_invariant (oelm1->op)
1786           && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1787                                        TREE_TYPE (oelast->op)))
1788         {
1789           tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1790                                      oelm1->op, oelast->op);
1791
1792           if (folded && is_gimple_min_invariant (folded))
1793             {
1794               if (dump_file && (dump_flags & TDF_DETAILS))
1795                 fprintf (dump_file, "Merging constants\n");
1796
1797               ops->pop ();
1798               ops->pop ();
1799
1800               add_to_ops_vec (ops, folded);
1801               reassociate_stats.constants_eliminated++;
1802
1803               optimize_ops_list (opcode, ops);
1804               return;
1805             }
1806         }
1807     }
1808
1809   eliminate_using_constants (opcode, ops);
1810   oelast = NULL;
1811
1812   for (i = 0; ops->iterate (i, &oe);)
1813     {
1814       bool done = false;
1815
1816       if (eliminate_not_pairs (opcode, ops, i, oe))
1817         return;
1818       if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1819           || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1820           || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1821         {
1822           if (done)
1823             return;
1824           iterate = true;
1825           oelast = NULL;
1826           continue;
1827         }
1828       oelast = oe;
1829       i++;
1830     }
1831
1832   length = ops->length ();
1833   oelast = ops->last ();
1834
1835   if (iterate)
1836     optimize_ops_list (opcode, ops);
1837 }
1838
1839 /* The following functions are subroutines to optimize_range_tests and allow
1840    it to try to change a logical combination of comparisons into a range
1841    test.
1842
1843    For example, both
1844         X == 2 || X == 5 || X == 3 || X == 4
1845    and
1846         X >= 2 && X <= 5
1847    are converted to
1848         (unsigned) (X - 2) <= 3
1849
1850    For more information see comments above fold_test_range in fold-const.c,
1851    this implementation is for GIMPLE.  */
1852
1853 struct range_entry
1854 {
1855   tree exp;
1856   tree low;
1857   tree high;
1858   bool in_p;
1859   bool strict_overflow_p;
1860   unsigned int idx, next;
1861 };
1862
1863 /* This is similar to make_range in fold-const.c, but on top of
1864    GIMPLE instead of trees.  If EXP is non-NULL, it should be
1865    an SSA_NAME and STMT argument is ignored, otherwise STMT
1866    argument should be a GIMPLE_COND.  */
1867
1868 static void
1869 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
1870 {
1871   int in_p;
1872   tree low, high;
1873   bool is_bool, strict_overflow_p;
1874
1875   r->exp = NULL_TREE;
1876   r->in_p = false;
1877   r->strict_overflow_p = false;
1878   r->low = NULL_TREE;
1879   r->high = NULL_TREE;
1880   if (exp != NULL_TREE
1881       && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1882     return;
1883
1884   /* Start with simply saying "EXP != 0" and then look at the code of EXP
1885      and see if we can refine the range.  Some of the cases below may not
1886      happen, but it doesn't seem worth worrying about this.  We "continue"
1887      the outer loop when we've changed something; otherwise we "break"
1888      the switch, which will "break" the while.  */
1889   low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1890   high = low;
1891   in_p = 0;
1892   strict_overflow_p = false;
1893   is_bool = false;
1894   if (exp == NULL_TREE)
1895     is_bool = true;
1896   else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1897     {
1898       if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1899         is_bool = true;
1900       else
1901         return;
1902     }
1903   else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1904     is_bool = true;
1905
1906   while (1)
1907     {
1908       enum tree_code code;
1909       tree arg0, arg1, exp_type;
1910       tree nexp;
1911       location_t loc;
1912
1913       if (exp != NULL_TREE)
1914         {
1915           if (TREE_CODE (exp) != SSA_NAME
1916               || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
1917             break;
1918
1919           stmt = SSA_NAME_DEF_STMT (exp);
1920           if (!is_gimple_assign (stmt))
1921             break;
1922
1923           code = gimple_assign_rhs_code (stmt);
1924           arg0 = gimple_assign_rhs1 (stmt);
1925           arg1 = gimple_assign_rhs2 (stmt);
1926           exp_type = TREE_TYPE (exp);
1927         }
1928       else
1929         {
1930           code = gimple_cond_code (stmt);
1931           arg0 = gimple_cond_lhs (stmt);
1932           arg1 = gimple_cond_rhs (stmt);
1933           exp_type = boolean_type_node;
1934         }
1935
1936       if (TREE_CODE (arg0) != SSA_NAME)
1937         break;
1938       loc = gimple_location (stmt);
1939       switch (code)
1940         {
1941         case BIT_NOT_EXPR:
1942           if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
1943               /* Ensure the range is either +[-,0], +[0,0],
1944                  -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1945                  -[1,1].  If it is e.g. +[-,-] or -[-,-]
1946                  or similar expression of unconditional true or
1947                  false, it should not be negated.  */
1948               && ((high && integer_zerop (high))
1949                   || (low && integer_onep (low))))
1950             {
1951               in_p = !in_p;
1952               exp = arg0;
1953               continue;
1954             }
1955           break;
1956         case SSA_NAME:
1957           exp = arg0;
1958           continue;
1959         CASE_CONVERT:
1960           if (is_bool)
1961             goto do_default;
1962           if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1963             {
1964               if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1965                 is_bool = true;
1966               else
1967                 return;
1968             }
1969           else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1970             is_bool = true;
1971           goto do_default;
1972         case EQ_EXPR:
1973         case NE_EXPR:
1974         case LT_EXPR:
1975         case LE_EXPR:
1976         case GE_EXPR:
1977         case GT_EXPR:
1978           is_bool = true;
1979           /* FALLTHRU */
1980         default:
1981           if (!is_bool)
1982             return;
1983         do_default:
1984           nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1985                                   &low, &high, &in_p,
1986                                   &strict_overflow_p);
1987           if (nexp != NULL_TREE)
1988             {
1989               exp = nexp;
1990               gcc_assert (TREE_CODE (exp) == SSA_NAME);
1991               continue;
1992             }
1993           break;
1994         }
1995       break;
1996     }
1997   if (is_bool)
1998     {
1999       r->exp = exp;
2000       r->in_p = in_p;
2001       r->low = low;
2002       r->high = high;
2003       r->strict_overflow_p = strict_overflow_p;
2004     }
2005 }
2006
2007 /* Comparison function for qsort.  Sort entries
2008    without SSA_NAME exp first, then with SSA_NAMEs sorted
2009    by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2010    by increasing ->low and if ->low is the same, by increasing
2011    ->high.  ->low == NULL_TREE means minimum, ->high == NULL_TREE
2012    maximum.  */
2013
2014 static int
2015 range_entry_cmp (const void *a, const void *b)
2016 {
2017   const struct range_entry *p = (const struct range_entry *) a;
2018   const struct range_entry *q = (const struct range_entry *) b;
2019
2020   if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2021     {
2022       if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2023         {
2024           /* Group range_entries for the same SSA_NAME together.  */
2025           if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2026             return -1;
2027           else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2028             return 1;
2029           /* If ->low is different, NULL low goes first, then by
2030              ascending low.  */
2031           if (p->low != NULL_TREE)
2032             {
2033               if (q->low != NULL_TREE)
2034                 {
2035                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
2036                                           p->low, q->low);
2037                   if (tem && integer_onep (tem))
2038                     return -1;
2039                   tem = fold_binary (GT_EXPR, boolean_type_node,
2040                                      p->low, q->low);
2041                   if (tem && integer_onep (tem))
2042                     return 1;
2043                 }
2044               else
2045                 return 1;
2046             }
2047           else if (q->low != NULL_TREE)
2048             return -1;
2049           /* If ->high is different, NULL high goes last, before that by
2050              ascending high.  */
2051           if (p->high != NULL_TREE)
2052             {
2053               if (q->high != NULL_TREE)
2054                 {
2055                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
2056                                           p->high, q->high);
2057                   if (tem && integer_onep (tem))
2058                     return -1;
2059                   tem = fold_binary (GT_EXPR, boolean_type_node,
2060                                      p->high, q->high);
2061                   if (tem && integer_onep (tem))
2062                     return 1;
2063                 }
2064               else
2065                 return -1;
2066             }
2067           else if (q->high != NULL_TREE)
2068             return 1;
2069           /* If both ranges are the same, sort below by ascending idx.  */
2070         }
2071       else
2072         return 1;
2073     }
2074   else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2075     return -1;
2076
2077   if (p->idx < q->idx)
2078     return -1;
2079   else
2080     {
2081       gcc_checking_assert (p->idx > q->idx);
2082       return 1;
2083     }
2084 }
2085
2086 /* Helper routine of optimize_range_test.
2087    [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2088    RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2089    OPCODE and OPS are arguments of optimize_range_tests.  If OTHERRANGE
2090    is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2091    an array of COUNT pointers to other ranges.  Return
2092    true if the range merge has been successful.
2093    If OPCODE is ERROR_MARK, this is called from within
2094    maybe_optimize_range_tests and is performing inter-bb range optimization.
2095    In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2096    oe->rank.  */
2097
2098 static bool
2099 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2100                    struct range_entry **otherrangep,
2101                    unsigned int count, enum tree_code opcode,
2102                    vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2103                    bool in_p, tree low, tree high, bool strict_overflow_p)
2104 {
2105   operand_entry *oe = (*ops)[range->idx];
2106   tree op = oe->op;
2107   gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2108                     : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2109   location_t loc = gimple_location (stmt);
2110   tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2111   tree tem = build_range_check (loc, optype, unshare_expr (exp),
2112                                 in_p, low, high);
2113   enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2114   gimple_stmt_iterator gsi;
2115   unsigned int i, uid;
2116
2117   if (tem == NULL_TREE)
2118     return false;
2119
2120   /* If op is default def SSA_NAME, there is no place to insert the
2121      new comparison.  Give up, unless we can use OP itself as the
2122      range test.  */
2123   if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2124     {
2125       if (op == range->exp
2126           && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2127               || TREE_CODE (optype) == BOOLEAN_TYPE)
2128           && (op == tem
2129               || (TREE_CODE (tem) == EQ_EXPR
2130                   && TREE_OPERAND (tem, 0) == op
2131                   && integer_onep (TREE_OPERAND (tem, 1))))
2132           && opcode != BIT_IOR_EXPR
2133           && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2134         {
2135           stmt = NULL;
2136           tem = op;
2137         }
2138       else
2139         return false;
2140     }
2141
2142   if (strict_overflow_p && issue_strict_overflow_warning (wc))
2143     warning_at (loc, OPT_Wstrict_overflow,
2144                 "assuming signed overflow does not occur "
2145                 "when simplifying range test");
2146
2147   if (dump_file && (dump_flags & TDF_DETAILS))
2148     {
2149       struct range_entry *r;
2150       fprintf (dump_file, "Optimizing range tests ");
2151       print_generic_expr (dump_file, range->exp, 0);
2152       fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2153       print_generic_expr (dump_file, range->low, 0);
2154       fprintf (dump_file, ", ");
2155       print_generic_expr (dump_file, range->high, 0);
2156       fprintf (dump_file, "]");
2157       for (i = 0; i < count; i++)
2158         {
2159           if (otherrange)
2160             r = otherrange + i;
2161           else
2162             r = otherrangep[i];
2163           fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2164           print_generic_expr (dump_file, r->low, 0);
2165           fprintf (dump_file, ", ");
2166           print_generic_expr (dump_file, r->high, 0);
2167           fprintf (dump_file, "]");
2168         }
2169       fprintf (dump_file, "\n into ");
2170       print_generic_expr (dump_file, tem, 0);
2171       fprintf (dump_file, "\n");
2172     }
2173
2174   if (opcode == BIT_IOR_EXPR
2175       || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2176     tem = invert_truthvalue_loc (loc, tem);
2177
2178   tem = fold_convert_loc (loc, optype, tem);
2179   if (stmt)
2180     {
2181       gsi = gsi_for_stmt (stmt);
2182       uid = gimple_uid (stmt);
2183     }
2184   else
2185     {
2186       gsi = gsi_none ();
2187       uid = 0;
2188     }
2189   if (stmt == NULL)
2190     gcc_checking_assert (tem == op);
2191   /* In rare cases range->exp can be equal to lhs of stmt.
2192      In that case we have to insert after the stmt rather then before
2193      it.  If stmt is a PHI, insert it at the start of the basic block.  */
2194   else if (op != range->exp)
2195     {
2196       gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2197       tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2198                                       GSI_SAME_STMT);
2199       gsi_prev (&gsi);
2200     }
2201   else if (gimple_code (stmt) != GIMPLE_PHI)
2202     {
2203       gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2204       tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false,
2205                                       GSI_CONTINUE_LINKING);
2206     }
2207   else
2208     {
2209       gsi = gsi_after_labels (gimple_bb (stmt));
2210       if (!gsi_end_p (gsi))
2211         uid = gimple_uid (gsi_stmt (gsi));
2212       else
2213         {
2214           gsi = gsi_start_bb (gimple_bb (stmt));
2215           uid = 1;
2216           while (!gsi_end_p (gsi))
2217             {
2218               uid = gimple_uid (gsi_stmt (gsi));
2219               gsi_next (&gsi);
2220             }
2221         }
2222       gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2223       tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2224                                       GSI_SAME_STMT);
2225       if (gsi_end_p (gsi))
2226         gsi = gsi_last_bb (gimple_bb (stmt));
2227       else
2228         gsi_prev (&gsi);
2229     }
2230   for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2231     if (gimple_uid (gsi_stmt (gsi)))
2232       break;
2233     else
2234       gimple_set_uid (gsi_stmt (gsi), uid);
2235
2236   oe->op = tem;
2237   range->exp = exp;
2238   range->low = low;
2239   range->high = high;
2240   range->in_p = in_p;
2241   range->strict_overflow_p = false;
2242
2243   for (i = 0; i < count; i++)
2244     {
2245       if (otherrange)
2246         range = otherrange + i;
2247       else
2248         range = otherrangep[i];
2249       oe = (*ops)[range->idx];
2250       /* Now change all the other range test immediate uses, so that
2251          those tests will be optimized away.  */
2252       if (opcode == ERROR_MARK)
2253         {
2254           if (oe->op)
2255             oe->op = build_int_cst (TREE_TYPE (oe->op),
2256                                     oe->rank == BIT_IOR_EXPR ? 0 : 1);
2257           else
2258             oe->op = (oe->rank == BIT_IOR_EXPR
2259                       ? boolean_false_node : boolean_true_node);
2260         }
2261       else
2262         oe->op = error_mark_node;
2263       range->exp = NULL_TREE;
2264     }
2265   return true;
2266 }
2267
2268 /* Optimize X == CST1 || X == CST2
2269    if popcount (CST1 ^ CST2) == 1 into
2270    (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2271    Similarly for ranges.  E.g.
2272    X != 2 && X != 3 && X != 10 && X != 11
2273    will be transformed by the previous optimization into
2274    !((X - 2U) <= 1U || (X - 10U) <= 1U)
2275    and this loop can transform that into
2276    !(((X & ~8) - 2U) <= 1U).  */
2277
2278 static bool
2279 optimize_range_tests_xor (enum tree_code opcode, tree type,
2280                           tree lowi, tree lowj, tree highi, tree highj,
2281                           vec<operand_entry *> *ops,
2282                           struct range_entry *rangei,
2283                           struct range_entry *rangej)
2284 {
2285   tree lowxor, highxor, tem, exp;
2286   /* Check lowi ^ lowj == highi ^ highj and
2287      popcount (lowi ^ lowj) == 1.  */
2288   lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2289   if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2290     return false;
2291   if (!integer_pow2p (lowxor))
2292     return false;
2293   highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2294   if (!tree_int_cst_equal (lowxor, highxor))
2295     return false;
2296
2297   tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2298   exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2299   lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2300   highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2301   if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2302                          NULL, rangei->in_p, lowj, highj,
2303                          rangei->strict_overflow_p
2304                          || rangej->strict_overflow_p))
2305     return true;
2306   return false;
2307 }
2308
2309 /* Optimize X == CST1 || X == CST2
2310    if popcount (CST2 - CST1) == 1 into
2311    ((X - CST1) & ~(CST2 - CST1)) == 0.
2312    Similarly for ranges.  E.g.
2313    X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2314    || X == 75 || X == 45
2315    will be transformed by the previous optimization into
2316    (X - 43U) <= 3U || (X - 75U) <= 3U
2317    and this loop can transform that into
2318    ((X - 43U) & ~(75U - 43U)) <= 3U.  */
2319 static bool
2320 optimize_range_tests_diff (enum tree_code opcode, tree type,
2321                             tree lowi, tree lowj, tree highi, tree highj,
2322                             vec<operand_entry *> *ops,
2323                             struct range_entry *rangei,
2324                             struct range_entry *rangej)
2325 {
2326   tree tem1, tem2, mask;
2327   /* Check highi - lowi == highj - lowj.  */
2328   tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2329   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2330     return false;
2331   tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2332   if (!tree_int_cst_equal (tem1, tem2))
2333     return false;
2334   /* Check popcount (lowj - lowi) == 1.  */
2335   tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2336   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2337     return false;
2338   if (!integer_pow2p (tem1))
2339     return false;
2340
2341   type = unsigned_type_for (type);
2342   tem1 = fold_convert (type, tem1);
2343   tem2 = fold_convert (type, tem2);
2344   lowi = fold_convert (type, lowi);
2345   mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2346   tem1 = fold_binary (MINUS_EXPR, type,
2347                       fold_convert (type, rangei->exp), lowi);
2348   tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2349   lowj = build_int_cst (type, 0);
2350   if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2351                          NULL, rangei->in_p, lowj, tem2,
2352                          rangei->strict_overflow_p
2353                          || rangej->strict_overflow_p))
2354     return true;
2355   return false;
2356 }
2357
2358 /* It does some common checks for function optimize_range_tests_xor and
2359    optimize_range_tests_diff.
2360    If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2361    Else it calls optimize_range_tests_diff.  */
2362
2363 static bool
2364 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2365                         bool optimize_xor, vec<operand_entry *> *ops,
2366                         struct range_entry *ranges)
2367 {
2368   int i, j;
2369   bool any_changes = false;
2370   for (i = first; i < length; i++)
2371     {
2372       tree lowi, highi, lowj, highj, type, tem;
2373
2374       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2375         continue;
2376       type = TREE_TYPE (ranges[i].exp);
2377       if (!INTEGRAL_TYPE_P (type))
2378         continue;
2379       lowi = ranges[i].low;
2380       if (lowi == NULL_TREE)
2381         lowi = TYPE_MIN_VALUE (type);
2382       highi = ranges[i].high;
2383       if (highi == NULL_TREE)
2384         continue;
2385       for (j = i + 1; j < length && j < i + 64; j++)
2386         {
2387           bool changes;
2388           if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2389             continue;
2390           lowj = ranges[j].low;
2391           if (lowj == NULL_TREE)
2392             continue;
2393           highj = ranges[j].high;
2394           if (highj == NULL_TREE)
2395             highj = TYPE_MAX_VALUE (type);
2396           /* Check lowj > highi.  */
2397           tem = fold_binary (GT_EXPR, boolean_type_node,
2398                              lowj, highi);
2399           if (tem == NULL_TREE || !integer_onep (tem))
2400             continue;
2401           if (optimize_xor)
2402             changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2403                                                 highi, highj, ops,
2404                                                 ranges + i, ranges + j);
2405           else
2406             changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2407                                                  highi, highj, ops,
2408                                                  ranges + i, ranges + j);
2409           if (changes)
2410             {
2411               any_changes = true;
2412               break;
2413             }
2414         }
2415     }
2416   return any_changes;
2417 }
2418
2419 /* Helper function of optimize_range_tests_to_bit_test.  Handle a single
2420    range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2421    EXP on success, NULL otherwise.  */
2422
2423 static tree
2424 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
2425                        wide_int *mask, tree *totallowp)
2426 {
2427   tree tem = int_const_binop (MINUS_EXPR, high, low);
2428   if (tem == NULL_TREE
2429       || TREE_CODE (tem) != INTEGER_CST
2430       || TREE_OVERFLOW (tem)
2431       || tree_int_cst_sgn (tem) == -1
2432       || compare_tree_int (tem, prec) != -1)
2433     return NULL_TREE;
2434
2435   unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
2436   *mask = wi::shifted_mask (0, max, false, prec);
2437   if (TREE_CODE (exp) == BIT_AND_EXPR
2438       && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2439     {
2440       widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
2441       msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
2442       if (wi::popcount (msk) == 1
2443           && wi::ltu_p (msk, prec - max))
2444         {
2445           *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
2446           max += msk.to_uhwi ();
2447           exp = TREE_OPERAND (exp, 0);
2448           if (integer_zerop (low)
2449               && TREE_CODE (exp) == PLUS_EXPR
2450               && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2451             {
2452               tree ret = TREE_OPERAND (exp, 0);
2453               STRIP_NOPS (ret);
2454               widest_int bias
2455                 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
2456                                      TYPE_PRECISION (TREE_TYPE (low))));
2457               tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
2458               if (totallowp)
2459                 {
2460                   *totallowp = tbias;
2461                   return ret;
2462                 }
2463               else if (!tree_int_cst_lt (totallow, tbias))
2464                 return NULL_TREE;
2465               bias = wi::to_widest (tbias);
2466               bias -= wi::to_widest (totallow);
2467               if (wi::ges_p (bias, 0) && wi::lts_p (bias, prec - max))
2468                 {
2469                   *mask = wi::lshift (*mask, bias);
2470                   return ret;
2471                 }
2472             }
2473         }
2474     }
2475   if (totallowp)
2476     return exp;
2477   if (!tree_int_cst_lt (totallow, low))
2478     return exp;
2479   tem = int_const_binop (MINUS_EXPR, low, totallow);
2480   if (tem == NULL_TREE
2481       || TREE_CODE (tem) != INTEGER_CST
2482       || TREE_OVERFLOW (tem)
2483       || compare_tree_int (tem, prec - max) == 1)
2484     return NULL_TREE;
2485
2486   *mask = wi::lshift (*mask, wi::to_widest (tem));
2487   return exp;
2488 }
2489
2490 /* Attempt to optimize small range tests using bit test.
2491    E.g.
2492    X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2493    && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2494    has been by earlier optimizations optimized into:
2495    ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2496    As all the 43 through 82 range is less than 64 numbers,
2497    for 64-bit word targets optimize that into:
2498    (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0  */
2499
2500 static bool
2501 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
2502                                   vec<operand_entry *> *ops,
2503                                   struct range_entry *ranges)
2504 {
2505   int i, j;
2506   bool any_changes = false;
2507   int prec = GET_MODE_BITSIZE (word_mode);
2508   auto_vec<struct range_entry *, 64> candidates;
2509
2510   for (i = first; i < length - 2; i++)
2511     {
2512       tree lowi, highi, lowj, highj, type;
2513
2514       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2515         continue;
2516       type = TREE_TYPE (ranges[i].exp);
2517       if (!INTEGRAL_TYPE_P (type))
2518         continue;
2519       lowi = ranges[i].low;
2520       if (lowi == NULL_TREE)
2521         lowi = TYPE_MIN_VALUE (type);
2522       highi = ranges[i].high;
2523       if (highi == NULL_TREE)
2524         continue;
2525       wide_int mask;
2526       tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
2527                                         highi, &mask, &lowi);
2528       if (exp == NULL_TREE)
2529         continue;
2530       bool strict_overflow_p = ranges[i].strict_overflow_p;
2531       candidates.truncate (0);
2532       int end = MIN (i + 64, length);
2533       for (j = i + 1; j < end; j++)
2534         {
2535           tree exp2;
2536           if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
2537             continue;
2538           if (ranges[j].exp == exp)
2539             ;
2540           else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
2541             {
2542               exp2 = TREE_OPERAND (ranges[j].exp, 0);
2543               if (exp2 == exp)
2544                 ;
2545               else if (TREE_CODE (exp2) == PLUS_EXPR)
2546                 {
2547                   exp2 = TREE_OPERAND (exp2, 0);
2548                   STRIP_NOPS (exp2);
2549                   if (exp2 != exp)
2550                     continue;
2551                 }
2552               else
2553                 continue;
2554             }
2555           else
2556             continue;
2557           lowj = ranges[j].low;
2558           if (lowj == NULL_TREE)
2559             continue;
2560           highj = ranges[j].high;
2561           if (highj == NULL_TREE)
2562             highj = TYPE_MAX_VALUE (type);
2563           wide_int mask2;
2564           exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
2565                                         highj, &mask2, NULL);
2566           if (exp2 != exp)
2567             continue;
2568           mask |= mask2;
2569           strict_overflow_p |= ranges[j].strict_overflow_p;
2570           candidates.safe_push (&ranges[j]);
2571         }
2572
2573       /* If we need otherwise 3 or more comparisons, use a bit test.  */
2574       if (candidates.length () >= 2)
2575         {
2576           tree high = wide_int_to_tree (TREE_TYPE (lowi),
2577                                         wi::to_widest (lowi)
2578                                         + prec - 1 - wi::clz (mask));
2579           operand_entry *oe = (*ops)[ranges[i].idx];
2580           tree op = oe->op;
2581           gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2582                             : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2583           location_t loc = gimple_location (stmt);
2584           tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2585
2586           /* See if it isn't cheaper to pretend the minimum value of the
2587              range is 0, if maximum value is small enough.
2588              We can avoid then subtraction of the minimum value, but the
2589              mask constant could be perhaps more expensive.  */
2590           if (compare_tree_int (lowi, 0) > 0
2591               && compare_tree_int (high, prec) < 0)
2592             {
2593               int cost_diff;
2594               HOST_WIDE_INT m = tree_to_uhwi (lowi);
2595               rtx reg = gen_raw_REG (word_mode, 10000);
2596               bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
2597               cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg,
2598                                                       GEN_INT (-m)), speed_p);
2599               rtx r = immed_wide_int_const (mask, word_mode);
2600               cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
2601                                          word_mode, speed_p);
2602               r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
2603               cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
2604                                          word_mode, speed_p);
2605               if (cost_diff > 0)
2606                 {
2607                   mask = wi::lshift (mask, m);
2608                   lowi = build_zero_cst (TREE_TYPE (lowi));
2609                 }
2610             }
2611
2612           tree tem = build_range_check (loc, optype, unshare_expr (exp),
2613                                         false, lowi, high);
2614           if (tem == NULL_TREE || is_gimple_val (tem))
2615             continue;
2616           tree etype = unsigned_type_for (TREE_TYPE (exp));
2617           exp = fold_build2_loc (loc, MINUS_EXPR, etype,
2618                                  fold_convert_loc (loc, etype, exp),
2619                                  fold_convert_loc (loc, etype, lowi));
2620           exp = fold_convert_loc (loc, integer_type_node, exp);
2621           tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
2622           exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
2623                                  build_int_cst (word_type, 1), exp);
2624           exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
2625                                  wide_int_to_tree (word_type, mask));
2626           exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
2627                                  build_zero_cst (word_type));
2628           if (is_gimple_val (exp))
2629             continue;
2630
2631           /* The shift might have undefined behavior if TEM is true,
2632              but reassociate_bb isn't prepared to have basic blocks
2633              split when it is running.  So, temporarily emit a code
2634              with BIT_IOR_EXPR instead of &&, and fix it up in
2635              branch_fixup.  */
2636           gimple_seq seq;
2637           tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
2638           gcc_assert (TREE_CODE (tem) == SSA_NAME);
2639           gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
2640           gimple_seq seq2;
2641           exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
2642           gimple_seq_add_seq_without_update (&seq, seq2);
2643           gcc_assert (TREE_CODE (exp) == SSA_NAME);
2644           gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
2645           gimple *g = gimple_build_assign (make_ssa_name (optype),
2646                                            BIT_IOR_EXPR, tem, exp);
2647           gimple_set_location (g, loc);
2648           gimple_seq_add_stmt_without_update (&seq, g);
2649           exp = gimple_assign_lhs (g);
2650           tree val = build_zero_cst (optype);
2651           if (update_range_test (&ranges[i], NULL, candidates.address (),
2652                                  candidates.length (), opcode, ops, exp,
2653                                  seq, false, val, val, strict_overflow_p))
2654             {
2655               any_changes = true;
2656               reassoc_branch_fixups.safe_push (tem);
2657             }
2658           else
2659             gimple_seq_discard (seq);
2660         }
2661     }
2662   return any_changes;
2663 }
2664
2665 /* Optimize range tests, similarly how fold_range_test optimizes
2666    it on trees.  The tree code for the binary
2667    operation between all the operands is OPCODE.
2668    If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2669    maybe_optimize_range_tests for inter-bb range optimization.
2670    In that case if oe->op is NULL, oe->id is bb->index whose
2671    GIMPLE_COND is && or ||ed into the test, and oe->rank says
2672    the actual opcode.  */
2673
2674 static bool
2675 optimize_range_tests (enum tree_code opcode,
2676                       vec<operand_entry *> *ops)
2677 {
2678   unsigned int length = ops->length (), i, j, first;
2679   operand_entry *oe;
2680   struct range_entry *ranges;
2681   bool any_changes = false;
2682
2683   if (length == 1)
2684     return false;
2685
2686   ranges = XNEWVEC (struct range_entry, length);
2687   for (i = 0; i < length; i++)
2688     {
2689       oe = (*ops)[i];
2690       ranges[i].idx = i;
2691       init_range_entry (ranges + i, oe->op,
2692                         oe->op
2693                         ? NULL
2694                         : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
2695       /* For | invert it now, we will invert it again before emitting
2696          the optimized expression.  */
2697       if (opcode == BIT_IOR_EXPR
2698           || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2699         ranges[i].in_p = !ranges[i].in_p;
2700     }
2701
2702   qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2703   for (i = 0; i < length; i++)
2704     if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2705       break;
2706
2707   /* Try to merge ranges.  */
2708   for (first = i; i < length; i++)
2709     {
2710       tree low = ranges[i].low;
2711       tree high = ranges[i].high;
2712       int in_p = ranges[i].in_p;
2713       bool strict_overflow_p = ranges[i].strict_overflow_p;
2714       int update_fail_count = 0;
2715
2716       for (j = i + 1; j < length; j++)
2717         {
2718           if (ranges[i].exp != ranges[j].exp)
2719             break;
2720           if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2721                              ranges[j].in_p, ranges[j].low, ranges[j].high))
2722             break;
2723           strict_overflow_p |= ranges[j].strict_overflow_p;
2724         }
2725
2726       if (j == i + 1)
2727         continue;
2728
2729       if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
2730                              opcode, ops, ranges[i].exp, NULL, in_p,
2731                              low, high, strict_overflow_p))
2732         {
2733           i = j - 1;
2734           any_changes = true;
2735         }
2736       /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2737          while update_range_test would fail.  */
2738       else if (update_fail_count == 64)
2739         i = j - 1;
2740       else
2741         ++update_fail_count;
2742     }
2743
2744   any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2745                                          ops, ranges);
2746
2747   if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2748     any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2749                                            ops, ranges);
2750   if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
2751     any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
2752                                                      ops, ranges);
2753
2754   if (any_changes && opcode != ERROR_MARK)
2755     {
2756       j = 0;
2757       FOR_EACH_VEC_ELT (*ops, i, oe)
2758         {
2759           if (oe->op == error_mark_node)
2760             continue;
2761           else if (i != j)
2762             (*ops)[j] = oe;
2763           j++;
2764         }
2765       ops->truncate (j);
2766     }
2767
2768   XDELETEVEC (ranges);
2769   return any_changes;
2770 }
2771
2772 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
2773    the operands of the VEC_COND_EXPR.  Returns ERROR_MARK on failure,
2774    otherwise the comparison code.  */
2775
2776 static tree_code
2777 ovce_extract_ops (tree var, gassign **rets, bool *reti)
2778 {
2779   if (TREE_CODE (var) != SSA_NAME)
2780     return ERROR_MARK;
2781
2782   gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
2783   if (stmt == NULL)
2784     return ERROR_MARK;
2785
2786   /* ??? If we start creating more COND_EXPR, we could perform
2787      this same optimization with them.  For now, simplify.  */
2788   if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
2789     return ERROR_MARK;
2790
2791   tree cond = gimple_assign_rhs1 (stmt);
2792   tree_code cmp = TREE_CODE (cond);
2793   if (TREE_CODE_CLASS (cmp) != tcc_comparison)
2794     return ERROR_MARK;
2795
2796   /* ??? For now, allow only canonical true and false result vectors.
2797      We could expand this to other constants should the need arise,
2798      but at the moment we don't create them.  */
2799   tree t = gimple_assign_rhs2 (stmt);
2800   tree f = gimple_assign_rhs3 (stmt);
2801   bool inv;
2802   if (integer_all_onesp (t))
2803     inv = false;
2804   else if (integer_all_onesp (f))
2805     {
2806       cmp = invert_tree_comparison (cmp, false);
2807       inv = true;
2808     }
2809   else
2810     return ERROR_MARK;
2811   if (!integer_zerop (f))
2812     return ERROR_MARK;
2813
2814   /* Success!  */
2815   if (rets)
2816     *rets = stmt;
2817   if (reti)
2818     *reti = inv;
2819   return cmp;
2820 }
2821
2822 /* Optimize the condition of VEC_COND_EXPRs which have been combined
2823    with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR).  */
2824
2825 static bool
2826 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
2827 {
2828   unsigned int length = ops->length (), i, j;
2829   bool any_changes = false;
2830
2831   if (length == 1)
2832     return false;
2833
2834   for (i = 0; i < length; ++i)
2835     {
2836       tree elt0 = (*ops)[i]->op;
2837
2838       gassign *stmt0;
2839       bool invert;
2840       tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert);
2841       if (cmp0 == ERROR_MARK)
2842         continue;
2843
2844       for (j = i + 1; j < length; ++j)
2845         {
2846           tree &elt1 = (*ops)[j]->op;
2847
2848           gassign *stmt1;
2849           tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL);
2850           if (cmp1 == ERROR_MARK)
2851             continue;
2852
2853           tree cond0 = gimple_assign_rhs1 (stmt0);
2854           tree x0 = TREE_OPERAND (cond0, 0);
2855           tree y0 = TREE_OPERAND (cond0, 1);
2856
2857           tree cond1 = gimple_assign_rhs1 (stmt1);
2858           tree x1 = TREE_OPERAND (cond1, 0);
2859           tree y1 = TREE_OPERAND (cond1, 1);
2860
2861           tree comb;
2862           if (opcode == BIT_AND_EXPR)
2863             comb = maybe_fold_and_comparisons (cmp0, x0, y0, cmp1, x1, y1);
2864           else if (opcode == BIT_IOR_EXPR)
2865             comb = maybe_fold_or_comparisons (cmp0, x0, y0, cmp1, x1, y1);
2866           else
2867             gcc_unreachable ();
2868           if (comb == NULL)
2869             continue;
2870
2871           /* Success! */
2872           if (dump_file && (dump_flags & TDF_DETAILS))
2873             {
2874               fprintf (dump_file, "Transforming ");
2875               print_generic_expr (dump_file, cond0, 0);
2876               fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
2877               print_generic_expr (dump_file, cond1, 0);
2878               fprintf (dump_file, " into ");
2879               print_generic_expr (dump_file, comb, 0);
2880               fputc ('\n', dump_file);
2881             }
2882
2883           gimple_assign_set_rhs1 (stmt0, comb);
2884           if (invert)
2885             std::swap (*gimple_assign_rhs2_ptr (stmt0),
2886                        *gimple_assign_rhs3_ptr (stmt0));
2887           update_stmt (stmt0);
2888
2889           elt1 = error_mark_node;
2890           any_changes = true;
2891         }
2892     }
2893
2894   if (any_changes)
2895     {
2896       operand_entry *oe;
2897       j = 0;
2898       FOR_EACH_VEC_ELT (*ops, i, oe)
2899         {
2900           if (oe->op == error_mark_node)
2901             continue;
2902           else if (i != j)
2903             (*ops)[j] = oe;
2904           j++;
2905         }
2906       ops->truncate (j);
2907     }
2908
2909   return any_changes;
2910 }
2911
2912 /* Return true if STMT is a cast like:
2913    <bb N>:
2914    ...
2915    _123 = (int) _234;
2916
2917    <bb M>:
2918    # _345 = PHI <_123(N), 1(...), 1(...)>
2919    where _234 has bool type, _123 has single use and
2920    bb N has a single successor M.  This is commonly used in
2921    the last block of a range test.  */
2922
2923 static bool
2924 final_range_test_p (gimple *stmt)
2925 {
2926   basic_block bb, rhs_bb;
2927   edge e;
2928   tree lhs, rhs;
2929   use_operand_p use_p;
2930   gimple *use_stmt;
2931
2932   if (!gimple_assign_cast_p (stmt))
2933     return false;
2934   bb = gimple_bb (stmt);
2935   if (!single_succ_p (bb))
2936     return false;
2937   e = single_succ_edge (bb);
2938   if (e->flags & EDGE_COMPLEX)
2939     return false;
2940
2941   lhs = gimple_assign_lhs (stmt);
2942   rhs = gimple_assign_rhs1 (stmt);
2943   if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2944       || TREE_CODE (rhs) != SSA_NAME
2945       || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2946     return false;
2947
2948   /* Test whether lhs is consumed only by a PHI in the only successor bb.  */
2949   if (!single_imm_use (lhs, &use_p, &use_stmt))
2950     return false;
2951
2952   if (gimple_code (use_stmt) != GIMPLE_PHI
2953       || gimple_bb (use_stmt) != e->dest)
2954     return false;
2955
2956   /* And that the rhs is defined in the same loop.  */
2957   rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2958   if (rhs_bb == NULL
2959       || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2960     return false;
2961
2962   return true;
2963 }
2964
2965 /* Return true if BB is suitable basic block for inter-bb range test
2966    optimization.  If BACKWARD is true, BB should be the only predecessor
2967    of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2968    or compared with to find a common basic block to which all conditions
2969    branch to if true resp. false.  If BACKWARD is false, TEST_BB should
2970    be the only predecessor of BB.  */
2971
2972 static bool
2973 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2974                   bool backward)
2975 {
2976   edge_iterator ei, ei2;
2977   edge e, e2;
2978   gimple *stmt;
2979   gphi_iterator gsi;
2980   bool other_edge_seen = false;
2981   bool is_cond;
2982
2983   if (test_bb == bb)
2984     return false;
2985   /* Check last stmt first.  */
2986   stmt = last_stmt (bb);
2987   if (stmt == NULL
2988       || (gimple_code (stmt) != GIMPLE_COND
2989           && (backward || !final_range_test_p (stmt)))
2990       || gimple_visited_p (stmt)
2991       || stmt_could_throw_p (stmt)
2992       || *other_bb == bb)
2993     return false;
2994   is_cond = gimple_code (stmt) == GIMPLE_COND;
2995   if (is_cond)
2996     {
2997       /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2998          goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2999          to *OTHER_BB (if not set yet, try to find it out).  */
3000       if (EDGE_COUNT (bb->succs) != 2)
3001         return false;
3002       FOR_EACH_EDGE (e, ei, bb->succs)
3003         {
3004           if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
3005             return false;
3006           if (e->dest == test_bb)
3007             {
3008               if (backward)
3009                 continue;
3010               else
3011                 return false;
3012             }
3013           if (e->dest == bb)
3014             return false;
3015           if (*other_bb == NULL)
3016             {
3017               FOR_EACH_EDGE (e2, ei2, test_bb->succs)
3018                 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
3019                   return false;
3020                 else if (e->dest == e2->dest)
3021                   *other_bb = e->dest;
3022               if (*other_bb == NULL)
3023                 return false;
3024             }
3025           if (e->dest == *other_bb)
3026             other_edge_seen = true;
3027           else if (backward)
3028             return false;
3029         }
3030       if (*other_bb == NULL || !other_edge_seen)
3031         return false;
3032     }
3033   else if (single_succ (bb) != *other_bb)
3034     return false;
3035
3036   /* Now check all PHIs of *OTHER_BB.  */
3037   e = find_edge (bb, *other_bb);
3038   e2 = find_edge (test_bb, *other_bb);
3039   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
3040     {
3041       gphi *phi = gsi.phi ();
3042       /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
3043          corresponding to BB and TEST_BB predecessor must be the same.  */
3044       if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
3045                             gimple_phi_arg_def (phi, e2->dest_idx), 0))
3046         {
3047           /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
3048              one of the PHIs should have the lhs of the last stmt in
3049              that block as PHI arg and that PHI should have 0 or 1
3050              corresponding to it in all other range test basic blocks
3051              considered.  */
3052           if (!is_cond)
3053             {
3054               if (gimple_phi_arg_def (phi, e->dest_idx)
3055                   == gimple_assign_lhs (stmt)
3056                   && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
3057                       || integer_onep (gimple_phi_arg_def (phi,
3058                                                            e2->dest_idx))))
3059                 continue;
3060             }
3061           else
3062             {
3063               gimple *test_last = last_stmt (test_bb);
3064               if (gimple_code (test_last) != GIMPLE_COND
3065                   && gimple_phi_arg_def (phi, e2->dest_idx)
3066                      == gimple_assign_lhs (test_last)
3067                   && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
3068                       || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
3069                 continue;
3070             }
3071
3072           return false;
3073         }
3074     }
3075   return true;
3076 }
3077
3078 /* Return true if BB doesn't have side-effects that would disallow
3079    range test optimization, all SSA_NAMEs set in the bb are consumed
3080    in the bb and there are no PHIs.  */
3081
3082 static bool
3083 no_side_effect_bb (basic_block bb)
3084 {
3085   gimple_stmt_iterator gsi;
3086   gimple *last;
3087
3088   if (!gimple_seq_empty_p (phi_nodes (bb)))
3089     return false;
3090   last = last_stmt (bb);
3091   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3092     {
3093       gimple *stmt = gsi_stmt (gsi);
3094       tree lhs;
3095       imm_use_iterator imm_iter;
3096       use_operand_p use_p;
3097
3098       if (is_gimple_debug (stmt))
3099         continue;
3100       if (gimple_has_side_effects (stmt))
3101         return false;
3102       if (stmt == last)
3103         return true;
3104       if (!is_gimple_assign (stmt))
3105         return false;
3106       lhs = gimple_assign_lhs (stmt);
3107       if (TREE_CODE (lhs) != SSA_NAME)
3108         return false;
3109       if (gimple_assign_rhs_could_trap_p (stmt))
3110         return false;
3111       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
3112         {
3113           gimple *use_stmt = USE_STMT (use_p);
3114           if (is_gimple_debug (use_stmt))
3115             continue;
3116           if (gimple_bb (use_stmt) != bb)
3117             return false;
3118         }
3119     }
3120   return false;
3121 }
3122
3123 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
3124    return true and fill in *OPS recursively.  */
3125
3126 static bool
3127 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
3128          struct loop *loop)
3129 {
3130   gimple *stmt = SSA_NAME_DEF_STMT (var);
3131   tree rhs[2];
3132   int i;
3133
3134   if (!is_reassociable_op (stmt, code, loop))
3135     return false;
3136
3137   rhs[0] = gimple_assign_rhs1 (stmt);
3138   rhs[1] = gimple_assign_rhs2 (stmt);
3139   gimple_set_visited (stmt, true);
3140   for (i = 0; i < 2; i++)
3141     if (TREE_CODE (rhs[i]) == SSA_NAME
3142         && !get_ops (rhs[i], code, ops, loop)
3143         && has_single_use (rhs[i]))
3144       {
3145         operand_entry *oe = operand_entry_pool.allocate ();
3146
3147         oe->op = rhs[i];
3148         oe->rank = code;
3149         oe->id = 0;
3150         oe->count = 1;
3151         ops->safe_push (oe);
3152       }
3153   return true;
3154 }
3155
3156 /* Find the ops that were added by get_ops starting from VAR, see if
3157    they were changed during update_range_test and if yes, create new
3158    stmts.  */
3159
3160 static tree
3161 update_ops (tree var, enum tree_code code, vec<operand_entry *> ops,
3162             unsigned int *pidx, struct loop *loop)
3163 {
3164   gimple *stmt = SSA_NAME_DEF_STMT (var);
3165   tree rhs[4];
3166   int i;
3167
3168   if (!is_reassociable_op (stmt, code, loop))
3169     return NULL;
3170
3171   rhs[0] = gimple_assign_rhs1 (stmt);
3172   rhs[1] = gimple_assign_rhs2 (stmt);
3173   rhs[2] = rhs[0];
3174   rhs[3] = rhs[1];
3175   for (i = 0; i < 2; i++)
3176     if (TREE_CODE (rhs[i]) == SSA_NAME)
3177       {
3178         rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
3179         if (rhs[2 + i] == NULL_TREE)
3180           {
3181             if (has_single_use (rhs[i]))
3182               rhs[2 + i] = ops[(*pidx)++]->op;
3183             else
3184               rhs[2 + i] = rhs[i];
3185           }
3186       }
3187   if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
3188       && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
3189     {
3190       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3191       var = make_ssa_name (TREE_TYPE (var));
3192       gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
3193                                         rhs[2], rhs[3]);
3194       gimple_set_uid (g, gimple_uid (stmt));
3195       gimple_set_visited (g, true);
3196       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3197     }
3198   return var;
3199 }
3200
3201 /* Structure to track the initial value passed to get_ops and
3202    the range in the ops vector for each basic block.  */
3203
3204 struct inter_bb_range_test_entry
3205 {
3206   tree op;
3207   unsigned int first_idx, last_idx;
3208 };
3209
3210 /* Inter-bb range test optimization.
3211
3212    Returns TRUE if a gimple conditional is optimized to a true/false,
3213    otherwise return FALSE.
3214
3215    This indicates to the caller that it should run a CFG cleanup pass
3216    once reassociation is completed.  */
3217
3218 static bool
3219 maybe_optimize_range_tests (gimple *stmt)
3220 {
3221   basic_block first_bb = gimple_bb (stmt);
3222   basic_block last_bb = first_bb;
3223   basic_block other_bb = NULL;
3224   basic_block bb;
3225   edge_iterator ei;
3226   edge e;
3227   auto_vec<operand_entry *> ops;
3228   auto_vec<inter_bb_range_test_entry> bbinfo;
3229   bool any_changes = false;
3230   bool cfg_cleanup_needed = false;
3231
3232   /* Consider only basic blocks that end with GIMPLE_COND or
3233      a cast statement satisfying final_range_test_p.  All
3234      but the last bb in the first_bb .. last_bb range
3235      should end with GIMPLE_COND.  */
3236   if (gimple_code (stmt) == GIMPLE_COND)
3237     {
3238       if (EDGE_COUNT (first_bb->succs) != 2)
3239         return cfg_cleanup_needed;
3240     }
3241   else if (final_range_test_p (stmt))
3242     other_bb = single_succ (first_bb);
3243   else
3244     return cfg_cleanup_needed;
3245
3246   if (stmt_could_throw_p (stmt))
3247     return cfg_cleanup_needed;
3248
3249   /* As relative ordering of post-dominator sons isn't fixed,
3250      maybe_optimize_range_tests can be called first on any
3251      bb in the range we want to optimize.  So, start searching
3252      backwards, if first_bb can be set to a predecessor.  */
3253   while (single_pred_p (first_bb))
3254     {
3255       basic_block pred_bb = single_pred (first_bb);
3256       if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
3257         break;
3258       if (!no_side_effect_bb (first_bb))
3259         break;
3260       first_bb = pred_bb;
3261     }
3262   /* If first_bb is last_bb, other_bb hasn't been computed yet.
3263      Before starting forward search in last_bb successors, find
3264      out the other_bb.  */
3265   if (first_bb == last_bb)
3266     {
3267       other_bb = NULL;
3268       /* As non-GIMPLE_COND last stmt always terminates the range,
3269          if forward search didn't discover anything, just give up.  */
3270       if (gimple_code (stmt) != GIMPLE_COND)
3271         return cfg_cleanup_needed;
3272       /* Look at both successors.  Either it ends with a GIMPLE_COND
3273          and satisfies suitable_cond_bb, or ends with a cast and
3274          other_bb is that cast's successor.  */
3275       FOR_EACH_EDGE (e, ei, first_bb->succs)
3276         if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
3277             || e->dest == first_bb)
3278           return cfg_cleanup_needed;
3279         else if (single_pred_p (e->dest))
3280           {
3281             stmt = last_stmt (e->dest);
3282             if (stmt
3283                 && gimple_code (stmt) == GIMPLE_COND
3284                 && EDGE_COUNT (e->dest->succs) == 2)
3285               {
3286                 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
3287                   break;
3288                 else
3289                   other_bb = NULL;
3290               }
3291             else if (stmt
3292                      && final_range_test_p (stmt)
3293                      && find_edge (first_bb, single_succ (e->dest)))
3294               {
3295                 other_bb = single_succ (e->dest);
3296                 if (other_bb == first_bb)
3297                   other_bb = NULL;
3298               }
3299           }
3300       if (other_bb == NULL)
3301         return cfg_cleanup_needed;
3302     }
3303   /* Now do the forward search, moving last_bb to successor bbs
3304      that aren't other_bb.  */
3305   while (EDGE_COUNT (last_bb->succs) == 2)
3306     {
3307       FOR_EACH_EDGE (e, ei, last_bb->succs)
3308         if (e->dest != other_bb)
3309           break;
3310       if (e == NULL)
3311         break;
3312       if (!single_pred_p (e->dest))
3313         break;
3314       if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
3315         break;
3316       if (!no_side_effect_bb (e->dest))
3317         break;
3318       last_bb = e->dest;
3319     }
3320   if (first_bb == last_bb)
3321     return cfg_cleanup_needed;
3322   /* Here basic blocks first_bb through last_bb's predecessor
3323      end with GIMPLE_COND, all of them have one of the edges to
3324      other_bb and another to another block in the range,
3325      all blocks except first_bb don't have side-effects and
3326      last_bb ends with either GIMPLE_COND, or cast satisfying
3327      final_range_test_p.  */
3328   for (bb = last_bb; ; bb = single_pred (bb))
3329     {
3330       enum tree_code code;
3331       tree lhs, rhs;
3332       inter_bb_range_test_entry bb_ent;
3333
3334       bb_ent.op = NULL_TREE;
3335       bb_ent.first_idx = ops.length ();
3336       bb_ent.last_idx = bb_ent.first_idx;
3337       e = find_edge (bb, other_bb);
3338       stmt = last_stmt (bb);
3339       gimple_set_visited (stmt, true);
3340       if (gimple_code (stmt) != GIMPLE_COND)
3341         {
3342           use_operand_p use_p;
3343           gimple *phi;
3344           edge e2;
3345           unsigned int d;
3346
3347           lhs = gimple_assign_lhs (stmt);
3348           rhs = gimple_assign_rhs1 (stmt);
3349           gcc_assert (bb == last_bb);
3350
3351           /* stmt is
3352              _123 = (int) _234;
3353
3354              followed by:
3355              <bb M>:
3356              # _345 = PHI <_123(N), 1(...), 1(...)>
3357
3358              or 0 instead of 1.  If it is 0, the _234
3359              range test is anded together with all the
3360              other range tests, if it is 1, it is ored with
3361              them.  */
3362           single_imm_use (lhs, &use_p, &phi);
3363           gcc_assert (gimple_code (phi) == GIMPLE_PHI);
3364           e2 = find_edge (first_bb, other_bb);
3365           d = e2->dest_idx;
3366           gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
3367           if (integer_zerop (gimple_phi_arg_def (phi, d)))
3368             code = BIT_AND_EXPR;
3369           else
3370             {
3371               gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
3372               code = BIT_IOR_EXPR;
3373             }
3374
3375           /* If _234 SSA_NAME_DEF_STMT is
3376              _234 = _567 | _789;
3377              (or &, corresponding to 1/0 in the phi arguments,
3378              push into ops the individual range test arguments
3379              of the bitwise or resp. and, recursively.  */
3380           if (!get_ops (rhs, code, &ops,
3381                         loop_containing_stmt (stmt))
3382               && has_single_use (rhs))
3383             {
3384               /* Otherwise, push the _234 range test itself.  */
3385               operand_entry *oe = operand_entry_pool.allocate ();
3386
3387               oe->op = rhs;
3388               oe->rank = code;
3389               oe->id = 0;
3390               oe->count = 1;
3391               ops.safe_push (oe);
3392               bb_ent.last_idx++;
3393             }
3394           else
3395             bb_ent.last_idx = ops.length ();
3396           bb_ent.op = rhs;
3397           bbinfo.safe_push (bb_ent);
3398           continue;
3399         }
3400       /* Otherwise stmt is GIMPLE_COND.  */
3401       code = gimple_cond_code (stmt);
3402       lhs = gimple_cond_lhs (stmt);
3403       rhs = gimple_cond_rhs (stmt);
3404       if (TREE_CODE (lhs) == SSA_NAME
3405           && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3406           && ((code != EQ_EXPR && code != NE_EXPR)
3407               || rhs != boolean_false_node
3408                  /* Either push into ops the individual bitwise
3409                     or resp. and operands, depending on which
3410                     edge is other_bb.  */
3411               || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
3412                                  ^ (code == EQ_EXPR))
3413                                 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
3414                            loop_containing_stmt (stmt))))
3415         {
3416           /* Or push the GIMPLE_COND stmt itself.  */
3417           operand_entry *oe = operand_entry_pool.allocate ();
3418
3419           oe->op = NULL;
3420           oe->rank = (e->flags & EDGE_TRUE_VALUE)
3421                      ? BIT_IOR_EXPR : BIT_AND_EXPR;
3422           /* oe->op = NULL signs that there is no SSA_NAME
3423              for the range test, and oe->id instead is the
3424              basic block number, at which's end the GIMPLE_COND
3425              is.  */
3426           oe->id = bb->index;
3427           oe->count = 1;
3428           ops.safe_push (oe);
3429           bb_ent.op = NULL;
3430           bb_ent.last_idx++;
3431         }
3432       else if (ops.length () > bb_ent.first_idx)
3433         {
3434           bb_ent.op = lhs;
3435           bb_ent.last_idx = ops.length ();
3436         }
3437       bbinfo.safe_push (bb_ent);
3438       if (bb == first_bb)
3439         break;
3440     }
3441   if (ops.length () > 1)
3442     any_changes = optimize_range_tests (ERROR_MARK, &ops);
3443   if (any_changes)
3444     {
3445       unsigned int idx, max_idx = 0;
3446       /* update_ops relies on has_single_use predicates returning the
3447          same values as it did during get_ops earlier.  Additionally it
3448          never removes statements, only adds new ones and it should walk
3449          from the single imm use and check the predicate already before
3450          making those changes.
3451          On the other side, the handling of GIMPLE_COND directly can turn
3452          previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
3453          it needs to be done in a separate loop afterwards.  */
3454       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3455         {
3456           if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3457               && bbinfo[idx].op != NULL_TREE)
3458             {
3459               tree new_op;
3460
3461               max_idx = idx;
3462               stmt = last_stmt (bb);
3463               new_op = update_ops (bbinfo[idx].op,
3464                                    (enum tree_code)
3465                                    ops[bbinfo[idx].first_idx]->rank,
3466                                    ops, &bbinfo[idx].first_idx,
3467                                    loop_containing_stmt (stmt));
3468               if (new_op == NULL_TREE)
3469                 {
3470                   gcc_assert (bb == last_bb);
3471                   new_op = ops[bbinfo[idx].first_idx++]->op;
3472                 }
3473               if (bbinfo[idx].op != new_op)
3474                 {
3475                   imm_use_iterator iter;
3476                   use_operand_p use_p;
3477                   gimple *use_stmt, *cast_stmt = NULL;
3478
3479                   FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
3480                     if (is_gimple_debug (use_stmt))
3481                       continue;
3482                     else if (gimple_code (use_stmt) == GIMPLE_COND
3483                              || gimple_code (use_stmt) == GIMPLE_PHI)
3484                       FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3485                         SET_USE (use_p, new_op);
3486                     else if (gimple_assign_cast_p (use_stmt))
3487                       cast_stmt = use_stmt;
3488                     else
3489                       gcc_unreachable ();
3490                   if (cast_stmt)
3491                     {
3492                       gcc_assert (bb == last_bb);
3493                       tree lhs = gimple_assign_lhs (cast_stmt);
3494                       tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
3495                       enum tree_code rhs_code
3496                         = gimple_assign_rhs_code (cast_stmt);
3497                       gassign *g;
3498                       if (is_gimple_min_invariant (new_op))
3499                         {
3500                           new_op = fold_convert (TREE_TYPE (lhs), new_op);
3501                           g = gimple_build_assign (new_lhs, new_op);
3502                         }
3503                       else
3504                         g = gimple_build_assign (new_lhs, rhs_code, new_op);
3505                       gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt);
3506                       gimple_set_uid (g, gimple_uid (cast_stmt));
3507                       gimple_set_visited (g, true);
3508                       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3509                       FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
3510                         if (is_gimple_debug (use_stmt))
3511                           continue;
3512                         else if (gimple_code (use_stmt) == GIMPLE_COND
3513                                  || gimple_code (use_stmt) == GIMPLE_PHI)
3514                           FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3515                             SET_USE (use_p, new_lhs);
3516                         else
3517                           gcc_unreachable ();
3518                     }
3519                 }
3520             }
3521           if (bb == first_bb)
3522             break;
3523         }
3524       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3525         {
3526           if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3527               && bbinfo[idx].op == NULL_TREE
3528               && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
3529             {
3530               gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
3531
3532               if (idx > max_idx)
3533                 max_idx = idx;
3534
3535               /* If we collapse the conditional to a true/false
3536                  condition, then bubble that knowledge up to our caller.  */
3537               if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
3538                 {
3539                   gimple_cond_make_false (cond_stmt);
3540                   cfg_cleanup_needed = true;
3541                 }
3542               else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
3543                 {
3544                   gimple_cond_make_true (cond_stmt);
3545                   cfg_cleanup_needed = true;
3546                 }
3547               else
3548                 {
3549                   gimple_cond_set_code (cond_stmt, NE_EXPR);
3550                   gimple_cond_set_lhs (cond_stmt,
3551                                        ops[bbinfo[idx].first_idx]->op);
3552                   gimple_cond_set_rhs (cond_stmt, boolean_false_node);
3553                 }
3554               update_stmt (cond_stmt);
3555             }
3556           if (bb == first_bb)
3557             break;
3558         }
3559
3560       /* The above changes could result in basic blocks after the first
3561          modified one, up to and including last_bb, to be executed even if
3562          they would not be in the original program.  If the value ranges of
3563          assignment lhs' in those bbs were dependent on the conditions
3564          guarding those basic blocks which now can change, the VRs might
3565          be incorrect.  As no_side_effect_bb should ensure those SSA_NAMEs
3566          are only used within the same bb, it should be not a big deal if
3567          we just reset all the VRs in those bbs.  See PR68671.  */
3568       for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
3569         reset_flow_sensitive_info_in_bb (bb);
3570     }
3571   return cfg_cleanup_needed;
3572 }
3573
3574 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3575    of STMT in it's operands.  This is also known as a "destructive
3576    update" operation.  */
3577
3578 static bool
3579 is_phi_for_stmt (gimple *stmt, tree operand)
3580 {
3581   gimple *def_stmt;
3582   gphi *def_phi;
3583   tree lhs;
3584   use_operand_p arg_p;
3585   ssa_op_iter i;
3586
3587   if (TREE_CODE (operand) != SSA_NAME)
3588     return false;
3589
3590   lhs = gimple_assign_lhs (stmt);
3591
3592   def_stmt = SSA_NAME_DEF_STMT (operand);
3593   def_phi = dyn_cast <gphi *> (def_stmt);
3594   if (!def_phi)
3595     return false;
3596
3597   FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
3598     if (lhs == USE_FROM_PTR (arg_p))
3599       return true;
3600   return false;
3601 }
3602
3603 /* Remove def stmt of VAR if VAR has zero uses and recurse
3604    on rhs1 operand if so.  */
3605
3606 static void
3607 remove_visited_stmt_chain (tree var)
3608 {
3609   gimple *stmt;
3610   gimple_stmt_iterator gsi;
3611
3612   while (1)
3613     {
3614       if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
3615         return;
3616       stmt = SSA_NAME_DEF_STMT (var);
3617       if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
3618         {
3619           var = gimple_assign_rhs1 (stmt);
3620           gsi = gsi_for_stmt (stmt);
3621           reassoc_remove_stmt (&gsi);
3622           release_defs (stmt);
3623         }
3624       else
3625         return;
3626     }
3627 }
3628
3629 /* This function checks three consequtive operands in
3630    passed operands vector OPS starting from OPINDEX and
3631    swaps two operands if it is profitable for binary operation
3632    consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3633
3634    We pair ops with the same rank if possible.
3635
3636    The alternative we try is to see if STMT is a destructive
3637    update style statement, which is like:
3638    b = phi (a, ...)
3639    a = c + b;
3640    In that case, we want to use the destructive update form to
3641    expose the possible vectorizer sum reduction opportunity.
3642    In that case, the third operand will be the phi node. This
3643    check is not performed if STMT is null.
3644
3645    We could, of course, try to be better as noted above, and do a
3646    lot of work to try to find these opportunities in >3 operand
3647    cases, but it is unlikely to be worth it.  */
3648
3649 static void
3650 swap_ops_for_binary_stmt (vec<operand_entry *> ops,
3651                           unsigned int opindex, gimple *stmt)
3652 {
3653   operand_entry *oe1, *oe2, *oe3;
3654
3655   oe1 = ops[opindex];
3656   oe2 = ops[opindex + 1];
3657   oe3 = ops[opindex + 2];
3658
3659   if ((oe1->rank == oe2->rank
3660        && oe2->rank != oe3->rank)
3661       || (stmt && is_phi_for_stmt (stmt, oe3->op)
3662           && !is_phi_for_stmt (stmt, oe1->op)
3663           && !is_phi_for_stmt (stmt, oe2->op)))
3664     {
3665       operand_entry temp = *oe3;
3666       oe3->op = oe1->op;
3667       oe3->rank = oe1->rank;
3668       oe1->op = temp.op;
3669       oe1->rank= temp.rank;
3670     }
3671   else if ((oe1->rank == oe3->rank
3672             && oe2->rank != oe3->rank)
3673            || (stmt && is_phi_for_stmt (stmt, oe2->op)
3674                && !is_phi_for_stmt (stmt, oe1->op)
3675                && !is_phi_for_stmt (stmt, oe3->op)))
3676     {
3677       operand_entry temp = *oe2;
3678       oe2->op = oe1->op;
3679       oe2->rank = oe1->rank;
3680       oe1->op = temp.op;
3681       oe1->rank = temp.rank;
3682     }
3683 }
3684
3685 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3686    two definitions, otherwise return STMT.  */
3687
3688 static inline gimple *
3689 find_insert_point (gimple *stmt, tree rhs1, tree rhs2)
3690 {
3691   if (TREE_CODE (rhs1) == SSA_NAME
3692       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
3693     stmt = SSA_NAME_DEF_STMT (rhs1);
3694   if (TREE_CODE (rhs2) == SSA_NAME
3695       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
3696     stmt = SSA_NAME_DEF_STMT (rhs2);
3697   return stmt;
3698 }
3699
3700 /* Recursively rewrite our linearized statements so that the operators
3701    match those in OPS[OPINDEX], putting the computation in rank
3702    order.  Return new lhs.  */
3703
3704 static tree
3705 rewrite_expr_tree (gimple *stmt, unsigned int opindex,
3706                    vec<operand_entry *> ops, bool changed)
3707 {
3708   tree rhs1 = gimple_assign_rhs1 (stmt);
3709   tree rhs2 = gimple_assign_rhs2 (stmt);
3710   tree lhs = gimple_assign_lhs (stmt);
3711   operand_entry *oe;
3712
3713   /* The final recursion case for this function is that you have
3714      exactly two operations left.
3715      If we had exactly one op in the entire list to start with, we
3716      would have never called this function, and the tail recursion
3717      rewrites them one at a time.  */
3718   if (opindex + 2 == ops.length ())
3719     {
3720       operand_entry *oe1, *oe2;
3721
3722       oe1 = ops[opindex];
3723       oe2 = ops[opindex + 1];
3724
3725       if (rhs1 != oe1->op || rhs2 != oe2->op)
3726         {
3727           gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3728           unsigned int uid = gimple_uid (stmt);
3729
3730           if (dump_file && (dump_flags & TDF_DETAILS))
3731             {
3732               fprintf (dump_file, "Transforming ");
3733               print_gimple_stmt (dump_file, stmt, 0, 0);
3734             }
3735
3736           /* Even when changed is false, reassociation could have e.g. removed
3737              some redundant operations, so unless we are just swapping the
3738              arguments or unless there is no change at all (then we just
3739              return lhs), force creation of a new SSA_NAME.  */
3740           if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
3741             {
3742               gimple *insert_point
3743                 = find_insert_point (stmt, oe1->op, oe2->op);
3744               lhs = make_ssa_name (TREE_TYPE (lhs));
3745               stmt
3746                 = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
3747                                        oe1->op, oe2->op);
3748               gimple_set_uid (stmt, uid);
3749               gimple_set_visited (stmt, true);
3750               if (insert_point == gsi_stmt (gsi))
3751                 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3752               else
3753                 insert_stmt_after (stmt, insert_point);
3754             }
3755           else
3756             {
3757               gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
3758                                    == stmt);
3759               gimple_assign_set_rhs1 (stmt, oe1->op);
3760               gimple_assign_set_rhs2 (stmt, oe2->op);
3761               update_stmt (stmt);
3762             }
3763
3764           if (rhs1 != oe1->op && rhs1 != oe2->op)
3765             remove_visited_stmt_chain (rhs1);
3766
3767           if (dump_file && (dump_flags & TDF_DETAILS))
3768             {
3769               fprintf (dump_file, " into ");
3770               print_gimple_stmt (dump_file, stmt, 0, 0);
3771             }
3772         }
3773       return lhs;
3774     }
3775
3776   /* If we hit here, we should have 3 or more ops left.  */
3777   gcc_assert (opindex + 2 < ops.length ());
3778
3779   /* Rewrite the next operator.  */
3780   oe = ops[opindex];
3781
3782   /* Recurse on the LHS of the binary operator, which is guaranteed to
3783      be the non-leaf side.  */
3784   tree new_rhs1
3785     = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
3786                          changed || oe->op != rhs2);
3787
3788   if (oe->op != rhs2 || new_rhs1 != rhs1)
3789     {
3790       if (dump_file && (dump_flags & TDF_DETAILS))
3791         {
3792           fprintf (dump_file, "Transforming ");
3793           print_gimple_stmt (dump_file, stmt, 0, 0);
3794         }
3795
3796       /* If changed is false, this is either opindex == 0
3797          or all outer rhs2's were equal to corresponding oe->op,
3798          and powi_result is NULL.
3799          That means lhs is equivalent before and after reassociation.
3800          Otherwise ensure the old lhs SSA_NAME is not reused and
3801          create a new stmt as well, so that any debug stmts will be
3802          properly adjusted.  */
3803       if (changed)
3804         {
3805           gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3806           unsigned int uid = gimple_uid (stmt);
3807           gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op);
3808
3809           lhs = make_ssa_name (TREE_TYPE (lhs));
3810           stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
3811                                       new_rhs1, oe->op);
3812           gimple_set_uid (stmt, uid);
3813           gimple_set_visited (stmt, true);
3814           if (insert_point == gsi_stmt (gsi))
3815             gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3816           else
3817             insert_stmt_after (stmt, insert_point);
3818         }
3819       else
3820         {
3821           gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
3822                                == stmt);
3823           gimple_assign_set_rhs1 (stmt, new_rhs1);
3824           gimple_assign_set_rhs2 (stmt, oe->op);
3825           update_stmt (stmt);
3826         }
3827
3828       if (dump_file && (dump_flags & TDF_DETAILS))
3829         {
3830           fprintf (dump_file, " into ");
3831           print_gimple_stmt (dump_file, stmt, 0, 0);
3832         }
3833     }
3834   return lhs;
3835 }
3836
3837 /* Find out how many cycles we need to compute statements chain.
3838    OPS_NUM holds number os statements in a chain.  CPU_WIDTH is a
3839    maximum number of independent statements we may execute per cycle.  */
3840
3841 static int
3842 get_required_cycles (int ops_num, int cpu_width)
3843 {
3844   int res;
3845   int elog;
3846   unsigned int rest;
3847
3848   /* While we have more than 2 * cpu_width operands
3849      we may reduce number of operands by cpu_width
3850      per cycle.  */
3851   res = ops_num / (2 * cpu_width);
3852
3853   /* Remained operands count may be reduced twice per cycle
3854      until we have only one operand.  */
3855   rest = (unsigned)(ops_num - res * cpu_width);
3856   elog = exact_log2 (rest);
3857   if (elog >= 0)
3858     res += elog;
3859   else
3860     res += floor_log2 (rest) + 1;
3861
3862   return res;
3863 }
3864
3865 /* Returns an optimal number of registers to use for computation of
3866    given statements.  */
3867
3868 static int
3869 get_reassociation_width (int ops_num, enum tree_code opc,
3870                          machine_mode mode)
3871 {
3872   int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
3873   int width;
3874   int width_min;
3875   int cycles_best;
3876
3877   if (param_width > 0)
3878     width = param_width;
3879   else
3880     width = targetm.sched.reassociation_width (opc, mode);
3881
3882   if (width == 1)
3883     return width;
3884
3885   /* Get the minimal time required for sequence computation.  */
3886   cycles_best = get_required_cycles (ops_num, width);
3887
3888   /* Check if we may use less width and still compute sequence for
3889      the same time.  It will allow us to reduce registers usage.
3890      get_required_cycles is monotonically increasing with lower width
3891      so we can perform a binary search for the minimal width that still
3892      results in the optimal cycle count.  */
3893   width_min = 1;
3894   while (width > width_min)
3895     {
3896       int width_mid = (width + width_min) / 2;
3897
3898       if (get_required_cycles (ops_num, width_mid) == cycles_best)
3899         width = width_mid;
3900       else if (width_min < width_mid)
3901         width_min = width_mid;
3902       else
3903         break;
3904     }
3905
3906   return width;
3907 }
3908
3909 /* Recursively rewrite our linearized statements so that the operators
3910    match those in OPS[OPINDEX], putting the computation in rank
3911    order and trying to allow operations to be executed in
3912    parallel.  */
3913
3914 static void
3915 rewrite_expr_tree_parallel (gassign *stmt, int width,
3916                             vec<operand_entry *> ops)
3917 {
3918   enum tree_code opcode = gimple_assign_rhs_code (stmt);
3919   int op_num = ops.length ();
3920   int stmt_num = op_num - 1;
3921   gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
3922   int op_index = op_num - 1;
3923   int stmt_index = 0;
3924   int ready_stmts_end = 0;
3925   int i = 0;
3926   tree last_rhs1 = gimple_assign_rhs1 (stmt);
3927
3928   /* We start expression rewriting from the top statements.
3929      So, in this loop we create a full list of statements
3930      we will work with.  */
3931   stmts[stmt_num - 1] = stmt;
3932   for (i = stmt_num - 2; i >= 0; i--)
3933     stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3934
3935   for (i = 0; i < stmt_num; i++)
3936     {
3937       tree op1, op2;
3938
3939       /* Determine whether we should use results of
3940          already handled statements or not.  */
3941       if (ready_stmts_end == 0
3942           && (i - stmt_index >= width || op_index < 1))
3943         ready_stmts_end = i;
3944
3945       /* Now we choose operands for the next statement.  Non zero
3946          value in ready_stmts_end means here that we should use
3947          the result of already generated statements as new operand.  */
3948       if (ready_stmts_end > 0)
3949         {
3950           op1 = gimple_assign_lhs (stmts[stmt_index++]);
3951           if (ready_stmts_end > stmt_index)
3952             op2 = gimple_assign_lhs (stmts[stmt_index++]);
3953           else if (op_index >= 0)
3954             op2 = ops[op_index--]->op;
3955           else
3956             {
3957               gcc_assert (stmt_index < i);
3958               op2 = gimple_assign_lhs (stmts[stmt_index++]);
3959             }
3960
3961           if (stmt_index >= ready_stmts_end)
3962             ready_stmts_end = 0;
3963         }
3964       else
3965         {
3966           if (op_index > 1)
3967             swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3968           op2 = ops[op_index--]->op;
3969           op1 = ops[op_index--]->op;
3970         }
3971
3972       /* If we emit the last statement then we should put
3973          operands into the last statement.  It will also
3974          break the loop.  */
3975       if (op_index < 0 && stmt_index == i)
3976         i = stmt_num - 1;
3977
3978       if (dump_file && (dump_flags & TDF_DETAILS))
3979         {
3980           fprintf (dump_file, "Transforming ");
3981           print_gimple_stmt (dump_file, stmts[i], 0, 0);
3982         }
3983
3984       /* We keep original statement only for the last one.  All
3985          others are recreated.  */
3986       if (i == stmt_num - 1)
3987         {
3988           gimple_assign_set_rhs1 (stmts[i], op1);
3989           gimple_assign_set_rhs2 (stmts[i], op2);
3990           update_stmt (stmts[i]);
3991         }
3992       else
3993         stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3994
3995       if (dump_file && (dump_flags & TDF_DETAILS))
3996         {
3997           fprintf (dump_file, " into ");
3998           print_gimple_stmt (dump_file, stmts[i], 0, 0);
3999         }
4000     }
4001
4002   remove_visited_stmt_chain (last_rhs1);
4003 }
4004
4005 /* Transform STMT, which is really (A +B) + (C + D) into the left
4006    linear form, ((A+B)+C)+D.
4007    Recurse on D if necessary.  */
4008
4009 static void
4010 linearize_expr (gimple *stmt)
4011 {
4012   gimple_stmt_iterator gsi;
4013   gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
4014   gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
4015   gimple *oldbinrhs = binrhs;
4016   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4017   gimple *newbinrhs = NULL;
4018   struct loop *loop = loop_containing_stmt (stmt);
4019   tree lhs = gimple_assign_lhs (stmt);
4020
4021   gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
4022               && is_reassociable_op (binrhs, rhscode, loop));
4023
4024   gsi = gsi_for_stmt (stmt);
4025
4026   gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
4027   binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
4028                                 gimple_assign_rhs_code (binrhs),
4029                                 gimple_assign_lhs (binlhs),
4030                                 gimple_assign_rhs2 (binrhs));
4031   gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
4032   gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
4033   gimple_set_uid (binrhs, gimple_uid (stmt));
4034
4035   if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
4036     newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
4037
4038   if (dump_file && (dump_flags & TDF_DETAILS))
4039     {
4040       fprintf (dump_file, "Linearized: ");
4041       print_gimple_stmt (dump_file, stmt, 0, 0);
4042     }
4043
4044   reassociate_stats.linearized++;
4045   update_stmt (stmt);
4046
4047   gsi = gsi_for_stmt (oldbinrhs);
4048   reassoc_remove_stmt (&gsi);
4049   release_defs (oldbinrhs);
4050
4051   gimple_set_visited (stmt, true);
4052   gimple_set_visited (binlhs, true);
4053   gimple_set_visited (binrhs, true);
4054
4055   /* Tail recurse on the new rhs if it still needs reassociation.  */
4056   if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
4057     /* ??? This should probably be linearize_expr (newbinrhs) but I don't
4058            want to change the algorithm while converting to tuples.  */
4059     linearize_expr (stmt);
4060 }
4061
4062 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
4063    it.  Otherwise, return NULL.  */
4064
4065 static gimple *
4066 get_single_immediate_use (tree lhs)
4067 {
4068   use_operand_p immuse;
4069   gimple *immusestmt;
4070
4071   if (TREE_CODE (lhs) == SSA_NAME
4072       && single_imm_use (lhs, &immuse, &immusestmt)
4073       && is_gimple_assign (immusestmt))
4074     return immusestmt;
4075
4076   return NULL;
4077 }
4078
4079 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
4080    representing the negated value.  Insertions of any necessary
4081    instructions go before GSI.
4082    This function is recursive in that, if you hand it "a_5" as the
4083    value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
4084    transform b_3 + b_4 into a_5 = -b_3 + -b_4.  */
4085
4086 static tree
4087 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
4088 {
4089   gimple *negatedefstmt = NULL;
4090   tree resultofnegate;
4091   gimple_stmt_iterator gsi;
4092   unsigned int uid;
4093
4094   /* If we are trying to negate a name, defined by an add, negate the
4095      add operands instead.  */
4096   if (TREE_CODE (tonegate) == SSA_NAME)
4097     negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
4098   if (TREE_CODE (tonegate) == SSA_NAME
4099       && is_gimple_assign (negatedefstmt)
4100       && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
4101       && has_single_use (gimple_assign_lhs (negatedefstmt))
4102       && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
4103     {
4104       tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
4105       tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
4106       tree lhs = gimple_assign_lhs (negatedefstmt);
4107       gimple *g;
4108
4109       gsi = gsi_for_stmt (negatedefstmt);
4110       rhs1 = negate_value (rhs1, &gsi);
4111
4112       gsi = gsi_for_stmt (negatedefstmt);
4113       rhs2 = negate_value (rhs2, &gsi);
4114
4115       gsi = gsi_for_stmt (negatedefstmt);
4116       lhs = make_ssa_name (TREE_TYPE (lhs));
4117       gimple_set_visited (negatedefstmt, true);
4118       g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
4119       gimple_set_uid (g, gimple_uid (negatedefstmt));
4120       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4121       return lhs;
4122     }
4123
4124   tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
4125   resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
4126                                              NULL_TREE, true, GSI_SAME_STMT);
4127   gsi = *gsip;
4128   uid = gimple_uid (gsi_stmt (gsi));
4129   for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
4130     {
4131       gimple *stmt = gsi_stmt (gsi);
4132       if (gimple_uid (stmt) != 0)
4133         break;
4134       gimple_set_uid (stmt, uid);
4135     }
4136   return resultofnegate;
4137 }
4138
4139 /* Return true if we should break up the subtract in STMT into an add
4140    with negate.  This is true when we the subtract operands are really
4141    adds, or the subtract itself is used in an add expression.  In
4142    either case, breaking up the subtract into an add with negate
4143    exposes the adds to reassociation.  */
4144
4145 static bool
4146 should_break_up_subtract (gimple *stmt)
4147 {
4148   tree lhs = gimple_assign_lhs (stmt);
4149   tree binlhs = gimple_assign_rhs1 (stmt);
4150   tree binrhs = gimple_assign_rhs2 (stmt);
4151   gimple *immusestmt;
4152   struct loop *loop = loop_containing_stmt (stmt);
4153
4154   if (TREE_CODE (binlhs) == SSA_NAME
4155       && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
4156     return true;
4157
4158   if (TREE_CODE (binrhs) == SSA_NAME
4159       && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
4160     return true;
4161
4162   if (TREE_CODE (lhs) == SSA_NAME
4163       && (immusestmt = get_single_immediate_use (lhs))
4164       && is_gimple_assign (immusestmt)
4165       && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
4166           ||  gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
4167     return true;
4168   return false;
4169 }
4170
4171 /* Transform STMT from A - B into A + -B.  */
4172
4173 static void
4174 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
4175 {
4176   tree rhs1 = gimple_assign_rhs1 (stmt);
4177   tree rhs2 = gimple_assign_rhs2 (stmt);
4178
4179   if (dump_file && (dump_flags & TDF_DETAILS))
4180     {
4181       fprintf (dump_file, "Breaking up subtract ");
4182       print_gimple_stmt (dump_file, stmt, 0, 0);
4183     }
4184
4185   rhs2 = negate_value (rhs2, gsip);
4186   gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
4187   update_stmt (stmt);
4188 }
4189
4190 /* Determine whether STMT is a builtin call that raises an SSA name
4191    to an integer power and has only one use.  If so, and this is early
4192    reassociation and unsafe math optimizations are permitted, place
4193    the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
4194    If any of these conditions does not hold, return FALSE.  */
4195
4196 static bool
4197 acceptable_pow_call (gimple *stmt, tree *base, HOST_WIDE_INT *exponent)
4198 {
4199   tree arg1;
4200   REAL_VALUE_TYPE c, cint;
4201
4202   if (!reassoc_insert_powi_p
4203       || !flag_unsafe_math_optimizations
4204       || !is_gimple_call (stmt)
4205       || !has_single_use (gimple_call_lhs (stmt)))
4206     return false;
4207
4208   switch (gimple_call_combined_fn (stmt))
4209     {
4210     CASE_CFN_POW:
4211       if (flag_errno_math)
4212         return false;
4213
4214       *base = gimple_call_arg (stmt, 0);
4215       arg1 = gimple_call_arg (stmt, 1);
4216
4217       if (TREE_CODE (arg1) != REAL_CST)
4218         return false;
4219
4220       c = TREE_REAL_CST (arg1);
4221
4222       if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
4223         return false;
4224
4225       *exponent = real_to_integer (&c);
4226       real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
4227       if (!real_identical (&c, &cint))
4228         return false;
4229
4230       break;
4231
4232     CASE_CFN_POWI:
4233       *base = gimple_call_arg (stmt, 0);
4234       arg1 = gimple_call_arg (stmt, 1);
4235
4236       if (!tree_fits_shwi_p (arg1))
4237         return false;
4238
4239       *exponent = tree_to_shwi (arg1);
4240       break;
4241
4242     default:
4243       return false;
4244     }
4245
4246   /* Expanding negative exponents is generally unproductive, so we don't
4247      complicate matters with those.  Exponents of zero and one should
4248      have been handled by expression folding.  */
4249   if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
4250     return false;
4251
4252   return true;
4253 }
4254
4255 /* Recursively linearize a binary expression that is the RHS of STMT.
4256    Place the operands of the expression tree in the vector named OPS.  */
4257
4258 static void
4259 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
4260                      bool is_associative, bool set_visited)
4261 {
4262   tree binlhs = gimple_assign_rhs1 (stmt);
4263   tree binrhs = gimple_assign_rhs2 (stmt);
4264   gimple *binlhsdef = NULL, *binrhsdef = NULL;
4265   bool binlhsisreassoc = false;
4266   bool binrhsisreassoc = false;
4267   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4268   struct loop *loop = loop_containing_stmt (stmt);
4269   tree base = NULL_TREE;
4270   HOST_WIDE_INT exponent = 0;
4271
4272   if (set_visited)
4273     gimple_set_visited (stmt, true);
4274
4275   if (TREE_CODE (binlhs) == SSA_NAME)
4276     {
4277       binlhsdef = SSA_NAME_DEF_STMT (binlhs);
4278       binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
4279                          && !stmt_could_throw_p (binlhsdef));
4280     }
4281
4282   if (TREE_CODE (binrhs) == SSA_NAME)
4283     {
4284       binrhsdef = SSA_NAME_DEF_STMT (binrhs);
4285       binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
4286                          && !stmt_could_throw_p (binrhsdef));
4287     }
4288
4289   /* If the LHS is not reassociable, but the RHS is, we need to swap
4290      them.  If neither is reassociable, there is nothing we can do, so
4291      just put them in the ops vector.  If the LHS is reassociable,
4292      linearize it.  If both are reassociable, then linearize the RHS
4293      and the LHS.  */
4294
4295   if (!binlhsisreassoc)
4296     {
4297       /* If this is not a associative operation like division, give up.  */
4298       if (!is_associative)
4299         {
4300           add_to_ops_vec (ops, binrhs);
4301           return;
4302         }
4303
4304       if (!binrhsisreassoc)
4305         {
4306           if (rhscode == MULT_EXPR
4307               && TREE_CODE (binrhs) == SSA_NAME
4308               && acceptable_pow_call (binrhsdef, &base, &exponent))
4309             {
4310               add_repeat_to_ops_vec (ops, base, exponent);
4311               gimple_set_visited (binrhsdef, true);
4312             }
4313           else
4314             add_to_ops_vec (ops, binrhs);
4315
4316           if (rhscode == MULT_EXPR
4317               && TREE_CODE (binlhs) == SSA_NAME
4318               && acceptable_pow_call (binlhsdef, &base, &exponent))
4319             {
4320               add_repeat_to_ops_vec (ops, base, exponent);
4321               gimple_set_visited (binlhsdef, true);
4322             }
4323           else
4324             add_to_ops_vec (ops, binlhs);
4325
4326           return;
4327         }
4328
4329       if (dump_file && (dump_flags & TDF_DETAILS))
4330         {
4331           fprintf (dump_file, "swapping operands of ");
4332           print_gimple_stmt (dump_file, stmt, 0, 0);
4333         }
4334
4335       swap_ssa_operands (stmt,
4336                          gimple_assign_rhs1_ptr (stmt),
4337                          gimple_assign_rhs2_ptr (stmt));
4338       update_stmt (stmt);
4339
4340       if (dump_file && (dump_flags & TDF_DETAILS))
4341         {
4342           fprintf (dump_file, " is now ");
4343           print_gimple_stmt (dump_file, stmt, 0, 0);
4344         }
4345
4346       /* We want to make it so the lhs is always the reassociative op,
4347          so swap.  */
4348       std::swap (binlhs, binrhs);
4349     }
4350   else if (binrhsisreassoc)
4351     {
4352       linearize_expr (stmt);
4353       binlhs = gimple_assign_rhs1 (stmt);
4354       binrhs = gimple_assign_rhs2 (stmt);
4355     }
4356
4357   gcc_assert (TREE_CODE (binrhs) != SSA_NAME
4358               || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
4359                                       rhscode, loop));
4360   linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
4361                        is_associative, set_visited);
4362
4363   if (rhscode == MULT_EXPR
4364       && TREE_CODE (binrhs) == SSA_NAME
4365       && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
4366     {
4367       add_repeat_to_ops_vec (ops, base, exponent);
4368       gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
4369     }
4370   else
4371     add_to_ops_vec (ops, binrhs);
4372 }
4373
4374 /* Repropagate the negates back into subtracts, since no other pass
4375    currently does it.  */
4376
4377 static void
4378 repropagate_negates (void)
4379 {
4380   unsigned int i = 0;
4381   tree negate;
4382
4383   FOR_EACH_VEC_ELT (plus_negates, i, negate)
4384     {
4385       gimple *user = get_single_immediate_use (negate);
4386
4387       if (!user || !is_gimple_assign (user))
4388         continue;
4389
4390       /* The negate operand can be either operand of a PLUS_EXPR
4391          (it can be the LHS if the RHS is a constant for example).
4392
4393          Force the negate operand to the RHS of the PLUS_EXPR, then
4394          transform the PLUS_EXPR into a MINUS_EXPR.  */
4395       if (gimple_assign_rhs_code (user) == PLUS_EXPR)
4396         {
4397           /* If the negated operand appears on the LHS of the
4398              PLUS_EXPR, exchange the operands of the PLUS_EXPR
4399              to force the negated operand to the RHS of the PLUS_EXPR.  */
4400           if (gimple_assign_rhs1 (user) == negate)
4401             {
4402               swap_ssa_operands (user,
4403                                  gimple_assign_rhs1_ptr (user),
4404                                  gimple_assign_rhs2_ptr (user));
4405             }
4406
4407           /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
4408              the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR.  */
4409           if (gimple_assign_rhs2 (user) == negate)
4410             {
4411               tree rhs1 = gimple_assign_rhs1 (user);
4412               tree rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
4413               gimple_stmt_iterator gsi = gsi_for_stmt (user);
4414               gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
4415               update_stmt (user);
4416             }
4417         }
4418       else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
4419         {
4420           if (gimple_assign_rhs1 (user) == negate)
4421             {
4422               /* We have
4423                    x = -a
4424                    y = x - b
4425                  which we transform into
4426                    x = a + b
4427                    y = -x .
4428                  This pushes down the negate which we possibly can merge
4429                  into some other operation, hence insert it into the
4430                  plus_negates vector.  */
4431               gimple *feed = SSA_NAME_DEF_STMT (negate);
4432               tree a = gimple_assign_rhs1 (feed);
4433               tree b = gimple_assign_rhs2 (user);
4434               gimple_stmt_iterator gsi = gsi_for_stmt (feed);
4435               gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
4436               tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
4437               gimple *g = gimple_build_assign (x, PLUS_EXPR, a, b);
4438               gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
4439               gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
4440               user = gsi_stmt (gsi2);
4441               update_stmt (user);
4442               reassoc_remove_stmt (&gsi);
4443               release_defs (feed);
4444               plus_negates.safe_push (gimple_assign_lhs (user));
4445             }
4446           else
4447             {
4448               /* Transform "x = -a; y = b - x" into "y = b + a", getting
4449                  rid of one operation.  */
4450               gimple *feed = SSA_NAME_DEF_STMT (negate);
4451               tree a = gimple_assign_rhs1 (feed);
4452               tree rhs1 = gimple_assign_rhs1 (user);
4453               gimple_stmt_iterator gsi = gsi_for_stmt (user);
4454               gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
4455               update_stmt (gsi_stmt (gsi));
4456             }
4457         }
4458     }
4459 }
4460
4461 /* Returns true if OP is of a type for which we can do reassociation.
4462    That is for integral or non-saturating fixed-point types, and for
4463    floating point type when associative-math is enabled.  */
4464
4465 static bool
4466 can_reassociate_p (tree op)
4467 {
4468   tree type = TREE_TYPE (op);
4469   if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
4470       || NON_SAT_FIXED_POINT_TYPE_P (type)
4471       || (flag_associative_math && FLOAT_TYPE_P (type)))
4472     return true;
4473   return false;
4474 }
4475
4476 /* Break up subtract operations in block BB.
4477
4478    We do this top down because we don't know whether the subtract is
4479    part of a possible chain of reassociation except at the top.
4480
4481    IE given
4482    d = f + g
4483    c = a + e
4484    b = c - d
4485    q = b - r
4486    k = t - q
4487
4488    we want to break up k = t - q, but we won't until we've transformed q
4489    = b - r, which won't be broken up until we transform b = c - d.
4490
4491    En passant, clear the GIMPLE visited flag on every statement
4492    and set UIDs within each basic block.  */
4493
4494 static void
4495 break_up_subtract_bb (basic_block bb)
4496 {
4497   gimple_stmt_iterator gsi;
4498   basic_block son;
4499   unsigned int uid = 1;
4500
4501   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4502     {
4503       gimple *stmt = gsi_stmt (gsi);
4504       gimple_set_visited (stmt, false);
4505       gimple_set_uid (stmt, uid++);
4506
4507       if (!is_gimple_assign (stmt)
4508           || !can_reassociate_p (gimple_assign_lhs (stmt)))
4509         continue;
4510
4511       /* Look for simple gimple subtract operations.  */
4512       if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
4513         {
4514           if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
4515               || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
4516             continue;
4517
4518           /* Check for a subtract used only in an addition.  If this
4519              is the case, transform it into add of a negate for better
4520              reassociation.  IE transform C = A-B into C = A + -B if C
4521              is only used in an addition.  */
4522           if (should_break_up_subtract (stmt))
4523             break_up_subtract (stmt, &gsi);
4524         }
4525       else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
4526                && can_reassociate_p (gimple_assign_rhs1 (stmt)))
4527         plus_negates.safe_push (gimple_assign_lhs (stmt));
4528     }
4529   for (son = first_dom_son (CDI_DOMINATORS, bb);
4530        son;
4531        son = next_dom_son (CDI_DOMINATORS, son))
4532     break_up_subtract_bb (son);
4533 }
4534
4535 /* Used for repeated factor analysis.  */
4536 struct repeat_factor
4537 {
4538   /* An SSA name that occurs in a multiply chain.  */
4539   tree factor;
4540
4541   /* Cached rank of the factor.  */
4542   unsigned rank;
4543
4544   /* Number of occurrences of the factor in the chain.  */
4545   HOST_WIDE_INT count;
4546
4547   /* An SSA name representing the product of this factor and
4548      all factors appearing later in the repeated factor vector.  */
4549   tree repr;
4550 };
4551
4552
4553 static vec<repeat_factor> repeat_factor_vec;
4554
4555 /* Used for sorting the repeat factor vector.  Sort primarily by
4556    ascending occurrence count, secondarily by descending rank.  */
4557
4558 static int
4559 compare_repeat_factors (const void *x1, const void *x2)
4560 {
4561   const repeat_factor *rf1 = (const repeat_factor *) x1;
4562   const repeat_factor *rf2 = (const repeat_factor *) x2;
4563
4564   if (rf1->count != rf2->count)
4565     return rf1->count - rf2->count;
4566
4567   return rf2->rank - rf1->rank;
4568 }
4569
4570 /* Look for repeated operands in OPS in the multiply tree rooted at
4571    STMT.  Replace them with an optimal sequence of multiplies and powi
4572    builtin calls, and remove the used operands from OPS.  Return an
4573    SSA name representing the value of the replacement sequence.  */
4574
4575 static tree
4576 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
4577 {
4578   unsigned i, j, vec_len;
4579   int ii;
4580   operand_entry *oe;
4581   repeat_factor *rf1, *rf2;
4582   repeat_factor rfnew;
4583   tree result = NULL_TREE;
4584   tree target_ssa, iter_result;
4585   tree type = TREE_TYPE (gimple_get_lhs (stmt));
4586   tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
4587   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4588   gimple *mul_stmt, *pow_stmt;
4589
4590   /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4591      target.  */
4592   if (!powi_fndecl)
4593     return NULL_TREE;
4594
4595   /* Allocate the repeated factor vector.  */
4596   repeat_factor_vec.create (10);
4597
4598   /* Scan the OPS vector for all SSA names in the product and build
4599      up a vector of occurrence counts for each factor.  */
4600   FOR_EACH_VEC_ELT (*ops, i, oe)
4601     {
4602       if (TREE_CODE (oe->op) == SSA_NAME)
4603         {
4604           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4605             {
4606               if (rf1->factor == oe->op)
4607                 {
4608                   rf1->count += oe->count;
4609                   break;
4610                 }
4611             }
4612
4613           if (j >= repeat_factor_vec.length ())
4614             {
4615               rfnew.factor = oe->op;
4616               rfnew.rank = oe->rank;
4617               rfnew.count = oe->count;
4618               rfnew.repr = NULL_TREE;
4619               repeat_factor_vec.safe_push (rfnew);
4620             }
4621         }
4622     }
4623
4624   /* Sort the repeated factor vector by (a) increasing occurrence count,
4625      and (b) decreasing rank.  */
4626   repeat_factor_vec.qsort (compare_repeat_factors);
4627
4628   /* It is generally best to combine as many base factors as possible
4629      into a product before applying __builtin_powi to the result.
4630      However, the sort order chosen for the repeated factor vector
4631      allows us to cache partial results for the product of the base
4632      factors for subsequent use.  When we already have a cached partial
4633      result from a previous iteration, it is best to make use of it
4634      before looking for another __builtin_pow opportunity.
4635
4636      As an example, consider x * x * y * y * y * z * z * z * z.
4637      We want to first compose the product x * y * z, raise it to the
4638      second power, then multiply this by y * z, and finally multiply
4639      by z.  This can be done in 5 multiplies provided we cache y * z
4640      for use in both expressions:
4641
4642         t1 = y * z
4643         t2 = t1 * x
4644         t3 = t2 * t2
4645         t4 = t1 * t3
4646         result = t4 * z
4647
4648      If we instead ignored the cached y * z and first multiplied by
4649      the __builtin_pow opportunity z * z, we would get the inferior:
4650
4651         t1 = y * z
4652         t2 = t1 * x
4653         t3 = t2 * t2
4654         t4 = z * z
4655         t5 = t3 * t4
4656         result = t5 * y  */
4657
4658   vec_len = repeat_factor_vec.length ();
4659
4660   /* Repeatedly look for opportunities to create a builtin_powi call.  */
4661   while (true)
4662     {
4663       HOST_WIDE_INT power;
4664
4665       /* First look for the largest cached product of factors from
4666          preceding iterations.  If found, create a builtin_powi for
4667          it if the minimum occurrence count for its factors is at
4668          least 2, or just use this cached product as our next
4669          multiplicand if the minimum occurrence count is 1.  */
4670       FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4671         {
4672           if (rf1->repr && rf1->count > 0)
4673             break;
4674         }
4675
4676       if (j < vec_len)
4677         {
4678           power = rf1->count;
4679
4680           if (power == 1)
4681             {
4682               iter_result = rf1->repr;
4683
4684               if (dump_file && (dump_flags & TDF_DETAILS))
4685                 {
4686                   unsigned elt;
4687                   repeat_factor *rf;
4688                   fputs ("Multiplying by cached product ", dump_file);
4689                   for (elt = j; elt < vec_len; elt++)
4690                     {
4691                       rf = &repeat_factor_vec[elt];
4692                       print_generic_expr (dump_file, rf->factor, 0);
4693                       if (elt < vec_len - 1)
4694                         fputs (" * ", dump_file);
4695                     }
4696                   fputs ("\n", dump_file);
4697                 }
4698             }
4699           else
4700             {
4701               iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4702               pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4703                                             build_int_cst (integer_type_node,
4704                                                            power));
4705               gimple_call_set_lhs (pow_stmt, iter_result);
4706               gimple_set_location (pow_stmt, gimple_location (stmt));
4707               gimple_set_uid (pow_stmt, gimple_uid (stmt));
4708               gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4709
4710               if (dump_file && (dump_flags & TDF_DETAILS))
4711                 {
4712                   unsigned elt;
4713                   repeat_factor *rf;
4714                   fputs ("Building __builtin_pow call for cached product (",
4715                          dump_file);
4716                   for (elt = j; elt < vec_len; elt++)
4717                     {
4718                       rf = &repeat_factor_vec[elt];
4719                       print_generic_expr (dump_file, rf->factor, 0);
4720                       if (elt < vec_len - 1)
4721                         fputs (" * ", dump_file);
4722                     }
4723                   fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
4724                            power);
4725                 }
4726             }
4727         }
4728       else
4729         {
4730           /* Otherwise, find the first factor in the repeated factor
4731              vector whose occurrence count is at least 2.  If no such
4732              factor exists, there are no builtin_powi opportunities
4733              remaining.  */
4734           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4735             {
4736               if (rf1->count >= 2)
4737                 break;
4738             }
4739
4740           if (j >= vec_len)
4741             break;
4742
4743           power = rf1->count;
4744
4745           if (dump_file && (dump_flags & TDF_DETAILS))
4746             {
4747               unsigned elt;
4748               repeat_factor *rf;
4749               fputs ("Building __builtin_pow call for (", dump_file);
4750               for (elt = j; elt < vec_len; elt++)
4751                 {
4752                   rf = &repeat_factor_vec[elt];
4753                   print_generic_expr (dump_file, rf->factor, 0);
4754                   if (elt < vec_len - 1)
4755                     fputs (" * ", dump_file);
4756                 }
4757               fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
4758             }
4759
4760           reassociate_stats.pows_created++;
4761
4762           /* Visit each element of the vector in reverse order (so that
4763              high-occurrence elements are visited first, and within the
4764              same occurrence count, lower-ranked elements are visited
4765              first).  Form a linear product of all elements in this order
4766              whose occurrencce count is at least that of element J.
4767              Record the SSA name representing the product of each element
4768              with all subsequent elements in the vector.  */
4769           if (j == vec_len - 1)
4770             rf1->repr = rf1->factor;
4771           else
4772             {
4773               for (ii = vec_len - 2; ii >= (int)j; ii--)
4774                 {
4775                   tree op1, op2;
4776
4777                   rf1 = &repeat_factor_vec[ii];
4778                   rf2 = &repeat_factor_vec[ii + 1];
4779
4780                   /* Init the last factor's representative to be itself.  */
4781                   if (!rf2->repr)
4782                     rf2->repr = rf2->factor;
4783
4784                   op1 = rf1->factor;
4785                   op2 = rf2->repr;
4786
4787                   target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4788                   mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
4789                                                   op1, op2);
4790                   gimple_set_location (mul_stmt, gimple_location (stmt));
4791                   gimple_set_uid (mul_stmt, gimple_uid (stmt));
4792                   gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4793                   rf1->repr = target_ssa;
4794
4795                   /* Don't reprocess the multiply we just introduced.  */
4796                   gimple_set_visited (mul_stmt, true);
4797                 }
4798             }
4799
4800           /* Form a call to __builtin_powi for the maximum product
4801              just formed, raised to the power obtained earlier.  */
4802           rf1 = &repeat_factor_vec[j];
4803           iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4804           pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4805                                         build_int_cst (integer_type_node,
4806                                                        power));
4807           gimple_call_set_lhs (pow_stmt, iter_result);
4808           gimple_set_location (pow_stmt, gimple_location (stmt));
4809           gimple_set_uid (pow_stmt, gimple_uid (stmt));
4810           gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4811         }
4812
4813       /* If we previously formed at least one other builtin_powi call,
4814          form the product of this one and those others.  */
4815       if (result)
4816         {
4817           tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4818           mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
4819                                           result, iter_result);
4820           gimple_set_location (mul_stmt, gimple_location (stmt));
4821           gimple_set_uid (mul_stmt, gimple_uid (stmt));
4822           gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4823           gimple_set_visited (mul_stmt, true);
4824           result = new_result;
4825         }
4826       else
4827         result = iter_result;
4828
4829       /* Decrement the occurrence count of each element in the product
4830          by the count found above, and remove this many copies of each
4831          factor from OPS.  */
4832       for (i = j; i < vec_len; i++)
4833         {
4834           unsigned k = power;
4835           unsigned n;
4836
4837           rf1 = &repeat_factor_vec[i];
4838           rf1->count -= power;
4839
4840           FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
4841             {
4842               if (oe->op == rf1->factor)
4843                 {
4844                   if (oe->count <= k)
4845                     {
4846                       ops->ordered_remove (n);
4847                       k -= oe->count;
4848
4849                       if (k == 0)
4850                         break;
4851                     }
4852                   else
4853                     {
4854                       oe->count -= k;
4855                       break;
4856                     }
4857                 }
4858             }
4859         }
4860     }
4861
4862   /* At this point all elements in the repeated factor vector have a
4863      remaining occurrence count of 0 or 1, and those with a count of 1
4864      don't have cached representatives.  Re-sort the ops vector and
4865      clean up.  */
4866   ops->qsort (sort_by_operand_rank);
4867   repeat_factor_vec.release ();
4868
4869   /* Return the final product computed herein.  Note that there may
4870      still be some elements with single occurrence count left in OPS;
4871      those will be handled by the normal reassociation logic.  */
4872   return result;
4873 }
4874
4875 /* Attempt to optimize
4876    CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
4877    CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0.  */
4878
4879 static void
4880 attempt_builtin_copysign (vec<operand_entry *> *ops)
4881 {
4882   operand_entry *oe;
4883   unsigned int i;
4884   unsigned int length = ops->length ();
4885   tree cst = ops->last ()->op;
4886
4887   if (length == 1 || TREE_CODE (cst) != REAL_CST)
4888     return;
4889
4890   FOR_EACH_VEC_ELT (*ops, i, oe)
4891     {
4892       if (TREE_CODE (oe->op) == SSA_NAME
4893           && has_single_use (oe->op))
4894         {
4895           gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
4896           if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
4897             {
4898               tree arg0, arg1;
4899               switch (gimple_call_combined_fn (old_call))
4900                 {
4901                 CASE_CFN_COPYSIGN:
4902                   arg0 = gimple_call_arg (old_call, 0);
4903                   arg1 = gimple_call_arg (old_call, 1);
4904                   /* The first argument of copysign must be a constant,
4905                      otherwise there's nothing to do.  */
4906                   if (TREE_CODE (arg0) == REAL_CST)
4907                     {
4908                       tree type = TREE_TYPE (arg0);
4909                       tree mul = const_binop (MULT_EXPR, type, cst, arg0);
4910                       /* If we couldn't fold to a single constant, skip it.
4911                          That happens e.g. for inexact multiplication when
4912                          -frounding-math.  */
4913                       if (mul == NULL_TREE)
4914                         break;
4915                       /* Instead of adjusting OLD_CALL, let's build a new
4916                          call to not leak the LHS and prevent keeping bogus
4917                          debug statements.  DCE will clean up the old call.  */
4918                       gcall *new_call;
4919                       if (gimple_call_internal_p (old_call))
4920                         new_call = gimple_build_call_internal
4921                           (IFN_COPYSIGN, 2, mul, arg1);
4922                       else
4923                         new_call = gimple_build_call
4924                           (gimple_call_fndecl (old_call), 2, mul, arg1);
4925                       tree lhs = make_ssa_name (type);
4926                       gimple_call_set_lhs (new_call, lhs);
4927                       gimple_set_location (new_call,
4928                                            gimple_location (old_call));
4929                       insert_stmt_after (new_call, old_call);
4930                       /* We've used the constant, get rid of it.  */
4931                       ops->pop ();
4932                       bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
4933                       /* Handle the CST1 < 0 case by negating the result.  */
4934                       if (cst1_neg)
4935                         {
4936                           tree negrhs = make_ssa_name (TREE_TYPE (lhs));
4937                           gimple *negate_stmt
4938                             = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
4939                           insert_stmt_after (negate_stmt, new_call);
4940                           oe->op = negrhs;
4941                         }
4942                       else
4943                         oe->op = lhs;
4944                       if (dump_file && (dump_flags & TDF_DETAILS))
4945                         {
4946                           fprintf (dump_file, "Optimizing copysign: ");
4947                           print_generic_expr (dump_file, cst, 0);
4948                           fprintf (dump_file, " * COPYSIGN (");
4949                           print_generic_expr (dump_file, arg0, 0);
4950                           fprintf (dump_file, ", ");
4951                           print_generic_expr (dump_file, arg1, 0);
4952                           fprintf (dump_file, ") into %sCOPYSIGN (",
4953                                    cst1_neg ? "-" : "");
4954                           print_generic_expr (dump_file, mul, 0);
4955                           fprintf (dump_file, ", ");
4956                           print_generic_expr (dump_file, arg1, 0);
4957                           fprintf (dump_file, "\n");
4958                         }
4959                       return;
4960                     }
4961                   break;
4962                 default:
4963                   break;
4964                 }
4965             }
4966         }
4967     }
4968 }
4969
4970 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS.  */
4971
4972 static void
4973 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
4974 {
4975   tree rhs1;
4976
4977   if (dump_file && (dump_flags & TDF_DETAILS))
4978     {
4979       fprintf (dump_file, "Transforming ");
4980       print_gimple_stmt (dump_file, stmt, 0, 0);
4981     }
4982
4983   rhs1 = gimple_assign_rhs1 (stmt);
4984   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
4985   update_stmt (stmt);
4986   remove_visited_stmt_chain (rhs1);
4987
4988   if (dump_file && (dump_flags & TDF_DETAILS))
4989     {
4990       fprintf (dump_file, " into ");
4991       print_gimple_stmt (dump_file, stmt, 0, 0);
4992     }
4993 }
4994
4995 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2.  */
4996
4997 static void
4998 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
4999                             tree rhs1, tree rhs2)
5000 {
5001   if (dump_file && (dump_flags & TDF_DETAILS))
5002     {
5003       fprintf (dump_file, "Transforming ");
5004       print_gimple_stmt (dump_file, stmt, 0, 0);
5005     }
5006
5007   gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
5008   update_stmt (gsi_stmt (*gsi));
5009   remove_visited_stmt_chain (rhs1);
5010
5011   if (dump_file && (dump_flags & TDF_DETAILS))
5012     {
5013       fprintf (dump_file, " into ");
5014       print_gimple_stmt (dump_file, stmt, 0, 0);
5015     }
5016 }
5017
5018 /* Reassociate expressions in basic block BB and its post-dominator as
5019    children.
5020
5021    Bubble up return status from maybe_optimize_range_tests.  */
5022
5023 static bool
5024 reassociate_bb (basic_block bb)
5025 {
5026   gimple_stmt_iterator gsi;
5027   basic_block son;
5028   gimple *stmt = last_stmt (bb);
5029   bool cfg_cleanup_needed = false;
5030
5031   if (stmt && !gimple_visited_p (stmt))
5032     cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
5033
5034   for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
5035     {
5036       stmt = gsi_stmt (gsi);
5037
5038       if (is_gimple_assign (stmt)
5039           && !stmt_could_throw_p (stmt))
5040         {
5041           tree lhs, rhs1, rhs2;
5042           enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
5043
5044           /* If this is not a gimple binary expression, there is
5045              nothing for us to do with it.  */
5046           if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
5047             continue;
5048
5049           /* If this was part of an already processed statement,
5050              we don't need to touch it again. */
5051           if (gimple_visited_p (stmt))
5052             {
5053               /* This statement might have become dead because of previous
5054                  reassociations.  */
5055               if (has_zero_uses (gimple_get_lhs (stmt)))
5056                 {
5057                   reassoc_remove_stmt (&gsi);
5058                   release_defs (stmt);
5059                   /* We might end up removing the last stmt above which
5060                      places the iterator to the end of the sequence.
5061                      Reset it to the last stmt in this case which might
5062                      be the end of the sequence as well if we removed
5063                      the last statement of the sequence.  In which case
5064                      we need to bail out.  */
5065                   if (gsi_end_p (gsi))
5066                     {
5067                       gsi = gsi_last_bb (bb);
5068                       if (gsi_end_p (gsi))
5069                         break;
5070                     }
5071                 }
5072               continue;
5073             }
5074
5075           lhs = gimple_assign_lhs (stmt);
5076           rhs1 = gimple_assign_rhs1 (stmt);
5077           rhs2 = gimple_assign_rhs2 (stmt);
5078
5079           /* For non-bit or min/max operations we can't associate
5080              all types.  Verify that here.  */
5081           if (rhs_code != BIT_IOR_EXPR
5082               && rhs_code != BIT_AND_EXPR
5083               && rhs_code != BIT_XOR_EXPR
5084               && rhs_code != MIN_EXPR
5085               && rhs_code != MAX_EXPR
5086               && (!can_reassociate_p (lhs)
5087                   || !can_reassociate_p (rhs1)
5088                   || !can_reassociate_p (rhs2)))
5089             continue;
5090
5091           if (associative_tree_code (rhs_code))
5092             {
5093               auto_vec<operand_entry *> ops;
5094               tree powi_result = NULL_TREE;
5095               bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
5096
5097               /* There may be no immediate uses left by the time we
5098                  get here because we may have eliminated them all.  */
5099               if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
5100                 continue;
5101
5102               gimple_set_visited (stmt, true);
5103               linearize_expr_tree (&ops, stmt, true, true);
5104               ops.qsort (sort_by_operand_rank);
5105               optimize_ops_list (rhs_code, &ops);
5106               if (undistribute_ops_list (rhs_code, &ops,
5107                                          loop_containing_stmt (stmt)))
5108                 {
5109                   ops.qsort (sort_by_operand_rank);
5110                   optimize_ops_list (rhs_code, &ops);
5111                 }
5112
5113               if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
5114                 {
5115                   if (is_vector)
5116                     optimize_vec_cond_expr (rhs_code, &ops);
5117                   else
5118                     optimize_range_tests (rhs_code, &ops);
5119                 }
5120
5121               if (rhs_code == MULT_EXPR && !is_vector)
5122                 {
5123                   attempt_builtin_copysign (&ops);
5124
5125                   if (reassoc_insert_powi_p
5126                       && flag_unsafe_math_optimizations)
5127                     powi_result = attempt_builtin_powi (stmt, &ops);
5128                 }
5129
5130               /* If the operand vector is now empty, all operands were
5131                  consumed by the __builtin_powi optimization.  */
5132               if (ops.length () == 0)
5133                 transform_stmt_to_copy (&gsi, stmt, powi_result);
5134               else if (ops.length () == 1)
5135                 {
5136                   tree last_op = ops.last ()->op;
5137
5138                   if (powi_result)
5139                     transform_stmt_to_multiply (&gsi, stmt, last_op,
5140                                                 powi_result);
5141                   else
5142                     transform_stmt_to_copy (&gsi, stmt, last_op);
5143                 }
5144               else
5145                 {
5146                   machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
5147                   int ops_num = ops.length ();
5148                   int width = get_reassociation_width (ops_num, rhs_code, mode);
5149                   tree new_lhs = lhs;
5150
5151                   if (dump_file && (dump_flags & TDF_DETAILS))
5152                     fprintf (dump_file,
5153                              "Width = %d was chosen for reassociation\n", width);
5154
5155                   if (width > 1
5156                       && ops.length () > 3)
5157                     rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
5158                                                 width, ops);
5159                   else
5160                     {
5161                       /* When there are three operands left, we want
5162                          to make sure the ones that get the double
5163                          binary op are chosen wisely.  */
5164                       int len = ops.length ();
5165                       if (len >= 3)
5166                         swap_ops_for_binary_stmt (ops, len - 3, stmt);
5167
5168                       new_lhs = rewrite_expr_tree (stmt, 0, ops,
5169                                                    powi_result != NULL);
5170                     }
5171
5172                   /* If we combined some repeated factors into a
5173                      __builtin_powi call, multiply that result by the
5174                      reassociated operands.  */
5175                   if (powi_result)
5176                     {
5177                       gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
5178                       tree type = TREE_TYPE (lhs);
5179                       tree target_ssa = make_temp_ssa_name (type, NULL,
5180                                                             "reassocpow");
5181                       gimple_set_lhs (lhs_stmt, target_ssa);
5182                       update_stmt (lhs_stmt);
5183                       if (lhs != new_lhs)
5184                         target_ssa = new_lhs;
5185                       mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
5186                                                       powi_result, target_ssa);
5187                       gimple_set_location (mul_stmt, gimple_location (stmt));
5188                       gimple_set_uid (mul_stmt, gimple_uid (stmt));
5189                       gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
5190                     }
5191                 }
5192             }
5193         }
5194     }
5195   for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
5196        son;
5197        son = next_dom_son (CDI_POST_DOMINATORS, son))
5198     cfg_cleanup_needed |= reassociate_bb (son);
5199
5200   return cfg_cleanup_needed;
5201 }
5202
5203 /* Add jumps around shifts for range tests turned into bit tests.
5204    For each SSA_NAME VAR we have code like:
5205    VAR = ...; // final stmt of range comparison
5206    // bit test here...;
5207    OTHERVAR = ...; // final stmt of the bit test sequence
5208    RES = VAR | OTHERVAR;
5209    Turn the above into:
5210    VAR = ...;
5211    if (VAR != 0)
5212      goto <l3>;
5213    else
5214      goto <l2>;
5215    <l2>:
5216    // bit test here...;
5217    OTHERVAR = ...;
5218    <l3>:
5219    # RES = PHI<1(l1), OTHERVAR(l2)>;  */
5220
5221 static void
5222 branch_fixup (void)
5223 {
5224   tree var;
5225   unsigned int i;
5226
5227   FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
5228     {
5229       gimple *def_stmt = SSA_NAME_DEF_STMT (var);
5230       gimple *use_stmt;
5231       use_operand_p use;
5232       bool ok = single_imm_use (var, &use, &use_stmt);
5233       gcc_assert (ok
5234                   && is_gimple_assign (use_stmt)
5235                   && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
5236                   && gimple_bb (def_stmt) == gimple_bb (use_stmt));
5237
5238       basic_block cond_bb = gimple_bb (def_stmt);
5239       basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
5240       basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
5241
5242       gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
5243       gimple *g = gimple_build_cond (NE_EXPR, var,
5244                                      build_zero_cst (TREE_TYPE (var)),
5245                                      NULL_TREE, NULL_TREE);
5246       location_t loc = gimple_location (use_stmt);
5247       gimple_set_location (g, loc);
5248       gsi_insert_after (&gsi, g, GSI_NEW_STMT);
5249
5250       edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
5251       etrue->probability = REG_BR_PROB_BASE / 2;
5252       etrue->count = cond_bb->count / 2;
5253       edge efalse = find_edge (cond_bb, then_bb);
5254       efalse->flags = EDGE_FALSE_VALUE;
5255       efalse->probability -= etrue->probability;
5256       efalse->count -= etrue->count;
5257       then_bb->count -= etrue->count;
5258
5259       tree othervar = NULL_TREE;
5260       if (gimple_assign_rhs1 (use_stmt) == var)
5261         othervar = gimple_assign_rhs2 (use_stmt);
5262       else if (gimple_assign_rhs2 (use_stmt) == var)
5263         othervar = gimple_assign_rhs1 (use_stmt);
5264       else
5265         gcc_unreachable ();
5266       tree lhs = gimple_assign_lhs (use_stmt);
5267       gphi *phi = create_phi_node (lhs, merge_bb);
5268       add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
5269       add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
5270       gsi = gsi_for_stmt (use_stmt);
5271       gsi_remove (&gsi, true);
5272
5273       set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
5274       set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
5275     }
5276   reassoc_branch_fixups.release ();
5277 }
5278
5279 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
5280 void debug_ops_vector (vec<operand_entry *> ops);
5281
5282 /* Dump the operand entry vector OPS to FILE.  */
5283
5284 void
5285 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
5286 {
5287   operand_entry *oe;
5288   unsigned int i;
5289
5290   FOR_EACH_VEC_ELT (ops, i, oe)
5291     {
5292       fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
5293       print_generic_expr (file, oe->op, 0);
5294       fprintf (file, "\n");
5295     }
5296 }
5297
5298 /* Dump the operand entry vector OPS to STDERR.  */
5299
5300 DEBUG_FUNCTION void
5301 debug_ops_vector (vec<operand_entry *> ops)
5302 {
5303   dump_ops_vector (stderr, ops);
5304 }
5305
5306 /* Bubble up return status from reassociate_bb.  */
5307
5308 static bool
5309 do_reassoc (void)
5310 {
5311   break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
5312   return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
5313 }
5314
5315 /* Initialize the reassociation pass.  */
5316
5317 static void
5318 init_reassoc (void)
5319 {
5320   int i;
5321   long rank = 2;
5322   int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
5323
5324   /* Find the loops, so that we can prevent moving calculations in
5325      them.  */
5326   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
5327
5328   memset (&reassociate_stats, 0, sizeof (reassociate_stats));
5329
5330   next_operand_entry_id = 0;
5331
5332   /* Reverse RPO (Reverse Post Order) will give us something where
5333      deeper loops come later.  */
5334   pre_and_rev_post_order_compute (NULL, bbs, false);
5335   bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
5336   operand_rank = new hash_map<tree, long>;
5337
5338   /* Give each default definition a distinct rank.  This includes
5339      parameters and the static chain.  Walk backwards over all
5340      SSA names so that we get proper rank ordering according
5341      to tree_swap_operands_p.  */
5342   for (i = num_ssa_names - 1; i > 0; --i)
5343     {
5344       tree name = ssa_name (i);
5345       if (name && SSA_NAME_IS_DEFAULT_DEF (name))
5346         insert_operand_rank (name, ++rank);
5347     }
5348
5349   /* Set up rank for each BB  */
5350   for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
5351     bb_rank[bbs[i]] = ++rank  << 16;
5352
5353   free (bbs);
5354   calculate_dominance_info (CDI_POST_DOMINATORS);
5355   plus_negates = vNULL;
5356 }
5357
5358 /* Cleanup after the reassociation pass, and print stats if
5359    requested.  */
5360
5361 static void
5362 fini_reassoc (void)
5363 {
5364   statistics_counter_event (cfun, "Linearized",
5365                             reassociate_stats.linearized);
5366   statistics_counter_event (cfun, "Constants eliminated",
5367                             reassociate_stats.constants_eliminated);
5368   statistics_counter_event (cfun, "Ops eliminated",
5369                             reassociate_stats.ops_eliminated);
5370   statistics_counter_event (cfun, "Statements rewritten",
5371                             reassociate_stats.rewritten);
5372   statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
5373                             reassociate_stats.pows_encountered);
5374   statistics_counter_event (cfun, "Built-in powi calls created",
5375                             reassociate_stats.pows_created);
5376
5377   delete operand_rank;
5378   operand_entry_pool.release ();
5379   free (bb_rank);
5380   plus_negates.release ();
5381   free_dominance_info (CDI_POST_DOMINATORS);
5382   loop_optimizer_finalize ();
5383 }
5384
5385 /* Gate and execute functions for Reassociation.  If INSERT_POWI_P, enable
5386    insertion of __builtin_powi calls.
5387
5388    Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
5389    optimization of a gimple conditional.  Otherwise returns zero.  */
5390
5391 static unsigned int
5392 execute_reassoc (bool insert_powi_p)
5393 {
5394   reassoc_insert_powi_p = insert_powi_p;
5395
5396   init_reassoc ();
5397
5398   bool cfg_cleanup_needed;
5399   cfg_cleanup_needed = do_reassoc ();
5400   repropagate_negates ();
5401   branch_fixup ();
5402
5403   fini_reassoc ();
5404   return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
5405 }
5406
5407 namespace {
5408
5409 const pass_data pass_data_reassoc =
5410 {
5411   GIMPLE_PASS, /* type */
5412   "reassoc", /* name */
5413   OPTGROUP_NONE, /* optinfo_flags */
5414   TV_TREE_REASSOC, /* tv_id */
5415   ( PROP_cfg | PROP_ssa ), /* properties_required */
5416   0, /* properties_provided */
5417   0, /* properties_destroyed */
5418   0, /* todo_flags_start */
5419   TODO_update_ssa_only_virtuals, /* todo_flags_finish */
5420 };
5421
5422 class pass_reassoc : public gimple_opt_pass
5423 {
5424 public:
5425   pass_reassoc (gcc::context *ctxt)
5426     : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
5427   {}
5428
5429   /* opt_pass methods: */
5430   opt_pass * clone () { return new pass_reassoc (m_ctxt); }
5431   void set_pass_param (unsigned int n, bool param)
5432     {
5433       gcc_assert (n == 0);
5434       insert_powi_p = param;
5435     }
5436   virtual bool gate (function *) { return flag_tree_reassoc != 0; }
5437   virtual unsigned int execute (function *)
5438     { return execute_reassoc (insert_powi_p); }
5439
5440  private:
5441   /* Enable insertion of __builtin_powi calls during execute_reassoc.  See
5442      point 3a in the pass header comment.  */
5443   bool insert_powi_p;
5444 }; // class pass_reassoc
5445
5446 } // anon namespace
5447
5448 gimple_opt_pass *
5449 make_pass_reassoc (gcc::context *ctxt)
5450 {
5451   return new pass_reassoc (ctxt);
5452 }