gcc/tree-ssa-reassoc.c

   1 /* Reassociation for trees.
   2    Copyright (C) 2005-2016 Free Software Foundation, Inc.
   3    Contributed by Daniel Berlin <dan@dberlin.org>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 3, or (at your option)
  10 any later version.
  11
  12 GCC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "backend.h"
  25 #include "target.h"
  26 #include "rtl.h"
  27 #include "tree.h"
  28 #include "gimple.h"
  29 #include "cfghooks.h"
  30 #include "alloc-pool.h"
  31 #include "tree-pass.h"
  32 #include "tm_p.h"
  33 #include "ssa.h"
  34 #include "optabs-tree.h"
  35 #include "gimple-pretty-print.h"
  36 #include "diagnostic-core.h"
  37 #include "fold-const.h"
  38 #include "stor-layout.h"
  39 #include "cfganal.h"
  40 #include "gimple-fold.h"
  41 #include "tree-eh.h"
  42 #include "gimple-iterator.h"
  43 #include "gimplify-me.h"
  44 #include "tree-cfg.h"
  45 #include "tree-ssa-loop.h"
  46 #include "flags.h"
  47 #include "tree-ssa.h"
  48 #include "langhooks.h"
  49 #include "cfgloop.h"
  50 #include "params.h"
  51 #include "builtins.h"
  52 #include "gimplify.h"
  53 #include "case-cfn-macros.h"
  54
  55 /*  This is a simple global reassociation pass.  It is, in part, based
  56     on the LLVM pass of the same name (They do some things more/less
  57     than we do, in different orders, etc).
  58
  59     It consists of five steps:
  60
  61     1. Breaking up subtract operations into addition + negate, where
  62     it would promote the reassociation of adds.
  63
  64     2. Left linearization of the expression trees, so that (A+B)+(C+D)
  65     becomes (((A+B)+C)+D), which is easier for us to rewrite later.
  66     During linearization, we place the operands of the binary
  67     expressions into a vector of operand_entry_*
  68
  69     3. Optimization of the operand lists, eliminating things like a +
  70     -a, a & a, etc.
  71
  72     3a. Combine repeated factors with the same occurrence counts
  73     into a __builtin_powi call that will later be optimized into
  74     an optimal number of multiplies.
  75
  76     4. Rewrite the expression trees we linearized and optimized so
  77     they are in proper rank order.
  78
  79     5. Repropagate negates, as nothing else will clean it up ATM.
  80
  81     A bit of theory on #4, since nobody seems to write anything down
  82     about why it makes sense to do it the way they do it:
  83
  84     We could do this much nicer theoretically, but don't (for reasons
  85     explained after how to do it theoretically nice :P).
  86
  87     In order to promote the most redundancy elimination, you want
  88     binary expressions whose operands are the same rank (or
  89     preferably, the same value) exposed to the redundancy eliminator,
  90     for possible elimination.
  91
  92     So the way to do this if we really cared, is to build the new op
  93     tree from the leaves to the roots, merging as you go, and putting the
  94     new op on the end of the worklist, until you are left with one
  95     thing on the worklist.
  96
  97     IE if you have to rewrite the following set of operands (listed with
  98     rank in parentheses), with opcode PLUS_EXPR:
  99
 100     a (1),  b (1),  c (1),  d (2), e (2)
 101
 102
 103     We start with our merge worklist empty, and the ops list with all of
 104     those on it.
 105
 106     You want to first merge all leaves of the same rank, as much as
 107     possible.
 108
 109     So first build a binary op of
 110
 111     mergetmp = a + b, and put "mergetmp" on the merge worklist.
 112
 113     Because there is no three operand form of PLUS_EXPR, c is not going to
 114     be exposed to redundancy elimination as a rank 1 operand.
 115
 116     So you might as well throw it on the merge worklist (you could also
 117     consider it to now be a rank two operand, and merge it with d and e,
 118     but in this case, you then have evicted e from a binary op. So at
 119     least in this situation, you can't win.)
 120
 121     Then build a binary op of d + e
 122     mergetmp2 = d + e
 123
 124     and put mergetmp2 on the merge worklist.
 125
 126     so merge worklist = {mergetmp, c, mergetmp2}
 127
 128     Continue building binary ops of these operations until you have only
 129     one operation left on the worklist.
 130
 131     So we have
 132
 133     build binary op
 134     mergetmp3 = mergetmp + c
 135
 136     worklist = {mergetmp2, mergetmp3}
 137
 138     mergetmp4 = mergetmp2 + mergetmp3
 139
 140     worklist = {mergetmp4}
 141
 142     because we have one operation left, we can now just set the original
 143     statement equal to the result of that operation.
 144
 145     This will at least expose a + b  and d + e to redundancy elimination
 146     as binary operations.
 147
 148     For extra points, you can reuse the old statements to build the
 149     mergetmps, since you shouldn't run out.
 150
 151     So why don't we do this?
 152
 153     Because it's expensive, and rarely will help.  Most trees we are
 154     reassociating have 3 or less ops.  If they have 2 ops, they already
 155     will be written into a nice single binary op.  If you have 3 ops, a
 156     single simple check suffices to tell you whether the first two are of the
 157     same rank.  If so, you know to order it
 158
 159     mergetmp = op1 + op2
 160     newstmt = mergetmp + op3
 161
 162     instead of
 163     mergetmp = op2 + op3
 164     newstmt = mergetmp + op1
 165
 166     If all three are of the same rank, you can't expose them all in a
 167     single binary operator anyway, so the above is *still* the best you
 168     can do.
 169
 170     Thus, this is what we do.  When we have three ops left, we check to see
 171     what order to put them in, and call it a day.  As a nod to vector sum
 172     reduction, we check if any of the ops are really a phi node that is a
 173     destructive update for the associating op, and keep the destructive
 174     update together for vector sum reduction recognition.  */
 175
 176 /* Enable insertion of __builtin_powi calls during execute_reassoc.  See
 177    point 3a in the pass header comment.  */
 178 static bool reassoc_insert_powi_p;
 179
 180 /* Statistics */
 181 static struct
 182 {
 183   int linearized;
 184   int constants_eliminated;
 185   int ops_eliminated;
 186   int rewritten;
 187   int pows_encountered;
 188   int pows_created;
 189 } reassociate_stats;
 190
 191 /* Operator, rank pair.  */
 192 struct operand_entry
 193 {
 194   unsigned int rank;
 195   int id;
 196   tree op;
 197   unsigned int count;
 198   gimple *stmt_to_insert;
 199 };
 200
 201 static object_allocator<operand_entry> operand_entry_pool
 202   ("operand entry pool");
 203
 204 /* This is used to assign a unique ID to each struct operand_entry
 205    so that qsort results are identical on different hosts.  */
 206 static int next_operand_entry_id;
 207
 208 /* Starting rank number for a given basic block, so that we can rank
 209    operations using unmovable instructions in that BB based on the bb
 210    depth.  */
 211 static long *bb_rank;
 212
 213 /* Operand->rank hashtable.  */
 214 static hash_map<tree, long> *operand_rank;
 215
 216 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
 217    all basic blocks the CFG should be adjusted - basic blocks
 218    split right after that SSA_NAME's definition statement and before
 219    the only use, which must be a bit ior.  */
 220 static vec<tree> reassoc_branch_fixups;
 221
 222 /* Forward decls.  */
 223 static long get_rank (tree);
 224 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
 225
 226 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
 227    possibly added by gsi_remove.  */
 228
 229 bool
 230 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
 231 {
 232   gimple *stmt = gsi_stmt (*gsi);
 233
 234   if (!MAY_HAVE_DEBUG_STMTS || gimple_code (stmt) == GIMPLE_PHI)
 235     return gsi_remove (gsi, true);
 236
 237   gimple_stmt_iterator prev = *gsi;
 238   gsi_prev (&prev);
 239   unsigned uid = gimple_uid (stmt);
 240   basic_block bb = gimple_bb (stmt);
 241   bool ret = gsi_remove (gsi, true);
 242   if (!gsi_end_p (prev))
 243     gsi_next (&prev);
 244   else
 245     prev = gsi_start_bb (bb);
 246   gimple *end_stmt = gsi_stmt (*gsi);
 247   while ((stmt = gsi_stmt (prev)) != end_stmt)
 248     {
 249       gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
 250       gimple_set_uid (stmt, uid);
 251       gsi_next (&prev);
 252     }
 253   return ret;
 254 }
 255
 256 /* Bias amount for loop-carried phis.  We want this to be larger than
 257    the depth of any reassociation tree we can see, but not larger than
 258    the rank difference between two blocks.  */
 259 #define PHI_LOOP_BIAS (1 << 15)
 260
 261 /* Rank assigned to a phi statement.  If STMT is a loop-carried phi of
 262    an innermost loop, and the phi has only a single use which is inside
 263    the loop, then the rank is the block rank of the loop latch plus an
 264    extra bias for the loop-carried dependence.  This causes expressions
 265    calculated into an accumulator variable to be independent for each
 266    iteration of the loop.  If STMT is some other phi, the rank is the
 267    block rank of its containing block.  */
 268 static long
 269 phi_rank (gimple *stmt)
 270 {
 271   basic_block bb = gimple_bb (stmt);
 272   struct loop *father = bb->loop_father;
 273   tree res;
 274   unsigned i;
 275   use_operand_p use;
 276   gimple *use_stmt;
 277
 278   /* We only care about real loops (those with a latch).  */
 279   if (!father->latch)
 280     return bb_rank[bb->index];
 281
 282   /* Interesting phis must be in headers of innermost loops.  */
 283   if (bb != father->header
 284       || father->inner)
 285     return bb_rank[bb->index];
 286
 287   /* Ignore virtual SSA_NAMEs.  */
 288   res = gimple_phi_result (stmt);
 289   if (virtual_operand_p (res))
 290     return bb_rank[bb->index];
 291
 292   /* The phi definition must have a single use, and that use must be
 293      within the loop.  Otherwise this isn't an accumulator pattern.  */
 294   if (!single_imm_use (res, &use, &use_stmt)
 295       || gimple_bb (use_stmt)->loop_father != father)
 296     return bb_rank[bb->index];
 297
 298   /* Look for phi arguments from within the loop.  If found, bias this phi.  */
 299   for (i = 0; i < gimple_phi_num_args (stmt); i++)
 300     {
 301       tree arg = gimple_phi_arg_def (stmt, i);
 302       if (TREE_CODE (arg) == SSA_NAME
 303           && !SSA_NAME_IS_DEFAULT_DEF (arg))
 304         {
 305           gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
 306           if (gimple_bb (def_stmt)->loop_father == father)
 307             return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
 308         }
 309     }
 310
 311   /* Must be an uninteresting phi.  */
 312   return bb_rank[bb->index];
 313 }
 314
 315 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
 316    loop-carried dependence of an innermost loop, return TRUE; else
 317    return FALSE.  */
 318 static bool
 319 loop_carried_phi (tree exp)
 320 {
 321   gimple *phi_stmt;
 322   long block_rank;
 323
 324   if (TREE_CODE (exp) != SSA_NAME
 325       || SSA_NAME_IS_DEFAULT_DEF (exp))
 326     return false;
 327
 328   phi_stmt = SSA_NAME_DEF_STMT (exp);
 329
 330   if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
 331     return false;
 332
 333   /* Non-loop-carried phis have block rank.  Loop-carried phis have
 334      an additional bias added in.  If this phi doesn't have block rank,
 335      it's biased and should not be propagated.  */
 336   block_rank = bb_rank[gimple_bb (phi_stmt)->index];
 337
 338   if (phi_rank (phi_stmt) != block_rank)
 339     return true;
 340
 341   return false;
 342 }
 343
 344 /* Return the maximum of RANK and the rank that should be propagated
 345    from expression OP.  For most operands, this is just the rank of OP.
 346    For loop-carried phis, the value is zero to avoid undoing the bias
 347    in favor of the phi.  */
 348 static long
 349 propagate_rank (long rank, tree op)
 350 {
 351   long op_rank;
 352
 353   if (loop_carried_phi (op))
 354     return rank;
 355
 356   op_rank = get_rank (op);
 357
 358   return MAX (rank, op_rank);
 359 }
 360
 361 /* Look up the operand rank structure for expression E.  */
 362
 363 static inline long
 364 find_operand_rank (tree e)
 365 {
 366   long *slot = operand_rank->get (e);
 367   return slot ? *slot : -1;
 368 }
 369
 370 /* Insert {E,RANK} into the operand rank hashtable.  */
 371
 372 static inline void
 373 insert_operand_rank (tree e, long rank)
 374 {
 375   gcc_assert (rank > 0);
 376   gcc_assert (!operand_rank->put (e, rank));
 377 }
 378
 379 /* Given an expression E, return the rank of the expression.  */
 380
 381 static long
 382 get_rank (tree e)
 383 {
 384   /* SSA_NAME's have the rank of the expression they are the result
 385      of.
 386      For globals and uninitialized values, the rank is 0.
 387      For function arguments, use the pre-setup rank.
 388      For PHI nodes, stores, asm statements, etc, we use the rank of
 389      the BB.
 390      For simple operations, the rank is the maximum rank of any of
 391      its operands, or the bb_rank, whichever is less.
 392      I make no claims that this is optimal, however, it gives good
 393      results.  */
 394
 395   /* We make an exception to the normal ranking system to break
 396      dependences of accumulator variables in loops.  Suppose we
 397      have a simple one-block loop containing:
 398
 399        x_1 = phi(x_0, x_2)
 400        b = a + x_1
 401        c = b + d
 402        x_2 = c + e
 403
 404      As shown, each iteration of the calculation into x is fully
 405      dependent upon the iteration before it.  We would prefer to
 406      see this in the form:
 407
 408        x_1 = phi(x_0, x_2)
 409        b = a + d
 410        c = b + e
 411        x_2 = c + x_1
 412
 413      If the loop is unrolled, the calculations of b and c from
 414      different iterations can be interleaved.
 415
 416      To obtain this result during reassociation, we bias the rank
 417      of the phi definition x_1 upward, when it is recognized as an
 418      accumulator pattern.  The artificial rank causes it to be
 419      added last, providing the desired independence.  */
 420
 421   if (TREE_CODE (e) == SSA_NAME)
 422     {
 423       ssa_op_iter iter;
 424       gimple *stmt;
 425       long rank;
 426       tree op;
 427
 428       if (SSA_NAME_IS_DEFAULT_DEF (e))
 429         return find_operand_rank (e);
 430
 431       stmt = SSA_NAME_DEF_STMT (e);
 432       if (gimple_code (stmt) == GIMPLE_PHI)
 433         return phi_rank (stmt);
 434
 435       if (!is_gimple_assign (stmt))
 436         return bb_rank[gimple_bb (stmt)->index];
 437
 438       /* If we already have a rank for this expression, use that.  */
 439       rank = find_operand_rank (e);
 440       if (rank != -1)
 441         return rank;
 442
 443       /* Otherwise, find the maximum rank for the operands.  As an
 444          exception, remove the bias from loop-carried phis when propagating
 445          the rank so that dependent operations are not also biased.  */
 446       /* Simply walk over all SSA uses - this takes advatage of the
 447          fact that non-SSA operands are is_gimple_min_invariant and
 448          thus have rank 0.  */
 449       rank = 0;
 450       FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
 451         rank = propagate_rank (rank, op);
 452
 453       if (dump_file && (dump_flags & TDF_DETAILS))
 454         {
 455           fprintf (dump_file, "Rank for ");
 456           print_generic_expr (dump_file, e, 0);
 457           fprintf (dump_file, " is %ld\n", (rank + 1));
 458         }
 459
 460       /* Note the rank in the hashtable so we don't recompute it.  */
 461       insert_operand_rank (e, (rank + 1));
 462       return (rank + 1);
 463     }
 464
 465   /* Constants, globals, etc., are rank 0 */
 466   return 0;
 467 }
 468
 469
 470 /* We want integer ones to end up last no matter what, since they are
 471    the ones we can do the most with.  */
 472 #define INTEGER_CONST_TYPE 1 << 3
 473 #define FLOAT_CONST_TYPE 1 << 2
 474 #define OTHER_CONST_TYPE 1 << 1
 475
 476 /* Classify an invariant tree into integer, float, or other, so that
 477    we can sort them to be near other constants of the same type.  */
 478 static inline int
 479 constant_type (tree t)
 480 {
 481   if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
 482     return INTEGER_CONST_TYPE;
 483   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
 484     return FLOAT_CONST_TYPE;
 485   else
 486     return OTHER_CONST_TYPE;
 487 }
 488
 489 /* qsort comparison function to sort operand entries PA and PB by rank
 490    so that the sorted array is ordered by rank in decreasing order.  */
 491 static int
 492 sort_by_operand_rank (const void *pa, const void *pb)
 493 {
 494   const operand_entry *oea = *(const operand_entry *const *)pa;
 495   const operand_entry *oeb = *(const operand_entry *const *)pb;
 496
 497   /* It's nicer for optimize_expression if constants that are likely
 498      to fold when added/multiplied//whatever are put next to each
 499      other.  Since all constants have rank 0, order them by type.  */
 500   if (oeb->rank == 0 && oea->rank == 0)
 501     {
 502       if (constant_type (oeb->op) != constant_type (oea->op))
 503         return constant_type (oeb->op) - constant_type (oea->op);
 504       else
 505         /* To make sorting result stable, we use unique IDs to determine
 506            order.  */
 507         return oeb->id - oea->id;
 508     }
 509
 510   /* Lastly, make sure the versions that are the same go next to each
 511      other.  */
 512   if ((oeb->rank - oea->rank == 0)
 513       && TREE_CODE (oea->op) == SSA_NAME
 514       && TREE_CODE (oeb->op) == SSA_NAME)
 515     {
 516       /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
 517          versions of removed SSA_NAMEs, so if possible, prefer to sort
 518          based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
 519          See PR60418.  */
 520       if (!SSA_NAME_IS_DEFAULT_DEF (oea->op)
 521           && !SSA_NAME_IS_DEFAULT_DEF (oeb->op)
 522           && !oea->stmt_to_insert
 523           && !oeb->stmt_to_insert
 524           && SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
 525         {
 526           gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
 527           gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
 528           basic_block bba = gimple_bb (stmta);
 529           basic_block bbb = gimple_bb (stmtb);
 530           if (bbb != bba)
 531             {
 532               if (bb_rank[bbb->index] != bb_rank[bba->index])
 533                 return bb_rank[bbb->index] - bb_rank[bba->index];
 534             }
 535           else
 536             {
 537               bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
 538               bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
 539               if (da != db)
 540                 return da ? 1 : -1;
 541             }
 542         }
 543
 544       if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
 545         return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
 546       else
 547         return oeb->id - oea->id;
 548     }
 549
 550   if (oeb->rank != oea->rank)
 551     return oeb->rank - oea->rank;
 552   else
 553     return oeb->id - oea->id;
 554 }
 555
 556 /* Add an operand entry to *OPS for the tree operand OP.  */
 557
 558 static void
 559 add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL)
 560 {
 561   operand_entry *oe = operand_entry_pool.allocate ();
 562
 563   oe->op = op;
 564   oe->rank = get_rank (op);
 565   oe->id = next_operand_entry_id++;
 566   oe->count = 1;
 567   oe->stmt_to_insert = stmt_to_insert;
 568   ops->safe_push (oe);
 569 }
 570
 571 /* Add an operand entry to *OPS for the tree operand OP with repeat
 572    count REPEAT.  */
 573
 574 static void
 575 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
 576                        HOST_WIDE_INT repeat)
 577 {
 578   operand_entry *oe = operand_entry_pool.allocate ();
 579
 580   oe->op = op;
 581   oe->rank = get_rank (op);
 582   oe->id = next_operand_entry_id++;
 583   oe->count = repeat;
 584   oe->stmt_to_insert = NULL;
 585   ops->safe_push (oe);
 586
 587   reassociate_stats.pows_encountered++;
 588 }
 589
 590 /* Return true if STMT is reassociable operation containing a binary
 591    operation with tree code CODE, and is inside LOOP.  */
 592
 593 static bool
 594 is_reassociable_op (gimple *stmt, enum tree_code code, struct loop *loop)
 595 {
 596   basic_block bb = gimple_bb (stmt);
 597
 598   if (gimple_bb (stmt) == NULL)
 599     return false;
 600
 601   if (!flow_bb_inside_loop_p (loop, bb))
 602     return false;
 603
 604   if (is_gimple_assign (stmt)
 605       && gimple_assign_rhs_code (stmt) == code
 606       && has_single_use (gimple_assign_lhs (stmt)))
 607     return true;
 608
 609   return false;
 610 }
 611
 612
 613 /* Return true if STMT is a nop-conversion.  */
 614
 615 static bool
 616 gimple_nop_conversion_p (gimple *stmt)
 617 {
 618   if (gassign *ass = dyn_cast <gassign *> (stmt))
 619     {
 620       if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
 621           && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
 622                                     TREE_TYPE (gimple_assign_rhs1 (ass))))
 623         return true;
 624     }
 625   return false;
 626 }
 627
 628 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
 629    operand of the negate operation.  Otherwise, return NULL.  */
 630
 631 static tree
 632 get_unary_op (tree name, enum tree_code opcode)
 633 {
 634   gimple *stmt = SSA_NAME_DEF_STMT (name);
 635
 636   /* Look through nop conversions (sign changes).  */
 637   if (gimple_nop_conversion_p (stmt)
 638       && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
 639     stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
 640
 641   if (!is_gimple_assign (stmt))
 642     return NULL_TREE;
 643
 644   if (gimple_assign_rhs_code (stmt) == opcode)
 645     return gimple_assign_rhs1 (stmt);
 646   return NULL_TREE;
 647 }
 648
 649 /* Return true if OP1 and OP2 have the same value if casted to either type.  */
 650
 651 static bool
 652 ops_equal_values_p (tree op1, tree op2)
 653 {
 654   if (op1 == op2)
 655     return true;
 656
 657   tree orig_op1 = op1;
 658   if (TREE_CODE (op1) == SSA_NAME)
 659     {
 660       gimple *stmt = SSA_NAME_DEF_STMT (op1);
 661       if (gimple_nop_conversion_p (stmt))
 662         {
 663           op1 = gimple_assign_rhs1 (stmt);
 664           if (op1 == op2)
 665             return true;
 666         }
 667     }
 668
 669   if (TREE_CODE (op2) == SSA_NAME)
 670     {
 671       gimple *stmt = SSA_NAME_DEF_STMT (op2);
 672       if (gimple_nop_conversion_p (stmt))
 673         {
 674           op2 = gimple_assign_rhs1 (stmt);
 675           if (op1 == op2
 676               || orig_op1 == op2)
 677             return true;
 678         }
 679     }
 680
 681   return false;
 682 }
 683
 684
 685 /* If CURR and LAST are a pair of ops that OPCODE allows us to
 686    eliminate through equivalences, do so, remove them from OPS, and
 687    return true.  Otherwise, return false.  */
 688
 689 static bool
 690 eliminate_duplicate_pair (enum tree_code opcode,
 691                           vec<operand_entry *> *ops,
 692                           bool *all_done,
 693                           unsigned int i,
 694                           operand_entry *curr,
 695                           operand_entry *last)
 696 {
 697
 698   /* If we have two of the same op, and the opcode is & |, min, or max,
 699      we can eliminate one of them.
 700      If we have two of the same op, and the opcode is ^, we can
 701      eliminate both of them.  */
 702
 703   if (last && last->op == curr->op)
 704     {
 705       switch (opcode)
 706         {
 707         case MAX_EXPR:
 708         case MIN_EXPR:
 709         case BIT_IOR_EXPR:
 710         case BIT_AND_EXPR:
 711           if (dump_file && (dump_flags & TDF_DETAILS))
 712             {
 713               fprintf (dump_file, "Equivalence: ");
 714               print_generic_expr (dump_file, curr->op, 0);
 715               fprintf (dump_file, " [&|minmax] ");
 716               print_generic_expr (dump_file, last->op, 0);
 717               fprintf (dump_file, " -> ");
 718               print_generic_stmt (dump_file, last->op, 0);
 719             }
 720
 721           ops->ordered_remove (i);
 722           reassociate_stats.ops_eliminated ++;
 723
 724           return true;
 725
 726         case BIT_XOR_EXPR:
 727           if (dump_file && (dump_flags & TDF_DETAILS))
 728             {
 729               fprintf (dump_file, "Equivalence: ");
 730               print_generic_expr (dump_file, curr->op, 0);
 731               fprintf (dump_file, " ^ ");
 732               print_generic_expr (dump_file, last->op, 0);
 733               fprintf (dump_file, " -> nothing\n");
 734             }
 735
 736           reassociate_stats.ops_eliminated += 2;
 737
 738           if (ops->length () == 2)
 739             {
 740               ops->truncate (0);
 741               add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
 742               *all_done = true;
 743             }
 744           else
 745             {
 746               ops->ordered_remove (i-1);
 747               ops->ordered_remove (i-1);
 748             }
 749
 750           return true;
 751
 752         default:
 753           break;
 754         }
 755     }
 756   return false;
 757 }
 758
 759 static vec<tree> plus_negates;
 760
 761 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
 762    expression, look in OPS for a corresponding positive operation to cancel
 763    it out.  If we find one, remove the other from OPS, replace
 764    OPS[CURRINDEX] with 0 or -1, respectively, and return true.  Otherwise,
 765    return false. */
 766
 767 static bool
 768 eliminate_plus_minus_pair (enum tree_code opcode,
 769                            vec<operand_entry *> *ops,
 770                            unsigned int currindex,
 771                            operand_entry *curr)
 772 {
 773   tree negateop;
 774   tree notop;
 775   unsigned int i;
 776   operand_entry *oe;
 777
 778   if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
 779     return false;
 780
 781   negateop = get_unary_op (curr->op, NEGATE_EXPR);
 782   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 783   if (negateop == NULL_TREE && notop == NULL_TREE)
 784     return false;
 785
 786   /* Any non-negated version will have a rank that is one less than
 787      the current rank.  So once we hit those ranks, if we don't find
 788      one, we can stop.  */
 789
 790   for (i = currindex + 1;
 791        ops->iterate (i, &oe)
 792        && oe->rank >= curr->rank - 1 ;
 793        i++)
 794     {
 795       if (negateop
 796           && ops_equal_values_p (oe->op, negateop))
 797         {
 798           if (dump_file && (dump_flags & TDF_DETAILS))
 799             {
 800               fprintf (dump_file, "Equivalence: ");
 801               print_generic_expr (dump_file, negateop, 0);
 802               fprintf (dump_file, " + -");
 803               print_generic_expr (dump_file, oe->op, 0);
 804               fprintf (dump_file, " -> 0\n");
 805             }
 806
 807           ops->ordered_remove (i);
 808           add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
 809           ops->ordered_remove (currindex);
 810           reassociate_stats.ops_eliminated ++;
 811
 812           return true;
 813         }
 814       else if (notop
 815                && ops_equal_values_p (oe->op, notop))
 816         {
 817           tree op_type = TREE_TYPE (oe->op);
 818
 819           if (dump_file && (dump_flags & TDF_DETAILS))
 820             {
 821               fprintf (dump_file, "Equivalence: ");
 822               print_generic_expr (dump_file, notop, 0);
 823               fprintf (dump_file, " + ~");
 824               print_generic_expr (dump_file, oe->op, 0);
 825               fprintf (dump_file, " -> -1\n");
 826             }
 827
 828           ops->ordered_remove (i);
 829           add_to_ops_vec (ops, build_all_ones_cst (op_type));
 830           ops->ordered_remove (currindex);
 831           reassociate_stats.ops_eliminated ++;
 832
 833           return true;
 834         }
 835     }
 836
 837   /* If CURR->OP is a negate expr without nop conversion in a plus expr:
 838      save it for later inspection in repropagate_negates().  */
 839   if (negateop != NULL_TREE
 840       && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
 841     plus_negates.safe_push (curr->op);
 842
 843   return false;
 844 }
 845
 846 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
 847    bitwise not expression, look in OPS for a corresponding operand to
 848    cancel it out.  If we find one, remove the other from OPS, replace
 849    OPS[CURRINDEX] with 0, and return true.  Otherwise, return
 850    false. */
 851
 852 static bool
 853 eliminate_not_pairs (enum tree_code opcode,
 854                      vec<operand_entry *> *ops,
 855                      unsigned int currindex,
 856                      operand_entry *curr)
 857 {
 858   tree notop;
 859   unsigned int i;
 860   operand_entry *oe;
 861
 862   if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
 863       || TREE_CODE (curr->op) != SSA_NAME)
 864     return false;
 865
 866   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 867   if (notop == NULL_TREE)
 868     return false;
 869
 870   /* Any non-not version will have a rank that is one less than
 871      the current rank.  So once we hit those ranks, if we don't find
 872      one, we can stop.  */
 873
 874   for (i = currindex + 1;
 875        ops->iterate (i, &oe)
 876        && oe->rank >= curr->rank - 1;
 877        i++)
 878     {
 879       if (oe->op == notop)
 880         {
 881           if (dump_file && (dump_flags & TDF_DETAILS))
 882             {
 883               fprintf (dump_file, "Equivalence: ");
 884               print_generic_expr (dump_file, notop, 0);
 885               if (opcode == BIT_AND_EXPR)
 886                 fprintf (dump_file, " & ~");
 887               else if (opcode == BIT_IOR_EXPR)
 888                 fprintf (dump_file, " | ~");
 889               print_generic_expr (dump_file, oe->op, 0);
 890               if (opcode == BIT_AND_EXPR)
 891                 fprintf (dump_file, " -> 0\n");
 892               else if (opcode == BIT_IOR_EXPR)
 893                 fprintf (dump_file, " -> -1\n");
 894             }
 895
 896           if (opcode == BIT_AND_EXPR)
 897             oe->op = build_zero_cst (TREE_TYPE (oe->op));
 898           else if (opcode == BIT_IOR_EXPR)
 899             oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
 900
 901           reassociate_stats.ops_eliminated += ops->length () - 1;
 902           ops->truncate (0);
 903           ops->quick_push (oe);
 904           return true;
 905         }
 906     }
 907
 908   return false;
 909 }
 910
 911 /* Use constant value that may be present in OPS to try to eliminate
 912    operands.  Note that this function is only really used when we've
 913    eliminated ops for other reasons, or merged constants.  Across
 914    single statements, fold already does all of this, plus more.  There
 915    is little point in duplicating logic, so I've only included the
 916    identities that I could ever construct testcases to trigger.  */
 917
 918 static void
 919 eliminate_using_constants (enum tree_code opcode,
 920                            vec<operand_entry *> *ops)
 921 {
 922   operand_entry *oelast = ops->last ();
 923   tree type = TREE_TYPE (oelast->op);
 924
 925   if (oelast->rank == 0
 926       && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
 927     {
 928       switch (opcode)
 929         {
 930         case BIT_AND_EXPR:
 931           if (integer_zerop (oelast->op))
 932             {
 933               if (ops->length () != 1)
 934                 {
 935                   if (dump_file && (dump_flags & TDF_DETAILS))
 936                     fprintf (dump_file, "Found & 0, removing all other ops\n");
 937
 938                   reassociate_stats.ops_eliminated += ops->length () - 1;
 939
 940                   ops->truncate (0);
 941                   ops->quick_push (oelast);
 942                   return;
 943                 }
 944             }
 945           else if (integer_all_onesp (oelast->op))
 946             {
 947               if (ops->length () != 1)
 948                 {
 949                   if (dump_file && (dump_flags & TDF_DETAILS))
 950                     fprintf (dump_file, "Found & -1, removing\n");
 951                   ops->pop ();
 952                   reassociate_stats.ops_eliminated++;
 953                 }
 954             }
 955           break;
 956         case BIT_IOR_EXPR:
 957           if (integer_all_onesp (oelast->op))
 958             {
 959               if (ops->length () != 1)
 960                 {
 961                   if (dump_file && (dump_flags & TDF_DETAILS))
 962                     fprintf (dump_file, "Found | -1, removing all other ops\n");
 963
 964                   reassociate_stats.ops_eliminated += ops->length () - 1;
 965
 966                   ops->truncate (0);
 967                   ops->quick_push (oelast);
 968                   return;
 969                 }
 970             }
 971           else if (integer_zerop (oelast->op))
 972             {
 973               if (ops->length () != 1)
 974                 {
 975                   if (dump_file && (dump_flags & TDF_DETAILS))
 976                     fprintf (dump_file, "Found | 0, removing\n");
 977                   ops->pop ();
 978                   reassociate_stats.ops_eliminated++;
 979                 }
 980             }
 981           break;
 982         case MULT_EXPR:
 983           if (integer_zerop (oelast->op)
 984               || (FLOAT_TYPE_P (type)
 985                   && !HONOR_NANS (type)
 986                   && !HONOR_SIGNED_ZEROS (type)
 987                   && real_zerop (oelast->op)))
 988             {
 989               if (ops->length () != 1)
 990                 {
 991                   if (dump_file && (dump_flags & TDF_DETAILS))
 992                     fprintf (dump_file, "Found * 0, removing all other ops\n");
 993
 994                   reassociate_stats.ops_eliminated += ops->length () - 1;
 995                   ops->truncate (1);
 996                   ops->quick_push (oelast);
 997                   return;
 998                 }
 999             }
1000           else if (integer_onep (oelast->op)
1001                    || (FLOAT_TYPE_P (type)
1002                        && !HONOR_SNANS (type)
1003                        && real_onep (oelast->op)))
1004             {
1005               if (ops->length () != 1)
1006                 {
1007                   if (dump_file && (dump_flags & TDF_DETAILS))
1008                     fprintf (dump_file, "Found * 1, removing\n");
1009                   ops->pop ();
1010                   reassociate_stats.ops_eliminated++;
1011                   return;
1012                 }
1013             }
1014           break;
1015         case BIT_XOR_EXPR:
1016         case PLUS_EXPR:
1017         case MINUS_EXPR:
1018           if (integer_zerop (oelast->op)
1019               || (FLOAT_TYPE_P (type)
1020                   && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1021                   && fold_real_zero_addition_p (type, oelast->op,
1022                                                 opcode == MINUS_EXPR)))
1023             {
1024               if (ops->length () != 1)
1025                 {
1026                   if (dump_file && (dump_flags & TDF_DETAILS))
1027                     fprintf (dump_file, "Found [|^+] 0, removing\n");
1028                   ops->pop ();
1029                   reassociate_stats.ops_eliminated++;
1030                   return;
1031                 }
1032             }
1033           break;
1034         default:
1035           break;
1036         }
1037     }
1038 }
1039
1040
1041 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1042                                  bool, bool);
1043
1044 /* Structure for tracking and counting operands.  */
1045 struct oecount {
1046   int cnt;
1047   int id;
1048   enum tree_code oecode;
1049   tree op;
1050 };
1051
1052
1053 /* The heap for the oecount hashtable and the sorted list of operands.  */
1054 static vec<oecount> cvec;
1055
1056
1057 /* Oecount hashtable helpers.  */
1058
1059 struct oecount_hasher : int_hash <int, 0, 1>
1060 {
1061   static inline hashval_t hash (int);
1062   static inline bool equal (int, int);
1063 };
1064
1065 /* Hash function for oecount.  */
1066
1067 inline hashval_t
1068 oecount_hasher::hash (int p)
1069 {
1070   const oecount *c = &cvec[p - 42];
1071   return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1072 }
1073
1074 /* Comparison function for oecount.  */
1075
1076 inline bool
1077 oecount_hasher::equal (int p1, int p2)
1078 {
1079   const oecount *c1 = &cvec[p1 - 42];
1080   const oecount *c2 = &cvec[p2 - 42];
1081   return (c1->oecode == c2->oecode
1082           && c1->op == c2->op);
1083 }
1084
1085 /* Comparison function for qsort sorting oecount elements by count.  */
1086
1087 static int
1088 oecount_cmp (const void *p1, const void *p2)
1089 {
1090   const oecount *c1 = (const oecount *)p1;
1091   const oecount *c2 = (const oecount *)p2;
1092   if (c1->cnt != c2->cnt)
1093     return c1->cnt - c2->cnt;
1094   else
1095     /* If counts are identical, use unique IDs to stabilize qsort.  */
1096     return c1->id - c2->id;
1097 }
1098
1099 /* Return TRUE iff STMT represents a builtin call that raises OP
1100    to some exponent.  */
1101
1102 static bool
1103 stmt_is_power_of_op (gimple *stmt, tree op)
1104 {
1105   if (!is_gimple_call (stmt))
1106     return false;
1107
1108   switch (gimple_call_combined_fn (stmt))
1109     {
1110     CASE_CFN_POW:
1111     CASE_CFN_POWI:
1112       return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1113
1114     default:
1115       return false;
1116     }
1117 }
1118
1119 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1120    in place and return the result.  Assumes that stmt_is_power_of_op
1121    was previously called for STMT and returned TRUE.  */
1122
1123 static HOST_WIDE_INT
1124 decrement_power (gimple *stmt)
1125 {
1126   REAL_VALUE_TYPE c, cint;
1127   HOST_WIDE_INT power;
1128   tree arg1;
1129
1130   switch (gimple_call_combined_fn (stmt))
1131     {
1132     CASE_CFN_POW:
1133       arg1 = gimple_call_arg (stmt, 1);
1134       c = TREE_REAL_CST (arg1);
1135       power = real_to_integer (&c) - 1;
1136       real_from_integer (&cint, VOIDmode, power, SIGNED);
1137       gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1138       return power;
1139
1140     CASE_CFN_POWI:
1141       arg1 = gimple_call_arg (stmt, 1);
1142       power = TREE_INT_CST_LOW (arg1) - 1;
1143       gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1144       return power;
1145
1146     default:
1147       gcc_unreachable ();
1148     }
1149 }
1150
1151 /* Find the single immediate use of STMT's LHS, and replace it
1152    with OP.  Remove STMT.  If STMT's LHS is the same as *DEF,
1153    replace *DEF with OP as well.  */
1154
1155 static void
1156 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1157 {
1158   tree lhs;
1159   gimple *use_stmt;
1160   use_operand_p use;
1161   gimple_stmt_iterator gsi;
1162
1163   if (is_gimple_call (stmt))
1164     lhs = gimple_call_lhs (stmt);
1165   else
1166     lhs = gimple_assign_lhs (stmt);
1167
1168   gcc_assert (has_single_use (lhs));
1169   single_imm_use (lhs, &use, &use_stmt);
1170   if (lhs == *def)
1171     *def = op;
1172   SET_USE (use, op);
1173   if (TREE_CODE (op) != SSA_NAME)
1174     update_stmt (use_stmt);
1175   gsi = gsi_for_stmt (stmt);
1176   unlink_stmt_vdef (stmt);
1177   reassoc_remove_stmt (&gsi);
1178   release_defs (stmt);
1179 }
1180
1181 /* Walks the linear chain with result *DEF searching for an operation
1182    with operand OP and code OPCODE removing that from the chain.  *DEF
1183    is updated if there is only one operand but no operation left.  */
1184
1185 static void
1186 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1187 {
1188   gimple *stmt = SSA_NAME_DEF_STMT (*def);
1189
1190   do
1191     {
1192       tree name;
1193
1194       if (opcode == MULT_EXPR)
1195         {
1196           if (stmt_is_power_of_op (stmt, op))
1197             {
1198               if (decrement_power (stmt) == 1)
1199                 propagate_op_to_single_use (op, stmt, def);
1200               return;
1201             }
1202           else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR)
1203             {
1204               if (gimple_assign_rhs1 (stmt) == op)
1205                 {
1206                   propagate_op_to_single_use (op, stmt, def);
1207                   return;
1208                 }
1209               else if (integer_minus_onep (op)
1210                        || real_minus_onep (op))
1211                 {
1212                   gimple_assign_set_rhs_code
1213                     (stmt, TREE_CODE (gimple_assign_rhs1 (stmt)));
1214                   return;
1215                 }
1216             }
1217         }
1218
1219       name = gimple_assign_rhs1 (stmt);
1220
1221       /* If this is the operation we look for and one of the operands
1222          is ours simply propagate the other operand into the stmts
1223          single use.  */
1224       if (gimple_assign_rhs_code (stmt) == opcode
1225           && (name == op
1226               || gimple_assign_rhs2 (stmt) == op))
1227         {
1228           if (name == op)
1229             name = gimple_assign_rhs2 (stmt);
1230           propagate_op_to_single_use (name, stmt, def);
1231           return;
1232         }
1233
1234       /* We might have a multiply of two __builtin_pow* calls, and
1235          the operand might be hiding in the rightmost one.  Likewise
1236          this can happen for a negate.  */
1237       if (opcode == MULT_EXPR
1238           && gimple_assign_rhs_code (stmt) == opcode
1239           && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1240           && has_single_use (gimple_assign_rhs2 (stmt)))
1241         {
1242           gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1243           if (stmt_is_power_of_op (stmt2, op))
1244             {
1245               if (decrement_power (stmt2) == 1)
1246                 propagate_op_to_single_use (op, stmt2, def);
1247               return;
1248             }
1249           else if (is_gimple_assign (stmt2)
1250                    && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR)
1251             {
1252               if (gimple_assign_rhs1 (stmt2) == op)
1253                 {
1254                   propagate_op_to_single_use (op, stmt2, def);
1255                   return;
1256                 }
1257               else if (integer_minus_onep (op)
1258                        || real_minus_onep (op))
1259                 {
1260                   gimple_assign_set_rhs_code
1261                     (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2)));
1262                   return;
1263                 }
1264             }
1265         }
1266
1267       /* Continue walking the chain.  */
1268       gcc_assert (name != op
1269                   && TREE_CODE (name) == SSA_NAME);
1270       stmt = SSA_NAME_DEF_STMT (name);
1271     }
1272   while (1);
1273 }
1274
1275 /* Returns true if statement S1 dominates statement S2.  Like
1276    stmt_dominates_stmt_p, but uses stmt UIDs to optimize.  */
1277
1278 static bool
1279 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1280 {
1281   basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1282
1283   /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1284      SSA_NAME.  Assume it lives at the beginning of function and
1285      thus dominates everything.  */
1286   if (!bb1 || s1 == s2)
1287     return true;
1288
1289   /* If bb2 is NULL, it doesn't dominate any stmt with a bb.  */
1290   if (!bb2)
1291     return false;
1292
1293   if (bb1 == bb2)
1294     {
1295       /* PHIs in the same basic block are assumed to be
1296          executed all in parallel, if only one stmt is a PHI,
1297          it dominates the other stmt in the same basic block.  */
1298       if (gimple_code (s1) == GIMPLE_PHI)
1299         return true;
1300
1301       if (gimple_code (s2) == GIMPLE_PHI)
1302         return false;
1303
1304       gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1305
1306       if (gimple_uid (s1) < gimple_uid (s2))
1307         return true;
1308
1309       if (gimple_uid (s1) > gimple_uid (s2))
1310         return false;
1311
1312       gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1313       unsigned int uid = gimple_uid (s1);
1314       for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1315         {
1316           gimple *s = gsi_stmt (gsi);
1317           if (gimple_uid (s) != uid)
1318             break;
1319           if (s == s2)
1320             return true;
1321         }
1322
1323       return false;
1324     }
1325
1326   return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1327 }
1328
1329 /* Insert STMT after INSERT_POINT.  */
1330
1331 static void
1332 insert_stmt_after (gimple *stmt, gimple *insert_point)
1333 {
1334   gimple_stmt_iterator gsi;
1335   basic_block bb;
1336
1337   if (gimple_code (insert_point) == GIMPLE_PHI)
1338     bb = gimple_bb (insert_point);
1339   else if (!stmt_ends_bb_p (insert_point))
1340     {
1341       gsi = gsi_for_stmt (insert_point);
1342       gimple_set_uid (stmt, gimple_uid (insert_point));
1343       gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1344       return;
1345     }
1346   else
1347     /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1348        thus if it must end a basic block, it should be a call that can
1349        throw, or some assignment that can throw.  If it throws, the LHS
1350        of it will not be initialized though, so only valid places using
1351        the SSA_NAME should be dominated by the fallthru edge.  */
1352     bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1353   gsi = gsi_after_labels (bb);
1354   if (gsi_end_p (gsi))
1355     {
1356       gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1357       gimple_set_uid (stmt,
1358                       gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1359     }
1360   else
1361     gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1362   gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1363 }
1364
1365 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1366    the result.  Places the statement after the definition of either
1367    OP1 or OP2.  Returns the new statement.  */
1368
1369 static gimple *
1370 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1371 {
1372   gimple *op1def = NULL, *op2def = NULL;
1373   gimple_stmt_iterator gsi;
1374   tree op;
1375   gassign *sum;
1376
1377   /* Create the addition statement.  */
1378   op = make_ssa_name (type);
1379   sum = gimple_build_assign (op, opcode, op1, op2);
1380
1381   /* Find an insertion place and insert.  */
1382   if (TREE_CODE (op1) == SSA_NAME)
1383     op1def = SSA_NAME_DEF_STMT (op1);
1384   if (TREE_CODE (op2) == SSA_NAME)
1385     op2def = SSA_NAME_DEF_STMT (op2);
1386   if ((!op1def || gimple_nop_p (op1def))
1387       && (!op2def || gimple_nop_p (op2def)))
1388     {
1389       gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1390       if (gsi_end_p (gsi))
1391         {
1392           gimple_stmt_iterator gsi2
1393             = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1394           gimple_set_uid (sum,
1395                           gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1396         }
1397       else
1398         gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1399       gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1400     }
1401   else
1402     {
1403       gimple *insert_point;
1404       if ((!op1def || gimple_nop_p (op1def))
1405            || (op2def && !gimple_nop_p (op2def)
1406                && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1407         insert_point = op2def;
1408       else
1409         insert_point = op1def;
1410       insert_stmt_after (sum, insert_point);
1411     }
1412   update_stmt (sum);
1413
1414   return sum;
1415 }
1416
1417 /* Perform un-distribution of divisions and multiplications.
1418    A * X + B * X is transformed into (A + B) * X and A / X + B / X
1419    to (A + B) / X for real X.
1420
1421    The algorithm is organized as follows.
1422
1423     - First we walk the addition chain *OPS looking for summands that
1424       are defined by a multiplication or a real division.  This results
1425       in the candidates bitmap with relevant indices into *OPS.
1426
1427     - Second we build the chains of multiplications or divisions for
1428       these candidates, counting the number of occurrences of (operand, code)
1429       pairs in all of the candidates chains.
1430
1431     - Third we sort the (operand, code) pairs by number of occurrence and
1432       process them starting with the pair with the most uses.
1433
1434       * For each such pair we walk the candidates again to build a
1435         second candidate bitmap noting all multiplication/division chains
1436         that have at least one occurrence of (operand, code).
1437
1438       * We build an alternate addition chain only covering these
1439         candidates with one (operand, code) operation removed from their
1440         multiplication/division chain.
1441
1442       * The first candidate gets replaced by the alternate addition chain
1443         multiplied/divided by the operand.
1444
1445       * All candidate chains get disabled for further processing and
1446         processing of (operand, code) pairs continues.
1447
1448   The alternate addition chains built are re-processed by the main
1449   reassociation algorithm which allows optimizing a * x * y + b * y * x
1450   to (a + b ) * x * y in one invocation of the reassociation pass.  */
1451
1452 static bool
1453 undistribute_ops_list (enum tree_code opcode,
1454                        vec<operand_entry *> *ops, struct loop *loop)
1455 {
1456   unsigned int length = ops->length ();
1457   operand_entry *oe1;
1458   unsigned i, j;
1459   sbitmap candidates, candidates2;
1460   unsigned nr_candidates, nr_candidates2;
1461   sbitmap_iterator sbi0;
1462   vec<operand_entry *> *subops;
1463   bool changed = false;
1464   int next_oecount_id = 0;
1465
1466   if (length <= 1
1467       || opcode != PLUS_EXPR)
1468     return false;
1469
1470   /* Build a list of candidates to process.  */
1471   candidates = sbitmap_alloc (length);
1472   bitmap_clear (candidates);
1473   nr_candidates = 0;
1474   FOR_EACH_VEC_ELT (*ops, i, oe1)
1475     {
1476       enum tree_code dcode;
1477       gimple *oe1def;
1478
1479       if (TREE_CODE (oe1->op) != SSA_NAME)
1480         continue;
1481       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1482       if (!is_gimple_assign (oe1def))
1483         continue;
1484       dcode = gimple_assign_rhs_code (oe1def);
1485       if ((dcode != MULT_EXPR
1486            && dcode != RDIV_EXPR)
1487           || !is_reassociable_op (oe1def, dcode, loop))
1488         continue;
1489
1490       bitmap_set_bit (candidates, i);
1491       nr_candidates++;
1492     }
1493
1494   if (nr_candidates < 2)
1495     {
1496       sbitmap_free (candidates);
1497       return false;
1498     }
1499
1500   if (dump_file && (dump_flags & TDF_DETAILS))
1501     {
1502       fprintf (dump_file, "searching for un-distribute opportunities ");
1503       print_generic_expr (dump_file,
1504         (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1505       fprintf (dump_file, " %d\n", nr_candidates);
1506     }
1507
1508   /* Build linearized sub-operand lists and the counting table.  */
1509   cvec.create (0);
1510
1511   hash_table<oecount_hasher> ctable (15);
1512
1513   /* ??? Macro arguments cannot have multi-argument template types in
1514      them.  This typedef is needed to workaround that limitation.  */
1515   typedef vec<operand_entry *> vec_operand_entry_t_heap;
1516   subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1517   EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1518     {
1519       gimple *oedef;
1520       enum tree_code oecode;
1521       unsigned j;
1522
1523       oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1524       oecode = gimple_assign_rhs_code (oedef);
1525       linearize_expr_tree (&subops[i], oedef,
1526                            associative_tree_code (oecode), false);
1527
1528       FOR_EACH_VEC_ELT (subops[i], j, oe1)
1529         {
1530           oecount c;
1531           int *slot;
1532           int idx;
1533           c.oecode = oecode;
1534           c.cnt = 1;
1535           c.id = next_oecount_id++;
1536           c.op = oe1->op;
1537           cvec.safe_push (c);
1538           idx = cvec.length () + 41;
1539           slot = ctable.find_slot (idx, INSERT);
1540           if (!*slot)
1541             {
1542               *slot = idx;
1543             }
1544           else
1545             {
1546               cvec.pop ();
1547               cvec[*slot - 42].cnt++;
1548             }
1549         }
1550     }
1551
1552   /* Sort the counting table.  */
1553   cvec.qsort (oecount_cmp);
1554
1555   if (dump_file && (dump_flags & TDF_DETAILS))
1556     {
1557       oecount *c;
1558       fprintf (dump_file, "Candidates:\n");
1559       FOR_EACH_VEC_ELT (cvec, j, c)
1560         {
1561           fprintf (dump_file, "  %u %s: ", c->cnt,
1562                    c->oecode == MULT_EXPR
1563                    ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1564           print_generic_expr (dump_file, c->op, 0);
1565           fprintf (dump_file, "\n");
1566         }
1567     }
1568
1569   /* Process the (operand, code) pairs in order of most occurrence.  */
1570   candidates2 = sbitmap_alloc (length);
1571   while (!cvec.is_empty ())
1572     {
1573       oecount *c = &cvec.last ();
1574       if (c->cnt < 2)
1575         break;
1576
1577       /* Now collect the operands in the outer chain that contain
1578          the common operand in their inner chain.  */
1579       bitmap_clear (candidates2);
1580       nr_candidates2 = 0;
1581       EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1582         {
1583           gimple *oedef;
1584           enum tree_code oecode;
1585           unsigned j;
1586           tree op = (*ops)[i]->op;
1587
1588           /* If we undistributed in this chain already this may be
1589              a constant.  */
1590           if (TREE_CODE (op) != SSA_NAME)
1591             continue;
1592
1593           oedef = SSA_NAME_DEF_STMT (op);
1594           oecode = gimple_assign_rhs_code (oedef);
1595           if (oecode != c->oecode)
1596             continue;
1597
1598           FOR_EACH_VEC_ELT (subops[i], j, oe1)
1599             {
1600               if (oe1->op == c->op)
1601                 {
1602                   bitmap_set_bit (candidates2, i);
1603                   ++nr_candidates2;
1604                   break;
1605                 }
1606             }
1607         }
1608
1609       if (nr_candidates2 >= 2)
1610         {
1611           operand_entry *oe1, *oe2;
1612           gimple *prod;
1613           int first = bitmap_first_set_bit (candidates2);
1614
1615           /* Build the new addition chain.  */
1616           oe1 = (*ops)[first];
1617           if (dump_file && (dump_flags & TDF_DETAILS))
1618             {
1619               fprintf (dump_file, "Building (");
1620               print_generic_expr (dump_file, oe1->op, 0);
1621             }
1622           zero_one_operation (&oe1->op, c->oecode, c->op);
1623           EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1624             {
1625               gimple *sum;
1626               oe2 = (*ops)[i];
1627               if (dump_file && (dump_flags & TDF_DETAILS))
1628                 {
1629                   fprintf (dump_file, " + ");
1630                   print_generic_expr (dump_file, oe2->op, 0);
1631                 }
1632               zero_one_operation (&oe2->op, c->oecode, c->op);
1633               sum = build_and_add_sum (TREE_TYPE (oe1->op),
1634                                        oe1->op, oe2->op, opcode);
1635               oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1636               oe2->rank = 0;
1637               oe1->op = gimple_get_lhs (sum);
1638             }
1639
1640           /* Apply the multiplication/division.  */
1641           prod = build_and_add_sum (TREE_TYPE (oe1->op),
1642                                     oe1->op, c->op, c->oecode);
1643           if (dump_file && (dump_flags & TDF_DETAILS))
1644             {
1645               fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1646               print_generic_expr (dump_file, c->op, 0);
1647               fprintf (dump_file, "\n");
1648             }
1649
1650           /* Record it in the addition chain and disable further
1651              undistribution with this op.  */
1652           oe1->op = gimple_assign_lhs (prod);
1653           oe1->rank = get_rank (oe1->op);
1654           subops[first].release ();
1655
1656           changed = true;
1657         }
1658
1659       cvec.pop ();
1660     }
1661
1662   for (i = 0; i < ops->length (); ++i)
1663     subops[i].release ();
1664   free (subops);
1665   cvec.release ();
1666   sbitmap_free (candidates);
1667   sbitmap_free (candidates2);
1668
1669   return changed;
1670 }
1671
1672 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1673    expression, examine the other OPS to see if any of them are comparisons
1674    of the same values, which we may be able to combine or eliminate.
1675    For example, we can rewrite (a < b) | (a == b) as (a <= b).  */
1676
1677 static bool
1678 eliminate_redundant_comparison (enum tree_code opcode,
1679                                 vec<operand_entry *> *ops,
1680                                 unsigned int currindex,
1681                                 operand_entry *curr)
1682 {
1683   tree op1, op2;
1684   enum tree_code lcode, rcode;
1685   gimple *def1, *def2;
1686   int i;
1687   operand_entry *oe;
1688
1689   if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1690     return false;
1691
1692   /* Check that CURR is a comparison.  */
1693   if (TREE_CODE (curr->op) != SSA_NAME)
1694     return false;
1695   def1 = SSA_NAME_DEF_STMT (curr->op);
1696   if (!is_gimple_assign (def1))
1697     return false;
1698   lcode = gimple_assign_rhs_code (def1);
1699   if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1700     return false;
1701   op1 = gimple_assign_rhs1 (def1);
1702   op2 = gimple_assign_rhs2 (def1);
1703
1704   /* Now look for a similar comparison in the remaining OPS.  */
1705   for (i = currindex + 1; ops->iterate (i, &oe); i++)
1706     {
1707       tree t;
1708
1709       if (TREE_CODE (oe->op) != SSA_NAME)
1710         continue;
1711       def2 = SSA_NAME_DEF_STMT (oe->op);
1712       if (!is_gimple_assign (def2))
1713         continue;
1714       rcode = gimple_assign_rhs_code (def2);
1715       if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1716         continue;
1717
1718       /* If we got here, we have a match.  See if we can combine the
1719          two comparisons.  */
1720       if (opcode == BIT_IOR_EXPR)
1721         t = maybe_fold_or_comparisons (lcode, op1, op2,
1722                                        rcode, gimple_assign_rhs1 (def2),
1723                                        gimple_assign_rhs2 (def2));
1724       else
1725         t = maybe_fold_and_comparisons (lcode, op1, op2,
1726                                         rcode, gimple_assign_rhs1 (def2),
1727                                         gimple_assign_rhs2 (def2));
1728       if (!t)
1729         continue;
1730
1731       /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1732          always give us a boolean_type_node value back.  If the original
1733          BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1734          we need to convert.  */
1735       if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1736         t = fold_convert (TREE_TYPE (curr->op), t);
1737
1738       if (TREE_CODE (t) != INTEGER_CST
1739           && !operand_equal_p (t, curr->op, 0))
1740         {
1741           enum tree_code subcode;
1742           tree newop1, newop2;
1743           if (!COMPARISON_CLASS_P (t))
1744             continue;
1745           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1746           STRIP_USELESS_TYPE_CONVERSION (newop1);
1747           STRIP_USELESS_TYPE_CONVERSION (newop2);
1748           if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1749             continue;
1750         }
1751
1752       if (dump_file && (dump_flags & TDF_DETAILS))
1753         {
1754           fprintf (dump_file, "Equivalence: ");
1755           print_generic_expr (dump_file, curr->op, 0);
1756           fprintf (dump_file, " %s ", op_symbol_code (opcode));
1757           print_generic_expr (dump_file, oe->op, 0);
1758           fprintf (dump_file, " -> ");
1759           print_generic_expr (dump_file, t, 0);
1760           fprintf (dump_file, "\n");
1761         }
1762
1763       /* Now we can delete oe, as it has been subsumed by the new combined
1764          expression t.  */
1765       ops->ordered_remove (i);
1766       reassociate_stats.ops_eliminated ++;
1767
1768       /* If t is the same as curr->op, we're done.  Otherwise we must
1769          replace curr->op with t.  Special case is if we got a constant
1770          back, in which case we add it to the end instead of in place of
1771          the current entry.  */
1772       if (TREE_CODE (t) == INTEGER_CST)
1773         {
1774           ops->ordered_remove (currindex);
1775           add_to_ops_vec (ops, t);
1776         }
1777       else if (!operand_equal_p (t, curr->op, 0))
1778         {
1779           gimple *sum;
1780           enum tree_code subcode;
1781           tree newop1;
1782           tree newop2;
1783           gcc_assert (COMPARISON_CLASS_P (t));
1784           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1785           STRIP_USELESS_TYPE_CONVERSION (newop1);
1786           STRIP_USELESS_TYPE_CONVERSION (newop2);
1787           gcc_checking_assert (is_gimple_val (newop1)
1788                                && is_gimple_val (newop2));
1789           sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1790           curr->op = gimple_get_lhs (sum);
1791         }
1792       return true;
1793     }
1794
1795   return false;
1796 }
1797
1798
1799 /* Transform repeated addition of same values into multiply with
1800    constant.  */
1801 static bool
1802 transform_add_to_multiply (vec<operand_entry *> *ops)
1803 {
1804   operand_entry *oe;
1805   tree op = NULL_TREE;
1806   int j;
1807   int i, start = -1, end = 0, count = 0;
1808   vec<std::pair <int, int> > indxs = vNULL;
1809   bool changed = false;
1810
1811   if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op))
1812       && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op))
1813           || !flag_unsafe_math_optimizations))
1814     return false;
1815
1816   /* Look for repeated operands.  */
1817   FOR_EACH_VEC_ELT (*ops, i, oe)
1818     {
1819       if (start == -1)
1820         {
1821           count = 1;
1822           op = oe->op;
1823           start = i;
1824         }
1825       else if (operand_equal_p (oe->op, op, 0))
1826         {
1827           count++;
1828           end = i;
1829         }
1830       else
1831         {
1832           if (count > 1)
1833             indxs.safe_push (std::make_pair (start, end));
1834           count = 1;
1835           op = oe->op;
1836           start = i;
1837         }
1838     }
1839
1840   if (count > 1)
1841     indxs.safe_push (std::make_pair (start, end));
1842
1843   for (j = indxs.length () - 1; j >= 0; --j)
1844     {
1845       /* Convert repeated operand addition to multiplication.  */
1846       start = indxs[j].first;
1847       end = indxs[j].second;
1848       op = (*ops)[start]->op;
1849       count = end - start + 1;
1850       for (i = end; i >= start; --i)
1851         ops->unordered_remove (i);
1852       tree tmp = make_ssa_name (TREE_TYPE (op));
1853       tree cst = build_int_cst (integer_type_node, count);
1854       gassign *mul_stmt
1855         = gimple_build_assign (tmp, MULT_EXPR,
1856                                op, fold_convert (TREE_TYPE (op), cst));
1857       gimple_set_visited (mul_stmt, true);
1858       add_to_ops_vec (ops, tmp, mul_stmt);
1859       changed = true;
1860     }
1861
1862   return changed;
1863 }
1864
1865
1866 /* Perform various identities and other optimizations on the list of
1867    operand entries, stored in OPS.  The tree code for the binary
1868    operation between all the operands is OPCODE.  */
1869
1870 static void
1871 optimize_ops_list (enum tree_code opcode,
1872                    vec<operand_entry *> *ops)
1873 {
1874   unsigned int length = ops->length ();
1875   unsigned int i;
1876   operand_entry *oe;
1877   operand_entry *oelast = NULL;
1878   bool iterate = false;
1879
1880   if (length == 1)
1881     return;
1882
1883   oelast = ops->last ();
1884
1885   /* If the last two are constants, pop the constants off, merge them
1886      and try the next two.  */
1887   if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1888     {
1889       operand_entry *oelm1 = (*ops)[length - 2];
1890
1891       if (oelm1->rank == 0
1892           && is_gimple_min_invariant (oelm1->op)
1893           && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1894                                        TREE_TYPE (oelast->op)))
1895         {
1896           tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1897                                      oelm1->op, oelast->op);
1898
1899           if (folded && is_gimple_min_invariant (folded))
1900             {
1901               if (dump_file && (dump_flags & TDF_DETAILS))
1902                 fprintf (dump_file, "Merging constants\n");
1903
1904               ops->pop ();
1905               ops->pop ();
1906
1907               add_to_ops_vec (ops, folded);
1908               reassociate_stats.constants_eliminated++;
1909
1910               optimize_ops_list (opcode, ops);
1911               return;
1912             }
1913         }
1914     }
1915
1916   eliminate_using_constants (opcode, ops);
1917   oelast = NULL;
1918
1919   for (i = 0; ops->iterate (i, &oe);)
1920     {
1921       bool done = false;
1922
1923       if (eliminate_not_pairs (opcode, ops, i, oe))
1924         return;
1925       if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1926           || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1927           || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1928         {
1929           if (done)
1930             return;
1931           iterate = true;
1932           oelast = NULL;
1933           continue;
1934         }
1935       oelast = oe;
1936       i++;
1937     }
1938
1939   length = ops->length ();
1940   oelast = ops->last ();
1941
1942   if (iterate)
1943     optimize_ops_list (opcode, ops);
1944 }
1945
1946 /* The following functions are subroutines to optimize_range_tests and allow
1947    it to try to change a logical combination of comparisons into a range
1948    test.
1949
1950    For example, both
1951         X == 2 || X == 5 || X == 3 || X == 4
1952    and
1953         X >= 2 && X <= 5
1954    are converted to
1955         (unsigned) (X - 2) <= 3
1956
1957    For more information see comments above fold_test_range in fold-const.c,
1958    this implementation is for GIMPLE.  */
1959
1960 struct range_entry
1961 {
1962   tree exp;
1963   tree low;
1964   tree high;
1965   bool in_p;
1966   bool strict_overflow_p;
1967   unsigned int idx, next;
1968 };
1969
1970 /* This is similar to make_range in fold-const.c, but on top of
1971    GIMPLE instead of trees.  If EXP is non-NULL, it should be
1972    an SSA_NAME and STMT argument is ignored, otherwise STMT
1973    argument should be a GIMPLE_COND.  */
1974
1975 static void
1976 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
1977 {
1978   int in_p;
1979   tree low, high;
1980   bool is_bool, strict_overflow_p;
1981
1982   r->exp = NULL_TREE;
1983   r->in_p = false;
1984   r->strict_overflow_p = false;
1985   r->low = NULL_TREE;
1986   r->high = NULL_TREE;
1987   if (exp != NULL_TREE
1988       && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1989     return;
1990
1991   /* Start with simply saying "EXP != 0" and then look at the code of EXP
1992      and see if we can refine the range.  Some of the cases below may not
1993      happen, but it doesn't seem worth worrying about this.  We "continue"
1994      the outer loop when we've changed something; otherwise we "break"
1995      the switch, which will "break" the while.  */
1996   low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1997   high = low;
1998   in_p = 0;
1999   strict_overflow_p = false;
2000   is_bool = false;
2001   if (exp == NULL_TREE)
2002     is_bool = true;
2003   else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
2004     {
2005       if (TYPE_UNSIGNED (TREE_TYPE (exp)))
2006         is_bool = true;
2007       else
2008         return;
2009     }
2010   else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
2011     is_bool = true;
2012
2013   while (1)
2014     {
2015       enum tree_code code;
2016       tree arg0, arg1, exp_type;
2017       tree nexp;
2018       location_t loc;
2019
2020       if (exp != NULL_TREE)
2021         {
2022           if (TREE_CODE (exp) != SSA_NAME
2023               || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
2024             break;
2025
2026           stmt = SSA_NAME_DEF_STMT (exp);
2027           if (!is_gimple_assign (stmt))
2028             break;
2029
2030           code = gimple_assign_rhs_code (stmt);
2031           arg0 = gimple_assign_rhs1 (stmt);
2032           arg1 = gimple_assign_rhs2 (stmt);
2033           exp_type = TREE_TYPE (exp);
2034         }
2035       else
2036         {
2037           code = gimple_cond_code (stmt);
2038           arg0 = gimple_cond_lhs (stmt);
2039           arg1 = gimple_cond_rhs (stmt);
2040           exp_type = boolean_type_node;
2041         }
2042
2043       if (TREE_CODE (arg0) != SSA_NAME)
2044         break;
2045       loc = gimple_location (stmt);
2046       switch (code)
2047         {
2048         case BIT_NOT_EXPR:
2049           if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
2050               /* Ensure the range is either +[-,0], +[0,0],
2051                  -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2052                  -[1,1].  If it is e.g. +[-,-] or -[-,-]
2053                  or similar expression of unconditional true or
2054                  false, it should not be negated.  */
2055               && ((high && integer_zerop (high))
2056                   || (low && integer_onep (low))))
2057             {
2058               in_p = !in_p;
2059               exp = arg0;
2060               continue;
2061             }
2062           break;
2063         case SSA_NAME:
2064           exp = arg0;
2065           continue;
2066         CASE_CONVERT:
2067           if (is_bool)
2068             goto do_default;
2069           if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
2070             {
2071               if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
2072                 is_bool = true;
2073               else
2074                 return;
2075             }
2076           else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
2077             is_bool = true;
2078           goto do_default;
2079         case EQ_EXPR:
2080         case NE_EXPR:
2081         case LT_EXPR:
2082         case LE_EXPR:
2083         case GE_EXPR:
2084         case GT_EXPR:
2085           is_bool = true;
2086           /* FALLTHRU */
2087         default:
2088           if (!is_bool)
2089             return;
2090         do_default:
2091           nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2092                                   &low, &high, &in_p,
2093                                   &strict_overflow_p);
2094           if (nexp != NULL_TREE)
2095             {
2096               exp = nexp;
2097               gcc_assert (TREE_CODE (exp) == SSA_NAME);
2098               continue;
2099             }
2100           break;
2101         }
2102       break;
2103     }
2104   if (is_bool)
2105     {
2106       r->exp = exp;
2107       r->in_p = in_p;
2108       r->low = low;
2109       r->high = high;
2110       r->strict_overflow_p = strict_overflow_p;
2111     }
2112 }
2113
2114 /* Comparison function for qsort.  Sort entries
2115    without SSA_NAME exp first, then with SSA_NAMEs sorted
2116    by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2117    by increasing ->low and if ->low is the same, by increasing
2118    ->high.  ->low == NULL_TREE means minimum, ->high == NULL_TREE
2119    maximum.  */
2120
2121 static int
2122 range_entry_cmp (const void *a, const void *b)
2123 {
2124   const struct range_entry *p = (const struct range_entry *) a;
2125   const struct range_entry *q = (const struct range_entry *) b;
2126
2127   if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2128     {
2129       if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2130         {
2131           /* Group range_entries for the same SSA_NAME together.  */
2132           if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2133             return -1;
2134           else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2135             return 1;
2136           /* If ->low is different, NULL low goes first, then by
2137              ascending low.  */
2138           if (p->low != NULL_TREE)
2139             {
2140               if (q->low != NULL_TREE)
2141                 {
2142                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
2143                                           p->low, q->low);
2144                   if (tem && integer_onep (tem))
2145                     return -1;
2146                   tem = fold_binary (GT_EXPR, boolean_type_node,
2147                                      p->low, q->low);
2148                   if (tem && integer_onep (tem))
2149                     return 1;
2150                 }
2151               else
2152                 return 1;
2153             }
2154           else if (q->low != NULL_TREE)
2155             return -1;
2156           /* If ->high is different, NULL high goes last, before that by
2157              ascending high.  */
2158           if (p->high != NULL_TREE)
2159             {
2160               if (q->high != NULL_TREE)
2161                 {
2162                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
2163                                           p->high, q->high);
2164                   if (tem && integer_onep (tem))
2165                     return -1;
2166                   tem = fold_binary (GT_EXPR, boolean_type_node,
2167                                      p->high, q->high);
2168                   if (tem && integer_onep (tem))
2169                     return 1;
2170                 }
2171               else
2172                 return -1;
2173             }
2174           else if (q->high != NULL_TREE)
2175             return 1;
2176           /* If both ranges are the same, sort below by ascending idx.  */
2177         }
2178       else
2179         return 1;
2180     }
2181   else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2182     return -1;
2183
2184   if (p->idx < q->idx)
2185     return -1;
2186   else
2187     {
2188       gcc_checking_assert (p->idx > q->idx);
2189       return 1;
2190     }
2191 }
2192
2193 /* Helper routine of optimize_range_test.
2194    [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2195    RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2196    OPCODE and OPS are arguments of optimize_range_tests.  If OTHERRANGE
2197    is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2198    an array of COUNT pointers to other ranges.  Return
2199    true if the range merge has been successful.
2200    If OPCODE is ERROR_MARK, this is called from within
2201    maybe_optimize_range_tests and is performing inter-bb range optimization.
2202    In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2203    oe->rank.  */
2204
2205 static bool
2206 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2207                    struct range_entry **otherrangep,
2208                    unsigned int count, enum tree_code opcode,
2209                    vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2210                    bool in_p, tree low, tree high, bool strict_overflow_p)
2211 {
2212   operand_entry *oe = (*ops)[range->idx];
2213   tree op = oe->op;
2214   gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2215                     : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2216   location_t loc = gimple_location (stmt);
2217   tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2218   tree tem = build_range_check (loc, optype, unshare_expr (exp),
2219                                 in_p, low, high);
2220   enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2221   gimple_stmt_iterator gsi;
2222   unsigned int i, uid;
2223
2224   if (tem == NULL_TREE)
2225     return false;
2226
2227   /* If op is default def SSA_NAME, there is no place to insert the
2228      new comparison.  Give up, unless we can use OP itself as the
2229      range test.  */
2230   if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2231     {
2232       if (op == range->exp
2233           && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2234               || TREE_CODE (optype) == BOOLEAN_TYPE)
2235           && (op == tem
2236               || (TREE_CODE (tem) == EQ_EXPR
2237                   && TREE_OPERAND (tem, 0) == op
2238                   && integer_onep (TREE_OPERAND (tem, 1))))
2239           && opcode != BIT_IOR_EXPR
2240           && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2241         {
2242           stmt = NULL;
2243           tem = op;
2244         }
2245       else
2246         return false;
2247     }
2248
2249   if (strict_overflow_p && issue_strict_overflow_warning (wc))
2250     warning_at (loc, OPT_Wstrict_overflow,
2251                 "assuming signed overflow does not occur "
2252                 "when simplifying range test");
2253
2254   if (dump_file && (dump_flags & TDF_DETAILS))
2255     {
2256       struct range_entry *r;
2257       fprintf (dump_file, "Optimizing range tests ");
2258       print_generic_expr (dump_file, range->exp, 0);
2259       fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2260       print_generic_expr (dump_file, range->low, 0);
2261       fprintf (dump_file, ", ");
2262       print_generic_expr (dump_file, range->high, 0);
2263       fprintf (dump_file, "]");
2264       for (i = 0; i < count; i++)
2265         {
2266           if (otherrange)
2267             r = otherrange + i;
2268           else
2269             r = otherrangep[i];
2270           fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2271           print_generic_expr (dump_file, r->low, 0);
2272           fprintf (dump_file, ", ");
2273           print_generic_expr (dump_file, r->high, 0);
2274           fprintf (dump_file, "]");
2275         }
2276       fprintf (dump_file, "\n into ");
2277       print_generic_expr (dump_file, tem, 0);
2278       fprintf (dump_file, "\n");
2279     }
2280
2281   if (opcode == BIT_IOR_EXPR
2282       || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2283     tem = invert_truthvalue_loc (loc, tem);
2284
2285   tem = fold_convert_loc (loc, optype, tem);
2286   if (stmt)
2287     {
2288       gsi = gsi_for_stmt (stmt);
2289       uid = gimple_uid (stmt);
2290     }
2291   else
2292     {
2293       gsi = gsi_none ();
2294       uid = 0;
2295     }
2296   if (stmt == NULL)
2297     gcc_checking_assert (tem == op);
2298   /* In rare cases range->exp can be equal to lhs of stmt.
2299      In that case we have to insert after the stmt rather then before
2300      it.  If stmt is a PHI, insert it at the start of the basic block.  */
2301   else if (op != range->exp)
2302     {
2303       gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2304       tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2305                                       GSI_SAME_STMT);
2306       gsi_prev (&gsi);
2307     }
2308   else if (gimple_code (stmt) != GIMPLE_PHI)
2309     {
2310       gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2311       tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false,
2312                                       GSI_CONTINUE_LINKING);
2313     }
2314   else
2315     {
2316       gsi = gsi_after_labels (gimple_bb (stmt));
2317       if (!gsi_end_p (gsi))
2318         uid = gimple_uid (gsi_stmt (gsi));
2319       else
2320         {
2321           gsi = gsi_start_bb (gimple_bb (stmt));
2322           uid = 1;
2323           while (!gsi_end_p (gsi))
2324             {
2325               uid = gimple_uid (gsi_stmt (gsi));
2326               gsi_next (&gsi);
2327             }
2328         }
2329       gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2330       tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2331                                       GSI_SAME_STMT);
2332       if (gsi_end_p (gsi))
2333         gsi = gsi_last_bb (gimple_bb (stmt));
2334       else
2335         gsi_prev (&gsi);
2336     }
2337   for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2338     if (gimple_uid (gsi_stmt (gsi)))
2339       break;
2340     else
2341       gimple_set_uid (gsi_stmt (gsi), uid);
2342
2343   oe->op = tem;
2344   range->exp = exp;
2345   range->low = low;
2346   range->high = high;
2347   range->in_p = in_p;
2348   range->strict_overflow_p = false;
2349
2350   for (i = 0; i < count; i++)
2351     {
2352       if (otherrange)
2353         range = otherrange + i;
2354       else
2355         range = otherrangep[i];
2356       oe = (*ops)[range->idx];
2357       /* Now change all the other range test immediate uses, so that
2358          those tests will be optimized away.  */
2359       if (opcode == ERROR_MARK)
2360         {
2361           if (oe->op)
2362             oe->op = build_int_cst (TREE_TYPE (oe->op),
2363                                     oe->rank == BIT_IOR_EXPR ? 0 : 1);
2364           else
2365             oe->op = (oe->rank == BIT_IOR_EXPR
2366                       ? boolean_false_node : boolean_true_node);
2367         }
2368       else
2369         oe->op = error_mark_node;
2370       range->exp = NULL_TREE;
2371     }
2372   return true;
2373 }
2374
2375 /* Optimize X == CST1 || X == CST2
2376    if popcount (CST1 ^ CST2) == 1 into
2377    (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2378    Similarly for ranges.  E.g.
2379    X != 2 && X != 3 && X != 10 && X != 11
2380    will be transformed by the previous optimization into
2381    !((X - 2U) <= 1U || (X - 10U) <= 1U)
2382    and this loop can transform that into
2383    !(((X & ~8) - 2U) <= 1U).  */
2384
2385 static bool
2386 optimize_range_tests_xor (enum tree_code opcode, tree type,
2387                           tree lowi, tree lowj, tree highi, tree highj,
2388                           vec<operand_entry *> *ops,
2389                           struct range_entry *rangei,
2390                           struct range_entry *rangej)
2391 {
2392   tree lowxor, highxor, tem, exp;
2393   /* Check lowi ^ lowj == highi ^ highj and
2394      popcount (lowi ^ lowj) == 1.  */
2395   lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2396   if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2397     return false;
2398   if (!integer_pow2p (lowxor))
2399     return false;
2400   highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2401   if (!tree_int_cst_equal (lowxor, highxor))
2402     return false;
2403
2404   tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2405   exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2406   lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2407   highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2408   if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2409                          NULL, rangei->in_p, lowj, highj,
2410                          rangei->strict_overflow_p
2411                          || rangej->strict_overflow_p))
2412     return true;
2413   return false;
2414 }
2415
2416 /* Optimize X == CST1 || X == CST2
2417    if popcount (CST2 - CST1) == 1 into
2418    ((X - CST1) & ~(CST2 - CST1)) == 0.
2419    Similarly for ranges.  E.g.
2420    X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2421    || X == 75 || X == 45
2422    will be transformed by the previous optimization into
2423    (X - 43U) <= 3U || (X - 75U) <= 3U
2424    and this loop can transform that into
2425    ((X - 43U) & ~(75U - 43U)) <= 3U.  */
2426 static bool
2427 optimize_range_tests_diff (enum tree_code opcode, tree type,
2428                             tree lowi, tree lowj, tree highi, tree highj,
2429                             vec<operand_entry *> *ops,
2430                             struct range_entry *rangei,
2431                             struct range_entry *rangej)
2432 {
2433   tree tem1, tem2, mask;
2434   /* Check highi - lowi == highj - lowj.  */
2435   tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2436   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2437     return false;
2438   tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2439   if (!tree_int_cst_equal (tem1, tem2))
2440     return false;
2441   /* Check popcount (lowj - lowi) == 1.  */
2442   tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2443   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2444     return false;
2445   if (!integer_pow2p (tem1))
2446     return false;
2447
2448   type = unsigned_type_for (type);
2449   tem1 = fold_convert (type, tem1);
2450   tem2 = fold_convert (type, tem2);
2451   lowi = fold_convert (type, lowi);
2452   mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2453   tem1 = fold_binary (MINUS_EXPR, type,
2454                       fold_convert (type, rangei->exp), lowi);
2455   tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2456   lowj = build_int_cst (type, 0);
2457   if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2458                          NULL, rangei->in_p, lowj, tem2,
2459                          rangei->strict_overflow_p
2460                          || rangej->strict_overflow_p))
2461     return true;
2462   return false;
2463 }
2464
2465 /* It does some common checks for function optimize_range_tests_xor and
2466    optimize_range_tests_diff.
2467    If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2468    Else it calls optimize_range_tests_diff.  */
2469
2470 static bool
2471 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2472                         bool optimize_xor, vec<operand_entry *> *ops,
2473                         struct range_entry *ranges)
2474 {
2475   int i, j;
2476   bool any_changes = false;
2477   for (i = first; i < length; i++)
2478     {
2479       tree lowi, highi, lowj, highj, type, tem;
2480
2481       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2482         continue;
2483       type = TREE_TYPE (ranges[i].exp);
2484       if (!INTEGRAL_TYPE_P (type))
2485         continue;
2486       lowi = ranges[i].low;
2487       if (lowi == NULL_TREE)
2488         lowi = TYPE_MIN_VALUE (type);
2489       highi = ranges[i].high;
2490       if (highi == NULL_TREE)
2491         continue;
2492       for (j = i + 1; j < length && j < i + 64; j++)
2493         {
2494           bool changes;
2495           if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2496             continue;
2497           lowj = ranges[j].low;
2498           if (lowj == NULL_TREE)
2499             continue;
2500           highj = ranges[j].high;
2501           if (highj == NULL_TREE)
2502             highj = TYPE_MAX_VALUE (type);
2503           /* Check lowj > highi.  */
2504           tem = fold_binary (GT_EXPR, boolean_type_node,
2505                              lowj, highi);
2506           if (tem == NULL_TREE || !integer_onep (tem))
2507             continue;
2508           if (optimize_xor)
2509             changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2510                                                 highi, highj, ops,
2511                                                 ranges + i, ranges + j);
2512           else
2513             changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2514                                                  highi, highj, ops,
2515                                                  ranges + i, ranges + j);
2516           if (changes)
2517             {
2518               any_changes = true;
2519               break;
2520             }
2521         }
2522     }
2523   return any_changes;
2524 }
2525
2526 /* Helper function of optimize_range_tests_to_bit_test.  Handle a single
2527    range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2528    EXP on success, NULL otherwise.  */
2529
2530 static tree
2531 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
2532                        wide_int *mask, tree *totallowp)
2533 {
2534   tree tem = int_const_binop (MINUS_EXPR, high, low);
2535   if (tem == NULL_TREE
2536       || TREE_CODE (tem) != INTEGER_CST
2537       || TREE_OVERFLOW (tem)
2538       || tree_int_cst_sgn (tem) == -1
2539       || compare_tree_int (tem, prec) != -1)
2540     return NULL_TREE;
2541
2542   unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
2543   *mask = wi::shifted_mask (0, max, false, prec);
2544   if (TREE_CODE (exp) == BIT_AND_EXPR
2545       && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2546     {
2547       widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
2548       msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
2549       if (wi::popcount (msk) == 1
2550           && wi::ltu_p (msk, prec - max))
2551         {
2552           *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
2553           max += msk.to_uhwi ();
2554           exp = TREE_OPERAND (exp, 0);
2555           if (integer_zerop (low)
2556               && TREE_CODE (exp) == PLUS_EXPR
2557               && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2558             {
2559               tree ret = TREE_OPERAND (exp, 0);
2560               STRIP_NOPS (ret);
2561               widest_int bias
2562                 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
2563                                      TYPE_PRECISION (TREE_TYPE (low))));
2564               tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
2565               if (totallowp)
2566                 {
2567                   *totallowp = tbias;
2568                   return ret;
2569                 }
2570               else if (!tree_int_cst_lt (totallow, tbias))
2571                 return NULL_TREE;
2572               bias = wi::to_widest (tbias);
2573               bias -= wi::to_widest (totallow);
2574               if (bias >= 0 && bias < prec - max)
2575                 {
2576                   *mask = wi::lshift (*mask, bias);
2577                   return ret;
2578                 }
2579             }
2580         }
2581     }
2582   if (totallowp)
2583     return exp;
2584   if (!tree_int_cst_lt (totallow, low))
2585     return exp;
2586   tem = int_const_binop (MINUS_EXPR, low, totallow);
2587   if (tem == NULL_TREE
2588       || TREE_CODE (tem) != INTEGER_CST
2589       || TREE_OVERFLOW (tem)
2590       || compare_tree_int (tem, prec - max) == 1)
2591     return NULL_TREE;
2592
2593   *mask = wi::lshift (*mask, wi::to_widest (tem));
2594   return exp;
2595 }
2596
2597 /* Attempt to optimize small range tests using bit test.
2598    E.g.
2599    X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2600    && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2601    has been by earlier optimizations optimized into:
2602    ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2603    As all the 43 through 82 range is less than 64 numbers,
2604    for 64-bit word targets optimize that into:
2605    (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0  */
2606
2607 static bool
2608 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
2609                                   vec<operand_entry *> *ops,
2610                                   struct range_entry *ranges)
2611 {
2612   int i, j;
2613   bool any_changes = false;
2614   int prec = GET_MODE_BITSIZE (word_mode);
2615   auto_vec<struct range_entry *, 64> candidates;
2616
2617   for (i = first; i < length - 2; i++)
2618     {
2619       tree lowi, highi, lowj, highj, type;
2620
2621       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2622         continue;
2623       type = TREE_TYPE (ranges[i].exp);
2624       if (!INTEGRAL_TYPE_P (type))
2625         continue;
2626       lowi = ranges[i].low;
2627       if (lowi == NULL_TREE)
2628         lowi = TYPE_MIN_VALUE (type);
2629       highi = ranges[i].high;
2630       if (highi == NULL_TREE)
2631         continue;
2632       wide_int mask;
2633       tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
2634                                         highi, &mask, &lowi);
2635       if (exp == NULL_TREE)
2636         continue;
2637       bool strict_overflow_p = ranges[i].strict_overflow_p;
2638       candidates.truncate (0);
2639       int end = MIN (i + 64, length);
2640       for (j = i + 1; j < end; j++)
2641         {
2642           tree exp2;
2643           if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
2644             continue;
2645           if (ranges[j].exp == exp)
2646             ;
2647           else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
2648             {
2649               exp2 = TREE_OPERAND (ranges[j].exp, 0);
2650               if (exp2 == exp)
2651                 ;
2652               else if (TREE_CODE (exp2) == PLUS_EXPR)
2653                 {
2654                   exp2 = TREE_OPERAND (exp2, 0);
2655                   STRIP_NOPS (exp2);
2656                   if (exp2 != exp)
2657                     continue;
2658                 }
2659               else
2660                 continue;
2661             }
2662           else
2663             continue;
2664           lowj = ranges[j].low;
2665           if (lowj == NULL_TREE)
2666             continue;
2667           highj = ranges[j].high;
2668           if (highj == NULL_TREE)
2669             highj = TYPE_MAX_VALUE (type);
2670           wide_int mask2;
2671           exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
2672                                         highj, &mask2, NULL);
2673           if (exp2 != exp)
2674             continue;
2675           mask |= mask2;
2676           strict_overflow_p |= ranges[j].strict_overflow_p;
2677           candidates.safe_push (&ranges[j]);
2678         }
2679
2680       /* If we need otherwise 3 or more comparisons, use a bit test.  */
2681       if (candidates.length () >= 2)
2682         {
2683           tree high = wide_int_to_tree (TREE_TYPE (lowi),
2684                                         wi::to_widest (lowi)
2685                                         + prec - 1 - wi::clz (mask));
2686           operand_entry *oe = (*ops)[ranges[i].idx];
2687           tree op = oe->op;
2688           gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2689                             : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2690           location_t loc = gimple_location (stmt);
2691           tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2692
2693           /* See if it isn't cheaper to pretend the minimum value of the
2694              range is 0, if maximum value is small enough.
2695              We can avoid then subtraction of the minimum value, but the
2696              mask constant could be perhaps more expensive.  */
2697           if (compare_tree_int (lowi, 0) > 0
2698               && compare_tree_int (high, prec) < 0)
2699             {
2700               int cost_diff;
2701               HOST_WIDE_INT m = tree_to_uhwi (lowi);
2702               rtx reg = gen_raw_REG (word_mode, 10000);
2703               bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
2704               cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg,
2705                                                       GEN_INT (-m)), speed_p);
2706               rtx r = immed_wide_int_const (mask, word_mode);
2707               cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
2708                                          word_mode, speed_p);
2709               r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
2710               cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
2711                                          word_mode, speed_p);
2712               if (cost_diff > 0)
2713                 {
2714                   mask = wi::lshift (mask, m);
2715                   lowi = build_zero_cst (TREE_TYPE (lowi));
2716                 }
2717             }
2718
2719           tree tem = build_range_check (loc, optype, unshare_expr (exp),
2720                                         false, lowi, high);
2721           if (tem == NULL_TREE || is_gimple_val (tem))
2722             continue;
2723           tree etype = unsigned_type_for (TREE_TYPE (exp));
2724           exp = fold_build2_loc (loc, MINUS_EXPR, etype,
2725                                  fold_convert_loc (loc, etype, exp),
2726                                  fold_convert_loc (loc, etype, lowi));
2727           exp = fold_convert_loc (loc, integer_type_node, exp);
2728           tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
2729           exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
2730                                  build_int_cst (word_type, 1), exp);
2731           exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
2732                                  wide_int_to_tree (word_type, mask));
2733           exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
2734                                  build_zero_cst (word_type));
2735           if (is_gimple_val (exp))
2736             continue;
2737
2738           /* The shift might have undefined behavior if TEM is true,
2739              but reassociate_bb isn't prepared to have basic blocks
2740              split when it is running.  So, temporarily emit a code
2741              with BIT_IOR_EXPR instead of &&, and fix it up in
2742              branch_fixup.  */
2743           gimple_seq seq;
2744           tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
2745           gcc_assert (TREE_CODE (tem) == SSA_NAME);
2746           gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
2747           gimple_seq seq2;
2748           exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
2749           gimple_seq_add_seq_without_update (&seq, seq2);
2750           gcc_assert (TREE_CODE (exp) == SSA_NAME);
2751           gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
2752           gimple *g = gimple_build_assign (make_ssa_name (optype),
2753                                            BIT_IOR_EXPR, tem, exp);
2754           gimple_set_location (g, loc);
2755           gimple_seq_add_stmt_without_update (&seq, g);
2756           exp = gimple_assign_lhs (g);
2757           tree val = build_zero_cst (optype);
2758           if (update_range_test (&ranges[i], NULL, candidates.address (),
2759                                  candidates.length (), opcode, ops, exp,
2760                                  seq, false, val, val, strict_overflow_p))
2761             {
2762               any_changes = true;
2763               reassoc_branch_fixups.safe_push (tem);
2764             }
2765           else
2766             gimple_seq_discard (seq);
2767         }
2768     }
2769   return any_changes;
2770 }
2771
2772 /* Optimize range tests, similarly how fold_range_test optimizes
2773    it on trees.  The tree code for the binary
2774    operation between all the operands is OPCODE.
2775    If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2776    maybe_optimize_range_tests for inter-bb range optimization.
2777    In that case if oe->op is NULL, oe->id is bb->index whose
2778    GIMPLE_COND is && or ||ed into the test, and oe->rank says
2779    the actual opcode.  */
2780
2781 static bool
2782 optimize_range_tests (enum tree_code opcode,
2783                       vec<operand_entry *> *ops)
2784 {
2785   unsigned int length = ops->length (), i, j, first;
2786   operand_entry *oe;
2787   struct range_entry *ranges;
2788   bool any_changes = false;
2789
2790   if (length == 1)
2791     return false;
2792
2793   ranges = XNEWVEC (struct range_entry, length);
2794   for (i = 0; i < length; i++)
2795     {
2796       oe = (*ops)[i];
2797       ranges[i].idx = i;
2798       init_range_entry (ranges + i, oe->op,
2799                         oe->op
2800                         ? NULL
2801                         : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
2802       /* For | invert it now, we will invert it again before emitting
2803          the optimized expression.  */
2804       if (opcode == BIT_IOR_EXPR
2805           || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2806         ranges[i].in_p = !ranges[i].in_p;
2807     }
2808
2809   qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2810   for (i = 0; i < length; i++)
2811     if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2812       break;
2813
2814   /* Try to merge ranges.  */
2815   for (first = i; i < length; i++)
2816     {
2817       tree low = ranges[i].low;
2818       tree high = ranges[i].high;
2819       int in_p = ranges[i].in_p;
2820       bool strict_overflow_p = ranges[i].strict_overflow_p;
2821       int update_fail_count = 0;
2822
2823       for (j = i + 1; j < length; j++)
2824         {
2825           if (ranges[i].exp != ranges[j].exp)
2826             break;
2827           if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2828                              ranges[j].in_p, ranges[j].low, ranges[j].high))
2829             break;
2830           strict_overflow_p |= ranges[j].strict_overflow_p;
2831         }
2832
2833       if (j == i + 1)
2834         continue;
2835
2836       if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
2837                              opcode, ops, ranges[i].exp, NULL, in_p,
2838                              low, high, strict_overflow_p))
2839         {
2840           i = j - 1;
2841           any_changes = true;
2842         }
2843       /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2844          while update_range_test would fail.  */
2845       else if (update_fail_count == 64)
2846         i = j - 1;
2847       else
2848         ++update_fail_count;
2849     }
2850
2851   any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2852                                          ops, ranges);
2853
2854   if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2855     any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2856                                            ops, ranges);
2857   if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
2858     any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
2859                                                      ops, ranges);
2860
2861   if (any_changes && opcode != ERROR_MARK)
2862     {
2863       j = 0;
2864       FOR_EACH_VEC_ELT (*ops, i, oe)
2865         {
2866           if (oe->op == error_mark_node)
2867             continue;
2868           else if (i != j)
2869             (*ops)[j] = oe;
2870           j++;
2871         }
2872       ops->truncate (j);
2873     }
2874
2875   XDELETEVEC (ranges);
2876   return any_changes;
2877 }
2878
2879 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
2880    the operands of the VEC_COND_EXPR.  Returns ERROR_MARK on failure,
2881    otherwise the comparison code.  */
2882
2883 static tree_code
2884 ovce_extract_ops (tree var, gassign **rets, bool *reti)
2885 {
2886   if (TREE_CODE (var) != SSA_NAME)
2887     return ERROR_MARK;
2888
2889   gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
2890   if (stmt == NULL)
2891     return ERROR_MARK;
2892
2893   /* ??? If we start creating more COND_EXPR, we could perform
2894      this same optimization with them.  For now, simplify.  */
2895   if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
2896     return ERROR_MARK;
2897
2898   tree cond = gimple_assign_rhs1 (stmt);
2899   tree_code cmp = TREE_CODE (cond);
2900   if (TREE_CODE_CLASS (cmp) != tcc_comparison)
2901     return ERROR_MARK;
2902
2903   /* ??? For now, allow only canonical true and false result vectors.
2904      We could expand this to other constants should the need arise,
2905      but at the moment we don't create them.  */
2906   tree t = gimple_assign_rhs2 (stmt);
2907   tree f = gimple_assign_rhs3 (stmt);
2908   bool inv;
2909   if (integer_all_onesp (t))
2910     inv = false;
2911   else if (integer_all_onesp (f))
2912     {
2913       cmp = invert_tree_comparison (cmp, false);
2914       inv = true;
2915     }
2916   else
2917     return ERROR_MARK;
2918   if (!integer_zerop (f))
2919     return ERROR_MARK;
2920
2921   /* Success!  */
2922   if (rets)
2923     *rets = stmt;
2924   if (reti)
2925     *reti = inv;
2926   return cmp;
2927 }
2928
2929 /* Optimize the condition of VEC_COND_EXPRs which have been combined
2930    with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR).  */
2931
2932 static bool
2933 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
2934 {
2935   unsigned int length = ops->length (), i, j;
2936   bool any_changes = false;
2937
2938   if (length == 1)
2939     return false;
2940
2941   for (i = 0; i < length; ++i)
2942     {
2943       tree elt0 = (*ops)[i]->op;
2944
2945       gassign *stmt0;
2946       bool invert;
2947       tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert);
2948       if (cmp0 == ERROR_MARK)
2949         continue;
2950
2951       for (j = i + 1; j < length; ++j)
2952         {
2953           tree &elt1 = (*ops)[j]->op;
2954
2955           gassign *stmt1;
2956           tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL);
2957           if (cmp1 == ERROR_MARK)
2958             continue;
2959
2960           tree cond0 = gimple_assign_rhs1 (stmt0);
2961           tree x0 = TREE_OPERAND (cond0, 0);
2962           tree y0 = TREE_OPERAND (cond0, 1);
2963
2964           tree cond1 = gimple_assign_rhs1 (stmt1);
2965           tree x1 = TREE_OPERAND (cond1, 0);
2966           tree y1 = TREE_OPERAND (cond1, 1);
2967
2968           tree comb;
2969           if (opcode == BIT_AND_EXPR)
2970             comb = maybe_fold_and_comparisons (cmp0, x0, y0, cmp1, x1, y1);
2971           else if (opcode == BIT_IOR_EXPR)
2972             comb = maybe_fold_or_comparisons (cmp0, x0, y0, cmp1, x1, y1);
2973           else
2974             gcc_unreachable ();
2975           if (comb == NULL)
2976             continue;
2977
2978           /* Success! */
2979           if (dump_file && (dump_flags & TDF_DETAILS))
2980             {
2981               fprintf (dump_file, "Transforming ");
2982               print_generic_expr (dump_file, cond0, 0);
2983               fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
2984               print_generic_expr (dump_file, cond1, 0);
2985               fprintf (dump_file, " into ");
2986               print_generic_expr (dump_file, comb, 0);
2987               fputc ('\n', dump_file);
2988             }
2989
2990           gimple_assign_set_rhs1 (stmt0, comb);
2991           if (invert)
2992             std::swap (*gimple_assign_rhs2_ptr (stmt0),
2993                        *gimple_assign_rhs3_ptr (stmt0));
2994           update_stmt (stmt0);
2995
2996           elt1 = error_mark_node;
2997           any_changes = true;
2998         }
2999     }
3000
3001   if (any_changes)
3002     {
3003       operand_entry *oe;
3004       j = 0;
3005       FOR_EACH_VEC_ELT (*ops, i, oe)
3006         {
3007           if (oe->op == error_mark_node)
3008             continue;
3009           else if (i != j)
3010             (*ops)[j] = oe;
3011           j++;
3012         }
3013       ops->truncate (j);
3014     }
3015
3016   return any_changes;
3017 }
3018
3019 /* Return true if STMT is a cast like:
3020    <bb N>:
3021    ...
3022    _123 = (int) _234;
3023
3024    <bb M>:
3025    # _345 = PHI <_123(N), 1(...), 1(...)>
3026    where _234 has bool type, _123 has single use and
3027    bb N has a single successor M.  This is commonly used in
3028    the last block of a range test.  */
3029
3030 static bool
3031 final_range_test_p (gimple *stmt)
3032 {
3033   basic_block bb, rhs_bb;
3034   edge e;
3035   tree lhs, rhs;
3036   use_operand_p use_p;
3037   gimple *use_stmt;
3038
3039   if (!gimple_assign_cast_p (stmt))
3040     return false;
3041   bb = gimple_bb (stmt);
3042   if (!single_succ_p (bb))
3043     return false;
3044   e = single_succ_edge (bb);
3045   if (e->flags & EDGE_COMPLEX)
3046     return false;
3047
3048   lhs = gimple_assign_lhs (stmt);
3049   rhs = gimple_assign_rhs1 (stmt);
3050   if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3051       || TREE_CODE (rhs) != SSA_NAME
3052       || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
3053     return false;
3054
3055   /* Test whether lhs is consumed only by a PHI in the only successor bb.  */
3056   if (!single_imm_use (lhs, &use_p, &use_stmt))
3057     return false;
3058
3059   if (gimple_code (use_stmt) != GIMPLE_PHI
3060       || gimple_bb (use_stmt) != e->dest)
3061     return false;
3062
3063   /* And that the rhs is defined in the same loop.  */
3064   rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
3065   if (rhs_bb == NULL
3066       || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
3067     return false;
3068
3069   return true;
3070 }
3071
3072 /* Return true if BB is suitable basic block for inter-bb range test
3073    optimization.  If BACKWARD is true, BB should be the only predecessor
3074    of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
3075    or compared with to find a common basic block to which all conditions
3076    branch to if true resp. false.  If BACKWARD is false, TEST_BB should
3077    be the only predecessor of BB.  */
3078
3079 static bool
3080 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
3081                   bool backward)
3082 {
3083   edge_iterator ei, ei2;
3084   edge e, e2;
3085   gimple *stmt;
3086   gphi_iterator gsi;
3087   bool other_edge_seen = false;
3088   bool is_cond;
3089
3090   if (test_bb == bb)
3091     return false;
3092   /* Check last stmt first.  */
3093   stmt = last_stmt (bb);
3094   if (stmt == NULL
3095       || (gimple_code (stmt) != GIMPLE_COND
3096           && (backward || !final_range_test_p (stmt)))
3097       || gimple_visited_p (stmt)
3098       || stmt_could_throw_p (stmt)
3099       || *other_bb == bb)
3100     return false;
3101   is_cond = gimple_code (stmt) == GIMPLE_COND;
3102   if (is_cond)
3103     {
3104       /* If last stmt is GIMPLE_COND, verify that one of the succ edges
3105          goes to the next bb (if BACKWARD, it is TEST_BB), and the other
3106          to *OTHER_BB (if not set yet, try to find it out).  */
3107       if (EDGE_COUNT (bb->succs) != 2)
3108         return false;
3109       FOR_EACH_EDGE (e, ei, bb->succs)
3110         {
3111           if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
3112             return false;
3113           if (e->dest == test_bb)
3114             {
3115               if (backward)
3116                 continue;
3117               else
3118                 return false;
3119             }
3120           if (e->dest == bb)
3121             return false;
3122           if (*other_bb == NULL)
3123             {
3124               FOR_EACH_EDGE (e2, ei2, test_bb->succs)
3125                 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
3126                   return false;
3127                 else if (e->dest == e2->dest)
3128                   *other_bb = e->dest;
3129               if (*other_bb == NULL)
3130                 return false;
3131             }
3132           if (e->dest == *other_bb)
3133             other_edge_seen = true;
3134           else if (backward)
3135             return false;
3136         }
3137       if (*other_bb == NULL || !other_edge_seen)
3138         return false;
3139     }
3140   else if (single_succ (bb) != *other_bb)
3141     return false;
3142
3143   /* Now check all PHIs of *OTHER_BB.  */
3144   e = find_edge (bb, *other_bb);
3145   e2 = find_edge (test_bb, *other_bb);
3146   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
3147     {
3148       gphi *phi = gsi.phi ();
3149       /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
3150          corresponding to BB and TEST_BB predecessor must be the same.  */
3151       if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
3152                             gimple_phi_arg_def (phi, e2->dest_idx), 0))
3153         {
3154           /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
3155              one of the PHIs should have the lhs of the last stmt in
3156              that block as PHI arg and that PHI should have 0 or 1
3157              corresponding to it in all other range test basic blocks
3158              considered.  */
3159           if (!is_cond)
3160             {
3161               if (gimple_phi_arg_def (phi, e->dest_idx)
3162                   == gimple_assign_lhs (stmt)
3163                   && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
3164                       || integer_onep (gimple_phi_arg_def (phi,
3165                                                            e2->dest_idx))))
3166                 continue;
3167             }
3168           else
3169             {
3170               gimple *test_last = last_stmt (test_bb);
3171               if (gimple_code (test_last) != GIMPLE_COND
3172                   && gimple_phi_arg_def (phi, e2->dest_idx)
3173                      == gimple_assign_lhs (test_last)
3174                   && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
3175                       || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
3176                 continue;
3177             }
3178
3179           return false;
3180         }
3181     }
3182   return true;
3183 }
3184
3185 /* Return true if BB doesn't have side-effects that would disallow
3186    range test optimization, all SSA_NAMEs set in the bb are consumed
3187    in the bb and there are no PHIs.  */
3188
3189 static bool
3190 no_side_effect_bb (basic_block bb)
3191 {
3192   gimple_stmt_iterator gsi;
3193   gimple *last;
3194
3195   if (!gimple_seq_empty_p (phi_nodes (bb)))
3196     return false;
3197   last = last_stmt (bb);
3198   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3199     {
3200       gimple *stmt = gsi_stmt (gsi);
3201       tree lhs;
3202       imm_use_iterator imm_iter;
3203       use_operand_p use_p;
3204
3205       if (is_gimple_debug (stmt))
3206         continue;
3207       if (gimple_has_side_effects (stmt))
3208         return false;
3209       if (stmt == last)
3210         return true;
3211       if (!is_gimple_assign (stmt))
3212         return false;
3213       lhs = gimple_assign_lhs (stmt);
3214       if (TREE_CODE (lhs) != SSA_NAME)
3215         return false;
3216       if (gimple_assign_rhs_could_trap_p (stmt))
3217         return false;
3218       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
3219         {
3220           gimple *use_stmt = USE_STMT (use_p);
3221           if (is_gimple_debug (use_stmt))
3222             continue;
3223           if (gimple_bb (use_stmt) != bb)
3224             return false;
3225         }
3226     }
3227   return false;
3228 }
3229
3230 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
3231    return true and fill in *OPS recursively.  */
3232
3233 static bool
3234 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
3235          struct loop *loop)
3236 {
3237   gimple *stmt = SSA_NAME_DEF_STMT (var);
3238   tree rhs[2];
3239   int i;
3240
3241   if (!is_reassociable_op (stmt, code, loop))
3242     return false;
3243
3244   rhs[0] = gimple_assign_rhs1 (stmt);
3245   rhs[1] = gimple_assign_rhs2 (stmt);
3246   gimple_set_visited (stmt, true);
3247   for (i = 0; i < 2; i++)
3248     if (TREE_CODE (rhs[i]) == SSA_NAME
3249         && !get_ops (rhs[i], code, ops, loop)
3250         && has_single_use (rhs[i]))
3251       {
3252         operand_entry *oe = operand_entry_pool.allocate ();
3253
3254         oe->op = rhs[i];
3255         oe->rank = code;
3256         oe->id = 0;
3257         oe->count = 1;
3258         oe->stmt_to_insert = NULL;
3259         ops->safe_push (oe);
3260       }
3261   return true;
3262 }
3263
3264 /* Find the ops that were added by get_ops starting from VAR, see if
3265    they were changed during update_range_test and if yes, create new
3266    stmts.  */
3267
3268 static tree
3269 update_ops (tree var, enum tree_code code, vec<operand_entry *> ops,
3270             unsigned int *pidx, struct loop *loop)
3271 {
3272   gimple *stmt = SSA_NAME_DEF_STMT (var);
3273   tree rhs[4];
3274   int i;
3275
3276   if (!is_reassociable_op (stmt, code, loop))
3277     return NULL;
3278
3279   rhs[0] = gimple_assign_rhs1 (stmt);
3280   rhs[1] = gimple_assign_rhs2 (stmt);
3281   rhs[2] = rhs[0];
3282   rhs[3] = rhs[1];
3283   for (i = 0; i < 2; i++)
3284     if (TREE_CODE (rhs[i]) == SSA_NAME)
3285       {
3286         rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
3287         if (rhs[2 + i] == NULL_TREE)
3288           {
3289             if (has_single_use (rhs[i]))
3290               rhs[2 + i] = ops[(*pidx)++]->op;
3291             else
3292               rhs[2 + i] = rhs[i];
3293           }
3294       }
3295   if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
3296       && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
3297     {
3298       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3299       var = make_ssa_name (TREE_TYPE (var));
3300       gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
3301                                         rhs[2], rhs[3]);
3302       gimple_set_uid (g, gimple_uid (stmt));
3303       gimple_set_visited (g, true);
3304       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3305     }
3306   return var;
3307 }
3308
3309 /* Structure to track the initial value passed to get_ops and
3310    the range in the ops vector for each basic block.  */
3311
3312 struct inter_bb_range_test_entry
3313 {
3314   tree op;
3315   unsigned int first_idx, last_idx;
3316 };
3317
3318 /* Inter-bb range test optimization.
3319
3320    Returns TRUE if a gimple conditional is optimized to a true/false,
3321    otherwise return FALSE.
3322
3323    This indicates to the caller that it should run a CFG cleanup pass
3324    once reassociation is completed.  */
3325
3326 static bool
3327 maybe_optimize_range_tests (gimple *stmt)
3328 {
3329   basic_block first_bb = gimple_bb (stmt);
3330   basic_block last_bb = first_bb;
3331   basic_block other_bb = NULL;
3332   basic_block bb;
3333   edge_iterator ei;
3334   edge e;
3335   auto_vec<operand_entry *> ops;
3336   auto_vec<inter_bb_range_test_entry> bbinfo;
3337   bool any_changes = false;
3338   bool cfg_cleanup_needed = false;
3339
3340   /* Consider only basic blocks that end with GIMPLE_COND or
3341      a cast statement satisfying final_range_test_p.  All
3342      but the last bb in the first_bb .. last_bb range
3343      should end with GIMPLE_COND.  */
3344   if (gimple_code (stmt) == GIMPLE_COND)
3345     {
3346       if (EDGE_COUNT (first_bb->succs) != 2)
3347         return cfg_cleanup_needed;
3348     }
3349   else if (final_range_test_p (stmt))
3350     other_bb = single_succ (first_bb);
3351   else
3352     return cfg_cleanup_needed;
3353
3354   if (stmt_could_throw_p (stmt))
3355     return cfg_cleanup_needed;
3356
3357   /* As relative ordering of post-dominator sons isn't fixed,
3358      maybe_optimize_range_tests can be called first on any
3359      bb in the range we want to optimize.  So, start searching
3360      backwards, if first_bb can be set to a predecessor.  */
3361   while (single_pred_p (first_bb))
3362     {
3363       basic_block pred_bb = single_pred (first_bb);
3364       if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
3365         break;
3366       if (!no_side_effect_bb (first_bb))
3367         break;
3368       first_bb = pred_bb;
3369     }
3370   /* If first_bb is last_bb, other_bb hasn't been computed yet.
3371      Before starting forward search in last_bb successors, find
3372      out the other_bb.  */
3373   if (first_bb == last_bb)
3374     {
3375       other_bb = NULL;
3376       /* As non-GIMPLE_COND last stmt always terminates the range,
3377          if forward search didn't discover anything, just give up.  */
3378       if (gimple_code (stmt) != GIMPLE_COND)
3379         return cfg_cleanup_needed;
3380       /* Look at both successors.  Either it ends with a GIMPLE_COND
3381          and satisfies suitable_cond_bb, or ends with a cast and
3382          other_bb is that cast's successor.  */
3383       FOR_EACH_EDGE (e, ei, first_bb->succs)
3384         if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
3385             || e->dest == first_bb)
3386           return cfg_cleanup_needed;
3387         else if (single_pred_p (e->dest))
3388           {
3389             stmt = last_stmt (e->dest);
3390             if (stmt
3391                 && gimple_code (stmt) == GIMPLE_COND
3392                 && EDGE_COUNT (e->dest->succs) == 2)
3393               {
3394                 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
3395                   break;
3396                 else
3397                   other_bb = NULL;
3398               }
3399             else if (stmt
3400                      && final_range_test_p (stmt)
3401                      && find_edge (first_bb, single_succ (e->dest)))
3402               {
3403                 other_bb = single_succ (e->dest);
3404                 if (other_bb == first_bb)
3405                   other_bb = NULL;
3406               }
3407           }
3408       if (other_bb == NULL)
3409         return cfg_cleanup_needed;
3410     }
3411   /* Now do the forward search, moving last_bb to successor bbs
3412      that aren't other_bb.  */
3413   while (EDGE_COUNT (last_bb->succs) == 2)
3414     {
3415       FOR_EACH_EDGE (e, ei, last_bb->succs)
3416         if (e->dest != other_bb)
3417           break;
3418       if (e == NULL)
3419         break;
3420       if (!single_pred_p (e->dest))
3421         break;
3422       if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
3423         break;
3424       if (!no_side_effect_bb (e->dest))
3425         break;
3426       last_bb = e->dest;
3427     }
3428   if (first_bb == last_bb)
3429     return cfg_cleanup_needed;
3430   /* Here basic blocks first_bb through last_bb's predecessor
3431      end with GIMPLE_COND, all of them have one of the edges to
3432      other_bb and another to another block in the range,
3433      all blocks except first_bb don't have side-effects and
3434      last_bb ends with either GIMPLE_COND, or cast satisfying
3435      final_range_test_p.  */
3436   for (bb = last_bb; ; bb = single_pred (bb))
3437     {
3438       enum tree_code code;
3439       tree lhs, rhs;
3440       inter_bb_range_test_entry bb_ent;
3441
3442       bb_ent.op = NULL_TREE;
3443       bb_ent.first_idx = ops.length ();
3444       bb_ent.last_idx = bb_ent.first_idx;
3445       e = find_edge (bb, other_bb);
3446       stmt = last_stmt (bb);
3447       gimple_set_visited (stmt, true);
3448       if (gimple_code (stmt) != GIMPLE_COND)
3449         {
3450           use_operand_p use_p;
3451           gimple *phi;
3452           edge e2;
3453           unsigned int d;
3454
3455           lhs = gimple_assign_lhs (stmt);
3456           rhs = gimple_assign_rhs1 (stmt);
3457           gcc_assert (bb == last_bb);
3458
3459           /* stmt is
3460              _123 = (int) _234;
3461
3462              followed by:
3463              <bb M>:
3464              # _345 = PHI <_123(N), 1(...), 1(...)>
3465
3466              or 0 instead of 1.  If it is 0, the _234
3467              range test is anded together with all the
3468              other range tests, if it is 1, it is ored with
3469              them.  */
3470           single_imm_use (lhs, &use_p, &phi);
3471           gcc_assert (gimple_code (phi) == GIMPLE_PHI);
3472           e2 = find_edge (first_bb, other_bb);
3473           d = e2->dest_idx;
3474           gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
3475           if (integer_zerop (gimple_phi_arg_def (phi, d)))
3476             code = BIT_AND_EXPR;
3477           else
3478             {
3479               gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
3480               code = BIT_IOR_EXPR;
3481             }
3482
3483           /* If _234 SSA_NAME_DEF_STMT is
3484              _234 = _567 | _789;
3485              (or &, corresponding to 1/0 in the phi arguments,
3486              push into ops the individual range test arguments
3487              of the bitwise or resp. and, recursively.  */
3488           if (!get_ops (rhs, code, &ops,
3489                         loop_containing_stmt (stmt))
3490               && has_single_use (rhs))
3491             {
3492               /* Otherwise, push the _234 range test itself.  */
3493               operand_entry *oe = operand_entry_pool.allocate ();
3494
3495               oe->op = rhs;
3496               oe->rank = code;
3497               oe->id = 0;
3498               oe->count = 1;
3499               oe->stmt_to_insert = NULL;
3500               ops.safe_push (oe);
3501               bb_ent.last_idx++;
3502             }
3503           else
3504             bb_ent.last_idx = ops.length ();
3505           bb_ent.op = rhs;
3506           bbinfo.safe_push (bb_ent);
3507           continue;
3508         }
3509       /* Otherwise stmt is GIMPLE_COND.  */
3510       code = gimple_cond_code (stmt);
3511       lhs = gimple_cond_lhs (stmt);
3512       rhs = gimple_cond_rhs (stmt);
3513       if (TREE_CODE (lhs) == SSA_NAME
3514           && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3515           && ((code != EQ_EXPR && code != NE_EXPR)
3516               || rhs != boolean_false_node
3517                  /* Either push into ops the individual bitwise
3518                     or resp. and operands, depending on which
3519                     edge is other_bb.  */
3520               || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
3521                                  ^ (code == EQ_EXPR))
3522                                 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
3523                            loop_containing_stmt (stmt))))
3524         {
3525           /* Or push the GIMPLE_COND stmt itself.  */
3526           operand_entry *oe = operand_entry_pool.allocate ();
3527
3528           oe->op = NULL;
3529           oe->rank = (e->flags & EDGE_TRUE_VALUE)
3530                      ? BIT_IOR_EXPR : BIT_AND_EXPR;
3531           /* oe->op = NULL signs that there is no SSA_NAME
3532              for the range test, and oe->id instead is the
3533              basic block number, at which's end the GIMPLE_COND
3534              is.  */
3535           oe->id = bb->index;
3536           oe->count = 1;
3537           oe->stmt_to_insert = NULL;
3538           ops.safe_push (oe);
3539           bb_ent.op = NULL;
3540           bb_ent.last_idx++;
3541         }
3542       else if (ops.length () > bb_ent.first_idx)
3543         {
3544           bb_ent.op = lhs;
3545           bb_ent.last_idx = ops.length ();
3546         }
3547       bbinfo.safe_push (bb_ent);
3548       if (bb == first_bb)
3549         break;
3550     }
3551   if (ops.length () > 1)
3552     any_changes = optimize_range_tests (ERROR_MARK, &ops);
3553   if (any_changes)
3554     {
3555       unsigned int idx, max_idx = 0;
3556       /* update_ops relies on has_single_use predicates returning the
3557          same values as it did during get_ops earlier.  Additionally it
3558          never removes statements, only adds new ones and it should walk
3559          from the single imm use and check the predicate already before
3560          making those changes.
3561          On the other side, the handling of GIMPLE_COND directly can turn
3562          previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
3563          it needs to be done in a separate loop afterwards.  */
3564       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3565         {
3566           if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3567               && bbinfo[idx].op != NULL_TREE)
3568             {
3569               tree new_op;
3570
3571               max_idx = idx;
3572               stmt = last_stmt (bb);
3573               new_op = update_ops (bbinfo[idx].op,
3574                                    (enum tree_code)
3575                                    ops[bbinfo[idx].first_idx]->rank,
3576                                    ops, &bbinfo[idx].first_idx,
3577                                    loop_containing_stmt (stmt));
3578               if (new_op == NULL_TREE)
3579                 {
3580                   gcc_assert (bb == last_bb);
3581                   new_op = ops[bbinfo[idx].first_idx++]->op;
3582                 }
3583               if (bbinfo[idx].op != new_op)
3584                 {
3585                   imm_use_iterator iter;
3586                   use_operand_p use_p;
3587                   gimple *use_stmt, *cast_stmt = NULL;
3588
3589                   FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
3590                     if (is_gimple_debug (use_stmt))
3591                       continue;
3592                     else if (gimple_code (use_stmt) == GIMPLE_COND
3593                              || gimple_code (use_stmt) == GIMPLE_PHI)
3594                       FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3595                         SET_USE (use_p, new_op);
3596                     else if (gimple_assign_cast_p (use_stmt))
3597                       cast_stmt = use_stmt;
3598                     else
3599                       gcc_unreachable ();
3600                   if (cast_stmt)
3601                     {
3602                       gcc_assert (bb == last_bb);
3603                       tree lhs = gimple_assign_lhs (cast_stmt);
3604                       tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
3605                       enum tree_code rhs_code
3606                         = gimple_assign_rhs_code (cast_stmt);
3607                       gassign *g;
3608                       if (is_gimple_min_invariant (new_op))
3609                         {
3610                           new_op = fold_convert (TREE_TYPE (lhs), new_op);
3611                           g = gimple_build_assign (new_lhs, new_op);
3612                         }
3613                       else
3614                         g = gimple_build_assign (new_lhs, rhs_code, new_op);
3615                       gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt);
3616                       gimple_set_uid (g, gimple_uid (cast_stmt));
3617                       gimple_set_visited (g, true);
3618                       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3619                       FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
3620                         if (is_gimple_debug (use_stmt))
3621                           continue;
3622                         else if (gimple_code (use_stmt) == GIMPLE_COND
3623                                  || gimple_code (use_stmt) == GIMPLE_PHI)
3624                           FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3625                             SET_USE (use_p, new_lhs);
3626                         else
3627                           gcc_unreachable ();
3628                     }
3629                 }
3630             }
3631           if (bb == first_bb)
3632             break;
3633         }
3634       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3635         {
3636           if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3637               && bbinfo[idx].op == NULL_TREE
3638               && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
3639             {
3640               gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
3641
3642               if (idx > max_idx)
3643                 max_idx = idx;
3644
3645               /* If we collapse the conditional to a true/false
3646                  condition, then bubble that knowledge up to our caller.  */
3647               if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
3648                 {
3649                   gimple_cond_make_false (cond_stmt);
3650                   cfg_cleanup_needed = true;
3651                 }
3652               else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
3653                 {
3654                   gimple_cond_make_true (cond_stmt);
3655                   cfg_cleanup_needed = true;
3656                 }
3657               else
3658                 {
3659                   gimple_cond_set_code (cond_stmt, NE_EXPR);
3660                   gimple_cond_set_lhs (cond_stmt,
3661                                        ops[bbinfo[idx].first_idx]->op);
3662                   gimple_cond_set_rhs (cond_stmt, boolean_false_node);
3663                 }
3664               update_stmt (cond_stmt);
3665             }
3666           if (bb == first_bb)
3667             break;
3668         }
3669
3670       /* The above changes could result in basic blocks after the first
3671          modified one, up to and including last_bb, to be executed even if
3672          they would not be in the original program.  If the value ranges of
3673          assignment lhs' in those bbs were dependent on the conditions
3674          guarding those basic blocks which now can change, the VRs might
3675          be incorrect.  As no_side_effect_bb should ensure those SSA_NAMEs
3676          are only used within the same bb, it should be not a big deal if
3677          we just reset all the VRs in those bbs.  See PR68671.  */
3678       for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
3679         reset_flow_sensitive_info_in_bb (bb);
3680     }
3681   return cfg_cleanup_needed;
3682 }
3683
3684 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3685    of STMT in it's operands.  This is also known as a "destructive
3686    update" operation.  */
3687
3688 static bool
3689 is_phi_for_stmt (gimple *stmt, tree operand)
3690 {
3691   gimple *def_stmt;
3692   gphi *def_phi;
3693   tree lhs;
3694   use_operand_p arg_p;
3695   ssa_op_iter i;
3696
3697   if (TREE_CODE (operand) != SSA_NAME)
3698     return false;
3699
3700   lhs = gimple_assign_lhs (stmt);
3701
3702   def_stmt = SSA_NAME_DEF_STMT (operand);
3703   def_phi = dyn_cast <gphi *> (def_stmt);
3704   if (!def_phi)
3705     return false;
3706
3707   FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
3708     if (lhs == USE_FROM_PTR (arg_p))
3709       return true;
3710   return false;
3711 }
3712
3713 /* Remove def stmt of VAR if VAR has zero uses and recurse
3714    on rhs1 operand if so.  */
3715
3716 static void
3717 remove_visited_stmt_chain (tree var)
3718 {
3719   gimple *stmt;
3720   gimple_stmt_iterator gsi;
3721
3722   while (1)
3723     {
3724       if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
3725         return;
3726       stmt = SSA_NAME_DEF_STMT (var);
3727       if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
3728         {
3729           var = gimple_assign_rhs1 (stmt);
3730           gsi = gsi_for_stmt (stmt);
3731           reassoc_remove_stmt (&gsi);
3732           release_defs (stmt);
3733         }
3734       else
3735         return;
3736     }
3737 }
3738
3739 /* This function checks three consequtive operands in
3740    passed operands vector OPS starting from OPINDEX and
3741    swaps two operands if it is profitable for binary operation
3742    consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3743
3744    We pair ops with the same rank if possible.
3745
3746    The alternative we try is to see if STMT is a destructive
3747    update style statement, which is like:
3748    b = phi (a, ...)
3749    a = c + b;
3750    In that case, we want to use the destructive update form to
3751    expose the possible vectorizer sum reduction opportunity.
3752    In that case, the third operand will be the phi node. This
3753    check is not performed if STMT is null.
3754
3755    We could, of course, try to be better as noted above, and do a
3756    lot of work to try to find these opportunities in >3 operand
3757    cases, but it is unlikely to be worth it.  */
3758
3759 static void
3760 swap_ops_for_binary_stmt (vec<operand_entry *> ops,
3761                           unsigned int opindex, gimple *stmt)
3762 {
3763   operand_entry *oe1, *oe2, *oe3;
3764
3765   oe1 = ops[opindex];
3766   oe2 = ops[opindex + 1];
3767   oe3 = ops[opindex + 2];
3768
3769   if ((oe1->rank == oe2->rank
3770        && oe2->rank != oe3->rank)
3771       || (stmt && is_phi_for_stmt (stmt, oe3->op)
3772           && !is_phi_for_stmt (stmt, oe1->op)
3773           && !is_phi_for_stmt (stmt, oe2->op)))
3774     std::swap (*oe1, *oe3);
3775   else if ((oe1->rank == oe3->rank
3776             && oe2->rank != oe3->rank)
3777            || (stmt && is_phi_for_stmt (stmt, oe2->op)
3778                && !is_phi_for_stmt (stmt, oe1->op)
3779                && !is_phi_for_stmt (stmt, oe3->op)))
3780     std::swap (*oe1, *oe2);
3781 }
3782
3783 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3784    two definitions, otherwise return STMT.  */
3785
3786 static inline gimple *
3787 find_insert_point (gimple *stmt, tree rhs1, tree rhs2)
3788 {
3789   if (TREE_CODE (rhs1) == SSA_NAME
3790       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
3791     stmt = SSA_NAME_DEF_STMT (rhs1);
3792   if (TREE_CODE (rhs2) == SSA_NAME
3793       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
3794     stmt = SSA_NAME_DEF_STMT (rhs2);
3795   return stmt;
3796 }
3797
3798 /* If the stmt that defines operand has to be inserted, insert it
3799    before the use.  */
3800 static void
3801 insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert)
3802 {
3803   gcc_assert (is_gimple_assign (stmt_to_insert));
3804   tree rhs1 = gimple_assign_rhs1 (stmt_to_insert);
3805   tree rhs2 = gimple_assign_rhs2 (stmt_to_insert);
3806   gimple *insert_point = find_insert_point (stmt, rhs1, rhs2);
3807   gimple_stmt_iterator gsi = gsi_for_stmt (insert_point);
3808   gimple_set_uid (stmt_to_insert, gimple_uid (insert_point));
3809
3810   /* If the insert point is not stmt, then insert_point would be
3811      the point where operand rhs1 or rhs2 is defined. In this case,
3812      stmt_to_insert has to be inserted afterwards. This would
3813      only happen when the stmt insertion point is flexible. */
3814   if (stmt == insert_point)
3815     gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT);
3816   else
3817     insert_stmt_after (stmt_to_insert, insert_point);
3818 }
3819
3820
3821 /* Recursively rewrite our linearized statements so that the operators
3822    match those in OPS[OPINDEX], putting the computation in rank
3823    order.  Return new lhs.  */
3824
3825 static tree
3826 rewrite_expr_tree (gimple *stmt, unsigned int opindex,
3827                    vec<operand_entry *> ops, bool changed)
3828 {
3829   tree rhs1 = gimple_assign_rhs1 (stmt);
3830   tree rhs2 = gimple_assign_rhs2 (stmt);
3831   tree lhs = gimple_assign_lhs (stmt);
3832   operand_entry *oe;
3833
3834   /* The final recursion case for this function is that you have
3835      exactly two operations left.
3836      If we had exactly one op in the entire list to start with, we
3837      would have never called this function, and the tail recursion
3838      rewrites them one at a time.  */
3839   if (opindex + 2 == ops.length ())
3840     {
3841       operand_entry *oe1, *oe2;
3842
3843       oe1 = ops[opindex];
3844       oe2 = ops[opindex + 1];
3845
3846       if (rhs1 != oe1->op || rhs2 != oe2->op)
3847         {
3848           gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3849           unsigned int uid = gimple_uid (stmt);
3850
3851           if (dump_file && (dump_flags & TDF_DETAILS))
3852             {
3853               fprintf (dump_file, "Transforming ");
3854               print_gimple_stmt (dump_file, stmt, 0, 0);
3855             }
3856
3857           /* If the stmt that defines operand has to be inserted, insert it
3858              before the use.  */
3859           if (oe1->stmt_to_insert)
3860             insert_stmt_before_use (stmt, oe1->stmt_to_insert);
3861           if (oe2->stmt_to_insert)
3862             insert_stmt_before_use (stmt, oe2->stmt_to_insert);
3863           /* Even when changed is false, reassociation could have e.g. removed
3864              some redundant operations, so unless we are just swapping the
3865              arguments or unless there is no change at all (then we just
3866              return lhs), force creation of a new SSA_NAME.  */
3867           if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
3868             {
3869               gimple *insert_point
3870                 = find_insert_point (stmt, oe1->op, oe2->op);
3871               lhs = make_ssa_name (TREE_TYPE (lhs));
3872               stmt
3873                 = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
3874                                        oe1->op, oe2->op);
3875               gimple_set_uid (stmt, uid);
3876               gimple_set_visited (stmt, true);
3877               if (insert_point == gsi_stmt (gsi))
3878                 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3879               else
3880                 insert_stmt_after (stmt, insert_point);
3881             }
3882           else
3883             {
3884               gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
3885                                    == stmt);
3886               gimple_assign_set_rhs1 (stmt, oe1->op);
3887               gimple_assign_set_rhs2 (stmt, oe2->op);
3888               update_stmt (stmt);
3889             }
3890
3891           if (rhs1 != oe1->op && rhs1 != oe2->op)
3892             remove_visited_stmt_chain (rhs1);
3893
3894           if (dump_file && (dump_flags & TDF_DETAILS))
3895             {
3896               fprintf (dump_file, " into ");
3897               print_gimple_stmt (dump_file, stmt, 0, 0);
3898             }
3899         }
3900       return lhs;
3901     }
3902
3903   /* If we hit here, we should have 3 or more ops left.  */
3904   gcc_assert (opindex + 2 < ops.length ());
3905
3906   /* Rewrite the next operator.  */
3907   oe = ops[opindex];
3908
3909   /* If the stmt that defines operand has to be inserted, insert it
3910      before the use.  */
3911   if (oe->stmt_to_insert)
3912     insert_stmt_before_use (stmt, oe->stmt_to_insert);
3913
3914   /* Recurse on the LHS of the binary operator, which is guaranteed to
3915      be the non-leaf side.  */
3916   tree new_rhs1
3917     = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
3918                          changed || oe->op != rhs2);
3919
3920   if (oe->op != rhs2 || new_rhs1 != rhs1)
3921     {
3922       if (dump_file && (dump_flags & TDF_DETAILS))
3923         {
3924           fprintf (dump_file, "Transforming ");
3925           print_gimple_stmt (dump_file, stmt, 0, 0);
3926         }
3927
3928       /* If changed is false, this is either opindex == 0
3929          or all outer rhs2's were equal to corresponding oe->op,
3930          and powi_result is NULL.
3931          That means lhs is equivalent before and after reassociation.
3932          Otherwise ensure the old lhs SSA_NAME is not reused and
3933          create a new stmt as well, so that any debug stmts will be
3934          properly adjusted.  */
3935       if (changed)
3936         {
3937           gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3938           unsigned int uid = gimple_uid (stmt);
3939           gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op);
3940
3941           lhs = make_ssa_name (TREE_TYPE (lhs));
3942           stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
3943                                       new_rhs1, oe->op);
3944           gimple_set_uid (stmt, uid);
3945           gimple_set_visited (stmt, true);
3946           if (insert_point == gsi_stmt (gsi))
3947             gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3948           else
3949             insert_stmt_after (stmt, insert_point);
3950         }
3951       else
3952         {
3953           gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
3954                                == stmt);
3955           gimple_assign_set_rhs1 (stmt, new_rhs1);
3956           gimple_assign_set_rhs2 (stmt, oe->op);
3957           update_stmt (stmt);
3958         }
3959
3960       if (dump_file && (dump_flags & TDF_DETAILS))
3961         {
3962           fprintf (dump_file, " into ");
3963           print_gimple_stmt (dump_file, stmt, 0, 0);
3964         }
3965     }
3966   return lhs;
3967 }
3968
3969 /* Find out how many cycles we need to compute statements chain.
3970    OPS_NUM holds number os statements in a chain.  CPU_WIDTH is a
3971    maximum number of independent statements we may execute per cycle.  */
3972
3973 static int
3974 get_required_cycles (int ops_num, int cpu_width)
3975 {
3976   int res;
3977   int elog;
3978   unsigned int rest;
3979
3980   /* While we have more than 2 * cpu_width operands
3981      we may reduce number of operands by cpu_width
3982      per cycle.  */
3983   res = ops_num / (2 * cpu_width);
3984
3985   /* Remained operands count may be reduced twice per cycle
3986      until we have only one operand.  */
3987   rest = (unsigned)(ops_num - res * cpu_width);
3988   elog = exact_log2 (rest);
3989   if (elog >= 0)
3990     res += elog;
3991   else
3992     res += floor_log2 (rest) + 1;
3993
3994   return res;
3995 }
3996
3997 /* Returns an optimal number of registers to use for computation of
3998    given statements.  */
3999
4000 static int
4001 get_reassociation_width (int ops_num, enum tree_code opc,
4002                          machine_mode mode)
4003 {
4004   int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
4005   int width;
4006   int width_min;
4007   int cycles_best;
4008
4009   if (param_width > 0)
4010     width = param_width;
4011   else
4012     width = targetm.sched.reassociation_width (opc, mode);
4013
4014   if (width == 1)
4015     return width;
4016
4017   /* Get the minimal time required for sequence computation.  */
4018   cycles_best = get_required_cycles (ops_num, width);
4019
4020   /* Check if we may use less width and still compute sequence for
4021      the same time.  It will allow us to reduce registers usage.
4022      get_required_cycles is monotonically increasing with lower width
4023      so we can perform a binary search for the minimal width that still
4024      results in the optimal cycle count.  */
4025   width_min = 1;
4026   while (width > width_min)
4027     {
4028       int width_mid = (width + width_min) / 2;
4029
4030       if (get_required_cycles (ops_num, width_mid) == cycles_best)
4031         width = width_mid;
4032       else if (width_min < width_mid)
4033         width_min = width_mid;
4034       else
4035         break;
4036     }
4037
4038   return width;
4039 }
4040
4041 /* Recursively rewrite our linearized statements so that the operators
4042    match those in OPS[OPINDEX], putting the computation in rank
4043    order and trying to allow operations to be executed in
4044    parallel.  */
4045
4046 static void
4047 rewrite_expr_tree_parallel (gassign *stmt, int width,
4048                             vec<operand_entry *> ops)
4049 {
4050   enum tree_code opcode = gimple_assign_rhs_code (stmt);
4051   int op_num = ops.length ();
4052   int stmt_num = op_num - 1;
4053   gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
4054   int op_index = op_num - 1;
4055   int stmt_index = 0;
4056   int ready_stmts_end = 0;
4057   int i = 0;
4058   gimple *stmt1 = NULL, *stmt2 = NULL;
4059   tree last_rhs1 = gimple_assign_rhs1 (stmt);
4060
4061   /* We start expression rewriting from the top statements.
4062      So, in this loop we create a full list of statements
4063      we will work with.  */
4064   stmts[stmt_num - 1] = stmt;
4065   for (i = stmt_num - 2; i >= 0; i--)
4066     stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
4067
4068   for (i = 0; i < stmt_num; i++)
4069     {
4070       tree op1, op2;
4071
4072       /* Determine whether we should use results of
4073          already handled statements or not.  */
4074       if (ready_stmts_end == 0
4075           && (i - stmt_index >= width || op_index < 1))
4076         ready_stmts_end = i;
4077
4078       /* Now we choose operands for the next statement.  Non zero
4079          value in ready_stmts_end means here that we should use
4080          the result of already generated statements as new operand.  */
4081       if (ready_stmts_end > 0)
4082         {
4083           op1 = gimple_assign_lhs (stmts[stmt_index++]);
4084           if (ready_stmts_end > stmt_index)
4085             op2 = gimple_assign_lhs (stmts[stmt_index++]);
4086           else if (op_index >= 0)
4087             {
4088               operand_entry *oe = ops[op_index--];
4089               stmt2 = oe->stmt_to_insert;
4090               op2 = oe->op;
4091             }
4092           else
4093             {
4094               gcc_assert (stmt_index < i);
4095               op2 = gimple_assign_lhs (stmts[stmt_index++]);
4096             }
4097
4098           if (stmt_index >= ready_stmts_end)
4099             ready_stmts_end = 0;
4100         }
4101       else
4102         {
4103           if (op_index > 1)
4104             swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
4105           operand_entry *oe2 = ops[op_index--];
4106           operand_entry *oe1 = ops[op_index--];
4107           op2 = oe2->op;
4108           stmt2 = oe2->stmt_to_insert;
4109           op1 = oe1->op;
4110           stmt1 = oe1->stmt_to_insert;
4111         }
4112
4113       /* If we emit the last statement then we should put
4114          operands into the last statement.  It will also
4115          break the loop.  */
4116       if (op_index < 0 && stmt_index == i)
4117         i = stmt_num - 1;
4118
4119       if (dump_file && (dump_flags & TDF_DETAILS))
4120         {
4121           fprintf (dump_file, "Transforming ");
4122           print_gimple_stmt (dump_file, stmts[i], 0, 0);
4123         }
4124
4125       /* If the stmt that defines operand has to be inserted, insert it
4126          before the use.  */
4127       if (stmt1)
4128         insert_stmt_before_use (stmts[i], stmt1);
4129       if (stmt2)
4130         insert_stmt_before_use (stmts[i], stmt2);
4131       stmt1 = stmt2 = NULL;
4132
4133       /* We keep original statement only for the last one.  All
4134          others are recreated.  */
4135       if (i == stmt_num - 1)
4136         {
4137           gimple_assign_set_rhs1 (stmts[i], op1);
4138           gimple_assign_set_rhs2 (stmts[i], op2);
4139           update_stmt (stmts[i]);
4140         }
4141       else
4142         {
4143           stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
4144         }
4145       if (dump_file && (dump_flags & TDF_DETAILS))
4146         {
4147           fprintf (dump_file, " into ");
4148           print_gimple_stmt (dump_file, stmts[i], 0, 0);
4149         }
4150     }
4151
4152   remove_visited_stmt_chain (last_rhs1);
4153 }
4154
4155 /* Transform STMT, which is really (A +B) + (C + D) into the left
4156    linear form, ((A+B)+C)+D.
4157    Recurse on D if necessary.  */
4158
4159 static void
4160 linearize_expr (gimple *stmt)
4161 {
4162   gimple_stmt_iterator gsi;
4163   gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
4164   gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
4165   gimple *oldbinrhs = binrhs;
4166   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4167   gimple *newbinrhs = NULL;
4168   struct loop *loop = loop_containing_stmt (stmt);
4169   tree lhs = gimple_assign_lhs (stmt);
4170
4171   gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
4172               && is_reassociable_op (binrhs, rhscode, loop));
4173
4174   gsi = gsi_for_stmt (stmt);
4175
4176   gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
4177   binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
4178                                 gimple_assign_rhs_code (binrhs),
4179                                 gimple_assign_lhs (binlhs),
4180                                 gimple_assign_rhs2 (binrhs));
4181   gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
4182   gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
4183   gimple_set_uid (binrhs, gimple_uid (stmt));
4184
4185   if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
4186     newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
4187
4188   if (dump_file && (dump_flags & TDF_DETAILS))
4189     {
4190       fprintf (dump_file, "Linearized: ");
4191       print_gimple_stmt (dump_file, stmt, 0, 0);
4192     }
4193
4194   reassociate_stats.linearized++;
4195   update_stmt (stmt);
4196
4197   gsi = gsi_for_stmt (oldbinrhs);
4198   reassoc_remove_stmt (&gsi);
4199   release_defs (oldbinrhs);
4200
4201   gimple_set_visited (stmt, true);
4202   gimple_set_visited (binlhs, true);
4203   gimple_set_visited (binrhs, true);
4204
4205   /* Tail recurse on the new rhs if it still needs reassociation.  */
4206   if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
4207     /* ??? This should probably be linearize_expr (newbinrhs) but I don't
4208            want to change the algorithm while converting to tuples.  */
4209     linearize_expr (stmt);
4210 }
4211
4212 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
4213    it.  Otherwise, return NULL.  */
4214
4215 static gimple *
4216 get_single_immediate_use (tree lhs)
4217 {
4218   use_operand_p immuse;
4219   gimple *immusestmt;
4220
4221   if (TREE_CODE (lhs) == SSA_NAME
4222       && single_imm_use (lhs, &immuse, &immusestmt)
4223       && is_gimple_assign (immusestmt))
4224     return immusestmt;
4225
4226   return NULL;
4227 }
4228
4229 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
4230    representing the negated value.  Insertions of any necessary
4231    instructions go before GSI.
4232    This function is recursive in that, if you hand it "a_5" as the
4233    value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
4234    transform b_3 + b_4 into a_5 = -b_3 + -b_4.  */
4235
4236 static tree
4237 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
4238 {
4239   gimple *negatedefstmt = NULL;
4240   tree resultofnegate;
4241   gimple_stmt_iterator gsi;
4242   unsigned int uid;
4243
4244   /* If we are trying to negate a name, defined by an add, negate the
4245      add operands instead.  */
4246   if (TREE_CODE (tonegate) == SSA_NAME)
4247     negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
4248   if (TREE_CODE (tonegate) == SSA_NAME
4249       && is_gimple_assign (negatedefstmt)
4250       && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
4251       && has_single_use (gimple_assign_lhs (negatedefstmt))
4252       && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
4253     {
4254       tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
4255       tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
4256       tree lhs = gimple_assign_lhs (negatedefstmt);
4257       gimple *g;
4258
4259       gsi = gsi_for_stmt (negatedefstmt);
4260       rhs1 = negate_value (rhs1, &gsi);
4261
4262       gsi = gsi_for_stmt (negatedefstmt);
4263       rhs2 = negate_value (rhs2, &gsi);
4264
4265       gsi = gsi_for_stmt (negatedefstmt);
4266       lhs = make_ssa_name (TREE_TYPE (lhs));
4267       gimple_set_visited (negatedefstmt, true);
4268       g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
4269       gimple_set_uid (g, gimple_uid (negatedefstmt));
4270       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4271       return lhs;
4272     }
4273
4274   tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
4275   resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
4276                                              NULL_TREE, true, GSI_SAME_STMT);
4277   gsi = *gsip;
4278   uid = gimple_uid (gsi_stmt (gsi));
4279   for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
4280     {
4281       gimple *stmt = gsi_stmt (gsi);
4282       if (gimple_uid (stmt) != 0)
4283         break;
4284       gimple_set_uid (stmt, uid);
4285     }
4286   return resultofnegate;
4287 }
4288
4289 /* Return true if we should break up the subtract in STMT into an add
4290    with negate.  This is true when we the subtract operands are really
4291    adds, or the subtract itself is used in an add expression.  In
4292    either case, breaking up the subtract into an add with negate
4293    exposes the adds to reassociation.  */
4294
4295 static bool
4296 should_break_up_subtract (gimple *stmt)
4297 {
4298   tree lhs = gimple_assign_lhs (stmt);
4299   tree binlhs = gimple_assign_rhs1 (stmt);
4300   tree binrhs = gimple_assign_rhs2 (stmt);
4301   gimple *immusestmt;
4302   struct loop *loop = loop_containing_stmt (stmt);
4303
4304   if (TREE_CODE (binlhs) == SSA_NAME
4305       && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
4306     return true;
4307
4308   if (TREE_CODE (binrhs) == SSA_NAME
4309       && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
4310     return true;
4311
4312   if (TREE_CODE (lhs) == SSA_NAME
4313       && (immusestmt = get_single_immediate_use (lhs))
4314       && is_gimple_assign (immusestmt)
4315       && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
4316           ||  gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
4317     return true;
4318   return false;
4319 }
4320
4321 /* Transform STMT from A - B into A + -B.  */
4322
4323 static void
4324 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
4325 {
4326   tree rhs1 = gimple_assign_rhs1 (stmt);
4327   tree rhs2 = gimple_assign_rhs2 (stmt);
4328
4329   if (dump_file && (dump_flags & TDF_DETAILS))
4330     {
4331       fprintf (dump_file, "Breaking up subtract ");
4332       print_gimple_stmt (dump_file, stmt, 0, 0);
4333     }
4334
4335   rhs2 = negate_value (rhs2, gsip);
4336   gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
4337   update_stmt (stmt);
4338 }
4339
4340 /* Determine whether STMT is a builtin call that raises an SSA name
4341    to an integer power and has only one use.  If so, and this is early
4342    reassociation and unsafe math optimizations are permitted, place
4343    the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
4344    If any of these conditions does not hold, return FALSE.  */
4345
4346 static bool
4347 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent)
4348 {
4349   tree arg1;
4350   REAL_VALUE_TYPE c, cint;
4351
4352   switch (gimple_call_combined_fn (stmt))
4353     {
4354     CASE_CFN_POW:
4355       if (flag_errno_math)
4356         return false;
4357
4358       *base = gimple_call_arg (stmt, 0);
4359       arg1 = gimple_call_arg (stmt, 1);
4360
4361       if (TREE_CODE (arg1) != REAL_CST)
4362         return false;
4363
4364       c = TREE_REAL_CST (arg1);
4365
4366       if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
4367         return false;
4368
4369       *exponent = real_to_integer (&c);
4370       real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
4371       if (!real_identical (&c, &cint))
4372         return false;
4373
4374       break;
4375
4376     CASE_CFN_POWI:
4377       *base = gimple_call_arg (stmt, 0);
4378       arg1 = gimple_call_arg (stmt, 1);
4379
4380       if (!tree_fits_shwi_p (arg1))
4381         return false;
4382
4383       *exponent = tree_to_shwi (arg1);
4384       break;
4385
4386     default:
4387       return false;
4388     }
4389
4390   /* Expanding negative exponents is generally unproductive, so we don't
4391      complicate matters with those.  Exponents of zero and one should
4392      have been handled by expression folding.  */
4393   if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
4394     return false;
4395
4396   return true;
4397 }
4398
4399 /* Try to derive and add operand entry for OP to *OPS.  Return false if
4400    unsuccessful.  */
4401
4402 static bool
4403 try_special_add_to_ops (vec<operand_entry *> *ops,
4404                         enum tree_code code,
4405                         tree op, gimple* def_stmt)
4406 {
4407   tree base = NULL_TREE;
4408   HOST_WIDE_INT exponent = 0;
4409
4410   if (TREE_CODE (op) != SSA_NAME
4411       || ! has_single_use (op))
4412     return false;
4413
4414   if (code == MULT_EXPR
4415       && reassoc_insert_powi_p
4416       && flag_unsafe_math_optimizations
4417       && is_gimple_call (def_stmt)
4418       && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent))
4419     {
4420       add_repeat_to_ops_vec (ops, base, exponent);
4421       gimple_set_visited (def_stmt, true);
4422       return true;
4423     }
4424   else if (code == MULT_EXPR
4425            && is_gimple_assign (def_stmt)
4426            && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
4427            && !HONOR_SNANS (TREE_TYPE (op))
4428            && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op))
4429                || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op))))
4430     {
4431       tree rhs1 = gimple_assign_rhs1 (def_stmt);
4432       tree cst = build_minus_one_cst (TREE_TYPE (op));
4433       add_to_ops_vec (ops, rhs1);
4434       add_to_ops_vec (ops, cst);
4435       gimple_set_visited (def_stmt, true);
4436       return true;
4437     }
4438
4439   return false;
4440 }
4441
4442 /* Recursively linearize a binary expression that is the RHS of STMT.
4443    Place the operands of the expression tree in the vector named OPS.  */
4444
4445 static void
4446 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
4447                      bool is_associative, bool set_visited)
4448 {
4449   tree binlhs = gimple_assign_rhs1 (stmt);
4450   tree binrhs = gimple_assign_rhs2 (stmt);
4451   gimple *binlhsdef = NULL, *binrhsdef = NULL;
4452   bool binlhsisreassoc = false;
4453   bool binrhsisreassoc = false;
4454   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4455   struct loop *loop = loop_containing_stmt (stmt);
4456
4457   if (set_visited)
4458     gimple_set_visited (stmt, true);
4459
4460   if (TREE_CODE (binlhs) == SSA_NAME)
4461     {
4462       binlhsdef = SSA_NAME_DEF_STMT (binlhs);
4463       binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
4464                          && !stmt_could_throw_p (binlhsdef));
4465     }
4466
4467   if (TREE_CODE (binrhs) == SSA_NAME)
4468     {
4469       binrhsdef = SSA_NAME_DEF_STMT (binrhs);
4470       binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
4471                          && !stmt_could_throw_p (binrhsdef));
4472     }
4473
4474   /* If the LHS is not reassociable, but the RHS is, we need to swap
4475      them.  If neither is reassociable, there is nothing we can do, so
4476      just put them in the ops vector.  If the LHS is reassociable,
4477      linearize it.  If both are reassociable, then linearize the RHS
4478      and the LHS.  */
4479
4480   if (!binlhsisreassoc)
4481     {
4482       /* If this is not a associative operation like division, give up.  */
4483       if (!is_associative)
4484         {
4485           add_to_ops_vec (ops, binrhs);
4486           return;
4487         }
4488
4489       if (!binrhsisreassoc)
4490         {
4491           if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
4492             add_to_ops_vec (ops, binrhs);
4493
4494           if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef))
4495             add_to_ops_vec (ops, binlhs);
4496
4497           return;
4498         }
4499
4500       if (dump_file && (dump_flags & TDF_DETAILS))
4501         {
4502           fprintf (dump_file, "swapping operands of ");
4503           print_gimple_stmt (dump_file, stmt, 0, 0);
4504         }
4505
4506       swap_ssa_operands (stmt,
4507                          gimple_assign_rhs1_ptr (stmt),
4508                          gimple_assign_rhs2_ptr (stmt));
4509       update_stmt (stmt);
4510
4511       if (dump_file && (dump_flags & TDF_DETAILS))
4512         {
4513           fprintf (dump_file, " is now ");
4514           print_gimple_stmt (dump_file, stmt, 0, 0);
4515         }
4516
4517       /* We want to make it so the lhs is always the reassociative op,
4518          so swap.  */
4519       std::swap (binlhs, binrhs);
4520     }
4521   else if (binrhsisreassoc)
4522     {
4523       linearize_expr (stmt);
4524       binlhs = gimple_assign_rhs1 (stmt);
4525       binrhs = gimple_assign_rhs2 (stmt);
4526     }
4527
4528   gcc_assert (TREE_CODE (binrhs) != SSA_NAME
4529               || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
4530                                       rhscode, loop));
4531   linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
4532                        is_associative, set_visited);
4533
4534   if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
4535     add_to_ops_vec (ops, binrhs);
4536 }
4537
4538 /* Repropagate the negates back into subtracts, since no other pass
4539    currently does it.  */
4540
4541 static void
4542 repropagate_negates (void)
4543 {
4544   unsigned int i = 0;
4545   tree negate;
4546
4547   FOR_EACH_VEC_ELT (plus_negates, i, negate)
4548     {
4549       gimple *user = get_single_immediate_use (negate);
4550
4551       if (!user || !is_gimple_assign (user))
4552         continue;
4553
4554       /* The negate operand can be either operand of a PLUS_EXPR
4555          (it can be the LHS if the RHS is a constant for example).
4556
4557          Force the negate operand to the RHS of the PLUS_EXPR, then
4558          transform the PLUS_EXPR into a MINUS_EXPR.  */
4559       if (gimple_assign_rhs_code (user) == PLUS_EXPR)
4560         {
4561           /* If the negated operand appears on the LHS of the
4562              PLUS_EXPR, exchange the operands of the PLUS_EXPR
4563              to force the negated operand to the RHS of the PLUS_EXPR.  */
4564           if (gimple_assign_rhs1 (user) == negate)
4565             {
4566               swap_ssa_operands (user,
4567                                  gimple_assign_rhs1_ptr (user),
4568                                  gimple_assign_rhs2_ptr (user));
4569             }
4570
4571           /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
4572              the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR.  */
4573           if (gimple_assign_rhs2 (user) == negate)
4574             {
4575               tree rhs1 = gimple_assign_rhs1 (user);
4576               tree rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
4577               gimple_stmt_iterator gsi = gsi_for_stmt (user);
4578               gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
4579               update_stmt (user);
4580             }
4581         }
4582       else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
4583         {
4584           if (gimple_assign_rhs1 (user) == negate)
4585             {
4586               /* We have
4587                    x = -a
4588                    y = x - b
4589                  which we transform into
4590                    x = a + b
4591                    y = -x .
4592                  This pushes down the negate which we possibly can merge
4593                  into some other operation, hence insert it into the
4594                  plus_negates vector.  */
4595               gimple *feed = SSA_NAME_DEF_STMT (negate);
4596               tree a = gimple_assign_rhs1 (feed);
4597               tree b = gimple_assign_rhs2 (user);
4598               gimple_stmt_iterator gsi = gsi_for_stmt (feed);
4599               gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
4600               tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
4601               gimple *g = gimple_build_assign (x, PLUS_EXPR, a, b);
4602               gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
4603               gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
4604               user = gsi_stmt (gsi2);
4605               update_stmt (user);
4606               reassoc_remove_stmt (&gsi);
4607               release_defs (feed);
4608               plus_negates.safe_push (gimple_assign_lhs (user));
4609             }
4610           else
4611             {
4612               /* Transform "x = -a; y = b - x" into "y = b + a", getting
4613                  rid of one operation.  */
4614               gimple *feed = SSA_NAME_DEF_STMT (negate);
4615               tree a = gimple_assign_rhs1 (feed);
4616               tree rhs1 = gimple_assign_rhs1 (user);
4617               gimple_stmt_iterator gsi = gsi_for_stmt (user);
4618               gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
4619               update_stmt (gsi_stmt (gsi));
4620             }
4621         }
4622     }
4623 }
4624
4625 /* Returns true if OP is of a type for which we can do reassociation.
4626    That is for integral or non-saturating fixed-point types, and for
4627    floating point type when associative-math is enabled.  */
4628
4629 static bool
4630 can_reassociate_p (tree op)
4631 {
4632   tree type = TREE_TYPE (op);
4633   if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
4634       || NON_SAT_FIXED_POINT_TYPE_P (type)
4635       || (flag_associative_math && FLOAT_TYPE_P (type)))
4636     return true;
4637   return false;
4638 }
4639
4640 /* Break up subtract operations in block BB.
4641
4642    We do this top down because we don't know whether the subtract is
4643    part of a possible chain of reassociation except at the top.
4644
4645    IE given
4646    d = f + g
4647    c = a + e
4648    b = c - d
4649    q = b - r
4650    k = t - q
4651
4652    we want to break up k = t - q, but we won't until we've transformed q
4653    = b - r, which won't be broken up until we transform b = c - d.
4654
4655    En passant, clear the GIMPLE visited flag on every statement
4656    and set UIDs within each basic block.  */
4657
4658 static void
4659 break_up_subtract_bb (basic_block bb)
4660 {
4661   gimple_stmt_iterator gsi;
4662   basic_block son;
4663   unsigned int uid = 1;
4664
4665   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4666     {
4667       gimple *stmt = gsi_stmt (gsi);
4668       gimple_set_visited (stmt, false);
4669       gimple_set_uid (stmt, uid++);
4670
4671       if (!is_gimple_assign (stmt)
4672           || !can_reassociate_p (gimple_assign_lhs (stmt)))
4673         continue;
4674
4675       /* Look for simple gimple subtract operations.  */
4676       if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
4677         {
4678           if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
4679               || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
4680             continue;
4681
4682           /* Check for a subtract used only in an addition.  If this
4683              is the case, transform it into add of a negate for better
4684              reassociation.  IE transform C = A-B into C = A + -B if C
4685              is only used in an addition.  */
4686           if (should_break_up_subtract (stmt))
4687             break_up_subtract (stmt, &gsi);
4688         }
4689       else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
4690                && can_reassociate_p (gimple_assign_rhs1 (stmt)))
4691         plus_negates.safe_push (gimple_assign_lhs (stmt));
4692     }
4693   for (son = first_dom_son (CDI_DOMINATORS, bb);
4694        son;
4695        son = next_dom_son (CDI_DOMINATORS, son))
4696     break_up_subtract_bb (son);
4697 }
4698
4699 /* Used for repeated factor analysis.  */
4700 struct repeat_factor
4701 {
4702   /* An SSA name that occurs in a multiply chain.  */
4703   tree factor;
4704
4705   /* Cached rank of the factor.  */
4706   unsigned rank;
4707
4708   /* Number of occurrences of the factor in the chain.  */
4709   HOST_WIDE_INT count;
4710
4711   /* An SSA name representing the product of this factor and
4712      all factors appearing later in the repeated factor vector.  */
4713   tree repr;
4714 };
4715
4716
4717 static vec<repeat_factor> repeat_factor_vec;
4718
4719 /* Used for sorting the repeat factor vector.  Sort primarily by
4720    ascending occurrence count, secondarily by descending rank.  */
4721
4722 static int
4723 compare_repeat_factors (const void *x1, const void *x2)
4724 {
4725   const repeat_factor *rf1 = (const repeat_factor *) x1;
4726   const repeat_factor *rf2 = (const repeat_factor *) x2;
4727
4728   if (rf1->count != rf2->count)
4729     return rf1->count - rf2->count;
4730
4731   return rf2->rank - rf1->rank;
4732 }
4733
4734 /* Look for repeated operands in OPS in the multiply tree rooted at
4735    STMT.  Replace them with an optimal sequence of multiplies and powi
4736    builtin calls, and remove the used operands from OPS.  Return an
4737    SSA name representing the value of the replacement sequence.  */
4738
4739 static tree
4740 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
4741 {
4742   unsigned i, j, vec_len;
4743   int ii;
4744   operand_entry *oe;
4745   repeat_factor *rf1, *rf2;
4746   repeat_factor rfnew;
4747   tree result = NULL_TREE;
4748   tree target_ssa, iter_result;
4749   tree type = TREE_TYPE (gimple_get_lhs (stmt));
4750   tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
4751   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4752   gimple *mul_stmt, *pow_stmt;
4753
4754   /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4755      target.  */
4756   if (!powi_fndecl)
4757     return NULL_TREE;
4758
4759   /* Allocate the repeated factor vector.  */
4760   repeat_factor_vec.create (10);
4761
4762   /* Scan the OPS vector for all SSA names in the product and build
4763      up a vector of occurrence counts for each factor.  */
4764   FOR_EACH_VEC_ELT (*ops, i, oe)
4765     {
4766       if (TREE_CODE (oe->op) == SSA_NAME)
4767         {
4768           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4769             {
4770               if (rf1->factor == oe->op)
4771                 {
4772                   rf1->count += oe->count;
4773                   break;
4774                 }
4775             }
4776
4777           if (j >= repeat_factor_vec.length ())
4778             {
4779               rfnew.factor = oe->op;
4780               rfnew.rank = oe->rank;
4781               rfnew.count = oe->count;
4782               rfnew.repr = NULL_TREE;
4783               repeat_factor_vec.safe_push (rfnew);
4784             }
4785         }
4786     }
4787
4788   /* Sort the repeated factor vector by (a) increasing occurrence count,
4789      and (b) decreasing rank.  */
4790   repeat_factor_vec.qsort (compare_repeat_factors);
4791
4792   /* It is generally best to combine as many base factors as possible
4793      into a product before applying __builtin_powi to the result.
4794      However, the sort order chosen for the repeated factor vector
4795      allows us to cache partial results for the product of the base
4796      factors for subsequent use.  When we already have a cached partial
4797      result from a previous iteration, it is best to make use of it
4798      before looking for another __builtin_pow opportunity.
4799
4800      As an example, consider x * x * y * y * y * z * z * z * z.
4801      We want to first compose the product x * y * z, raise it to the
4802      second power, then multiply this by y * z, and finally multiply
4803      by z.  This can be done in 5 multiplies provided we cache y * z
4804      for use in both expressions:
4805
4806         t1 = y * z
4807         t2 = t1 * x
4808         t3 = t2 * t2
4809         t4 = t1 * t3
4810         result = t4 * z
4811
4812      If we instead ignored the cached y * z and first multiplied by
4813      the __builtin_pow opportunity z * z, we would get the inferior:
4814
4815         t1 = y * z
4816         t2 = t1 * x
4817         t3 = t2 * t2
4818         t4 = z * z
4819         t5 = t3 * t4
4820         result = t5 * y  */
4821
4822   vec_len = repeat_factor_vec.length ();
4823
4824   /* Repeatedly look for opportunities to create a builtin_powi call.  */
4825   while (true)
4826     {
4827       HOST_WIDE_INT power;
4828
4829       /* First look for the largest cached product of factors from
4830          preceding iterations.  If found, create a builtin_powi for
4831          it if the minimum occurrence count for its factors is at
4832          least 2, or just use this cached product as our next
4833          multiplicand if the minimum occurrence count is 1.  */
4834       FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4835         {
4836           if (rf1->repr && rf1->count > 0)
4837             break;
4838         }
4839
4840       if (j < vec_len)
4841         {
4842           power = rf1->count;
4843
4844           if (power == 1)
4845             {
4846               iter_result = rf1->repr;
4847
4848               if (dump_file && (dump_flags & TDF_DETAILS))
4849                 {
4850                   unsigned elt;
4851                   repeat_factor *rf;
4852                   fputs ("Multiplying by cached product ", dump_file);
4853                   for (elt = j; elt < vec_len; elt++)
4854                     {
4855                       rf = &repeat_factor_vec[elt];
4856                       print_generic_expr (dump_file, rf->factor, 0);
4857                       if (elt < vec_len - 1)
4858                         fputs (" * ", dump_file);
4859                     }
4860                   fputs ("\n", dump_file);
4861                 }
4862             }
4863           else
4864             {
4865               iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4866               pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4867                                             build_int_cst (integer_type_node,
4868                                                            power));
4869               gimple_call_set_lhs (pow_stmt, iter_result);
4870               gimple_set_location (pow_stmt, gimple_location (stmt));
4871               gimple_set_uid (pow_stmt, gimple_uid (stmt));
4872               gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4873
4874               if (dump_file && (dump_flags & TDF_DETAILS))
4875                 {
4876                   unsigned elt;
4877                   repeat_factor *rf;
4878                   fputs ("Building __builtin_pow call for cached product (",
4879                          dump_file);
4880                   for (elt = j; elt < vec_len; elt++)
4881                     {
4882                       rf = &repeat_factor_vec[elt];
4883                       print_generic_expr (dump_file, rf->factor, 0);
4884                       if (elt < vec_len - 1)
4885                         fputs (" * ", dump_file);
4886                     }
4887                   fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
4888                            power);
4889                 }
4890             }
4891         }
4892       else
4893         {
4894           /* Otherwise, find the first factor in the repeated factor
4895              vector whose occurrence count is at least 2.  If no such
4896              factor exists, there are no builtin_powi opportunities
4897              remaining.  */
4898           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4899             {
4900               if (rf1->count >= 2)
4901                 break;
4902             }
4903
4904           if (j >= vec_len)
4905             break;
4906
4907           power = rf1->count;
4908
4909           if (dump_file && (dump_flags & TDF_DETAILS))
4910             {
4911               unsigned elt;
4912               repeat_factor *rf;
4913               fputs ("Building __builtin_pow call for (", dump_file);
4914               for (elt = j; elt < vec_len; elt++)
4915                 {
4916                   rf = &repeat_factor_vec[elt];
4917                   print_generic_expr (dump_file, rf->factor, 0);
4918                   if (elt < vec_len - 1)
4919                     fputs (" * ", dump_file);
4920                 }
4921               fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
4922             }
4923
4924           reassociate_stats.pows_created++;
4925
4926           /* Visit each element of the vector in reverse order (so that
4927              high-occurrence elements are visited first, and within the
4928              same occurrence count, lower-ranked elements are visited
4929              first).  Form a linear product of all elements in this order
4930              whose occurrencce count is at least that of element J.
4931              Record the SSA name representing the product of each element
4932              with all subsequent elements in the vector.  */
4933           if (j == vec_len - 1)
4934             rf1->repr = rf1->factor;
4935           else
4936             {
4937               for (ii = vec_len - 2; ii >= (int)j; ii--)
4938                 {
4939                   tree op1, op2;
4940
4941                   rf1 = &repeat_factor_vec[ii];
4942                   rf2 = &repeat_factor_vec[ii + 1];
4943
4944                   /* Init the last factor's representative to be itself.  */
4945                   if (!rf2->repr)
4946                     rf2->repr = rf2->factor;
4947
4948                   op1 = rf1->factor;
4949                   op2 = rf2->repr;
4950
4951                   target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4952                   mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
4953                                                   op1, op2);
4954                   gimple_set_location (mul_stmt, gimple_location (stmt));
4955                   gimple_set_uid (mul_stmt, gimple_uid (stmt));
4956                   gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4957                   rf1->repr = target_ssa;
4958
4959                   /* Don't reprocess the multiply we just introduced.  */
4960                   gimple_set_visited (mul_stmt, true);
4961                 }
4962             }
4963
4964           /* Form a call to __builtin_powi for the maximum product
4965              just formed, raised to the power obtained earlier.  */
4966           rf1 = &repeat_factor_vec[j];
4967           iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4968           pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4969                                         build_int_cst (integer_type_node,
4970                                                        power));
4971           gimple_call_set_lhs (pow_stmt, iter_result);
4972           gimple_set_location (pow_stmt, gimple_location (stmt));
4973           gimple_set_uid (pow_stmt, gimple_uid (stmt));
4974           gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4975         }
4976
4977       /* If we previously formed at least one other builtin_powi call,
4978          form the product of this one and those others.  */
4979       if (result)
4980         {
4981           tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4982           mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
4983                                           result, iter_result);
4984           gimple_set_location (mul_stmt, gimple_location (stmt));
4985           gimple_set_uid (mul_stmt, gimple_uid (stmt));
4986           gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4987           gimple_set_visited (mul_stmt, true);
4988           result = new_result;
4989         }
4990       else
4991         result = iter_result;
4992
4993       /* Decrement the occurrence count of each element in the product
4994          by the count found above, and remove this many copies of each
4995          factor from OPS.  */
4996       for (i = j; i < vec_len; i++)
4997         {
4998           unsigned k = power;
4999           unsigned n;
5000
5001           rf1 = &repeat_factor_vec[i];
5002           rf1->count -= power;
5003
5004           FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
5005             {
5006               if (oe->op == rf1->factor)
5007                 {
5008                   if (oe->count <= k)
5009                     {
5010                       ops->ordered_remove (n);
5011                       k -= oe->count;
5012
5013                       if (k == 0)
5014                         break;
5015                     }
5016                   else
5017                     {
5018                       oe->count -= k;
5019                       break;
5020                     }
5021                 }
5022             }
5023         }
5024     }
5025
5026   /* At this point all elements in the repeated factor vector have a
5027      remaining occurrence count of 0 or 1, and those with a count of 1
5028      don't have cached representatives.  Re-sort the ops vector and
5029      clean up.  */
5030   ops->qsort (sort_by_operand_rank);
5031   repeat_factor_vec.release ();
5032
5033   /* Return the final product computed herein.  Note that there may
5034      still be some elements with single occurrence count left in OPS;
5035      those will be handled by the normal reassociation logic.  */
5036   return result;
5037 }
5038
5039 /* Attempt to optimize
5040    CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
5041    CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0.  */
5042
5043 static void
5044 attempt_builtin_copysign (vec<operand_entry *> *ops)
5045 {
5046   operand_entry *oe;
5047   unsigned int i;
5048   unsigned int length = ops->length ();
5049   tree cst = ops->last ()->op;
5050
5051   if (length == 1 || TREE_CODE (cst) != REAL_CST)
5052     return;
5053
5054   FOR_EACH_VEC_ELT (*ops, i, oe)
5055     {
5056       if (TREE_CODE (oe->op) == SSA_NAME
5057           && has_single_use (oe->op))
5058         {
5059           gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
5060           if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
5061             {
5062               tree arg0, arg1;
5063               switch (gimple_call_combined_fn (old_call))
5064                 {
5065                 CASE_CFN_COPYSIGN:
5066                   arg0 = gimple_call_arg (old_call, 0);
5067                   arg1 = gimple_call_arg (old_call, 1);
5068                   /* The first argument of copysign must be a constant,
5069                      otherwise there's nothing to do.  */
5070                   if (TREE_CODE (arg0) == REAL_CST)
5071                     {
5072                       tree type = TREE_TYPE (arg0);
5073                       tree mul = const_binop (MULT_EXPR, type, cst, arg0);
5074                       /* If we couldn't fold to a single constant, skip it.
5075                          That happens e.g. for inexact multiplication when
5076                          -frounding-math.  */
5077                       if (mul == NULL_TREE)
5078                         break;
5079                       /* Instead of adjusting OLD_CALL, let's build a new
5080                          call to not leak the LHS and prevent keeping bogus
5081                          debug statements.  DCE will clean up the old call.  */
5082                       gcall *new_call;
5083                       if (gimple_call_internal_p (old_call))
5084                         new_call = gimple_build_call_internal
5085                           (IFN_COPYSIGN, 2, mul, arg1);
5086                       else
5087                         new_call = gimple_build_call
5088                           (gimple_call_fndecl (old_call), 2, mul, arg1);
5089                       tree lhs = make_ssa_name (type);
5090                       gimple_call_set_lhs (new_call, lhs);
5091                       gimple_set_location (new_call,
5092                                            gimple_location (old_call));
5093                       insert_stmt_after (new_call, old_call);
5094                       /* We've used the constant, get rid of it.  */
5095                       ops->pop ();
5096                       bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
5097                       /* Handle the CST1 < 0 case by negating the result.  */
5098                       if (cst1_neg)
5099                         {
5100                           tree negrhs = make_ssa_name (TREE_TYPE (lhs));
5101                           gimple *negate_stmt
5102                             = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
5103                           insert_stmt_after (negate_stmt, new_call);
5104                           oe->op = negrhs;
5105                         }
5106                       else
5107                         oe->op = lhs;
5108                       if (dump_file && (dump_flags & TDF_DETAILS))
5109                         {
5110                           fprintf (dump_file, "Optimizing copysign: ");
5111                           print_generic_expr (dump_file, cst, 0);
5112                           fprintf (dump_file, " * COPYSIGN (");
5113                           print_generic_expr (dump_file, arg0, 0);
5114                           fprintf (dump_file, ", ");
5115                           print_generic_expr (dump_file, arg1, 0);
5116                           fprintf (dump_file, ") into %sCOPYSIGN (",
5117                                    cst1_neg ? "-" : "");
5118                           print_generic_expr (dump_file, mul, 0);
5119                           fprintf (dump_file, ", ");
5120                           print_generic_expr (dump_file, arg1, 0);
5121                           fprintf (dump_file, "\n");
5122                         }
5123                       return;
5124                     }
5125                   break;
5126                 default:
5127                   break;
5128                 }
5129             }
5130         }
5131     }
5132 }
5133
5134 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS.  */
5135
5136 static void
5137 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
5138 {
5139   tree rhs1;
5140
5141   if (dump_file && (dump_flags & TDF_DETAILS))
5142     {
5143       fprintf (dump_file, "Transforming ");
5144       print_gimple_stmt (dump_file, stmt, 0, 0);
5145     }
5146
5147   rhs1 = gimple_assign_rhs1 (stmt);
5148   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
5149   update_stmt (stmt);
5150   remove_visited_stmt_chain (rhs1);
5151
5152   if (dump_file && (dump_flags & TDF_DETAILS))
5153     {
5154       fprintf (dump_file, " into ");
5155       print_gimple_stmt (dump_file, stmt, 0, 0);
5156     }
5157 }
5158
5159 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2.  */
5160
5161 static void
5162 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
5163                             tree rhs1, tree rhs2)
5164 {
5165   if (dump_file && (dump_flags & TDF_DETAILS))
5166     {
5167       fprintf (dump_file, "Transforming ");
5168       print_gimple_stmt (dump_file, stmt, 0, 0);
5169     }
5170
5171   gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
5172   update_stmt (gsi_stmt (*gsi));
5173   remove_visited_stmt_chain (rhs1);
5174
5175   if (dump_file && (dump_flags & TDF_DETAILS))
5176     {
5177       fprintf (dump_file, " into ");
5178       print_gimple_stmt (dump_file, stmt, 0, 0);
5179     }
5180 }
5181
5182 /* Reassociate expressions in basic block BB and its post-dominator as
5183    children.
5184
5185    Bubble up return status from maybe_optimize_range_tests.  */
5186
5187 static bool
5188 reassociate_bb (basic_block bb)
5189 {
5190   gimple_stmt_iterator gsi;
5191   basic_block son;
5192   gimple *stmt = last_stmt (bb);
5193   bool cfg_cleanup_needed = false;
5194
5195   if (stmt && !gimple_visited_p (stmt))
5196     cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
5197
5198   for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
5199     {
5200       stmt = gsi_stmt (gsi);
5201
5202       if (is_gimple_assign (stmt)
5203           && !stmt_could_throw_p (stmt))
5204         {
5205           tree lhs, rhs1, rhs2;
5206           enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
5207
5208           /* If this is not a gimple binary expression, there is
5209              nothing for us to do with it.  */
5210           if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
5211             continue;
5212
5213           /* If this was part of an already processed statement,
5214              we don't need to touch it again. */
5215           if (gimple_visited_p (stmt))
5216             {
5217               /* This statement might have become dead because of previous
5218                  reassociations.  */
5219               if (has_zero_uses (gimple_get_lhs (stmt)))
5220                 {
5221                   reassoc_remove_stmt (&gsi);
5222                   release_defs (stmt);
5223                   /* We might end up removing the last stmt above which
5224                      places the iterator to the end of the sequence.
5225                      Reset it to the last stmt in this case which might
5226                      be the end of the sequence as well if we removed
5227                      the last statement of the sequence.  In which case
5228                      we need to bail out.  */
5229                   if (gsi_end_p (gsi))
5230                     {
5231                       gsi = gsi_last_bb (bb);
5232                       if (gsi_end_p (gsi))
5233                         break;
5234                     }
5235                 }
5236               continue;
5237             }
5238
5239           lhs = gimple_assign_lhs (stmt);
5240           rhs1 = gimple_assign_rhs1 (stmt);
5241           rhs2 = gimple_assign_rhs2 (stmt);
5242
5243           /* For non-bit or min/max operations we can't associate
5244              all types.  Verify that here.  */
5245           if (rhs_code != BIT_IOR_EXPR
5246               && rhs_code != BIT_AND_EXPR
5247               && rhs_code != BIT_XOR_EXPR
5248               && rhs_code != MIN_EXPR
5249               && rhs_code != MAX_EXPR
5250               && (!can_reassociate_p (lhs)
5251                   || !can_reassociate_p (rhs1)
5252                   || !can_reassociate_p (rhs2)))
5253             continue;
5254
5255           if (associative_tree_code (rhs_code))
5256             {
5257               auto_vec<operand_entry *> ops;
5258               tree powi_result = NULL_TREE;
5259               bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
5260
5261               /* There may be no immediate uses left by the time we
5262                  get here because we may have eliminated them all.  */
5263               if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
5264                 continue;
5265
5266               gimple_set_visited (stmt, true);
5267               linearize_expr_tree (&ops, stmt, true, true);
5268               ops.qsort (sort_by_operand_rank);
5269               optimize_ops_list (rhs_code, &ops);
5270               if (undistribute_ops_list (rhs_code, &ops,
5271                                          loop_containing_stmt (stmt)))
5272                 {
5273                   ops.qsort (sort_by_operand_rank);
5274                   optimize_ops_list (rhs_code, &ops);
5275                 }
5276
5277               if (rhs_code == PLUS_EXPR
5278                   && transform_add_to_multiply (&ops))
5279                 ops.qsort (sort_by_operand_rank);
5280
5281               if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
5282                 {
5283                   if (is_vector)
5284                     optimize_vec_cond_expr (rhs_code, &ops);
5285                   else
5286                     optimize_range_tests (rhs_code, &ops);
5287                 }
5288
5289               if (rhs_code == MULT_EXPR && !is_vector)
5290                 {
5291                   attempt_builtin_copysign (&ops);
5292
5293                   if (reassoc_insert_powi_p
5294                       && flag_unsafe_math_optimizations)
5295                     powi_result = attempt_builtin_powi (stmt, &ops);
5296                 }
5297
5298               operand_entry *last;
5299               bool negate_result = false;
5300               if (ops.length () > 1
5301                   && rhs_code == MULT_EXPR)
5302                 {
5303                   last = ops.last ();
5304                   if (((TREE_CODE (last->op) == INTEGER_CST
5305                         && integer_minus_onep (last->op))
5306                        || real_minus_onep (last->op))
5307                       && !HONOR_SNANS (TREE_TYPE (lhs))
5308                       && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs))
5309                           || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs))))
5310                     {
5311                       ops.pop ();
5312                       negate_result = true;
5313                     }
5314                 }
5315
5316               /* If the operand vector is now empty, all operands were
5317                  consumed by the __builtin_powi optimization.  */
5318               if (ops.length () == 0)
5319                 transform_stmt_to_copy (&gsi, stmt, powi_result);
5320               else if (ops.length () == 1)
5321                 {
5322                   tree last_op = ops.last ()->op;
5323
5324                   /* If the stmt that defines operand has to be inserted, insert it
5325                      before the use.  */
5326                   if (ops.last ()->stmt_to_insert)
5327                     insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert);
5328                   if (powi_result)
5329                     transform_stmt_to_multiply (&gsi, stmt, last_op,
5330                                                 powi_result);
5331                   else
5332                     transform_stmt_to_copy (&gsi, stmt, last_op);
5333                 }
5334               else
5335                 {
5336                   machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
5337                   int ops_num = ops.length ();
5338                   int width = get_reassociation_width (ops_num, rhs_code, mode);
5339                   tree new_lhs = lhs;
5340
5341                   if (dump_file && (dump_flags & TDF_DETAILS))
5342                     fprintf (dump_file,
5343                              "Width = %d was chosen for reassociation\n", width);
5344
5345                   if (width > 1
5346                       && ops.length () > 3)
5347                     rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
5348                                                 width, ops);
5349                   else
5350                     {
5351                       /* When there are three operands left, we want
5352                          to make sure the ones that get the double
5353                          binary op are chosen wisely.  */
5354                       int len = ops.length ();
5355                       if (len >= 3)
5356                         swap_ops_for_binary_stmt (ops, len - 3, stmt);
5357
5358                       new_lhs = rewrite_expr_tree (stmt, 0, ops,
5359                                                    powi_result != NULL);
5360                     }
5361
5362                   /* If we combined some repeated factors into a
5363                      __builtin_powi call, multiply that result by the
5364                      reassociated operands.  */
5365                   if (powi_result)
5366                     {
5367                       gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
5368                       tree type = TREE_TYPE (lhs);
5369                       tree target_ssa = make_temp_ssa_name (type, NULL,
5370                                                             "reassocpow");
5371                       gimple_set_lhs (lhs_stmt, target_ssa);
5372                       update_stmt (lhs_stmt);
5373                       if (lhs != new_lhs)
5374                         target_ssa = new_lhs;
5375                       mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
5376                                                       powi_result, target_ssa);
5377                       gimple_set_location (mul_stmt, gimple_location (stmt));
5378                       gimple_set_uid (mul_stmt, gimple_uid (stmt));
5379                       gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
5380                     }
5381                 }
5382
5383               if (negate_result)
5384                 {
5385                   stmt = SSA_NAME_DEF_STMT (lhs);
5386                   tree tmp = make_ssa_name (TREE_TYPE (lhs));
5387                   gimple_set_lhs (stmt, tmp);
5388                   gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR,
5389                                                            tmp);
5390                   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5391                   gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT);
5392                   update_stmt (stmt);
5393                 }
5394             }
5395         }
5396     }
5397   for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
5398        son;
5399        son = next_dom_son (CDI_POST_DOMINATORS, son))
5400     cfg_cleanup_needed |= reassociate_bb (son);
5401
5402   return cfg_cleanup_needed;
5403 }
5404
5405 /* Add jumps around shifts for range tests turned into bit tests.
5406    For each SSA_NAME VAR we have code like:
5407    VAR = ...; // final stmt of range comparison
5408    // bit test here...;
5409    OTHERVAR = ...; // final stmt of the bit test sequence
5410    RES = VAR | OTHERVAR;
5411    Turn the above into:
5412    VAR = ...;
5413    if (VAR != 0)
5414      goto <l3>;
5415    else
5416      goto <l2>;
5417    <l2>:
5418    // bit test here...;
5419    OTHERVAR = ...;
5420    <l3>:
5421    # RES = PHI<1(l1), OTHERVAR(l2)>;  */
5422
5423 static void
5424 branch_fixup (void)
5425 {
5426   tree var;
5427   unsigned int i;
5428
5429   FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
5430     {
5431       gimple *def_stmt = SSA_NAME_DEF_STMT (var);
5432       gimple *use_stmt;
5433       use_operand_p use;
5434       bool ok = single_imm_use (var, &use, &use_stmt);
5435       gcc_assert (ok
5436                   && is_gimple_assign (use_stmt)
5437                   && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
5438                   && gimple_bb (def_stmt) == gimple_bb (use_stmt));
5439
5440       basic_block cond_bb = gimple_bb (def_stmt);
5441       basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
5442       basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
5443
5444       gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
5445       gimple *g = gimple_build_cond (NE_EXPR, var,
5446                                      build_zero_cst (TREE_TYPE (var)),
5447                                      NULL_TREE, NULL_TREE);
5448       location_t loc = gimple_location (use_stmt);
5449       gimple_set_location (g, loc);
5450       gsi_insert_after (&gsi, g, GSI_NEW_STMT);
5451
5452       edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
5453       etrue->probability = REG_BR_PROB_BASE / 2;
5454       etrue->count = cond_bb->count / 2;
5455       edge efalse = find_edge (cond_bb, then_bb);
5456       efalse->flags = EDGE_FALSE_VALUE;
5457       efalse->probability -= etrue->probability;
5458       efalse->count -= etrue->count;
5459       then_bb->count -= etrue->count;
5460
5461       tree othervar = NULL_TREE;
5462       if (gimple_assign_rhs1 (use_stmt) == var)
5463         othervar = gimple_assign_rhs2 (use_stmt);
5464       else if (gimple_assign_rhs2 (use_stmt) == var)
5465         othervar = gimple_assign_rhs1 (use_stmt);
5466       else
5467         gcc_unreachable ();
5468       tree lhs = gimple_assign_lhs (use_stmt);
5469       gphi *phi = create_phi_node (lhs, merge_bb);
5470       add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
5471       add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
5472       gsi = gsi_for_stmt (use_stmt);
5473       gsi_remove (&gsi, true);
5474
5475       set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
5476       set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
5477     }
5478   reassoc_branch_fixups.release ();
5479 }
5480
5481 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
5482 void debug_ops_vector (vec<operand_entry *> ops);
5483
5484 /* Dump the operand entry vector OPS to FILE.  */
5485
5486 void
5487 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
5488 {
5489   operand_entry *oe;
5490   unsigned int i;
5491
5492   FOR_EACH_VEC_ELT (ops, i, oe)
5493     {
5494       fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
5495       print_generic_expr (file, oe->op, 0);
5496       fprintf (file, "\n");
5497     }
5498 }
5499
5500 /* Dump the operand entry vector OPS to STDERR.  */
5501
5502 DEBUG_FUNCTION void
5503 debug_ops_vector (vec<operand_entry *> ops)
5504 {
5505   dump_ops_vector (stderr, ops);
5506 }
5507
5508 /* Bubble up return status from reassociate_bb.  */
5509
5510 static bool
5511 do_reassoc (void)
5512 {
5513   break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
5514   return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
5515 }
5516
5517 /* Initialize the reassociation pass.  */
5518
5519 static void
5520 init_reassoc (void)
5521 {
5522   int i;
5523   long rank = 2;
5524   int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
5525
5526   /* Find the loops, so that we can prevent moving calculations in
5527      them.  */
5528   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
5529
5530   memset (&reassociate_stats, 0, sizeof (reassociate_stats));
5531
5532   next_operand_entry_id = 0;
5533
5534   /* Reverse RPO (Reverse Post Order) will give us something where
5535      deeper loops come later.  */
5536   pre_and_rev_post_order_compute (NULL, bbs, false);
5537   bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
5538   operand_rank = new hash_map<tree, long>;
5539
5540   /* Give each default definition a distinct rank.  This includes
5541      parameters and the static chain.  Walk backwards over all
5542      SSA names so that we get proper rank ordering according
5543      to tree_swap_operands_p.  */
5544   for (i = num_ssa_names - 1; i > 0; --i)
5545     {
5546       tree name = ssa_name (i);
5547       if (name && SSA_NAME_IS_DEFAULT_DEF (name))
5548         insert_operand_rank (name, ++rank);
5549     }
5550
5551   /* Set up rank for each BB  */
5552   for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
5553     bb_rank[bbs[i]] = ++rank  << 16;
5554
5555   free (bbs);
5556   calculate_dominance_info (CDI_POST_DOMINATORS);
5557   plus_negates = vNULL;
5558 }
5559
5560 /* Cleanup after the reassociation pass, and print stats if
5561    requested.  */
5562
5563 static void
5564 fini_reassoc (void)
5565 {
5566   statistics_counter_event (cfun, "Linearized",
5567                             reassociate_stats.linearized);
5568   statistics_counter_event (cfun, "Constants eliminated",
5569                             reassociate_stats.constants_eliminated);
5570   statistics_counter_event (cfun, "Ops eliminated",
5571                             reassociate_stats.ops_eliminated);
5572   statistics_counter_event (cfun, "Statements rewritten",
5573                             reassociate_stats.rewritten);
5574   statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
5575                             reassociate_stats.pows_encountered);
5576   statistics_counter_event (cfun, "Built-in powi calls created",
5577                             reassociate_stats.pows_created);
5578
5579   delete operand_rank;
5580   operand_entry_pool.release ();
5581   free (bb_rank);
5582   plus_negates.release ();
5583   free_dominance_info (CDI_POST_DOMINATORS);
5584   loop_optimizer_finalize ();
5585 }
5586
5587 /* Gate and execute functions for Reassociation.  If INSERT_POWI_P, enable
5588    insertion of __builtin_powi calls.
5589
5590    Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
5591    optimization of a gimple conditional.  Otherwise returns zero.  */
5592
5593 static unsigned int
5594 execute_reassoc (bool insert_powi_p)
5595 {
5596   reassoc_insert_powi_p = insert_powi_p;
5597
5598   init_reassoc ();
5599
5600   bool cfg_cleanup_needed;
5601   cfg_cleanup_needed = do_reassoc ();
5602   repropagate_negates ();
5603   branch_fixup ();
5604
5605   fini_reassoc ();
5606   return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
5607 }
5608
5609 namespace {
5610
5611 const pass_data pass_data_reassoc =
5612 {
5613   GIMPLE_PASS, /* type */
5614   "reassoc", /* name */
5615   OPTGROUP_NONE, /* optinfo_flags */
5616   TV_TREE_REASSOC, /* tv_id */
5617   ( PROP_cfg | PROP_ssa ), /* properties_required */
5618   0, /* properties_provided */
5619   0, /* properties_destroyed */
5620   0, /* todo_flags_start */
5621   TODO_update_ssa_only_virtuals, /* todo_flags_finish */
5622 };
5623
5624 class pass_reassoc : public gimple_opt_pass
5625 {
5626 public:
5627   pass_reassoc (gcc::context *ctxt)
5628     : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
5629   {}
5630
5631   /* opt_pass methods: */
5632   opt_pass * clone () { return new pass_reassoc (m_ctxt); }
5633   void set_pass_param (unsigned int n, bool param)
5634     {
5635       gcc_assert (n == 0);
5636       insert_powi_p = param;
5637     }
5638   virtual bool gate (function *) { return flag_tree_reassoc != 0; }
5639   virtual unsigned int execute (function *)
5640     { return execute_reassoc (insert_powi_p); }
5641
5642  private:
5643   /* Enable insertion of __builtin_powi calls during execute_reassoc.  See
5644      point 3a in the pass header comment.  */
5645   bool insert_powi_p;
5646 }; // class pass_reassoc
5647
5648 } // anon namespace
5649
5650 gimple_opt_pass *
5651 make_pass_reassoc (gcc::context *ctxt)
5652 {
5653   return new pass_reassoc (ctxt);
5654 }