gcc/tree-ssa-reassoc.c

   1 /* Reassociation for trees.
   2    Copyright (C) 2005-2016 Free Software Foundation, Inc.
   3    Contributed by Daniel Berlin <dan@dberlin.org>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 3, or (at your option)
  10 any later version.
  11
  12 GCC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "backend.h"
  25 #include "target.h"
  26 #include "rtl.h"
  27 #include "tree.h"
  28 #include "gimple.h"
  29 #include "cfghooks.h"
  30 #include "alloc-pool.h"
  31 #include "tree-pass.h"
  32 #include "tm_p.h"
  33 #include "ssa.h"
  34 #include "optabs-tree.h"
  35 #include "gimple-pretty-print.h"
  36 #include "diagnostic-core.h"
  37 #include "fold-const.h"
  38 #include "stor-layout.h"
  39 #include "cfganal.h"
  40 #include "gimple-fold.h"
  41 #include "tree-eh.h"
  42 #include "gimple-iterator.h"
  43 #include "gimplify-me.h"
  44 #include "tree-cfg.h"
  45 #include "tree-ssa-loop.h"
  46 #include "flags.h"
  47 #include "tree-ssa.h"
  48 #include "langhooks.h"
  49 #include "cfgloop.h"
  50 #include "params.h"
  51 #include "builtins.h"
  52 #include "gimplify.h"
  53 #include "case-cfn-macros.h"
  54
  55 /*  This is a simple global reassociation pass.  It is, in part, based
  56     on the LLVM pass of the same name (They do some things more/less
  57     than we do, in different orders, etc).
  58
  59     It consists of five steps:
  60
  61     1. Breaking up subtract operations into addition + negate, where
  62     it would promote the reassociation of adds.
  63
  64     2. Left linearization of the expression trees, so that (A+B)+(C+D)
  65     becomes (((A+B)+C)+D), which is easier for us to rewrite later.
  66     During linearization, we place the operands of the binary
  67     expressions into a vector of operand_entry_*
  68
  69     3. Optimization of the operand lists, eliminating things like a +
  70     -a, a & a, etc.
  71
  72     3a. Combine repeated factors with the same occurrence counts
  73     into a __builtin_powi call that will later be optimized into
  74     an optimal number of multiplies.
  75
  76     4. Rewrite the expression trees we linearized and optimized so
  77     they are in proper rank order.
  78
  79     5. Repropagate negates, as nothing else will clean it up ATM.
  80
  81     A bit of theory on #4, since nobody seems to write anything down
  82     about why it makes sense to do it the way they do it:
  83
  84     We could do this much nicer theoretically, but don't (for reasons
  85     explained after how to do it theoretically nice :P).
  86
  87     In order to promote the most redundancy elimination, you want
  88     binary expressions whose operands are the same rank (or
  89     preferably, the same value) exposed to the redundancy eliminator,
  90     for possible elimination.
  91
  92     So the way to do this if we really cared, is to build the new op
  93     tree from the leaves to the roots, merging as you go, and putting the
  94     new op on the end of the worklist, until you are left with one
  95     thing on the worklist.
  96
  97     IE if you have to rewrite the following set of operands (listed with
  98     rank in parentheses), with opcode PLUS_EXPR:
  99
 100     a (1),  b (1),  c (1),  d (2), e (2)
 101
 102
 103     We start with our merge worklist empty, and the ops list with all of
 104     those on it.
 105
 106     You want to first merge all leaves of the same rank, as much as
 107     possible.
 108
 109     So first build a binary op of
 110
 111     mergetmp = a + b, and put "mergetmp" on the merge worklist.
 112
 113     Because there is no three operand form of PLUS_EXPR, c is not going to
 114     be exposed to redundancy elimination as a rank 1 operand.
 115
 116     So you might as well throw it on the merge worklist (you could also
 117     consider it to now be a rank two operand, and merge it with d and e,
 118     but in this case, you then have evicted e from a binary op. So at
 119     least in this situation, you can't win.)
 120
 121     Then build a binary op of d + e
 122     mergetmp2 = d + e
 123
 124     and put mergetmp2 on the merge worklist.
 125
 126     so merge worklist = {mergetmp, c, mergetmp2}
 127
 128     Continue building binary ops of these operations until you have only
 129     one operation left on the worklist.
 130
 131     So we have
 132
 133     build binary op
 134     mergetmp3 = mergetmp + c
 135
 136     worklist = {mergetmp2, mergetmp3}
 137
 138     mergetmp4 = mergetmp2 + mergetmp3
 139
 140     worklist = {mergetmp4}
 141
 142     because we have one operation left, we can now just set the original
 143     statement equal to the result of that operation.
 144
 145     This will at least expose a + b  and d + e to redundancy elimination
 146     as binary operations.
 147
 148     For extra points, you can reuse the old statements to build the
 149     mergetmps, since you shouldn't run out.
 150
 151     So why don't we do this?
 152
 153     Because it's expensive, and rarely will help.  Most trees we are
 154     reassociating have 3 or less ops.  If they have 2 ops, they already
 155     will be written into a nice single binary op.  If you have 3 ops, a
 156     single simple check suffices to tell you whether the first two are of the
 157     same rank.  If so, you know to order it
 158
 159     mergetmp = op1 + op2
 160     newstmt = mergetmp + op3
 161
 162     instead of
 163     mergetmp = op2 + op3
 164     newstmt = mergetmp + op1
 165
 166     If all three are of the same rank, you can't expose them all in a
 167     single binary operator anyway, so the above is *still* the best you
 168     can do.
 169
 170     Thus, this is what we do.  When we have three ops left, we check to see
 171     what order to put them in, and call it a day.  As a nod to vector sum
 172     reduction, we check if any of the ops are really a phi node that is a
 173     destructive update for the associating op, and keep the destructive
 174     update together for vector sum reduction recognition.  */
 175
 176 /* Enable insertion of __builtin_powi calls during execute_reassoc.  See
 177    point 3a in the pass header comment.  */
 178 static bool reassoc_insert_powi_p;
 179
 180 /* Statistics */
 181 static struct
 182 {
 183   int linearized;
 184   int constants_eliminated;
 185   int ops_eliminated;
 186   int rewritten;
 187   int pows_encountered;
 188   int pows_created;
 189 } reassociate_stats;
 190
 191 /* Operator, rank pair.  */
 192 struct operand_entry
 193 {
 194   unsigned int rank;
 195   int id;
 196   tree op;
 197   unsigned int count;
 198   gimple *stmt_to_insert;
 199 };
 200
 201 static object_allocator<operand_entry> operand_entry_pool
 202   ("operand entry pool");
 203
 204 /* This is used to assign a unique ID to each struct operand_entry
 205    so that qsort results are identical on different hosts.  */
 206 static int next_operand_entry_id;
 207
 208 /* Starting rank number for a given basic block, so that we can rank
 209    operations using unmovable instructions in that BB based on the bb
 210    depth.  */
 211 static long *bb_rank;
 212
 213 /* Operand->rank hashtable.  */
 214 static hash_map<tree, long> *operand_rank;
 215
 216 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
 217    all basic blocks the CFG should be adjusted - basic blocks
 218    split right after that SSA_NAME's definition statement and before
 219    the only use, which must be a bit ior.  */
 220 static vec<tree> reassoc_branch_fixups;
 221
 222 /* Forward decls.  */
 223 static long get_rank (tree);
 224 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
 225
 226 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
 227    possibly added by gsi_remove.  */
 228
 229 bool
 230 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
 231 {
 232   gimple *stmt = gsi_stmt (*gsi);
 233
 234   if (!MAY_HAVE_DEBUG_STMTS || gimple_code (stmt) == GIMPLE_PHI)
 235     return gsi_remove (gsi, true);
 236
 237   gimple_stmt_iterator prev = *gsi;
 238   gsi_prev (&prev);
 239   unsigned uid = gimple_uid (stmt);
 240   basic_block bb = gimple_bb (stmt);
 241   bool ret = gsi_remove (gsi, true);
 242   if (!gsi_end_p (prev))
 243     gsi_next (&prev);
 244   else
 245     prev = gsi_start_bb (bb);
 246   gimple *end_stmt = gsi_stmt (*gsi);
 247   while ((stmt = gsi_stmt (prev)) != end_stmt)
 248     {
 249       gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
 250       gimple_set_uid (stmt, uid);
 251       gsi_next (&prev);
 252     }
 253   return ret;
 254 }
 255
 256 /* Bias amount for loop-carried phis.  We want this to be larger than
 257    the depth of any reassociation tree we can see, but not larger than
 258    the rank difference between two blocks.  */
 259 #define PHI_LOOP_BIAS (1 << 15)
 260
 261 /* Rank assigned to a phi statement.  If STMT is a loop-carried phi of
 262    an innermost loop, and the phi has only a single use which is inside
 263    the loop, then the rank is the block rank of the loop latch plus an
 264    extra bias for the loop-carried dependence.  This causes expressions
 265    calculated into an accumulator variable to be independent for each
 266    iteration of the loop.  If STMT is some other phi, the rank is the
 267    block rank of its containing block.  */
 268 static long
 269 phi_rank (gimple *stmt)
 270 {
 271   basic_block bb = gimple_bb (stmt);
 272   struct loop *father = bb->loop_father;
 273   tree res;
 274   unsigned i;
 275   use_operand_p use;
 276   gimple *use_stmt;
 277
 278   /* We only care about real loops (those with a latch).  */
 279   if (!father->latch)
 280     return bb_rank[bb->index];
 281
 282   /* Interesting phis must be in headers of innermost loops.  */
 283   if (bb != father->header
 284       || father->inner)
 285     return bb_rank[bb->index];
 286
 287   /* Ignore virtual SSA_NAMEs.  */
 288   res = gimple_phi_result (stmt);
 289   if (virtual_operand_p (res))
 290     return bb_rank[bb->index];
 291
 292   /* The phi definition must have a single use, and that use must be
 293      within the loop.  Otherwise this isn't an accumulator pattern.  */
 294   if (!single_imm_use (res, &use, &use_stmt)
 295       || gimple_bb (use_stmt)->loop_father != father)
 296     return bb_rank[bb->index];
 297
 298   /* Look for phi arguments from within the loop.  If found, bias this phi.  */
 299   for (i = 0; i < gimple_phi_num_args (stmt); i++)
 300     {
 301       tree arg = gimple_phi_arg_def (stmt, i);
 302       if (TREE_CODE (arg) == SSA_NAME
 303           && !SSA_NAME_IS_DEFAULT_DEF (arg))
 304         {
 305           gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
 306           if (gimple_bb (def_stmt)->loop_father == father)
 307             return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
 308         }
 309     }
 310
 311   /* Must be an uninteresting phi.  */
 312   return bb_rank[bb->index];
 313 }
 314
 315 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
 316    loop-carried dependence of an innermost loop, return TRUE; else
 317    return FALSE.  */
 318 static bool
 319 loop_carried_phi (tree exp)
 320 {
 321   gimple *phi_stmt;
 322   long block_rank;
 323
 324   if (TREE_CODE (exp) != SSA_NAME
 325       || SSA_NAME_IS_DEFAULT_DEF (exp))
 326     return false;
 327
 328   phi_stmt = SSA_NAME_DEF_STMT (exp);
 329
 330   if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
 331     return false;
 332
 333   /* Non-loop-carried phis have block rank.  Loop-carried phis have
 334      an additional bias added in.  If this phi doesn't have block rank,
 335      it's biased and should not be propagated.  */
 336   block_rank = bb_rank[gimple_bb (phi_stmt)->index];
 337
 338   if (phi_rank (phi_stmt) != block_rank)
 339     return true;
 340
 341   return false;
 342 }
 343
 344 /* Return the maximum of RANK and the rank that should be propagated
 345    from expression OP.  For most operands, this is just the rank of OP.
 346    For loop-carried phis, the value is zero to avoid undoing the bias
 347    in favor of the phi.  */
 348 static long
 349 propagate_rank (long rank, tree op)
 350 {
 351   long op_rank;
 352
 353   if (loop_carried_phi (op))
 354     return rank;
 355
 356   op_rank = get_rank (op);
 357
 358   return MAX (rank, op_rank);
 359 }
 360
 361 /* Look up the operand rank structure for expression E.  */
 362
 363 static inline long
 364 find_operand_rank (tree e)
 365 {
 366   long *slot = operand_rank->get (e);
 367   return slot ? *slot : -1;
 368 }
 369
 370 /* Insert {E,RANK} into the operand rank hashtable.  */
 371
 372 static inline void
 373 insert_operand_rank (tree e, long rank)
 374 {
 375   gcc_assert (rank > 0);
 376   gcc_assert (!operand_rank->put (e, rank));
 377 }
 378
 379 /* Given an expression E, return the rank of the expression.  */
 380
 381 static long
 382 get_rank (tree e)
 383 {
 384   /* SSA_NAME's have the rank of the expression they are the result
 385      of.
 386      For globals and uninitialized values, the rank is 0.
 387      For function arguments, use the pre-setup rank.
 388      For PHI nodes, stores, asm statements, etc, we use the rank of
 389      the BB.
 390      For simple operations, the rank is the maximum rank of any of
 391      its operands, or the bb_rank, whichever is less.
 392      I make no claims that this is optimal, however, it gives good
 393      results.  */
 394
 395   /* We make an exception to the normal ranking system to break
 396      dependences of accumulator variables in loops.  Suppose we
 397      have a simple one-block loop containing:
 398
 399        x_1 = phi(x_0, x_2)
 400        b = a + x_1
 401        c = b + d
 402        x_2 = c + e
 403
 404      As shown, each iteration of the calculation into x is fully
 405      dependent upon the iteration before it.  We would prefer to
 406      see this in the form:
 407
 408        x_1 = phi(x_0, x_2)
 409        b = a + d
 410        c = b + e
 411        x_2 = c + x_1
 412
 413      If the loop is unrolled, the calculations of b and c from
 414      different iterations can be interleaved.
 415
 416      To obtain this result during reassociation, we bias the rank
 417      of the phi definition x_1 upward, when it is recognized as an
 418      accumulator pattern.  The artificial rank causes it to be
 419      added last, providing the desired independence.  */
 420
 421   if (TREE_CODE (e) == SSA_NAME)
 422     {
 423       ssa_op_iter iter;
 424       gimple *stmt;
 425       long rank;
 426       tree op;
 427
 428       if (SSA_NAME_IS_DEFAULT_DEF (e))
 429         return find_operand_rank (e);
 430
 431       stmt = SSA_NAME_DEF_STMT (e);
 432       if (gimple_code (stmt) == GIMPLE_PHI)
 433         return phi_rank (stmt);
 434
 435       if (!is_gimple_assign (stmt))
 436         return bb_rank[gimple_bb (stmt)->index];
 437
 438       /* If we already have a rank for this expression, use that.  */
 439       rank = find_operand_rank (e);
 440       if (rank != -1)
 441         return rank;
 442
 443       /* Otherwise, find the maximum rank for the operands.  As an
 444          exception, remove the bias from loop-carried phis when propagating
 445          the rank so that dependent operations are not also biased.  */
 446       /* Simply walk over all SSA uses - this takes advatage of the
 447          fact that non-SSA operands are is_gimple_min_invariant and
 448          thus have rank 0.  */
 449       rank = 0;
 450       FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
 451         rank = propagate_rank (rank, op);
 452
 453       if (dump_file && (dump_flags & TDF_DETAILS))
 454         {
 455           fprintf (dump_file, "Rank for ");
 456           print_generic_expr (dump_file, e, 0);
 457           fprintf (dump_file, " is %ld\n", (rank + 1));
 458         }
 459
 460       /* Note the rank in the hashtable so we don't recompute it.  */
 461       insert_operand_rank (e, (rank + 1));
 462       return (rank + 1);
 463     }
 464
 465   /* Constants, globals, etc., are rank 0 */
 466   return 0;
 467 }
 468
 469
 470 /* We want integer ones to end up last no matter what, since they are
 471    the ones we can do the most with.  */
 472 #define INTEGER_CONST_TYPE 1 << 3
 473 #define FLOAT_CONST_TYPE 1 << 2
 474 #define OTHER_CONST_TYPE 1 << 1
 475
 476 /* Classify an invariant tree into integer, float, or other, so that
 477    we can sort them to be near other constants of the same type.  */
 478 static inline int
 479 constant_type (tree t)
 480 {
 481   if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
 482     return INTEGER_CONST_TYPE;
 483   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
 484     return FLOAT_CONST_TYPE;
 485   else
 486     return OTHER_CONST_TYPE;
 487 }
 488
 489 /* qsort comparison function to sort operand entries PA and PB by rank
 490    so that the sorted array is ordered by rank in decreasing order.  */
 491 static int
 492 sort_by_operand_rank (const void *pa, const void *pb)
 493 {
 494   const operand_entry *oea = *(const operand_entry *const *)pa;
 495   const operand_entry *oeb = *(const operand_entry *const *)pb;
 496
 497   /* It's nicer for optimize_expression if constants that are likely
 498      to fold when added/multiplied//whatever are put next to each
 499      other.  Since all constants have rank 0, order them by type.  */
 500   if (oeb->rank == 0 && oea->rank == 0)
 501     {
 502       if (constant_type (oeb->op) != constant_type (oea->op))
 503         return constant_type (oeb->op) - constant_type (oea->op);
 504       else
 505         /* To make sorting result stable, we use unique IDs to determine
 506            order.  */
 507         return oeb->id - oea->id;
 508     }
 509
 510   /* Lastly, make sure the versions that are the same go next to each
 511      other.  */
 512   if ((oeb->rank - oea->rank == 0)
 513       && TREE_CODE (oea->op) == SSA_NAME
 514       && TREE_CODE (oeb->op) == SSA_NAME)
 515     {
 516       /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
 517          versions of removed SSA_NAMEs, so if possible, prefer to sort
 518          based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
 519          See PR60418.  */
 520       if (!SSA_NAME_IS_DEFAULT_DEF (oea->op)
 521           && !SSA_NAME_IS_DEFAULT_DEF (oeb->op)
 522           && !oea->stmt_to_insert
 523           && !oeb->stmt_to_insert
 524           && SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
 525         {
 526           gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
 527           gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
 528           basic_block bba = gimple_bb (stmta);
 529           basic_block bbb = gimple_bb (stmtb);
 530           if (bbb != bba)
 531             {
 532               if (bb_rank[bbb->index] != bb_rank[bba->index])
 533                 return bb_rank[bbb->index] - bb_rank[bba->index];
 534             }
 535           else
 536             {
 537               bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
 538               bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
 539               if (da != db)
 540                 return da ? 1 : -1;
 541             }
 542         }
 543
 544       if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
 545         return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
 546       else
 547         return oeb->id - oea->id;
 548     }
 549
 550   if (oeb->rank != oea->rank)
 551     return oeb->rank - oea->rank;
 552   else
 553     return oeb->id - oea->id;
 554 }
 555
 556 /* Add an operand entry to *OPS for the tree operand OP.  */
 557
 558 static void
 559 add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL)
 560 {
 561   operand_entry *oe = operand_entry_pool.allocate ();
 562
 563   oe->op = op;
 564   oe->rank = get_rank (op);
 565   oe->id = next_operand_entry_id++;
 566   oe->count = 1;
 567   oe->stmt_to_insert = stmt_to_insert;
 568   ops->safe_push (oe);
 569 }
 570
 571 /* Add an operand entry to *OPS for the tree operand OP with repeat
 572    count REPEAT.  */
 573
 574 static void
 575 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
 576                        HOST_WIDE_INT repeat)
 577 {
 578   operand_entry *oe = operand_entry_pool.allocate ();
 579
 580   oe->op = op;
 581   oe->rank = get_rank (op);
 582   oe->id = next_operand_entry_id++;
 583   oe->count = repeat;
 584   oe->stmt_to_insert = NULL;
 585   ops->safe_push (oe);
 586
 587   reassociate_stats.pows_encountered++;
 588 }
 589
 590 /* Return true if STMT is reassociable operation containing a binary
 591    operation with tree code CODE, and is inside LOOP.  */
 592
 593 static bool
 594 is_reassociable_op (gimple *stmt, enum tree_code code, struct loop *loop)
 595 {
 596   basic_block bb = gimple_bb (stmt);
 597
 598   if (gimple_bb (stmt) == NULL)
 599     return false;
 600
 601   if (!flow_bb_inside_loop_p (loop, bb))
 602     return false;
 603
 604   if (is_gimple_assign (stmt)
 605       && gimple_assign_rhs_code (stmt) == code
 606       && has_single_use (gimple_assign_lhs (stmt)))
 607     return true;
 608
 609   return false;
 610 }
 611
 612
 613 /* Return true if STMT is a nop-conversion.  */
 614
 615 static bool
 616 gimple_nop_conversion_p (gimple *stmt)
 617 {
 618   if (gassign *ass = dyn_cast <gassign *> (stmt))
 619     {
 620       if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
 621           && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
 622                                     TREE_TYPE (gimple_assign_rhs1 (ass))))
 623         return true;
 624     }
 625   return false;
 626 }
 627
 628 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
 629    operand of the negate operation.  Otherwise, return NULL.  */
 630
 631 static tree
 632 get_unary_op (tree name, enum tree_code opcode)
 633 {
 634   gimple *stmt = SSA_NAME_DEF_STMT (name);
 635
 636   /* Look through nop conversions (sign changes).  */
 637   if (gimple_nop_conversion_p (stmt)
 638       && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
 639     stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
 640
 641   if (!is_gimple_assign (stmt))
 642     return NULL_TREE;
 643
 644   if (gimple_assign_rhs_code (stmt) == opcode)
 645     return gimple_assign_rhs1 (stmt);
 646   return NULL_TREE;
 647 }
 648
 649 /* Return true if OP1 and OP2 have the same value if casted to either type.  */
 650
 651 static bool
 652 ops_equal_values_p (tree op1, tree op2)
 653 {
 654   if (op1 == op2)
 655     return true;
 656
 657   tree orig_op1 = op1;
 658   if (TREE_CODE (op1) == SSA_NAME)
 659     {
 660       gimple *stmt = SSA_NAME_DEF_STMT (op1);
 661       if (gimple_nop_conversion_p (stmt))
 662         {
 663           op1 = gimple_assign_rhs1 (stmt);
 664           if (op1 == op2)
 665             return true;
 666         }
 667     }
 668
 669   if (TREE_CODE (op2) == SSA_NAME)
 670     {
 671       gimple *stmt = SSA_NAME_DEF_STMT (op2);
 672       if (gimple_nop_conversion_p (stmt))
 673         {
 674           op2 = gimple_assign_rhs1 (stmt);
 675           if (op1 == op2
 676               || orig_op1 == op2)
 677             return true;
 678         }
 679     }
 680
 681   return false;
 682 }
 683
 684
 685 /* If CURR and LAST are a pair of ops that OPCODE allows us to
 686    eliminate through equivalences, do so, remove them from OPS, and
 687    return true.  Otherwise, return false.  */
 688
 689 static bool
 690 eliminate_duplicate_pair (enum tree_code opcode,
 691                           vec<operand_entry *> *ops,
 692                           bool *all_done,
 693                           unsigned int i,
 694                           operand_entry *curr,
 695                           operand_entry *last)
 696 {
 697
 698   /* If we have two of the same op, and the opcode is & |, min, or max,
 699      we can eliminate one of them.
 700      If we have two of the same op, and the opcode is ^, we can
 701      eliminate both of them.  */
 702
 703   if (last && last->op == curr->op)
 704     {
 705       switch (opcode)
 706         {
 707         case MAX_EXPR:
 708         case MIN_EXPR:
 709         case BIT_IOR_EXPR:
 710         case BIT_AND_EXPR:
 711           if (dump_file && (dump_flags & TDF_DETAILS))
 712             {
 713               fprintf (dump_file, "Equivalence: ");
 714               print_generic_expr (dump_file, curr->op, 0);
 715               fprintf (dump_file, " [&|minmax] ");
 716               print_generic_expr (dump_file, last->op, 0);
 717               fprintf (dump_file, " -> ");
 718               print_generic_stmt (dump_file, last->op, 0);
 719             }
 720
 721           ops->ordered_remove (i);
 722           reassociate_stats.ops_eliminated ++;
 723
 724           return true;
 725
 726         case BIT_XOR_EXPR:
 727           if (dump_file && (dump_flags & TDF_DETAILS))
 728             {
 729               fprintf (dump_file, "Equivalence: ");
 730               print_generic_expr (dump_file, curr->op, 0);
 731               fprintf (dump_file, " ^ ");
 732               print_generic_expr (dump_file, last->op, 0);
 733               fprintf (dump_file, " -> nothing\n");
 734             }
 735
 736           reassociate_stats.ops_eliminated += 2;
 737
 738           if (ops->length () == 2)
 739             {
 740               ops->truncate (0);
 741               add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
 742               *all_done = true;
 743             }
 744           else
 745             {
 746               ops->ordered_remove (i-1);
 747               ops->ordered_remove (i-1);
 748             }
 749
 750           return true;
 751
 752         default:
 753           break;
 754         }
 755     }
 756   return false;
 757 }
 758
 759 static vec<tree> plus_negates;
 760
 761 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
 762    expression, look in OPS for a corresponding positive operation to cancel
 763    it out.  If we find one, remove the other from OPS, replace
 764    OPS[CURRINDEX] with 0 or -1, respectively, and return true.  Otherwise,
 765    return false. */
 766
 767 static bool
 768 eliminate_plus_minus_pair (enum tree_code opcode,
 769                            vec<operand_entry *> *ops,
 770                            unsigned int currindex,
 771                            operand_entry *curr)
 772 {
 773   tree negateop;
 774   tree notop;
 775   unsigned int i;
 776   operand_entry *oe;
 777
 778   if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
 779     return false;
 780
 781   negateop = get_unary_op (curr->op, NEGATE_EXPR);
 782   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 783   if (negateop == NULL_TREE && notop == NULL_TREE)
 784     return false;
 785
 786   /* Any non-negated version will have a rank that is one less than
 787      the current rank.  So once we hit those ranks, if we don't find
 788      one, we can stop.  */
 789
 790   for (i = currindex + 1;
 791        ops->iterate (i, &oe)
 792        && oe->rank >= curr->rank - 1 ;
 793        i++)
 794     {
 795       if (negateop
 796           && ops_equal_values_p (oe->op, negateop))
 797         {
 798           if (dump_file && (dump_flags & TDF_DETAILS))
 799             {
 800               fprintf (dump_file, "Equivalence: ");
 801               print_generic_expr (dump_file, negateop, 0);
 802               fprintf (dump_file, " + -");
 803               print_generic_expr (dump_file, oe->op, 0);
 804               fprintf (dump_file, " -> 0\n");
 805             }
 806
 807           ops->ordered_remove (i);
 808           add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
 809           ops->ordered_remove (currindex);
 810           reassociate_stats.ops_eliminated ++;
 811
 812           return true;
 813         }
 814       else if (notop
 815                && ops_equal_values_p (oe->op, notop))
 816         {
 817           tree op_type = TREE_TYPE (oe->op);
 818
 819           if (dump_file && (dump_flags & TDF_DETAILS))
 820             {
 821               fprintf (dump_file, "Equivalence: ");
 822               print_generic_expr (dump_file, notop, 0);
 823               fprintf (dump_file, " + ~");
 824               print_generic_expr (dump_file, oe->op, 0);
 825               fprintf (dump_file, " -> -1\n");
 826             }
 827
 828           ops->ordered_remove (i);
 829           add_to_ops_vec (ops, build_all_ones_cst (op_type));
 830           ops->ordered_remove (currindex);
 831           reassociate_stats.ops_eliminated ++;
 832
 833           return true;
 834         }
 835     }
 836
 837   /* If CURR->OP is a negate expr without nop conversion in a plus expr:
 838      save it for later inspection in repropagate_negates().  */
 839   if (negateop != NULL_TREE
 840       && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
 841     plus_negates.safe_push (curr->op);
 842
 843   return false;
 844 }
 845
 846 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
 847    bitwise not expression, look in OPS for a corresponding operand to
 848    cancel it out.  If we find one, remove the other from OPS, replace
 849    OPS[CURRINDEX] with 0, and return true.  Otherwise, return
 850    false. */
 851
 852 static bool
 853 eliminate_not_pairs (enum tree_code opcode,
 854                      vec<operand_entry *> *ops,
 855                      unsigned int currindex,
 856                      operand_entry *curr)
 857 {
 858   tree notop;
 859   unsigned int i;
 860   operand_entry *oe;
 861
 862   if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
 863       || TREE_CODE (curr->op) != SSA_NAME)
 864     return false;
 865
 866   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 867   if (notop == NULL_TREE)
 868     return false;
 869
 870   /* Any non-not version will have a rank that is one less than
 871      the current rank.  So once we hit those ranks, if we don't find
 872      one, we can stop.  */
 873
 874   for (i = currindex + 1;
 875        ops->iterate (i, &oe)
 876        && oe->rank >= curr->rank - 1;
 877        i++)
 878     {
 879       if (oe->op == notop)
 880         {
 881           if (dump_file && (dump_flags & TDF_DETAILS))
 882             {
 883               fprintf (dump_file, "Equivalence: ");
 884               print_generic_expr (dump_file, notop, 0);
 885               if (opcode == BIT_AND_EXPR)
 886                 fprintf (dump_file, " & ~");
 887               else if (opcode == BIT_IOR_EXPR)
 888                 fprintf (dump_file, " | ~");
 889               print_generic_expr (dump_file, oe->op, 0);
 890               if (opcode == BIT_AND_EXPR)
 891                 fprintf (dump_file, " -> 0\n");
 892               else if (opcode == BIT_IOR_EXPR)
 893                 fprintf (dump_file, " -> -1\n");
 894             }
 895
 896           if (opcode == BIT_AND_EXPR)
 897             oe->op = build_zero_cst (TREE_TYPE (oe->op));
 898           else if (opcode == BIT_IOR_EXPR)
 899             oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
 900
 901           reassociate_stats.ops_eliminated += ops->length () - 1;
 902           ops->truncate (0);
 903           ops->quick_push (oe);
 904           return true;
 905         }
 906     }
 907
 908   return false;
 909 }
 910
 911 /* Use constant value that may be present in OPS to try to eliminate
 912    operands.  Note that this function is only really used when we've
 913    eliminated ops for other reasons, or merged constants.  Across
 914    single statements, fold already does all of this, plus more.  There
 915    is little point in duplicating logic, so I've only included the
 916    identities that I could ever construct testcases to trigger.  */
 917
 918 static void
 919 eliminate_using_constants (enum tree_code opcode,
 920                            vec<operand_entry *> *ops)
 921 {
 922   operand_entry *oelast = ops->last ();
 923   tree type = TREE_TYPE (oelast->op);
 924
 925   if (oelast->rank == 0
 926       && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
 927     {
 928       switch (opcode)
 929         {
 930         case BIT_AND_EXPR:
 931           if (integer_zerop (oelast->op))
 932             {
 933               if (ops->length () != 1)
 934                 {
 935                   if (dump_file && (dump_flags & TDF_DETAILS))
 936                     fprintf (dump_file, "Found & 0, removing all other ops\n");
 937
 938                   reassociate_stats.ops_eliminated += ops->length () - 1;
 939
 940                   ops->truncate (0);
 941                   ops->quick_push (oelast);
 942                   return;
 943                 }
 944             }
 945           else if (integer_all_onesp (oelast->op))
 946             {
 947               if (ops->length () != 1)
 948                 {
 949                   if (dump_file && (dump_flags & TDF_DETAILS))
 950                     fprintf (dump_file, "Found & -1, removing\n");
 951                   ops->pop ();
 952                   reassociate_stats.ops_eliminated++;
 953                 }
 954             }
 955           break;
 956         case BIT_IOR_EXPR:
 957           if (integer_all_onesp (oelast->op))
 958             {
 959               if (ops->length () != 1)
 960                 {
 961                   if (dump_file && (dump_flags & TDF_DETAILS))
 962                     fprintf (dump_file, "Found | -1, removing all other ops\n");
 963
 964                   reassociate_stats.ops_eliminated += ops->length () - 1;
 965
 966                   ops->truncate (0);
 967                   ops->quick_push (oelast);
 968                   return;
 969                 }
 970             }
 971           else if (integer_zerop (oelast->op))
 972             {
 973               if (ops->length () != 1)
 974                 {
 975                   if (dump_file && (dump_flags & TDF_DETAILS))
 976                     fprintf (dump_file, "Found | 0, removing\n");
 977                   ops->pop ();
 978                   reassociate_stats.ops_eliminated++;
 979                 }
 980             }
 981           break;
 982         case MULT_EXPR:
 983           if (integer_zerop (oelast->op)
 984               || (FLOAT_TYPE_P (type)
 985                   && !HONOR_NANS (type)
 986                   && !HONOR_SIGNED_ZEROS (type)
 987                   && real_zerop (oelast->op)))
 988             {
 989               if (ops->length () != 1)
 990                 {
 991                   if (dump_file && (dump_flags & TDF_DETAILS))
 992                     fprintf (dump_file, "Found * 0, removing all other ops\n");
 993
 994                   reassociate_stats.ops_eliminated += ops->length () - 1;
 995                   ops->truncate (1);
 996                   ops->quick_push (oelast);
 997                   return;
 998                 }
 999             }
1000           else if (integer_onep (oelast->op)
1001                    || (FLOAT_TYPE_P (type)
1002                        && !HONOR_SNANS (type)
1003                        && real_onep (oelast->op)))
1004             {
1005               if (ops->length () != 1)
1006                 {
1007                   if (dump_file && (dump_flags & TDF_DETAILS))
1008                     fprintf (dump_file, "Found * 1, removing\n");
1009                   ops->pop ();
1010                   reassociate_stats.ops_eliminated++;
1011                   return;
1012                 }
1013             }
1014           break;
1015         case BIT_XOR_EXPR:
1016         case PLUS_EXPR:
1017         case MINUS_EXPR:
1018           if (integer_zerop (oelast->op)
1019               || (FLOAT_TYPE_P (type)
1020                   && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1021                   && fold_real_zero_addition_p (type, oelast->op,
1022                                                 opcode == MINUS_EXPR)))
1023             {
1024               if (ops->length () != 1)
1025                 {
1026                   if (dump_file && (dump_flags & TDF_DETAILS))
1027                     fprintf (dump_file, "Found [|^+] 0, removing\n");
1028                   ops->pop ();
1029                   reassociate_stats.ops_eliminated++;
1030                   return;
1031                 }
1032             }
1033           break;
1034         default:
1035           break;
1036         }
1037     }
1038 }
1039
1040
1041 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1042                                  bool, bool);
1043
1044 /* Structure for tracking and counting operands.  */
1045 struct oecount {
1046   int cnt;
1047   int id;
1048   enum tree_code oecode;
1049   tree op;
1050 };
1051
1052
1053 /* The heap for the oecount hashtable and the sorted list of operands.  */
1054 static vec<oecount> cvec;
1055
1056
1057 /* Oecount hashtable helpers.  */
1058
1059 struct oecount_hasher : int_hash <int, 0, 1>
1060 {
1061   static inline hashval_t hash (int);
1062   static inline bool equal (int, int);
1063 };
1064
1065 /* Hash function for oecount.  */
1066
1067 inline hashval_t
1068 oecount_hasher::hash (int p)
1069 {
1070   const oecount *c = &cvec[p - 42];
1071   return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1072 }
1073
1074 /* Comparison function for oecount.  */
1075
1076 inline bool
1077 oecount_hasher::equal (int p1, int p2)
1078 {
1079   const oecount *c1 = &cvec[p1 - 42];
1080   const oecount *c2 = &cvec[p2 - 42];
1081   return (c1->oecode == c2->oecode
1082           && c1->op == c2->op);
1083 }
1084
1085 /* Comparison function for qsort sorting oecount elements by count.  */
1086
1087 static int
1088 oecount_cmp (const void *p1, const void *p2)
1089 {
1090   const oecount *c1 = (const oecount *)p1;
1091   const oecount *c2 = (const oecount *)p2;
1092   if (c1->cnt != c2->cnt)
1093     return c1->cnt - c2->cnt;
1094   else
1095     /* If counts are identical, use unique IDs to stabilize qsort.  */
1096     return c1->id - c2->id;
1097 }
1098
1099 /* Return TRUE iff STMT represents a builtin call that raises OP
1100    to some exponent.  */
1101
1102 static bool
1103 stmt_is_power_of_op (gimple *stmt, tree op)
1104 {
1105   if (!is_gimple_call (stmt))
1106     return false;
1107
1108   switch (gimple_call_combined_fn (stmt))
1109     {
1110     CASE_CFN_POW:
1111     CASE_CFN_POWI:
1112       return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1113
1114     default:
1115       return false;
1116     }
1117 }
1118
1119 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1120    in place and return the result.  Assumes that stmt_is_power_of_op
1121    was previously called for STMT and returned TRUE.  */
1122
1123 static HOST_WIDE_INT
1124 decrement_power (gimple *stmt)
1125 {
1126   REAL_VALUE_TYPE c, cint;
1127   HOST_WIDE_INT power;
1128   tree arg1;
1129
1130   switch (gimple_call_combined_fn (stmt))
1131     {
1132     CASE_CFN_POW:
1133       arg1 = gimple_call_arg (stmt, 1);
1134       c = TREE_REAL_CST (arg1);
1135       power = real_to_integer (&c) - 1;
1136       real_from_integer (&cint, VOIDmode, power, SIGNED);
1137       gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1138       return power;
1139
1140     CASE_CFN_POWI:
1141       arg1 = gimple_call_arg (stmt, 1);
1142       power = TREE_INT_CST_LOW (arg1) - 1;
1143       gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1144       return power;
1145
1146     default:
1147       gcc_unreachable ();
1148     }
1149 }
1150
1151 /* Find the single immediate use of STMT's LHS, and replace it
1152    with OP.  Remove STMT.  If STMT's LHS is the same as *DEF,
1153    replace *DEF with OP as well.  */
1154
1155 static void
1156 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1157 {
1158   tree lhs;
1159   gimple *use_stmt;
1160   use_operand_p use;
1161   gimple_stmt_iterator gsi;
1162
1163   if (is_gimple_call (stmt))
1164     lhs = gimple_call_lhs (stmt);
1165   else
1166     lhs = gimple_assign_lhs (stmt);
1167
1168   gcc_assert (has_single_use (lhs));
1169   single_imm_use (lhs, &use, &use_stmt);
1170   if (lhs == *def)
1171     *def = op;
1172   SET_USE (use, op);
1173   if (TREE_CODE (op) != SSA_NAME)
1174     update_stmt (use_stmt);
1175   gsi = gsi_for_stmt (stmt);
1176   unlink_stmt_vdef (stmt);
1177   reassoc_remove_stmt (&gsi);
1178   release_defs (stmt);
1179 }
1180
1181 /* Walks the linear chain with result *DEF searching for an operation
1182    with operand OP and code OPCODE removing that from the chain.  *DEF
1183    is updated if there is only one operand but no operation left.  */
1184
1185 static void
1186 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1187 {
1188   gimple *stmt = SSA_NAME_DEF_STMT (*def);
1189
1190   do
1191     {
1192       tree name;
1193
1194       if (opcode == MULT_EXPR)
1195         {
1196           if (stmt_is_power_of_op (stmt, op))
1197             {
1198               if (decrement_power (stmt) == 1)
1199                 propagate_op_to_single_use (op, stmt, def);
1200               return;
1201             }
1202           else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
1203                    && gimple_assign_rhs1 (stmt) == op)
1204             {
1205               propagate_op_to_single_use (op, stmt, def);
1206               return;
1207             }
1208         }
1209
1210       name = gimple_assign_rhs1 (stmt);
1211
1212       /* If this is the operation we look for and one of the operands
1213          is ours simply propagate the other operand into the stmts
1214          single use.  */
1215       if (gimple_assign_rhs_code (stmt) == opcode
1216           && (name == op
1217               || gimple_assign_rhs2 (stmt) == op))
1218         {
1219           if (name == op)
1220             name = gimple_assign_rhs2 (stmt);
1221           propagate_op_to_single_use (name, stmt, def);
1222           return;
1223         }
1224
1225       /* We might have a multiply of two __builtin_pow* calls, and
1226          the operand might be hiding in the rightmost one.  Likewise
1227          this can happen for a negate.  */
1228       if (opcode == MULT_EXPR
1229           && gimple_assign_rhs_code (stmt) == opcode
1230           && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1231           && has_single_use (gimple_assign_rhs2 (stmt)))
1232         {
1233           gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1234           if (stmt_is_power_of_op (stmt2, op))
1235             {
1236               if (decrement_power (stmt2) == 1)
1237                 propagate_op_to_single_use (op, stmt2, def);
1238               return;
1239             }
1240           else if (is_gimple_assign (stmt2)
1241                    && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR
1242                    && gimple_assign_rhs1 (stmt2) == op)
1243             {
1244               propagate_op_to_single_use (op, stmt2, def);
1245               return;
1246             }
1247         }
1248
1249       /* Continue walking the chain.  */
1250       gcc_assert (name != op
1251                   && TREE_CODE (name) == SSA_NAME);
1252       stmt = SSA_NAME_DEF_STMT (name);
1253     }
1254   while (1);
1255 }
1256
1257 /* Returns true if statement S1 dominates statement S2.  Like
1258    stmt_dominates_stmt_p, but uses stmt UIDs to optimize.  */
1259
1260 static bool
1261 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1262 {
1263   basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1264
1265   /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1266      SSA_NAME.  Assume it lives at the beginning of function and
1267      thus dominates everything.  */
1268   if (!bb1 || s1 == s2)
1269     return true;
1270
1271   /* If bb2 is NULL, it doesn't dominate any stmt with a bb.  */
1272   if (!bb2)
1273     return false;
1274
1275   if (bb1 == bb2)
1276     {
1277       /* PHIs in the same basic block are assumed to be
1278          executed all in parallel, if only one stmt is a PHI,
1279          it dominates the other stmt in the same basic block.  */
1280       if (gimple_code (s1) == GIMPLE_PHI)
1281         return true;
1282
1283       if (gimple_code (s2) == GIMPLE_PHI)
1284         return false;
1285
1286       gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1287
1288       if (gimple_uid (s1) < gimple_uid (s2))
1289         return true;
1290
1291       if (gimple_uid (s1) > gimple_uid (s2))
1292         return false;
1293
1294       gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1295       unsigned int uid = gimple_uid (s1);
1296       for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1297         {
1298           gimple *s = gsi_stmt (gsi);
1299           if (gimple_uid (s) != uid)
1300             break;
1301           if (s == s2)
1302             return true;
1303         }
1304
1305       return false;
1306     }
1307
1308   return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1309 }
1310
1311 /* Insert STMT after INSERT_POINT.  */
1312
1313 static void
1314 insert_stmt_after (gimple *stmt, gimple *insert_point)
1315 {
1316   gimple_stmt_iterator gsi;
1317   basic_block bb;
1318
1319   if (gimple_code (insert_point) == GIMPLE_PHI)
1320     bb = gimple_bb (insert_point);
1321   else if (!stmt_ends_bb_p (insert_point))
1322     {
1323       gsi = gsi_for_stmt (insert_point);
1324       gimple_set_uid (stmt, gimple_uid (insert_point));
1325       gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1326       return;
1327     }
1328   else
1329     /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1330        thus if it must end a basic block, it should be a call that can
1331        throw, or some assignment that can throw.  If it throws, the LHS
1332        of it will not be initialized though, so only valid places using
1333        the SSA_NAME should be dominated by the fallthru edge.  */
1334     bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1335   gsi = gsi_after_labels (bb);
1336   if (gsi_end_p (gsi))
1337     {
1338       gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1339       gimple_set_uid (stmt,
1340                       gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1341     }
1342   else
1343     gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1344   gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1345 }
1346
1347 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1348    the result.  Places the statement after the definition of either
1349    OP1 or OP2.  Returns the new statement.  */
1350
1351 static gimple *
1352 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1353 {
1354   gimple *op1def = NULL, *op2def = NULL;
1355   gimple_stmt_iterator gsi;
1356   tree op;
1357   gassign *sum;
1358
1359   /* Create the addition statement.  */
1360   op = make_ssa_name (type);
1361   sum = gimple_build_assign (op, opcode, op1, op2);
1362
1363   /* Find an insertion place and insert.  */
1364   if (TREE_CODE (op1) == SSA_NAME)
1365     op1def = SSA_NAME_DEF_STMT (op1);
1366   if (TREE_CODE (op2) == SSA_NAME)
1367     op2def = SSA_NAME_DEF_STMT (op2);
1368   if ((!op1def || gimple_nop_p (op1def))
1369       && (!op2def || gimple_nop_p (op2def)))
1370     {
1371       gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1372       if (gsi_end_p (gsi))
1373         {
1374           gimple_stmt_iterator gsi2
1375             = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1376           gimple_set_uid (sum,
1377                           gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1378         }
1379       else
1380         gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1381       gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1382     }
1383   else
1384     {
1385       gimple *insert_point;
1386       if ((!op1def || gimple_nop_p (op1def))
1387            || (op2def && !gimple_nop_p (op2def)
1388                && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1389         insert_point = op2def;
1390       else
1391         insert_point = op1def;
1392       insert_stmt_after (sum, insert_point);
1393     }
1394   update_stmt (sum);
1395
1396   return sum;
1397 }
1398
1399 /* Perform un-distribution of divisions and multiplications.
1400    A * X + B * X is transformed into (A + B) * X and A / X + B / X
1401    to (A + B) / X for real X.
1402
1403    The algorithm is organized as follows.
1404
1405     - First we walk the addition chain *OPS looking for summands that
1406       are defined by a multiplication or a real division.  This results
1407       in the candidates bitmap with relevant indices into *OPS.
1408
1409     - Second we build the chains of multiplications or divisions for
1410       these candidates, counting the number of occurrences of (operand, code)
1411       pairs in all of the candidates chains.
1412
1413     - Third we sort the (operand, code) pairs by number of occurrence and
1414       process them starting with the pair with the most uses.
1415
1416       * For each such pair we walk the candidates again to build a
1417         second candidate bitmap noting all multiplication/division chains
1418         that have at least one occurrence of (operand, code).
1419
1420       * We build an alternate addition chain only covering these
1421         candidates with one (operand, code) operation removed from their
1422         multiplication/division chain.
1423
1424       * The first candidate gets replaced by the alternate addition chain
1425         multiplied/divided by the operand.
1426
1427       * All candidate chains get disabled for further processing and
1428         processing of (operand, code) pairs continues.
1429
1430   The alternate addition chains built are re-processed by the main
1431   reassociation algorithm which allows optimizing a * x * y + b * y * x
1432   to (a + b ) * x * y in one invocation of the reassociation pass.  */
1433
1434 static bool
1435 undistribute_ops_list (enum tree_code opcode,
1436                        vec<operand_entry *> *ops, struct loop *loop)
1437 {
1438   unsigned int length = ops->length ();
1439   operand_entry *oe1;
1440   unsigned i, j;
1441   sbitmap candidates, candidates2;
1442   unsigned nr_candidates, nr_candidates2;
1443   sbitmap_iterator sbi0;
1444   vec<operand_entry *> *subops;
1445   bool changed = false;
1446   int next_oecount_id = 0;
1447
1448   if (length <= 1
1449       || opcode != PLUS_EXPR)
1450     return false;
1451
1452   /* Build a list of candidates to process.  */
1453   candidates = sbitmap_alloc (length);
1454   bitmap_clear (candidates);
1455   nr_candidates = 0;
1456   FOR_EACH_VEC_ELT (*ops, i, oe1)
1457     {
1458       enum tree_code dcode;
1459       gimple *oe1def;
1460
1461       if (TREE_CODE (oe1->op) != SSA_NAME)
1462         continue;
1463       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1464       if (!is_gimple_assign (oe1def))
1465         continue;
1466       dcode = gimple_assign_rhs_code (oe1def);
1467       if ((dcode != MULT_EXPR
1468            && dcode != RDIV_EXPR)
1469           || !is_reassociable_op (oe1def, dcode, loop))
1470         continue;
1471
1472       bitmap_set_bit (candidates, i);
1473       nr_candidates++;
1474     }
1475
1476   if (nr_candidates < 2)
1477     {
1478       sbitmap_free (candidates);
1479       return false;
1480     }
1481
1482   if (dump_file && (dump_flags & TDF_DETAILS))
1483     {
1484       fprintf (dump_file, "searching for un-distribute opportunities ");
1485       print_generic_expr (dump_file,
1486         (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1487       fprintf (dump_file, " %d\n", nr_candidates);
1488     }
1489
1490   /* Build linearized sub-operand lists and the counting table.  */
1491   cvec.create (0);
1492
1493   hash_table<oecount_hasher> ctable (15);
1494
1495   /* ??? Macro arguments cannot have multi-argument template types in
1496      them.  This typedef is needed to workaround that limitation.  */
1497   typedef vec<operand_entry *> vec_operand_entry_t_heap;
1498   subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1499   EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1500     {
1501       gimple *oedef;
1502       enum tree_code oecode;
1503       unsigned j;
1504
1505       oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1506       oecode = gimple_assign_rhs_code (oedef);
1507       linearize_expr_tree (&subops[i], oedef,
1508                            associative_tree_code (oecode), false);
1509
1510       FOR_EACH_VEC_ELT (subops[i], j, oe1)
1511         {
1512           oecount c;
1513           int *slot;
1514           int idx;
1515           c.oecode = oecode;
1516           c.cnt = 1;
1517           c.id = next_oecount_id++;
1518           c.op = oe1->op;
1519           cvec.safe_push (c);
1520           idx = cvec.length () + 41;
1521           slot = ctable.find_slot (idx, INSERT);
1522           if (!*slot)
1523             {
1524               *slot = idx;
1525             }
1526           else
1527             {
1528               cvec.pop ();
1529               cvec[*slot - 42].cnt++;
1530             }
1531         }
1532     }
1533
1534   /* Sort the counting table.  */
1535   cvec.qsort (oecount_cmp);
1536
1537   if (dump_file && (dump_flags & TDF_DETAILS))
1538     {
1539       oecount *c;
1540       fprintf (dump_file, "Candidates:\n");
1541       FOR_EACH_VEC_ELT (cvec, j, c)
1542         {
1543           fprintf (dump_file, "  %u %s: ", c->cnt,
1544                    c->oecode == MULT_EXPR
1545                    ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1546           print_generic_expr (dump_file, c->op, 0);
1547           fprintf (dump_file, "\n");
1548         }
1549     }
1550
1551   /* Process the (operand, code) pairs in order of most occurrence.  */
1552   candidates2 = sbitmap_alloc (length);
1553   while (!cvec.is_empty ())
1554     {
1555       oecount *c = &cvec.last ();
1556       if (c->cnt < 2)
1557         break;
1558
1559       /* Now collect the operands in the outer chain that contain
1560          the common operand in their inner chain.  */
1561       bitmap_clear (candidates2);
1562       nr_candidates2 = 0;
1563       EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1564         {
1565           gimple *oedef;
1566           enum tree_code oecode;
1567           unsigned j;
1568           tree op = (*ops)[i]->op;
1569
1570           /* If we undistributed in this chain already this may be
1571              a constant.  */
1572           if (TREE_CODE (op) != SSA_NAME)
1573             continue;
1574
1575           oedef = SSA_NAME_DEF_STMT (op);
1576           oecode = gimple_assign_rhs_code (oedef);
1577           if (oecode != c->oecode)
1578             continue;
1579
1580           FOR_EACH_VEC_ELT (subops[i], j, oe1)
1581             {
1582               if (oe1->op == c->op)
1583                 {
1584                   bitmap_set_bit (candidates2, i);
1585                   ++nr_candidates2;
1586                   break;
1587                 }
1588             }
1589         }
1590
1591       if (nr_candidates2 >= 2)
1592         {
1593           operand_entry *oe1, *oe2;
1594           gimple *prod;
1595           int first = bitmap_first_set_bit (candidates2);
1596
1597           /* Build the new addition chain.  */
1598           oe1 = (*ops)[first];
1599           if (dump_file && (dump_flags & TDF_DETAILS))
1600             {
1601               fprintf (dump_file, "Building (");
1602               print_generic_expr (dump_file, oe1->op, 0);
1603             }
1604           zero_one_operation (&oe1->op, c->oecode, c->op);
1605           EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1606             {
1607               gimple *sum;
1608               oe2 = (*ops)[i];
1609               if (dump_file && (dump_flags & TDF_DETAILS))
1610                 {
1611                   fprintf (dump_file, " + ");
1612                   print_generic_expr (dump_file, oe2->op, 0);
1613                 }
1614               zero_one_operation (&oe2->op, c->oecode, c->op);
1615               sum = build_and_add_sum (TREE_TYPE (oe1->op),
1616                                        oe1->op, oe2->op, opcode);
1617               oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1618               oe2->rank = 0;
1619               oe1->op = gimple_get_lhs (sum);
1620             }
1621
1622           /* Apply the multiplication/division.  */
1623           prod = build_and_add_sum (TREE_TYPE (oe1->op),
1624                                     oe1->op, c->op, c->oecode);
1625           if (dump_file && (dump_flags & TDF_DETAILS))
1626             {
1627               fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1628               print_generic_expr (dump_file, c->op, 0);
1629               fprintf (dump_file, "\n");
1630             }
1631
1632           /* Record it in the addition chain and disable further
1633              undistribution with this op.  */
1634           oe1->op = gimple_assign_lhs (prod);
1635           oe1->rank = get_rank (oe1->op);
1636           subops[first].release ();
1637
1638           changed = true;
1639         }
1640
1641       cvec.pop ();
1642     }
1643
1644   for (i = 0; i < ops->length (); ++i)
1645     subops[i].release ();
1646   free (subops);
1647   cvec.release ();
1648   sbitmap_free (candidates);
1649   sbitmap_free (candidates2);
1650
1651   return changed;
1652 }
1653
1654 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1655    expression, examine the other OPS to see if any of them are comparisons
1656    of the same values, which we may be able to combine or eliminate.
1657    For example, we can rewrite (a < b) | (a == b) as (a <= b).  */
1658
1659 static bool
1660 eliminate_redundant_comparison (enum tree_code opcode,
1661                                 vec<operand_entry *> *ops,
1662                                 unsigned int currindex,
1663                                 operand_entry *curr)
1664 {
1665   tree op1, op2;
1666   enum tree_code lcode, rcode;
1667   gimple *def1, *def2;
1668   int i;
1669   operand_entry *oe;
1670
1671   if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1672     return false;
1673
1674   /* Check that CURR is a comparison.  */
1675   if (TREE_CODE (curr->op) != SSA_NAME)
1676     return false;
1677   def1 = SSA_NAME_DEF_STMT (curr->op);
1678   if (!is_gimple_assign (def1))
1679     return false;
1680   lcode = gimple_assign_rhs_code (def1);
1681   if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1682     return false;
1683   op1 = gimple_assign_rhs1 (def1);
1684   op2 = gimple_assign_rhs2 (def1);
1685
1686   /* Now look for a similar comparison in the remaining OPS.  */
1687   for (i = currindex + 1; ops->iterate (i, &oe); i++)
1688     {
1689       tree t;
1690
1691       if (TREE_CODE (oe->op) != SSA_NAME)
1692         continue;
1693       def2 = SSA_NAME_DEF_STMT (oe->op);
1694       if (!is_gimple_assign (def2))
1695         continue;
1696       rcode = gimple_assign_rhs_code (def2);
1697       if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1698         continue;
1699
1700       /* If we got here, we have a match.  See if we can combine the
1701          two comparisons.  */
1702       if (opcode == BIT_IOR_EXPR)
1703         t = maybe_fold_or_comparisons (lcode, op1, op2,
1704                                        rcode, gimple_assign_rhs1 (def2),
1705                                        gimple_assign_rhs2 (def2));
1706       else
1707         t = maybe_fold_and_comparisons (lcode, op1, op2,
1708                                         rcode, gimple_assign_rhs1 (def2),
1709                                         gimple_assign_rhs2 (def2));
1710       if (!t)
1711         continue;
1712
1713       /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1714          always give us a boolean_type_node value back.  If the original
1715          BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1716          we need to convert.  */
1717       if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1718         t = fold_convert (TREE_TYPE (curr->op), t);
1719
1720       if (TREE_CODE (t) != INTEGER_CST
1721           && !operand_equal_p (t, curr->op, 0))
1722         {
1723           enum tree_code subcode;
1724           tree newop1, newop2;
1725           if (!COMPARISON_CLASS_P (t))
1726             continue;
1727           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1728           STRIP_USELESS_TYPE_CONVERSION (newop1);
1729           STRIP_USELESS_TYPE_CONVERSION (newop2);
1730           if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1731             continue;
1732         }
1733
1734       if (dump_file && (dump_flags & TDF_DETAILS))
1735         {
1736           fprintf (dump_file, "Equivalence: ");
1737           print_generic_expr (dump_file, curr->op, 0);
1738           fprintf (dump_file, " %s ", op_symbol_code (opcode));
1739           print_generic_expr (dump_file, oe->op, 0);
1740           fprintf (dump_file, " -> ");
1741           print_generic_expr (dump_file, t, 0);
1742           fprintf (dump_file, "\n");
1743         }
1744
1745       /* Now we can delete oe, as it has been subsumed by the new combined
1746          expression t.  */
1747       ops->ordered_remove (i);
1748       reassociate_stats.ops_eliminated ++;
1749
1750       /* If t is the same as curr->op, we're done.  Otherwise we must
1751          replace curr->op with t.  Special case is if we got a constant
1752          back, in which case we add it to the end instead of in place of
1753          the current entry.  */
1754       if (TREE_CODE (t) == INTEGER_CST)
1755         {
1756           ops->ordered_remove (currindex);
1757           add_to_ops_vec (ops, t);
1758         }
1759       else if (!operand_equal_p (t, curr->op, 0))
1760         {
1761           gimple *sum;
1762           enum tree_code subcode;
1763           tree newop1;
1764           tree newop2;
1765           gcc_assert (COMPARISON_CLASS_P (t));
1766           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1767           STRIP_USELESS_TYPE_CONVERSION (newop1);
1768           STRIP_USELESS_TYPE_CONVERSION (newop2);
1769           gcc_checking_assert (is_gimple_val (newop1)
1770                                && is_gimple_val (newop2));
1771           sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1772           curr->op = gimple_get_lhs (sum);
1773         }
1774       return true;
1775     }
1776
1777   return false;
1778 }
1779
1780 /* If the stmt that defines operand has to be inserted, insert it
1781    before the use.  */
1782 static void
1783 insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert)
1784 {
1785   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1786   gimple_set_uid (stmt_to_insert, gimple_uid (stmt));
1787   gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT);
1788 }
1789
1790
1791 /* Transform repeated addition of same values into multiply with
1792    constant.  */
1793 static bool
1794 transform_add_to_multiply (vec<operand_entry *> *ops)
1795 {
1796   operand_entry *oe;
1797   tree op = NULL_TREE;
1798   int j;
1799   int i, start = -1, end = 0, count = 0;
1800   vec<std::pair <int, int> > indxs = vNULL;
1801   bool changed = false;
1802
1803   if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op))
1804       && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op))
1805           || !flag_unsafe_math_optimizations))
1806     return false;
1807
1808   /* Look for repeated operands.  */
1809   FOR_EACH_VEC_ELT (*ops, i, oe)
1810     {
1811       if (start == -1)
1812         {
1813           count = 1;
1814           op = oe->op;
1815           start = i;
1816         }
1817       else if (operand_equal_p (oe->op, op, 0))
1818         {
1819           count++;
1820           end = i;
1821         }
1822       else
1823         {
1824           if (count > 1)
1825             indxs.safe_push (std::make_pair (start, end));
1826           count = 1;
1827           op = oe->op;
1828           start = i;
1829         }
1830     }
1831
1832   if (count > 1)
1833     indxs.safe_push (std::make_pair (start, end));
1834
1835   for (j = indxs.length () - 1; j >= 0; --j)
1836     {
1837       /* Convert repeated operand addition to multiplication.  */
1838       start = indxs[j].first;
1839       end = indxs[j].second;
1840       op = (*ops)[start]->op;
1841       count = end - start + 1;
1842       for (i = end; i >= start; --i)
1843         ops->unordered_remove (i);
1844       tree tmp = make_ssa_name (TREE_TYPE (op));
1845       tree cst = build_int_cst (integer_type_node, count);
1846       gassign *mul_stmt
1847         = gimple_build_assign (tmp, MULT_EXPR,
1848                                op, fold_convert (TREE_TYPE (op), cst));
1849       gimple_set_visited (mul_stmt, true);
1850       add_to_ops_vec (ops, tmp, mul_stmt);
1851       changed = true;
1852     }
1853
1854   return changed;
1855 }
1856
1857
1858 /* Perform various identities and other optimizations on the list of
1859    operand entries, stored in OPS.  The tree code for the binary
1860    operation between all the operands is OPCODE.  */
1861
1862 static void
1863 optimize_ops_list (enum tree_code opcode,
1864                    vec<operand_entry *> *ops)
1865 {
1866   unsigned int length = ops->length ();
1867   unsigned int i;
1868   operand_entry *oe;
1869   operand_entry *oelast = NULL;
1870   bool iterate = false;
1871
1872   if (length == 1)
1873     return;
1874
1875   oelast = ops->last ();
1876
1877   /* If the last two are constants, pop the constants off, merge them
1878      and try the next two.  */
1879   if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1880     {
1881       operand_entry *oelm1 = (*ops)[length - 2];
1882
1883       if (oelm1->rank == 0
1884           && is_gimple_min_invariant (oelm1->op)
1885           && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1886                                        TREE_TYPE (oelast->op)))
1887         {
1888           tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1889                                      oelm1->op, oelast->op);
1890
1891           if (folded && is_gimple_min_invariant (folded))
1892             {
1893               if (dump_file && (dump_flags & TDF_DETAILS))
1894                 fprintf (dump_file, "Merging constants\n");
1895
1896               ops->pop ();
1897               ops->pop ();
1898
1899               add_to_ops_vec (ops, folded);
1900               reassociate_stats.constants_eliminated++;
1901
1902               optimize_ops_list (opcode, ops);
1903               return;
1904             }
1905         }
1906     }
1907
1908   eliminate_using_constants (opcode, ops);
1909   oelast = NULL;
1910
1911   for (i = 0; ops->iterate (i, &oe);)
1912     {
1913       bool done = false;
1914
1915       if (eliminate_not_pairs (opcode, ops, i, oe))
1916         return;
1917       if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1918           || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1919           || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1920         {
1921           if (done)
1922             return;
1923           iterate = true;
1924           oelast = NULL;
1925           continue;
1926         }
1927       oelast = oe;
1928       i++;
1929     }
1930
1931   length = ops->length ();
1932   oelast = ops->last ();
1933
1934   if (iterate)
1935     optimize_ops_list (opcode, ops);
1936 }
1937
1938 /* The following functions are subroutines to optimize_range_tests and allow
1939    it to try to change a logical combination of comparisons into a range
1940    test.
1941
1942    For example, both
1943         X == 2 || X == 5 || X == 3 || X == 4
1944    and
1945         X >= 2 && X <= 5
1946    are converted to
1947         (unsigned) (X - 2) <= 3
1948
1949    For more information see comments above fold_test_range in fold-const.c,
1950    this implementation is for GIMPLE.  */
1951
1952 struct range_entry
1953 {
1954   tree exp;
1955   tree low;
1956   tree high;
1957   bool in_p;
1958   bool strict_overflow_p;
1959   unsigned int idx, next;
1960 };
1961
1962 /* This is similar to make_range in fold-const.c, but on top of
1963    GIMPLE instead of trees.  If EXP is non-NULL, it should be
1964    an SSA_NAME and STMT argument is ignored, otherwise STMT
1965    argument should be a GIMPLE_COND.  */
1966
1967 static void
1968 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
1969 {
1970   int in_p;
1971   tree low, high;
1972   bool is_bool, strict_overflow_p;
1973
1974   r->exp = NULL_TREE;
1975   r->in_p = false;
1976   r->strict_overflow_p = false;
1977   r->low = NULL_TREE;
1978   r->high = NULL_TREE;
1979   if (exp != NULL_TREE
1980       && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1981     return;
1982
1983   /* Start with simply saying "EXP != 0" and then look at the code of EXP
1984      and see if we can refine the range.  Some of the cases below may not
1985      happen, but it doesn't seem worth worrying about this.  We "continue"
1986      the outer loop when we've changed something; otherwise we "break"
1987      the switch, which will "break" the while.  */
1988   low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1989   high = low;
1990   in_p = 0;
1991   strict_overflow_p = false;
1992   is_bool = false;
1993   if (exp == NULL_TREE)
1994     is_bool = true;
1995   else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1996     {
1997       if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1998         is_bool = true;
1999       else
2000         return;
2001     }
2002   else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
2003     is_bool = true;
2004
2005   while (1)
2006     {
2007       enum tree_code code;
2008       tree arg0, arg1, exp_type;
2009       tree nexp;
2010       location_t loc;
2011
2012       if (exp != NULL_TREE)
2013         {
2014           if (TREE_CODE (exp) != SSA_NAME
2015               || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
2016             break;
2017
2018           stmt = SSA_NAME_DEF_STMT (exp);
2019           if (!is_gimple_assign (stmt))
2020             break;
2021
2022           code = gimple_assign_rhs_code (stmt);
2023           arg0 = gimple_assign_rhs1 (stmt);
2024           arg1 = gimple_assign_rhs2 (stmt);
2025           exp_type = TREE_TYPE (exp);
2026         }
2027       else
2028         {
2029           code = gimple_cond_code (stmt);
2030           arg0 = gimple_cond_lhs (stmt);
2031           arg1 = gimple_cond_rhs (stmt);
2032           exp_type = boolean_type_node;
2033         }
2034
2035       if (TREE_CODE (arg0) != SSA_NAME)
2036         break;
2037       loc = gimple_location (stmt);
2038       switch (code)
2039         {
2040         case BIT_NOT_EXPR:
2041           if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
2042               /* Ensure the range is either +[-,0], +[0,0],
2043                  -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2044                  -[1,1].  If it is e.g. +[-,-] or -[-,-]
2045                  or similar expression of unconditional true or
2046                  false, it should not be negated.  */
2047               && ((high && integer_zerop (high))
2048                   || (low && integer_onep (low))))
2049             {
2050               in_p = !in_p;
2051               exp = arg0;
2052               continue;
2053             }
2054           break;
2055         case SSA_NAME:
2056           exp = arg0;
2057           continue;
2058         CASE_CONVERT:
2059           if (is_bool)
2060             goto do_default;
2061           if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
2062             {
2063               if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
2064                 is_bool = true;
2065               else
2066                 return;
2067             }
2068           else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
2069             is_bool = true;
2070           goto do_default;
2071         case EQ_EXPR:
2072         case NE_EXPR:
2073         case LT_EXPR:
2074         case LE_EXPR:
2075         case GE_EXPR:
2076         case GT_EXPR:
2077           is_bool = true;
2078           /* FALLTHRU */
2079         default:
2080           if (!is_bool)
2081             return;
2082         do_default:
2083           nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2084                                   &low, &high, &in_p,
2085                                   &strict_overflow_p);
2086           if (nexp != NULL_TREE)
2087             {
2088               exp = nexp;
2089               gcc_assert (TREE_CODE (exp) == SSA_NAME);
2090               continue;
2091             }
2092           break;
2093         }
2094       break;
2095     }
2096   if (is_bool)
2097     {
2098       r->exp = exp;
2099       r->in_p = in_p;
2100       r->low = low;
2101       r->high = high;
2102       r->strict_overflow_p = strict_overflow_p;
2103     }
2104 }
2105
2106 /* Comparison function for qsort.  Sort entries
2107    without SSA_NAME exp first, then with SSA_NAMEs sorted
2108    by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2109    by increasing ->low and if ->low is the same, by increasing
2110    ->high.  ->low == NULL_TREE means minimum, ->high == NULL_TREE
2111    maximum.  */
2112
2113 static int
2114 range_entry_cmp (const void *a, const void *b)
2115 {
2116   const struct range_entry *p = (const struct range_entry *) a;
2117   const struct range_entry *q = (const struct range_entry *) b;
2118
2119   if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2120     {
2121       if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2122         {
2123           /* Group range_entries for the same SSA_NAME together.  */
2124           if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2125             return -1;
2126           else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2127             return 1;
2128           /* If ->low is different, NULL low goes first, then by
2129              ascending low.  */
2130           if (p->low != NULL_TREE)
2131             {
2132               if (q->low != NULL_TREE)
2133                 {
2134                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
2135                                           p->low, q->low);
2136                   if (tem && integer_onep (tem))
2137                     return -1;
2138                   tem = fold_binary (GT_EXPR, boolean_type_node,
2139                                      p->low, q->low);
2140                   if (tem && integer_onep (tem))
2141                     return 1;
2142                 }
2143               else
2144                 return 1;
2145             }
2146           else if (q->low != NULL_TREE)
2147             return -1;
2148           /* If ->high is different, NULL high goes last, before that by
2149              ascending high.  */
2150           if (p->high != NULL_TREE)
2151             {
2152               if (q->high != NULL_TREE)
2153                 {
2154                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
2155                                           p->high, q->high);
2156                   if (tem && integer_onep (tem))
2157                     return -1;
2158                   tem = fold_binary (GT_EXPR, boolean_type_node,
2159                                      p->high, q->high);
2160                   if (tem && integer_onep (tem))
2161                     return 1;
2162                 }
2163               else
2164                 return -1;
2165             }
2166           else if (q->high != NULL_TREE)
2167             return 1;
2168           /* If both ranges are the same, sort below by ascending idx.  */
2169         }
2170       else
2171         return 1;
2172     }
2173   else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2174     return -1;
2175
2176   if (p->idx < q->idx)
2177     return -1;
2178   else
2179     {
2180       gcc_checking_assert (p->idx > q->idx);
2181       return 1;
2182     }
2183 }
2184
2185 /* Helper routine of optimize_range_test.
2186    [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2187    RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2188    OPCODE and OPS are arguments of optimize_range_tests.  If OTHERRANGE
2189    is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2190    an array of COUNT pointers to other ranges.  Return
2191    true if the range merge has been successful.
2192    If OPCODE is ERROR_MARK, this is called from within
2193    maybe_optimize_range_tests and is performing inter-bb range optimization.
2194    In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2195    oe->rank.  */
2196
2197 static bool
2198 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2199                    struct range_entry **otherrangep,
2200                    unsigned int count, enum tree_code opcode,
2201                    vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2202                    bool in_p, tree low, tree high, bool strict_overflow_p)
2203 {
2204   operand_entry *oe = (*ops)[range->idx];
2205   tree op = oe->op;
2206   gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2207                     : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2208   location_t loc = gimple_location (stmt);
2209   tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2210   tree tem = build_range_check (loc, optype, unshare_expr (exp),
2211                                 in_p, low, high);
2212   enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2213   gimple_stmt_iterator gsi;
2214   unsigned int i, uid;
2215
2216   if (tem == NULL_TREE)
2217     return false;
2218
2219   /* If op is default def SSA_NAME, there is no place to insert the
2220      new comparison.  Give up, unless we can use OP itself as the
2221      range test.  */
2222   if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2223     {
2224       if (op == range->exp
2225           && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2226               || TREE_CODE (optype) == BOOLEAN_TYPE)
2227           && (op == tem
2228               || (TREE_CODE (tem) == EQ_EXPR
2229                   && TREE_OPERAND (tem, 0) == op
2230                   && integer_onep (TREE_OPERAND (tem, 1))))
2231           && opcode != BIT_IOR_EXPR
2232           && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2233         {
2234           stmt = NULL;
2235           tem = op;
2236         }
2237       else
2238         return false;
2239     }
2240
2241   if (strict_overflow_p && issue_strict_overflow_warning (wc))
2242     warning_at (loc, OPT_Wstrict_overflow,
2243                 "assuming signed overflow does not occur "
2244                 "when simplifying range test");
2245
2246   if (dump_file && (dump_flags & TDF_DETAILS))
2247     {
2248       struct range_entry *r;
2249       fprintf (dump_file, "Optimizing range tests ");
2250       print_generic_expr (dump_file, range->exp, 0);
2251       fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2252       print_generic_expr (dump_file, range->low, 0);
2253       fprintf (dump_file, ", ");
2254       print_generic_expr (dump_file, range->high, 0);
2255       fprintf (dump_file, "]");
2256       for (i = 0; i < count; i++)
2257         {
2258           if (otherrange)
2259             r = otherrange + i;
2260           else
2261             r = otherrangep[i];
2262           fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2263           print_generic_expr (dump_file, r->low, 0);
2264           fprintf (dump_file, ", ");
2265           print_generic_expr (dump_file, r->high, 0);
2266           fprintf (dump_file, "]");
2267         }
2268       fprintf (dump_file, "\n into ");
2269       print_generic_expr (dump_file, tem, 0);
2270       fprintf (dump_file, "\n");
2271     }
2272
2273   if (opcode == BIT_IOR_EXPR
2274       || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2275     tem = invert_truthvalue_loc (loc, tem);
2276
2277   tem = fold_convert_loc (loc, optype, tem);
2278   if (stmt)
2279     {
2280       gsi = gsi_for_stmt (stmt);
2281       uid = gimple_uid (stmt);
2282     }
2283   else
2284     {
2285       gsi = gsi_none ();
2286       uid = 0;
2287     }
2288   if (stmt == NULL)
2289     gcc_checking_assert (tem == op);
2290   /* In rare cases range->exp can be equal to lhs of stmt.
2291      In that case we have to insert after the stmt rather then before
2292      it.  If stmt is a PHI, insert it at the start of the basic block.  */
2293   else if (op != range->exp)
2294     {
2295       gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2296       tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2297                                       GSI_SAME_STMT);
2298       gsi_prev (&gsi);
2299     }
2300   else if (gimple_code (stmt) != GIMPLE_PHI)
2301     {
2302       gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2303       tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false,
2304                                       GSI_CONTINUE_LINKING);
2305     }
2306   else
2307     {
2308       gsi = gsi_after_labels (gimple_bb (stmt));
2309       if (!gsi_end_p (gsi))
2310         uid = gimple_uid (gsi_stmt (gsi));
2311       else
2312         {
2313           gsi = gsi_start_bb (gimple_bb (stmt));
2314           uid = 1;
2315           while (!gsi_end_p (gsi))
2316             {
2317               uid = gimple_uid (gsi_stmt (gsi));
2318               gsi_next (&gsi);
2319             }
2320         }
2321       gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2322       tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2323                                       GSI_SAME_STMT);
2324       if (gsi_end_p (gsi))
2325         gsi = gsi_last_bb (gimple_bb (stmt));
2326       else
2327         gsi_prev (&gsi);
2328     }
2329   for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2330     if (gimple_uid (gsi_stmt (gsi)))
2331       break;
2332     else
2333       gimple_set_uid (gsi_stmt (gsi), uid);
2334
2335   oe->op = tem;
2336   range->exp = exp;
2337   range->low = low;
2338   range->high = high;
2339   range->in_p = in_p;
2340   range->strict_overflow_p = false;
2341
2342   for (i = 0; i < count; i++)
2343     {
2344       if (otherrange)
2345         range = otherrange + i;
2346       else
2347         range = otherrangep[i];
2348       oe = (*ops)[range->idx];
2349       /* Now change all the other range test immediate uses, so that
2350          those tests will be optimized away.  */
2351       if (opcode == ERROR_MARK)
2352         {
2353           if (oe->op)
2354             oe->op = build_int_cst (TREE_TYPE (oe->op),
2355                                     oe->rank == BIT_IOR_EXPR ? 0 : 1);
2356           else
2357             oe->op = (oe->rank == BIT_IOR_EXPR
2358                       ? boolean_false_node : boolean_true_node);
2359         }
2360       else
2361         oe->op = error_mark_node;
2362       range->exp = NULL_TREE;
2363     }
2364   return true;
2365 }
2366
2367 /* Optimize X == CST1 || X == CST2
2368    if popcount (CST1 ^ CST2) == 1 into
2369    (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2370    Similarly for ranges.  E.g.
2371    X != 2 && X != 3 && X != 10 && X != 11
2372    will be transformed by the previous optimization into
2373    !((X - 2U) <= 1U || (X - 10U) <= 1U)
2374    and this loop can transform that into
2375    !(((X & ~8) - 2U) <= 1U).  */
2376
2377 static bool
2378 optimize_range_tests_xor (enum tree_code opcode, tree type,
2379                           tree lowi, tree lowj, tree highi, tree highj,
2380                           vec<operand_entry *> *ops,
2381                           struct range_entry *rangei,
2382                           struct range_entry *rangej)
2383 {
2384   tree lowxor, highxor, tem, exp;
2385   /* Check lowi ^ lowj == highi ^ highj and
2386      popcount (lowi ^ lowj) == 1.  */
2387   lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2388   if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2389     return false;
2390   if (!integer_pow2p (lowxor))
2391     return false;
2392   highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2393   if (!tree_int_cst_equal (lowxor, highxor))
2394     return false;
2395
2396   tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2397   exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2398   lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2399   highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2400   if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2401                          NULL, rangei->in_p, lowj, highj,
2402                          rangei->strict_overflow_p
2403                          || rangej->strict_overflow_p))
2404     return true;
2405   return false;
2406 }
2407
2408 /* Optimize X == CST1 || X == CST2
2409    if popcount (CST2 - CST1) == 1 into
2410    ((X - CST1) & ~(CST2 - CST1)) == 0.
2411    Similarly for ranges.  E.g.
2412    X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2413    || X == 75 || X == 45
2414    will be transformed by the previous optimization into
2415    (X - 43U) <= 3U || (X - 75U) <= 3U
2416    and this loop can transform that into
2417    ((X - 43U) & ~(75U - 43U)) <= 3U.  */
2418 static bool
2419 optimize_range_tests_diff (enum tree_code opcode, tree type,
2420                             tree lowi, tree lowj, tree highi, tree highj,
2421                             vec<operand_entry *> *ops,
2422                             struct range_entry *rangei,
2423                             struct range_entry *rangej)
2424 {
2425   tree tem1, tem2, mask;
2426   /* Check highi - lowi == highj - lowj.  */
2427   tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2428   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2429     return false;
2430   tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2431   if (!tree_int_cst_equal (tem1, tem2))
2432     return false;
2433   /* Check popcount (lowj - lowi) == 1.  */
2434   tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2435   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2436     return false;
2437   if (!integer_pow2p (tem1))
2438     return false;
2439
2440   type = unsigned_type_for (type);
2441   tem1 = fold_convert (type, tem1);
2442   tem2 = fold_convert (type, tem2);
2443   lowi = fold_convert (type, lowi);
2444   mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2445   tem1 = fold_binary (MINUS_EXPR, type,
2446                       fold_convert (type, rangei->exp), lowi);
2447   tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2448   lowj = build_int_cst (type, 0);
2449   if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2450                          NULL, rangei->in_p, lowj, tem2,
2451                          rangei->strict_overflow_p
2452                          || rangej->strict_overflow_p))
2453     return true;
2454   return false;
2455 }
2456
2457 /* It does some common checks for function optimize_range_tests_xor and
2458    optimize_range_tests_diff.
2459    If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2460    Else it calls optimize_range_tests_diff.  */
2461
2462 static bool
2463 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2464                         bool optimize_xor, vec<operand_entry *> *ops,
2465                         struct range_entry *ranges)
2466 {
2467   int i, j;
2468   bool any_changes = false;
2469   for (i = first; i < length; i++)
2470     {
2471       tree lowi, highi, lowj, highj, type, tem;
2472
2473       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2474         continue;
2475       type = TREE_TYPE (ranges[i].exp);
2476       if (!INTEGRAL_TYPE_P (type))
2477         continue;
2478       lowi = ranges[i].low;
2479       if (lowi == NULL_TREE)
2480         lowi = TYPE_MIN_VALUE (type);
2481       highi = ranges[i].high;
2482       if (highi == NULL_TREE)
2483         continue;
2484       for (j = i + 1; j < length && j < i + 64; j++)
2485         {
2486           bool changes;
2487           if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2488             continue;
2489           lowj = ranges[j].low;
2490           if (lowj == NULL_TREE)
2491             continue;
2492           highj = ranges[j].high;
2493           if (highj == NULL_TREE)
2494             highj = TYPE_MAX_VALUE (type);
2495           /* Check lowj > highi.  */
2496           tem = fold_binary (GT_EXPR, boolean_type_node,
2497                              lowj, highi);
2498           if (tem == NULL_TREE || !integer_onep (tem))
2499             continue;
2500           if (optimize_xor)
2501             changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2502                                                 highi, highj, ops,
2503                                                 ranges + i, ranges + j);
2504           else
2505             changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2506                                                  highi, highj, ops,
2507                                                  ranges + i, ranges + j);
2508           if (changes)
2509             {
2510               any_changes = true;
2511               break;
2512             }
2513         }
2514     }
2515   return any_changes;
2516 }
2517
2518 /* Helper function of optimize_range_tests_to_bit_test.  Handle a single
2519    range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2520    EXP on success, NULL otherwise.  */
2521
2522 static tree
2523 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
2524                        wide_int *mask, tree *totallowp)
2525 {
2526   tree tem = int_const_binop (MINUS_EXPR, high, low);
2527   if (tem == NULL_TREE
2528       || TREE_CODE (tem) != INTEGER_CST
2529       || TREE_OVERFLOW (tem)
2530       || tree_int_cst_sgn (tem) == -1
2531       || compare_tree_int (tem, prec) != -1)
2532     return NULL_TREE;
2533
2534   unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
2535   *mask = wi::shifted_mask (0, max, false, prec);
2536   if (TREE_CODE (exp) == BIT_AND_EXPR
2537       && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2538     {
2539       widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
2540       msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
2541       if (wi::popcount (msk) == 1
2542           && wi::ltu_p (msk, prec - max))
2543         {
2544           *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
2545           max += msk.to_uhwi ();
2546           exp = TREE_OPERAND (exp, 0);
2547           if (integer_zerop (low)
2548               && TREE_CODE (exp) == PLUS_EXPR
2549               && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2550             {
2551               tree ret = TREE_OPERAND (exp, 0);
2552               STRIP_NOPS (ret);
2553               widest_int bias
2554                 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
2555                                      TYPE_PRECISION (TREE_TYPE (low))));
2556               tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
2557               if (totallowp)
2558                 {
2559                   *totallowp = tbias;
2560                   return ret;
2561                 }
2562               else if (!tree_int_cst_lt (totallow, tbias))
2563                 return NULL_TREE;
2564               bias = wi::to_widest (tbias);
2565               bias -= wi::to_widest (totallow);
2566               if (bias >= 0 && bias < prec - max)
2567                 {
2568                   *mask = wi::lshift (*mask, bias);
2569                   return ret;
2570                 }
2571             }
2572         }
2573     }
2574   if (totallowp)
2575     return exp;
2576   if (!tree_int_cst_lt (totallow, low))
2577     return exp;
2578   tem = int_const_binop (MINUS_EXPR, low, totallow);
2579   if (tem == NULL_TREE
2580       || TREE_CODE (tem) != INTEGER_CST
2581       || TREE_OVERFLOW (tem)
2582       || compare_tree_int (tem, prec - max) == 1)
2583     return NULL_TREE;
2584
2585   *mask = wi::lshift (*mask, wi::to_widest (tem));
2586   return exp;
2587 }
2588
2589 /* Attempt to optimize small range tests using bit test.
2590    E.g.
2591    X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2592    && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2593    has been by earlier optimizations optimized into:
2594    ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2595    As all the 43 through 82 range is less than 64 numbers,
2596    for 64-bit word targets optimize that into:
2597    (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0  */
2598
2599 static bool
2600 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
2601                                   vec<operand_entry *> *ops,
2602                                   struct range_entry *ranges)
2603 {
2604   int i, j;
2605   bool any_changes = false;
2606   int prec = GET_MODE_BITSIZE (word_mode);
2607   auto_vec<struct range_entry *, 64> candidates;
2608
2609   for (i = first; i < length - 2; i++)
2610     {
2611       tree lowi, highi, lowj, highj, type;
2612
2613       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2614         continue;
2615       type = TREE_TYPE (ranges[i].exp);
2616       if (!INTEGRAL_TYPE_P (type))
2617         continue;
2618       lowi = ranges[i].low;
2619       if (lowi == NULL_TREE)
2620         lowi = TYPE_MIN_VALUE (type);
2621       highi = ranges[i].high;
2622       if (highi == NULL_TREE)
2623         continue;
2624       wide_int mask;
2625       tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
2626                                         highi, &mask, &lowi);
2627       if (exp == NULL_TREE)
2628         continue;
2629       bool strict_overflow_p = ranges[i].strict_overflow_p;
2630       candidates.truncate (0);
2631       int end = MIN (i + 64, length);
2632       for (j = i + 1; j < end; j++)
2633         {
2634           tree exp2;
2635           if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
2636             continue;
2637           if (ranges[j].exp == exp)
2638             ;
2639           else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
2640             {
2641               exp2 = TREE_OPERAND (ranges[j].exp, 0);
2642               if (exp2 == exp)
2643                 ;
2644               else if (TREE_CODE (exp2) == PLUS_EXPR)
2645                 {
2646                   exp2 = TREE_OPERAND (exp2, 0);
2647                   STRIP_NOPS (exp2);
2648                   if (exp2 != exp)
2649                     continue;
2650                 }
2651               else
2652                 continue;
2653             }
2654           else
2655             continue;
2656           lowj = ranges[j].low;
2657           if (lowj == NULL_TREE)
2658             continue;
2659           highj = ranges[j].high;
2660           if (highj == NULL_TREE)
2661             highj = TYPE_MAX_VALUE (type);
2662           wide_int mask2;
2663           exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
2664                                         highj, &mask2, NULL);
2665           if (exp2 != exp)
2666             continue;
2667           mask |= mask2;
2668           strict_overflow_p |= ranges[j].strict_overflow_p;
2669           candidates.safe_push (&ranges[j]);
2670         }
2671
2672       /* If we need otherwise 3 or more comparisons, use a bit test.  */
2673       if (candidates.length () >= 2)
2674         {
2675           tree high = wide_int_to_tree (TREE_TYPE (lowi),
2676                                         wi::to_widest (lowi)
2677                                         + prec - 1 - wi::clz (mask));
2678           operand_entry *oe = (*ops)[ranges[i].idx];
2679           tree op = oe->op;
2680           gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2681                             : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2682           location_t loc = gimple_location (stmt);
2683           tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2684
2685           /* See if it isn't cheaper to pretend the minimum value of the
2686              range is 0, if maximum value is small enough.
2687              We can avoid then subtraction of the minimum value, but the
2688              mask constant could be perhaps more expensive.  */
2689           if (compare_tree_int (lowi, 0) > 0
2690               && compare_tree_int (high, prec) < 0)
2691             {
2692               int cost_diff;
2693               HOST_WIDE_INT m = tree_to_uhwi (lowi);
2694               rtx reg = gen_raw_REG (word_mode, 10000);
2695               bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
2696               cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg,
2697                                                       GEN_INT (-m)), speed_p);
2698               rtx r = immed_wide_int_const (mask, word_mode);
2699               cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
2700                                          word_mode, speed_p);
2701               r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
2702               cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
2703                                          word_mode, speed_p);
2704               if (cost_diff > 0)
2705                 {
2706                   mask = wi::lshift (mask, m);
2707                   lowi = build_zero_cst (TREE_TYPE (lowi));
2708                 }
2709             }
2710
2711           tree tem = build_range_check (loc, optype, unshare_expr (exp),
2712                                         false, lowi, high);
2713           if (tem == NULL_TREE || is_gimple_val (tem))
2714             continue;
2715           tree etype = unsigned_type_for (TREE_TYPE (exp));
2716           exp = fold_build2_loc (loc, MINUS_EXPR, etype,
2717                                  fold_convert_loc (loc, etype, exp),
2718                                  fold_convert_loc (loc, etype, lowi));
2719           exp = fold_convert_loc (loc, integer_type_node, exp);
2720           tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
2721           exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
2722                                  build_int_cst (word_type, 1), exp);
2723           exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
2724                                  wide_int_to_tree (word_type, mask));
2725           exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
2726                                  build_zero_cst (word_type));
2727           if (is_gimple_val (exp))
2728             continue;
2729
2730           /* The shift might have undefined behavior if TEM is true,
2731              but reassociate_bb isn't prepared to have basic blocks
2732              split when it is running.  So, temporarily emit a code
2733              with BIT_IOR_EXPR instead of &&, and fix it up in
2734              branch_fixup.  */
2735           gimple_seq seq;
2736           tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
2737           gcc_assert (TREE_CODE (tem) == SSA_NAME);
2738           gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
2739           gimple_seq seq2;
2740           exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
2741           gimple_seq_add_seq_without_update (&seq, seq2);
2742           gcc_assert (TREE_CODE (exp) == SSA_NAME);
2743           gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
2744           gimple *g = gimple_build_assign (make_ssa_name (optype),
2745                                            BIT_IOR_EXPR, tem, exp);
2746           gimple_set_location (g, loc);
2747           gimple_seq_add_stmt_without_update (&seq, g);
2748           exp = gimple_assign_lhs (g);
2749           tree val = build_zero_cst (optype);
2750           if (update_range_test (&ranges[i], NULL, candidates.address (),
2751                                  candidates.length (), opcode, ops, exp,
2752                                  seq, false, val, val, strict_overflow_p))
2753             {
2754               any_changes = true;
2755               reassoc_branch_fixups.safe_push (tem);
2756             }
2757           else
2758             gimple_seq_discard (seq);
2759         }
2760     }
2761   return any_changes;
2762 }
2763
2764 /* Optimize range tests, similarly how fold_range_test optimizes
2765    it on trees.  The tree code for the binary
2766    operation between all the operands is OPCODE.
2767    If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2768    maybe_optimize_range_tests for inter-bb range optimization.
2769    In that case if oe->op is NULL, oe->id is bb->index whose
2770    GIMPLE_COND is && or ||ed into the test, and oe->rank says
2771    the actual opcode.  */
2772
2773 static bool
2774 optimize_range_tests (enum tree_code opcode,
2775                       vec<operand_entry *> *ops)
2776 {
2777   unsigned int length = ops->length (), i, j, first;
2778   operand_entry *oe;
2779   struct range_entry *ranges;
2780   bool any_changes = false;
2781
2782   if (length == 1)
2783     return false;
2784
2785   ranges = XNEWVEC (struct range_entry, length);
2786   for (i = 0; i < length; i++)
2787     {
2788       oe = (*ops)[i];
2789       ranges[i].idx = i;
2790       init_range_entry (ranges + i, oe->op,
2791                         oe->op
2792                         ? NULL
2793                         : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
2794       /* For | invert it now, we will invert it again before emitting
2795          the optimized expression.  */
2796       if (opcode == BIT_IOR_EXPR
2797           || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2798         ranges[i].in_p = !ranges[i].in_p;
2799     }
2800
2801   qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2802   for (i = 0; i < length; i++)
2803     if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2804       break;
2805
2806   /* Try to merge ranges.  */
2807   for (first = i; i < length; i++)
2808     {
2809       tree low = ranges[i].low;
2810       tree high = ranges[i].high;
2811       int in_p = ranges[i].in_p;
2812       bool strict_overflow_p = ranges[i].strict_overflow_p;
2813       int update_fail_count = 0;
2814
2815       for (j = i + 1; j < length; j++)
2816         {
2817           if (ranges[i].exp != ranges[j].exp)
2818             break;
2819           if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2820                              ranges[j].in_p, ranges[j].low, ranges[j].high))
2821             break;
2822           strict_overflow_p |= ranges[j].strict_overflow_p;
2823         }
2824
2825       if (j == i + 1)
2826         continue;
2827
2828       if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
2829                              opcode, ops, ranges[i].exp, NULL, in_p,
2830                              low, high, strict_overflow_p))
2831         {
2832           i = j - 1;
2833           any_changes = true;
2834         }
2835       /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2836          while update_range_test would fail.  */
2837       else if (update_fail_count == 64)
2838         i = j - 1;
2839       else
2840         ++update_fail_count;
2841     }
2842
2843   any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2844                                          ops, ranges);
2845
2846   if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2847     any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2848                                            ops, ranges);
2849   if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
2850     any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
2851                                                      ops, ranges);
2852
2853   if (any_changes && opcode != ERROR_MARK)
2854     {
2855       j = 0;
2856       FOR_EACH_VEC_ELT (*ops, i, oe)
2857         {
2858           if (oe->op == error_mark_node)
2859             continue;
2860           else if (i != j)
2861             (*ops)[j] = oe;
2862           j++;
2863         }
2864       ops->truncate (j);
2865     }
2866
2867   XDELETEVEC (ranges);
2868   return any_changes;
2869 }
2870
2871 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
2872    the operands of the VEC_COND_EXPR.  Returns ERROR_MARK on failure,
2873    otherwise the comparison code.  */
2874
2875 static tree_code
2876 ovce_extract_ops (tree var, gassign **rets, bool *reti)
2877 {
2878   if (TREE_CODE (var) != SSA_NAME)
2879     return ERROR_MARK;
2880
2881   gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
2882   if (stmt == NULL)
2883     return ERROR_MARK;
2884
2885   /* ??? If we start creating more COND_EXPR, we could perform
2886      this same optimization with them.  For now, simplify.  */
2887   if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
2888     return ERROR_MARK;
2889
2890   tree cond = gimple_assign_rhs1 (stmt);
2891   tree_code cmp = TREE_CODE (cond);
2892   if (TREE_CODE_CLASS (cmp) != tcc_comparison)
2893     return ERROR_MARK;
2894
2895   /* ??? For now, allow only canonical true and false result vectors.
2896      We could expand this to other constants should the need arise,
2897      but at the moment we don't create them.  */
2898   tree t = gimple_assign_rhs2 (stmt);
2899   tree f = gimple_assign_rhs3 (stmt);
2900   bool inv;
2901   if (integer_all_onesp (t))
2902     inv = false;
2903   else if (integer_all_onesp (f))
2904     {
2905       cmp = invert_tree_comparison (cmp, false);
2906       inv = true;
2907     }
2908   else
2909     return ERROR_MARK;
2910   if (!integer_zerop (f))
2911     return ERROR_MARK;
2912
2913   /* Success!  */
2914   if (rets)
2915     *rets = stmt;
2916   if (reti)
2917     *reti = inv;
2918   return cmp;
2919 }
2920
2921 /* Optimize the condition of VEC_COND_EXPRs which have been combined
2922    with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR).  */
2923
2924 static bool
2925 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
2926 {
2927   unsigned int length = ops->length (), i, j;
2928   bool any_changes = false;
2929
2930   if (length == 1)
2931     return false;
2932
2933   for (i = 0; i < length; ++i)
2934     {
2935       tree elt0 = (*ops)[i]->op;
2936
2937       gassign *stmt0;
2938       bool invert;
2939       tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert);
2940       if (cmp0 == ERROR_MARK)
2941         continue;
2942
2943       for (j = i + 1; j < length; ++j)
2944         {
2945           tree &elt1 = (*ops)[j]->op;
2946
2947           gassign *stmt1;
2948           tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL);
2949           if (cmp1 == ERROR_MARK)
2950             continue;
2951
2952           tree cond0 = gimple_assign_rhs1 (stmt0);
2953           tree x0 = TREE_OPERAND (cond0, 0);
2954           tree y0 = TREE_OPERAND (cond0, 1);
2955
2956           tree cond1 = gimple_assign_rhs1 (stmt1);
2957           tree x1 = TREE_OPERAND (cond1, 0);
2958           tree y1 = TREE_OPERAND (cond1, 1);
2959
2960           tree comb;
2961           if (opcode == BIT_AND_EXPR)
2962             comb = maybe_fold_and_comparisons (cmp0, x0, y0, cmp1, x1, y1);
2963           else if (opcode == BIT_IOR_EXPR)
2964             comb = maybe_fold_or_comparisons (cmp0, x0, y0, cmp1, x1, y1);
2965           else
2966             gcc_unreachable ();
2967           if (comb == NULL)
2968             continue;
2969
2970           /* Success! */
2971           if (dump_file && (dump_flags & TDF_DETAILS))
2972             {
2973               fprintf (dump_file, "Transforming ");
2974               print_generic_expr (dump_file, cond0, 0);
2975               fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
2976               print_generic_expr (dump_file, cond1, 0);
2977               fprintf (dump_file, " into ");
2978               print_generic_expr (dump_file, comb, 0);
2979               fputc ('\n', dump_file);
2980             }
2981
2982           gimple_assign_set_rhs1 (stmt0, comb);
2983           if (invert)
2984             std::swap (*gimple_assign_rhs2_ptr (stmt0),
2985                        *gimple_assign_rhs3_ptr (stmt0));
2986           update_stmt (stmt0);
2987
2988           elt1 = error_mark_node;
2989           any_changes = true;
2990         }
2991     }
2992
2993   if (any_changes)
2994     {
2995       operand_entry *oe;
2996       j = 0;
2997       FOR_EACH_VEC_ELT (*ops, i, oe)
2998         {
2999           if (oe->op == error_mark_node)
3000             continue;
3001           else if (i != j)
3002             (*ops)[j] = oe;
3003           j++;
3004         }
3005       ops->truncate (j);
3006     }
3007
3008   return any_changes;
3009 }
3010
3011 /* Return true if STMT is a cast like:
3012    <bb N>:
3013    ...
3014    _123 = (int) _234;
3015
3016    <bb M>:
3017    # _345 = PHI <_123(N), 1(...), 1(...)>
3018    where _234 has bool type, _123 has single use and
3019    bb N has a single successor M.  This is commonly used in
3020    the last block of a range test.  */
3021
3022 static bool
3023 final_range_test_p (gimple *stmt)
3024 {
3025   basic_block bb, rhs_bb;
3026   edge e;
3027   tree lhs, rhs;
3028   use_operand_p use_p;
3029   gimple *use_stmt;
3030
3031   if (!gimple_assign_cast_p (stmt))
3032     return false;
3033   bb = gimple_bb (stmt);
3034   if (!single_succ_p (bb))
3035     return false;
3036   e = single_succ_edge (bb);
3037   if (e->flags & EDGE_COMPLEX)
3038     return false;
3039
3040   lhs = gimple_assign_lhs (stmt);
3041   rhs = gimple_assign_rhs1 (stmt);
3042   if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3043       || TREE_CODE (rhs) != SSA_NAME
3044       || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
3045     return false;
3046
3047   /* Test whether lhs is consumed only by a PHI in the only successor bb.  */
3048   if (!single_imm_use (lhs, &use_p, &use_stmt))
3049     return false;
3050
3051   if (gimple_code (use_stmt) != GIMPLE_PHI
3052       || gimple_bb (use_stmt) != e->dest)
3053     return false;
3054
3055   /* And that the rhs is defined in the same loop.  */
3056   rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
3057   if (rhs_bb == NULL
3058       || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
3059     return false;
3060
3061   return true;
3062 }
3063
3064 /* Return true if BB is suitable basic block for inter-bb range test
3065    optimization.  If BACKWARD is true, BB should be the only predecessor
3066    of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
3067    or compared with to find a common basic block to which all conditions
3068    branch to if true resp. false.  If BACKWARD is false, TEST_BB should
3069    be the only predecessor of BB.  */
3070
3071 static bool
3072 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
3073                   bool backward)
3074 {
3075   edge_iterator ei, ei2;
3076   edge e, e2;
3077   gimple *stmt;
3078   gphi_iterator gsi;
3079   bool other_edge_seen = false;
3080   bool is_cond;
3081
3082   if (test_bb == bb)
3083     return false;
3084   /* Check last stmt first.  */
3085   stmt = last_stmt (bb);
3086   if (stmt == NULL
3087       || (gimple_code (stmt) != GIMPLE_COND
3088           && (backward || !final_range_test_p (stmt)))
3089       || gimple_visited_p (stmt)
3090       || stmt_could_throw_p (stmt)
3091       || *other_bb == bb)
3092     return false;
3093   is_cond = gimple_code (stmt) == GIMPLE_COND;
3094   if (is_cond)
3095     {
3096       /* If last stmt is GIMPLE_COND, verify that one of the succ edges
3097          goes to the next bb (if BACKWARD, it is TEST_BB), and the other
3098          to *OTHER_BB (if not set yet, try to find it out).  */
3099       if (EDGE_COUNT (bb->succs) != 2)
3100         return false;
3101       FOR_EACH_EDGE (e, ei, bb->succs)
3102         {
3103           if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
3104             return false;
3105           if (e->dest == test_bb)
3106             {
3107               if (backward)
3108                 continue;
3109               else
3110                 return false;
3111             }
3112           if (e->dest == bb)
3113             return false;
3114           if (*other_bb == NULL)
3115             {
3116               FOR_EACH_EDGE (e2, ei2, test_bb->succs)
3117                 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
3118                   return false;
3119                 else if (e->dest == e2->dest)
3120                   *other_bb = e->dest;
3121               if (*other_bb == NULL)
3122                 return false;
3123             }
3124           if (e->dest == *other_bb)
3125             other_edge_seen = true;
3126           else if (backward)
3127             return false;
3128         }
3129       if (*other_bb == NULL || !other_edge_seen)
3130         return false;
3131     }
3132   else if (single_succ (bb) != *other_bb)
3133     return false;
3134
3135   /* Now check all PHIs of *OTHER_BB.  */
3136   e = find_edge (bb, *other_bb);
3137   e2 = find_edge (test_bb, *other_bb);
3138   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
3139     {
3140       gphi *phi = gsi.phi ();
3141       /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
3142          corresponding to BB and TEST_BB predecessor must be the same.  */
3143       if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
3144                             gimple_phi_arg_def (phi, e2->dest_idx), 0))
3145         {
3146           /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
3147              one of the PHIs should have the lhs of the last stmt in
3148              that block as PHI arg and that PHI should have 0 or 1
3149              corresponding to it in all other range test basic blocks
3150              considered.  */
3151           if (!is_cond)
3152             {
3153               if (gimple_phi_arg_def (phi, e->dest_idx)
3154                   == gimple_assign_lhs (stmt)
3155                   && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
3156                       || integer_onep (gimple_phi_arg_def (phi,
3157                                                            e2->dest_idx))))
3158                 continue;
3159             }
3160           else
3161             {
3162               gimple *test_last = last_stmt (test_bb);
3163               if (gimple_code (test_last) != GIMPLE_COND
3164                   && gimple_phi_arg_def (phi, e2->dest_idx)
3165                      == gimple_assign_lhs (test_last)
3166                   && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
3167                       || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
3168                 continue;
3169             }
3170
3171           return false;
3172         }
3173     }
3174   return true;
3175 }
3176
3177 /* Return true if BB doesn't have side-effects that would disallow
3178    range test optimization, all SSA_NAMEs set in the bb are consumed
3179    in the bb and there are no PHIs.  */
3180
3181 static bool
3182 no_side_effect_bb (basic_block bb)
3183 {
3184   gimple_stmt_iterator gsi;
3185   gimple *last;
3186
3187   if (!gimple_seq_empty_p (phi_nodes (bb)))
3188     return false;
3189   last = last_stmt (bb);
3190   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3191     {
3192       gimple *stmt = gsi_stmt (gsi);
3193       tree lhs;
3194       imm_use_iterator imm_iter;
3195       use_operand_p use_p;
3196
3197       if (is_gimple_debug (stmt))
3198         continue;
3199       if (gimple_has_side_effects (stmt))
3200         return false;
3201       if (stmt == last)
3202         return true;
3203       if (!is_gimple_assign (stmt))
3204         return false;
3205       lhs = gimple_assign_lhs (stmt);
3206       if (TREE_CODE (lhs) != SSA_NAME)
3207         return false;
3208       if (gimple_assign_rhs_could_trap_p (stmt))
3209         return false;
3210       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
3211         {
3212           gimple *use_stmt = USE_STMT (use_p);
3213           if (is_gimple_debug (use_stmt))
3214             continue;
3215           if (gimple_bb (use_stmt) != bb)
3216             return false;
3217         }
3218     }
3219   return false;
3220 }
3221
3222 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
3223    return true and fill in *OPS recursively.  */
3224
3225 static bool
3226 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
3227          struct loop *loop)
3228 {
3229   gimple *stmt = SSA_NAME_DEF_STMT (var);
3230   tree rhs[2];
3231   int i;
3232
3233   if (!is_reassociable_op (stmt, code, loop))
3234     return false;
3235
3236   rhs[0] = gimple_assign_rhs1 (stmt);
3237   rhs[1] = gimple_assign_rhs2 (stmt);
3238   gimple_set_visited (stmt, true);
3239   for (i = 0; i < 2; i++)
3240     if (TREE_CODE (rhs[i]) == SSA_NAME
3241         && !get_ops (rhs[i], code, ops, loop)
3242         && has_single_use (rhs[i]))
3243       {
3244         operand_entry *oe = operand_entry_pool.allocate ();
3245
3246         oe->op = rhs[i];
3247         oe->rank = code;
3248         oe->id = 0;
3249         oe->count = 1;
3250         oe->stmt_to_insert = NULL;
3251         ops->safe_push (oe);
3252       }
3253   return true;
3254 }
3255
3256 /* Find the ops that were added by get_ops starting from VAR, see if
3257    they were changed during update_range_test and if yes, create new
3258    stmts.  */
3259
3260 static tree
3261 update_ops (tree var, enum tree_code code, vec<operand_entry *> ops,
3262             unsigned int *pidx, struct loop *loop)
3263 {
3264   gimple *stmt = SSA_NAME_DEF_STMT (var);
3265   tree rhs[4];
3266   int i;
3267
3268   if (!is_reassociable_op (stmt, code, loop))
3269     return NULL;
3270
3271   rhs[0] = gimple_assign_rhs1 (stmt);
3272   rhs[1] = gimple_assign_rhs2 (stmt);
3273   rhs[2] = rhs[0];
3274   rhs[3] = rhs[1];
3275   for (i = 0; i < 2; i++)
3276     if (TREE_CODE (rhs[i]) == SSA_NAME)
3277       {
3278         rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
3279         if (rhs[2 + i] == NULL_TREE)
3280           {
3281             if (has_single_use (rhs[i]))
3282               rhs[2 + i] = ops[(*pidx)++]->op;
3283             else
3284               rhs[2 + i] = rhs[i];
3285           }
3286       }
3287   if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
3288       && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
3289     {
3290       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3291       var = make_ssa_name (TREE_TYPE (var));
3292       gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
3293                                         rhs[2], rhs[3]);
3294       gimple_set_uid (g, gimple_uid (stmt));
3295       gimple_set_visited (g, true);
3296       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3297     }
3298   return var;
3299 }
3300
3301 /* Structure to track the initial value passed to get_ops and
3302    the range in the ops vector for each basic block.  */
3303
3304 struct inter_bb_range_test_entry
3305 {
3306   tree op;
3307   unsigned int first_idx, last_idx;
3308 };
3309
3310 /* Inter-bb range test optimization.
3311
3312    Returns TRUE if a gimple conditional is optimized to a true/false,
3313    otherwise return FALSE.
3314
3315    This indicates to the caller that it should run a CFG cleanup pass
3316    once reassociation is completed.  */
3317
3318 static bool
3319 maybe_optimize_range_tests (gimple *stmt)
3320 {
3321   basic_block first_bb = gimple_bb (stmt);
3322   basic_block last_bb = first_bb;
3323   basic_block other_bb = NULL;
3324   basic_block bb;
3325   edge_iterator ei;
3326   edge e;
3327   auto_vec<operand_entry *> ops;
3328   auto_vec<inter_bb_range_test_entry> bbinfo;
3329   bool any_changes = false;
3330   bool cfg_cleanup_needed = false;
3331
3332   /* Consider only basic blocks that end with GIMPLE_COND or
3333      a cast statement satisfying final_range_test_p.  All
3334      but the last bb in the first_bb .. last_bb range
3335      should end with GIMPLE_COND.  */
3336   if (gimple_code (stmt) == GIMPLE_COND)
3337     {
3338       if (EDGE_COUNT (first_bb->succs) != 2)
3339         return cfg_cleanup_needed;
3340     }
3341   else if (final_range_test_p (stmt))
3342     other_bb = single_succ (first_bb);
3343   else
3344     return cfg_cleanup_needed;
3345
3346   if (stmt_could_throw_p (stmt))
3347     return cfg_cleanup_needed;
3348
3349   /* As relative ordering of post-dominator sons isn't fixed,
3350      maybe_optimize_range_tests can be called first on any
3351      bb in the range we want to optimize.  So, start searching
3352      backwards, if first_bb can be set to a predecessor.  */
3353   while (single_pred_p (first_bb))
3354     {
3355       basic_block pred_bb = single_pred (first_bb);
3356       if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
3357         break;
3358       if (!no_side_effect_bb (first_bb))
3359         break;
3360       first_bb = pred_bb;
3361     }
3362   /* If first_bb is last_bb, other_bb hasn't been computed yet.
3363      Before starting forward search in last_bb successors, find
3364      out the other_bb.  */
3365   if (first_bb == last_bb)
3366     {
3367       other_bb = NULL;
3368       /* As non-GIMPLE_COND last stmt always terminates the range,
3369          if forward search didn't discover anything, just give up.  */
3370       if (gimple_code (stmt) != GIMPLE_COND)
3371         return cfg_cleanup_needed;
3372       /* Look at both successors.  Either it ends with a GIMPLE_COND
3373          and satisfies suitable_cond_bb, or ends with a cast and
3374          other_bb is that cast's successor.  */
3375       FOR_EACH_EDGE (e, ei, first_bb->succs)
3376         if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
3377             || e->dest == first_bb)
3378           return cfg_cleanup_needed;
3379         else if (single_pred_p (e->dest))
3380           {
3381             stmt = last_stmt (e->dest);
3382             if (stmt
3383                 && gimple_code (stmt) == GIMPLE_COND
3384                 && EDGE_COUNT (e->dest->succs) == 2)
3385               {
3386                 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
3387                   break;
3388                 else
3389                   other_bb = NULL;
3390               }
3391             else if (stmt
3392                      && final_range_test_p (stmt)
3393                      && find_edge (first_bb, single_succ (e->dest)))
3394               {
3395                 other_bb = single_succ (e->dest);
3396                 if (other_bb == first_bb)
3397                   other_bb = NULL;
3398               }
3399           }
3400       if (other_bb == NULL)
3401         return cfg_cleanup_needed;
3402     }
3403   /* Now do the forward search, moving last_bb to successor bbs
3404      that aren't other_bb.  */
3405   while (EDGE_COUNT (last_bb->succs) == 2)
3406     {
3407       FOR_EACH_EDGE (e, ei, last_bb->succs)
3408         if (e->dest != other_bb)
3409           break;
3410       if (e == NULL)
3411         break;
3412       if (!single_pred_p (e->dest))
3413         break;
3414       if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
3415         break;
3416       if (!no_side_effect_bb (e->dest))
3417         break;
3418       last_bb = e->dest;
3419     }
3420   if (first_bb == last_bb)
3421     return cfg_cleanup_needed;
3422   /* Here basic blocks first_bb through last_bb's predecessor
3423      end with GIMPLE_COND, all of them have one of the edges to
3424      other_bb and another to another block in the range,
3425      all blocks except first_bb don't have side-effects and
3426      last_bb ends with either GIMPLE_COND, or cast satisfying
3427      final_range_test_p.  */
3428   for (bb = last_bb; ; bb = single_pred (bb))
3429     {
3430       enum tree_code code;
3431       tree lhs, rhs;
3432       inter_bb_range_test_entry bb_ent;
3433
3434       bb_ent.op = NULL_TREE;
3435       bb_ent.first_idx = ops.length ();
3436       bb_ent.last_idx = bb_ent.first_idx;
3437       e = find_edge (bb, other_bb);
3438       stmt = last_stmt (bb);
3439       gimple_set_visited (stmt, true);
3440       if (gimple_code (stmt) != GIMPLE_COND)
3441         {
3442           use_operand_p use_p;
3443           gimple *phi;
3444           edge e2;
3445           unsigned int d;
3446
3447           lhs = gimple_assign_lhs (stmt);
3448           rhs = gimple_assign_rhs1 (stmt);
3449           gcc_assert (bb == last_bb);
3450
3451           /* stmt is
3452              _123 = (int) _234;
3453
3454              followed by:
3455              <bb M>:
3456              # _345 = PHI <_123(N), 1(...), 1(...)>
3457
3458              or 0 instead of 1.  If it is 0, the _234
3459              range test is anded together with all the
3460              other range tests, if it is 1, it is ored with
3461              them.  */
3462           single_imm_use (lhs, &use_p, &phi);
3463           gcc_assert (gimple_code (phi) == GIMPLE_PHI);
3464           e2 = find_edge (first_bb, other_bb);
3465           d = e2->dest_idx;
3466           gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
3467           if (integer_zerop (gimple_phi_arg_def (phi, d)))
3468             code = BIT_AND_EXPR;
3469           else
3470             {
3471               gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
3472               code = BIT_IOR_EXPR;
3473             }
3474
3475           /* If _234 SSA_NAME_DEF_STMT is
3476              _234 = _567 | _789;
3477              (or &, corresponding to 1/0 in the phi arguments,
3478              push into ops the individual range test arguments
3479              of the bitwise or resp. and, recursively.  */
3480           if (!get_ops (rhs, code, &ops,
3481                         loop_containing_stmt (stmt))
3482               && has_single_use (rhs))
3483             {
3484               /* Otherwise, push the _234 range test itself.  */
3485               operand_entry *oe = operand_entry_pool.allocate ();
3486
3487               oe->op = rhs;
3488               oe->rank = code;
3489               oe->id = 0;
3490               oe->count = 1;
3491               oe->stmt_to_insert = NULL;
3492               ops.safe_push (oe);
3493               bb_ent.last_idx++;
3494             }
3495           else
3496             bb_ent.last_idx = ops.length ();
3497           bb_ent.op = rhs;
3498           bbinfo.safe_push (bb_ent);
3499           continue;
3500         }
3501       /* Otherwise stmt is GIMPLE_COND.  */
3502       code = gimple_cond_code (stmt);
3503       lhs = gimple_cond_lhs (stmt);
3504       rhs = gimple_cond_rhs (stmt);
3505       if (TREE_CODE (lhs) == SSA_NAME
3506           && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3507           && ((code != EQ_EXPR && code != NE_EXPR)
3508               || rhs != boolean_false_node
3509                  /* Either push into ops the individual bitwise
3510                     or resp. and operands, depending on which
3511                     edge is other_bb.  */
3512               || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
3513                                  ^ (code == EQ_EXPR))
3514                                 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
3515                            loop_containing_stmt (stmt))))
3516         {
3517           /* Or push the GIMPLE_COND stmt itself.  */
3518           operand_entry *oe = operand_entry_pool.allocate ();
3519
3520           oe->op = NULL;
3521           oe->rank = (e->flags & EDGE_TRUE_VALUE)
3522                      ? BIT_IOR_EXPR : BIT_AND_EXPR;
3523           /* oe->op = NULL signs that there is no SSA_NAME
3524              for the range test, and oe->id instead is the
3525              basic block number, at which's end the GIMPLE_COND
3526              is.  */
3527           oe->id = bb->index;
3528           oe->count = 1;
3529           oe->stmt_to_insert = NULL;
3530           ops.safe_push (oe);
3531           bb_ent.op = NULL;
3532           bb_ent.last_idx++;
3533         }
3534       else if (ops.length () > bb_ent.first_idx)
3535         {
3536           bb_ent.op = lhs;
3537           bb_ent.last_idx = ops.length ();
3538         }
3539       bbinfo.safe_push (bb_ent);
3540       if (bb == first_bb)
3541         break;
3542     }
3543   if (ops.length () > 1)
3544     any_changes = optimize_range_tests (ERROR_MARK, &ops);
3545   if (any_changes)
3546     {
3547       unsigned int idx, max_idx = 0;
3548       /* update_ops relies on has_single_use predicates returning the
3549          same values as it did during get_ops earlier.  Additionally it
3550          never removes statements, only adds new ones and it should walk
3551          from the single imm use and check the predicate already before
3552          making those changes.
3553          On the other side, the handling of GIMPLE_COND directly can turn
3554          previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
3555          it needs to be done in a separate loop afterwards.  */
3556       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3557         {
3558           if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3559               && bbinfo[idx].op != NULL_TREE)
3560             {
3561               tree new_op;
3562
3563               max_idx = idx;
3564               stmt = last_stmt (bb);
3565               new_op = update_ops (bbinfo[idx].op,
3566                                    (enum tree_code)
3567                                    ops[bbinfo[idx].first_idx]->rank,
3568                                    ops, &bbinfo[idx].first_idx,
3569                                    loop_containing_stmt (stmt));
3570               if (new_op == NULL_TREE)
3571                 {
3572                   gcc_assert (bb == last_bb);
3573                   new_op = ops[bbinfo[idx].first_idx++]->op;
3574                 }
3575               if (bbinfo[idx].op != new_op)
3576                 {
3577                   imm_use_iterator iter;
3578                   use_operand_p use_p;
3579                   gimple *use_stmt, *cast_stmt = NULL;
3580
3581                   FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
3582                     if (is_gimple_debug (use_stmt))
3583                       continue;
3584                     else if (gimple_code (use_stmt) == GIMPLE_COND
3585                              || gimple_code (use_stmt) == GIMPLE_PHI)
3586                       FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3587                         SET_USE (use_p, new_op);
3588                     else if (gimple_assign_cast_p (use_stmt))
3589                       cast_stmt = use_stmt;
3590                     else
3591                       gcc_unreachable ();
3592                   if (cast_stmt)
3593                     {
3594                       gcc_assert (bb == last_bb);
3595                       tree lhs = gimple_assign_lhs (cast_stmt);
3596                       tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
3597                       enum tree_code rhs_code
3598                         = gimple_assign_rhs_code (cast_stmt);
3599                       gassign *g;
3600                       if (is_gimple_min_invariant (new_op))
3601                         {
3602                           new_op = fold_convert (TREE_TYPE (lhs), new_op);
3603                           g = gimple_build_assign (new_lhs, new_op);
3604                         }
3605                       else
3606                         g = gimple_build_assign (new_lhs, rhs_code, new_op);
3607                       gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt);
3608                       gimple_set_uid (g, gimple_uid (cast_stmt));
3609                       gimple_set_visited (g, true);
3610                       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3611                       FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
3612                         if (is_gimple_debug (use_stmt))
3613                           continue;
3614                         else if (gimple_code (use_stmt) == GIMPLE_COND
3615                                  || gimple_code (use_stmt) == GIMPLE_PHI)
3616                           FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3617                             SET_USE (use_p, new_lhs);
3618                         else
3619                           gcc_unreachable ();
3620                     }
3621                 }
3622             }
3623           if (bb == first_bb)
3624             break;
3625         }
3626       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3627         {
3628           if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3629               && bbinfo[idx].op == NULL_TREE
3630               && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
3631             {
3632               gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
3633
3634               if (idx > max_idx)
3635                 max_idx = idx;
3636
3637               /* If we collapse the conditional to a true/false
3638                  condition, then bubble that knowledge up to our caller.  */
3639               if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
3640                 {
3641                   gimple_cond_make_false (cond_stmt);
3642                   cfg_cleanup_needed = true;
3643                 }
3644               else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
3645                 {
3646                   gimple_cond_make_true (cond_stmt);
3647                   cfg_cleanup_needed = true;
3648                 }
3649               else
3650                 {
3651                   gimple_cond_set_code (cond_stmt, NE_EXPR);
3652                   gimple_cond_set_lhs (cond_stmt,
3653                                        ops[bbinfo[idx].first_idx]->op);
3654                   gimple_cond_set_rhs (cond_stmt, boolean_false_node);
3655                 }
3656               update_stmt (cond_stmt);
3657             }
3658           if (bb == first_bb)
3659             break;
3660         }
3661
3662       /* The above changes could result in basic blocks after the first
3663          modified one, up to and including last_bb, to be executed even if
3664          they would not be in the original program.  If the value ranges of
3665          assignment lhs' in those bbs were dependent on the conditions
3666          guarding those basic blocks which now can change, the VRs might
3667          be incorrect.  As no_side_effect_bb should ensure those SSA_NAMEs
3668          are only used within the same bb, it should be not a big deal if
3669          we just reset all the VRs in those bbs.  See PR68671.  */
3670       for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
3671         reset_flow_sensitive_info_in_bb (bb);
3672     }
3673   return cfg_cleanup_needed;
3674 }
3675
3676 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3677    of STMT in it's operands.  This is also known as a "destructive
3678    update" operation.  */
3679
3680 static bool
3681 is_phi_for_stmt (gimple *stmt, tree operand)
3682 {
3683   gimple *def_stmt;
3684   gphi *def_phi;
3685   tree lhs;
3686   use_operand_p arg_p;
3687   ssa_op_iter i;
3688
3689   if (TREE_CODE (operand) != SSA_NAME)
3690     return false;
3691
3692   lhs = gimple_assign_lhs (stmt);
3693
3694   def_stmt = SSA_NAME_DEF_STMT (operand);
3695   def_phi = dyn_cast <gphi *> (def_stmt);
3696   if (!def_phi)
3697     return false;
3698
3699   FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
3700     if (lhs == USE_FROM_PTR (arg_p))
3701       return true;
3702   return false;
3703 }
3704
3705 /* Remove def stmt of VAR if VAR has zero uses and recurse
3706    on rhs1 operand if so.  */
3707
3708 static void
3709 remove_visited_stmt_chain (tree var)
3710 {
3711   gimple *stmt;
3712   gimple_stmt_iterator gsi;
3713
3714   while (1)
3715     {
3716       if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
3717         return;
3718       stmt = SSA_NAME_DEF_STMT (var);
3719       if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
3720         {
3721           var = gimple_assign_rhs1 (stmt);
3722           gsi = gsi_for_stmt (stmt);
3723           reassoc_remove_stmt (&gsi);
3724           release_defs (stmt);
3725         }
3726       else
3727         return;
3728     }
3729 }
3730
3731 /* This function checks three consequtive operands in
3732    passed operands vector OPS starting from OPINDEX and
3733    swaps two operands if it is profitable for binary operation
3734    consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3735
3736    We pair ops with the same rank if possible.
3737
3738    The alternative we try is to see if STMT is a destructive
3739    update style statement, which is like:
3740    b = phi (a, ...)
3741    a = c + b;
3742    In that case, we want to use the destructive update form to
3743    expose the possible vectorizer sum reduction opportunity.
3744    In that case, the third operand will be the phi node. This
3745    check is not performed if STMT is null.
3746
3747    We could, of course, try to be better as noted above, and do a
3748    lot of work to try to find these opportunities in >3 operand
3749    cases, but it is unlikely to be worth it.  */
3750
3751 static void
3752 swap_ops_for_binary_stmt (vec<operand_entry *> ops,
3753                           unsigned int opindex, gimple *stmt)
3754 {
3755   operand_entry *oe1, *oe2, *oe3;
3756
3757   oe1 = ops[opindex];
3758   oe2 = ops[opindex + 1];
3759   oe3 = ops[opindex + 2];
3760
3761   if ((oe1->rank == oe2->rank
3762        && oe2->rank != oe3->rank)
3763       || (stmt && is_phi_for_stmt (stmt, oe3->op)
3764           && !is_phi_for_stmt (stmt, oe1->op)
3765           && !is_phi_for_stmt (stmt, oe2->op)))
3766     {
3767       operand_entry temp = *oe3;
3768       oe3->op = oe1->op;
3769       oe3->rank = oe1->rank;
3770       oe1->op = temp.op;
3771       oe1->rank= temp.rank;
3772     }
3773   else if ((oe1->rank == oe3->rank
3774             && oe2->rank != oe3->rank)
3775            || (stmt && is_phi_for_stmt (stmt, oe2->op)
3776                && !is_phi_for_stmt (stmt, oe1->op)
3777                && !is_phi_for_stmt (stmt, oe3->op)))
3778     {
3779       operand_entry temp = *oe2;
3780       oe2->op = oe1->op;
3781       oe2->rank = oe1->rank;
3782       oe1->op = temp.op;
3783       oe1->rank = temp.rank;
3784     }
3785 }
3786
3787 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3788    two definitions, otherwise return STMT.  */
3789
3790 static inline gimple *
3791 find_insert_point (gimple *stmt, tree rhs1, tree rhs2)
3792 {
3793   if (TREE_CODE (rhs1) == SSA_NAME
3794       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
3795     stmt = SSA_NAME_DEF_STMT (rhs1);
3796   if (TREE_CODE (rhs2) == SSA_NAME
3797       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
3798     stmt = SSA_NAME_DEF_STMT (rhs2);
3799   return stmt;
3800 }
3801
3802 /* Recursively rewrite our linearized statements so that the operators
3803    match those in OPS[OPINDEX], putting the computation in rank
3804    order.  Return new lhs.  */
3805
3806 static tree
3807 rewrite_expr_tree (gimple *stmt, unsigned int opindex,
3808                    vec<operand_entry *> ops, bool changed)
3809 {
3810   tree rhs1 = gimple_assign_rhs1 (stmt);
3811   tree rhs2 = gimple_assign_rhs2 (stmt);
3812   tree lhs = gimple_assign_lhs (stmt);
3813   operand_entry *oe;
3814
3815   /* The final recursion case for this function is that you have
3816      exactly two operations left.
3817      If we had exactly one op in the entire list to start with, we
3818      would have never called this function, and the tail recursion
3819      rewrites them one at a time.  */
3820   if (opindex + 2 == ops.length ())
3821     {
3822       operand_entry *oe1, *oe2;
3823
3824       oe1 = ops[opindex];
3825       oe2 = ops[opindex + 1];
3826
3827       if (rhs1 != oe1->op || rhs2 != oe2->op)
3828         {
3829           gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3830           unsigned int uid = gimple_uid (stmt);
3831
3832           if (dump_file && (dump_flags & TDF_DETAILS))
3833             {
3834               fprintf (dump_file, "Transforming ");
3835               print_gimple_stmt (dump_file, stmt, 0, 0);
3836             }
3837
3838           /* Even when changed is false, reassociation could have e.g. removed
3839              some redundant operations, so unless we are just swapping the
3840              arguments or unless there is no change at all (then we just
3841              return lhs), force creation of a new SSA_NAME.  */
3842           if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
3843             {
3844               gimple *insert_point
3845                 = find_insert_point (stmt, oe1->op, oe2->op);
3846               /* If the stmt that defines operand has to be inserted, insert it
3847                  before the use.  */
3848               if (oe1->stmt_to_insert)
3849                 insert_stmt_before_use (stmt, oe1->stmt_to_insert);
3850               if (oe2->stmt_to_insert)
3851                 insert_stmt_before_use (stmt, oe2->stmt_to_insert);
3852               lhs = make_ssa_name (TREE_TYPE (lhs));
3853               stmt
3854                 = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
3855                                        oe1->op, oe2->op);
3856               gimple_set_uid (stmt, uid);
3857               gimple_set_visited (stmt, true);
3858               if (insert_point == gsi_stmt (gsi))
3859                 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3860               else
3861                 insert_stmt_after (stmt, insert_point);
3862             }
3863           else
3864             {
3865               gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
3866                                    == stmt);
3867               /* If the stmt that defines operand has to be inserted, insert it
3868                  before the use.  */
3869               if (oe1->stmt_to_insert)
3870                 insert_stmt_before_use (stmt, oe1->stmt_to_insert);
3871               if (oe2->stmt_to_insert)
3872                 insert_stmt_before_use (stmt, oe2->stmt_to_insert);
3873               gimple_assign_set_rhs1 (stmt, oe1->op);
3874               gimple_assign_set_rhs2 (stmt, oe2->op);
3875               update_stmt (stmt);
3876             }
3877
3878           if (rhs1 != oe1->op && rhs1 != oe2->op)
3879             remove_visited_stmt_chain (rhs1);
3880
3881           if (dump_file && (dump_flags & TDF_DETAILS))
3882             {
3883               fprintf (dump_file, " into ");
3884               print_gimple_stmt (dump_file, stmt, 0, 0);
3885             }
3886         }
3887       return lhs;
3888     }
3889
3890   /* If we hit here, we should have 3 or more ops left.  */
3891   gcc_assert (opindex + 2 < ops.length ());
3892
3893   /* Rewrite the next operator.  */
3894   oe = ops[opindex];
3895
3896   /* If the stmt that defines operand has to be inserted, insert it
3897      before the use.  */
3898   if (oe->stmt_to_insert)
3899     insert_stmt_before_use (stmt, oe->stmt_to_insert);
3900
3901   /* Recurse on the LHS of the binary operator, which is guaranteed to
3902      be the non-leaf side.  */
3903   tree new_rhs1
3904     = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
3905                          changed || oe->op != rhs2);
3906
3907   if (oe->op != rhs2 || new_rhs1 != rhs1)
3908     {
3909       if (dump_file && (dump_flags & TDF_DETAILS))
3910         {
3911           fprintf (dump_file, "Transforming ");
3912           print_gimple_stmt (dump_file, stmt, 0, 0);
3913         }
3914
3915       /* If changed is false, this is either opindex == 0
3916          or all outer rhs2's were equal to corresponding oe->op,
3917          and powi_result is NULL.
3918          That means lhs is equivalent before and after reassociation.
3919          Otherwise ensure the old lhs SSA_NAME is not reused and
3920          create a new stmt as well, so that any debug stmts will be
3921          properly adjusted.  */
3922       if (changed)
3923         {
3924           gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3925           unsigned int uid = gimple_uid (stmt);
3926           gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op);
3927
3928           lhs = make_ssa_name (TREE_TYPE (lhs));
3929           stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
3930                                       new_rhs1, oe->op);
3931           gimple_set_uid (stmt, uid);
3932           gimple_set_visited (stmt, true);
3933           if (insert_point == gsi_stmt (gsi))
3934             gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3935           else
3936             insert_stmt_after (stmt, insert_point);
3937         }
3938       else
3939         {
3940           gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
3941                                == stmt);
3942           gimple_assign_set_rhs1 (stmt, new_rhs1);
3943           gimple_assign_set_rhs2 (stmt, oe->op);
3944           update_stmt (stmt);
3945         }
3946
3947       if (dump_file && (dump_flags & TDF_DETAILS))
3948         {
3949           fprintf (dump_file, " into ");
3950           print_gimple_stmt (dump_file, stmt, 0, 0);
3951         }
3952     }
3953   return lhs;
3954 }
3955
3956 /* Find out how many cycles we need to compute statements chain.
3957    OPS_NUM holds number os statements in a chain.  CPU_WIDTH is a
3958    maximum number of independent statements we may execute per cycle.  */
3959
3960 static int
3961 get_required_cycles (int ops_num, int cpu_width)
3962 {
3963   int res;
3964   int elog;
3965   unsigned int rest;
3966
3967   /* While we have more than 2 * cpu_width operands
3968      we may reduce number of operands by cpu_width
3969      per cycle.  */
3970   res = ops_num / (2 * cpu_width);
3971
3972   /* Remained operands count may be reduced twice per cycle
3973      until we have only one operand.  */
3974   rest = (unsigned)(ops_num - res * cpu_width);
3975   elog = exact_log2 (rest);
3976   if (elog >= 0)
3977     res += elog;
3978   else
3979     res += floor_log2 (rest) + 1;
3980
3981   return res;
3982 }
3983
3984 /* Returns an optimal number of registers to use for computation of
3985    given statements.  */
3986
3987 static int
3988 get_reassociation_width (int ops_num, enum tree_code opc,
3989                          machine_mode mode)
3990 {
3991   int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
3992   int width;
3993   int width_min;
3994   int cycles_best;
3995
3996   if (param_width > 0)
3997     width = param_width;
3998   else
3999     width = targetm.sched.reassociation_width (opc, mode);
4000
4001   if (width == 1)
4002     return width;
4003
4004   /* Get the minimal time required for sequence computation.  */
4005   cycles_best = get_required_cycles (ops_num, width);
4006
4007   /* Check if we may use less width and still compute sequence for
4008      the same time.  It will allow us to reduce registers usage.
4009      get_required_cycles is monotonically increasing with lower width
4010      so we can perform a binary search for the minimal width that still
4011      results in the optimal cycle count.  */
4012   width_min = 1;
4013   while (width > width_min)
4014     {
4015       int width_mid = (width + width_min) / 2;
4016
4017       if (get_required_cycles (ops_num, width_mid) == cycles_best)
4018         width = width_mid;
4019       else if (width_min < width_mid)
4020         width_min = width_mid;
4021       else
4022         break;
4023     }
4024
4025   return width;
4026 }
4027
4028 /* Recursively rewrite our linearized statements so that the operators
4029    match those in OPS[OPINDEX], putting the computation in rank
4030    order and trying to allow operations to be executed in
4031    parallel.  */
4032
4033 static void
4034 rewrite_expr_tree_parallel (gassign *stmt, int width,
4035                             vec<operand_entry *> ops)
4036 {
4037   enum tree_code opcode = gimple_assign_rhs_code (stmt);
4038   int op_num = ops.length ();
4039   int stmt_num = op_num - 1;
4040   gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
4041   int op_index = op_num - 1;
4042   int stmt_index = 0;
4043   int ready_stmts_end = 0;
4044   int i = 0;
4045   gimple *stmt1 = NULL, *stmt2 = NULL;
4046   tree last_rhs1 = gimple_assign_rhs1 (stmt);
4047
4048   /* We start expression rewriting from the top statements.
4049      So, in this loop we create a full list of statements
4050      we will work with.  */
4051   stmts[stmt_num - 1] = stmt;
4052   for (i = stmt_num - 2; i >= 0; i--)
4053     stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
4054
4055   for (i = 0; i < stmt_num; i++)
4056     {
4057       tree op1, op2;
4058
4059       /* Determine whether we should use results of
4060          already handled statements or not.  */
4061       if (ready_stmts_end == 0
4062           && (i - stmt_index >= width || op_index < 1))
4063         ready_stmts_end = i;
4064
4065       /* Now we choose operands for the next statement.  Non zero
4066          value in ready_stmts_end means here that we should use
4067          the result of already generated statements as new operand.  */
4068       if (ready_stmts_end > 0)
4069         {
4070           op1 = gimple_assign_lhs (stmts[stmt_index++]);
4071           if (ready_stmts_end > stmt_index)
4072             op2 = gimple_assign_lhs (stmts[stmt_index++]);
4073           else if (op_index >= 0)
4074             {
4075               operand_entry *oe = ops[op_index--];
4076               stmt2 = oe->stmt_to_insert;
4077               op2 = oe->op;
4078             }
4079           else
4080             {
4081               gcc_assert (stmt_index < i);
4082               op2 = gimple_assign_lhs (stmts[stmt_index++]);
4083             }
4084
4085           if (stmt_index >= ready_stmts_end)
4086             ready_stmts_end = 0;
4087         }
4088       else
4089         {
4090           if (op_index > 1)
4091             swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
4092           operand_entry *oe2 = ops[op_index--];
4093           operand_entry *oe1 = ops[op_index--];
4094           op2 = oe2->op;
4095           stmt2 = oe2->stmt_to_insert;
4096           op1 = oe1->op;
4097           stmt1 = oe1->stmt_to_insert;
4098         }
4099
4100       /* If we emit the last statement then we should put
4101          operands into the last statement.  It will also
4102          break the loop.  */
4103       if (op_index < 0 && stmt_index == i)
4104         i = stmt_num - 1;
4105
4106       if (dump_file && (dump_flags & TDF_DETAILS))
4107         {
4108           fprintf (dump_file, "Transforming ");
4109           print_gimple_stmt (dump_file, stmts[i], 0, 0);
4110         }
4111
4112       /* We keep original statement only for the last one.  All
4113          others are recreated.  */
4114       if (i == stmt_num - 1)
4115         {
4116           /* If the stmt that defines operand has to be inserted, insert it
4117              before the use.  */
4118           if (stmt1)
4119             insert_stmt_before_use (stmts[i], stmt1);
4120           if (stmt2)
4121             insert_stmt_before_use (stmts[i], stmt2);
4122           gimple_assign_set_rhs1 (stmts[i], op1);
4123           gimple_assign_set_rhs2 (stmts[i], op2);
4124           update_stmt (stmts[i]);
4125         }
4126       else
4127         {
4128           stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
4129           /* If the stmt that defines operand has to be inserted, insert it
4130              before new build_and_add stmt after it is created.  */
4131           if (stmt1)
4132             insert_stmt_before_use (stmts[i], stmt1);
4133           if (stmt2)
4134             insert_stmt_before_use (stmts[i], stmt2);
4135         }
4136       if (dump_file && (dump_flags & TDF_DETAILS))
4137         {
4138           fprintf (dump_file, " into ");
4139           print_gimple_stmt (dump_file, stmts[i], 0, 0);
4140         }
4141     }
4142
4143   remove_visited_stmt_chain (last_rhs1);
4144 }
4145
4146 /* Transform STMT, which is really (A +B) + (C + D) into the left
4147    linear form, ((A+B)+C)+D.
4148    Recurse on D if necessary.  */
4149
4150 static void
4151 linearize_expr (gimple *stmt)
4152 {
4153   gimple_stmt_iterator gsi;
4154   gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
4155   gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
4156   gimple *oldbinrhs = binrhs;
4157   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4158   gimple *newbinrhs = NULL;
4159   struct loop *loop = loop_containing_stmt (stmt);
4160   tree lhs = gimple_assign_lhs (stmt);
4161
4162   gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
4163               && is_reassociable_op (binrhs, rhscode, loop));
4164
4165   gsi = gsi_for_stmt (stmt);
4166
4167   gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
4168   binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
4169                                 gimple_assign_rhs_code (binrhs),
4170                                 gimple_assign_lhs (binlhs),
4171                                 gimple_assign_rhs2 (binrhs));
4172   gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
4173   gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
4174   gimple_set_uid (binrhs, gimple_uid (stmt));
4175
4176   if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
4177     newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
4178
4179   if (dump_file && (dump_flags & TDF_DETAILS))
4180     {
4181       fprintf (dump_file, "Linearized: ");
4182       print_gimple_stmt (dump_file, stmt, 0, 0);
4183     }
4184
4185   reassociate_stats.linearized++;
4186   update_stmt (stmt);
4187
4188   gsi = gsi_for_stmt (oldbinrhs);
4189   reassoc_remove_stmt (&gsi);
4190   release_defs (oldbinrhs);
4191
4192   gimple_set_visited (stmt, true);
4193   gimple_set_visited (binlhs, true);
4194   gimple_set_visited (binrhs, true);
4195
4196   /* Tail recurse on the new rhs if it still needs reassociation.  */
4197   if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
4198     /* ??? This should probably be linearize_expr (newbinrhs) but I don't
4199            want to change the algorithm while converting to tuples.  */
4200     linearize_expr (stmt);
4201 }
4202
4203 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
4204    it.  Otherwise, return NULL.  */
4205
4206 static gimple *
4207 get_single_immediate_use (tree lhs)
4208 {
4209   use_operand_p immuse;
4210   gimple *immusestmt;
4211
4212   if (TREE_CODE (lhs) == SSA_NAME
4213       && single_imm_use (lhs, &immuse, &immusestmt)
4214       && is_gimple_assign (immusestmt))
4215     return immusestmt;
4216
4217   return NULL;
4218 }
4219
4220 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
4221    representing the negated value.  Insertions of any necessary
4222    instructions go before GSI.
4223    This function is recursive in that, if you hand it "a_5" as the
4224    value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
4225    transform b_3 + b_4 into a_5 = -b_3 + -b_4.  */
4226
4227 static tree
4228 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
4229 {
4230   gimple *negatedefstmt = NULL;
4231   tree resultofnegate;
4232   gimple_stmt_iterator gsi;
4233   unsigned int uid;
4234
4235   /* If we are trying to negate a name, defined by an add, negate the
4236      add operands instead.  */
4237   if (TREE_CODE (tonegate) == SSA_NAME)
4238     negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
4239   if (TREE_CODE (tonegate) == SSA_NAME
4240       && is_gimple_assign (negatedefstmt)
4241       && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
4242       && has_single_use (gimple_assign_lhs (negatedefstmt))
4243       && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
4244     {
4245       tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
4246       tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
4247       tree lhs = gimple_assign_lhs (negatedefstmt);
4248       gimple *g;
4249
4250       gsi = gsi_for_stmt (negatedefstmt);
4251       rhs1 = negate_value (rhs1, &gsi);
4252
4253       gsi = gsi_for_stmt (negatedefstmt);
4254       rhs2 = negate_value (rhs2, &gsi);
4255
4256       gsi = gsi_for_stmt (negatedefstmt);
4257       lhs = make_ssa_name (TREE_TYPE (lhs));
4258       gimple_set_visited (negatedefstmt, true);
4259       g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
4260       gimple_set_uid (g, gimple_uid (negatedefstmt));
4261       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4262       return lhs;
4263     }
4264
4265   tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
4266   resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
4267                                              NULL_TREE, true, GSI_SAME_STMT);
4268   gsi = *gsip;
4269   uid = gimple_uid (gsi_stmt (gsi));
4270   for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
4271     {
4272       gimple *stmt = gsi_stmt (gsi);
4273       if (gimple_uid (stmt) != 0)
4274         break;
4275       gimple_set_uid (stmt, uid);
4276     }
4277   return resultofnegate;
4278 }
4279
4280 /* Return true if we should break up the subtract in STMT into an add
4281    with negate.  This is true when we the subtract operands are really
4282    adds, or the subtract itself is used in an add expression.  In
4283    either case, breaking up the subtract into an add with negate
4284    exposes the adds to reassociation.  */
4285
4286 static bool
4287 should_break_up_subtract (gimple *stmt)
4288 {
4289   tree lhs = gimple_assign_lhs (stmt);
4290   tree binlhs = gimple_assign_rhs1 (stmt);
4291   tree binrhs = gimple_assign_rhs2 (stmt);
4292   gimple *immusestmt;
4293   struct loop *loop = loop_containing_stmt (stmt);
4294
4295   if (TREE_CODE (binlhs) == SSA_NAME
4296       && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
4297     return true;
4298
4299   if (TREE_CODE (binrhs) == SSA_NAME
4300       && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
4301     return true;
4302
4303   if (TREE_CODE (lhs) == SSA_NAME
4304       && (immusestmt = get_single_immediate_use (lhs))
4305       && is_gimple_assign (immusestmt)
4306       && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
4307           ||  gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
4308     return true;
4309   return false;
4310 }
4311
4312 /* Transform STMT from A - B into A + -B.  */
4313
4314 static void
4315 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
4316 {
4317   tree rhs1 = gimple_assign_rhs1 (stmt);
4318   tree rhs2 = gimple_assign_rhs2 (stmt);
4319
4320   if (dump_file && (dump_flags & TDF_DETAILS))
4321     {
4322       fprintf (dump_file, "Breaking up subtract ");
4323       print_gimple_stmt (dump_file, stmt, 0, 0);
4324     }
4325
4326   rhs2 = negate_value (rhs2, gsip);
4327   gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
4328   update_stmt (stmt);
4329 }
4330
4331 /* Determine whether STMT is a builtin call that raises an SSA name
4332    to an integer power and has only one use.  If so, and this is early
4333    reassociation and unsafe math optimizations are permitted, place
4334    the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
4335    If any of these conditions does not hold, return FALSE.  */
4336
4337 static bool
4338 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent)
4339 {
4340   tree arg1;
4341   REAL_VALUE_TYPE c, cint;
4342
4343   switch (gimple_call_combined_fn (stmt))
4344     {
4345     CASE_CFN_POW:
4346       if (flag_errno_math)
4347         return false;
4348
4349       *base = gimple_call_arg (stmt, 0);
4350       arg1 = gimple_call_arg (stmt, 1);
4351
4352       if (TREE_CODE (arg1) != REAL_CST)
4353         return false;
4354
4355       c = TREE_REAL_CST (arg1);
4356
4357       if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
4358         return false;
4359
4360       *exponent = real_to_integer (&c);
4361       real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
4362       if (!real_identical (&c, &cint))
4363         return false;
4364
4365       break;
4366
4367     CASE_CFN_POWI:
4368       *base = gimple_call_arg (stmt, 0);
4369       arg1 = gimple_call_arg (stmt, 1);
4370
4371       if (!tree_fits_shwi_p (arg1))
4372         return false;
4373
4374       *exponent = tree_to_shwi (arg1);
4375       break;
4376
4377     default:
4378       return false;
4379     }
4380
4381   /* Expanding negative exponents is generally unproductive, so we don't
4382      complicate matters with those.  Exponents of zero and one should
4383      have been handled by expression folding.  */
4384   if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
4385     return false;
4386
4387   return true;
4388 }
4389
4390 /* Try to derive and add operand entry for OP to *OPS.  Return false if
4391    unsuccessful.  */
4392
4393 static bool
4394 try_special_add_to_ops (vec<operand_entry *> *ops,
4395                         enum tree_code code,
4396                         tree op, gimple* def_stmt)
4397 {
4398   tree base = NULL_TREE;
4399   HOST_WIDE_INT exponent = 0;
4400
4401   if (TREE_CODE (op) != SSA_NAME
4402       || ! has_single_use (op))
4403     return false;
4404
4405   if (code == MULT_EXPR
4406       && reassoc_insert_powi_p
4407       && flag_unsafe_math_optimizations
4408       && is_gimple_call (def_stmt)
4409       && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent))
4410     {
4411       add_repeat_to_ops_vec (ops, base, exponent);
4412       gimple_set_visited (def_stmt, true);
4413       return true;
4414     }
4415   else if (code == MULT_EXPR
4416            && is_gimple_assign (def_stmt)
4417            && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
4418            && !HONOR_SNANS (TREE_TYPE (op))
4419            && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op))
4420                || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op))))
4421     {
4422       tree rhs1 = gimple_assign_rhs1 (def_stmt);
4423       tree cst = build_minus_one_cst (TREE_TYPE (op));
4424       add_to_ops_vec (ops, rhs1);
4425       add_to_ops_vec (ops, cst);
4426       gimple_set_visited (def_stmt, true);
4427       return true;
4428     }
4429
4430   return false;
4431 }
4432
4433 /* Recursively linearize a binary expression that is the RHS of STMT.
4434    Place the operands of the expression tree in the vector named OPS.  */
4435
4436 static void
4437 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
4438                      bool is_associative, bool set_visited)
4439 {
4440   tree binlhs = gimple_assign_rhs1 (stmt);
4441   tree binrhs = gimple_assign_rhs2 (stmt);
4442   gimple *binlhsdef = NULL, *binrhsdef = NULL;
4443   bool binlhsisreassoc = false;
4444   bool binrhsisreassoc = false;
4445   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4446   struct loop *loop = loop_containing_stmt (stmt);
4447
4448   if (set_visited)
4449     gimple_set_visited (stmt, true);
4450
4451   if (TREE_CODE (binlhs) == SSA_NAME)
4452     {
4453       binlhsdef = SSA_NAME_DEF_STMT (binlhs);
4454       binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
4455                          && !stmt_could_throw_p (binlhsdef));
4456     }
4457
4458   if (TREE_CODE (binrhs) == SSA_NAME)
4459     {
4460       binrhsdef = SSA_NAME_DEF_STMT (binrhs);
4461       binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
4462                          && !stmt_could_throw_p (binrhsdef));
4463     }
4464
4465   /* If the LHS is not reassociable, but the RHS is, we need to swap
4466      them.  If neither is reassociable, there is nothing we can do, so
4467      just put them in the ops vector.  If the LHS is reassociable,
4468      linearize it.  If both are reassociable, then linearize the RHS
4469      and the LHS.  */
4470
4471   if (!binlhsisreassoc)
4472     {
4473       /* If this is not a associative operation like division, give up.  */
4474       if (!is_associative)
4475         {
4476           add_to_ops_vec (ops, binrhs);
4477           return;
4478         }
4479
4480       if (!binrhsisreassoc)
4481         {
4482           if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
4483             add_to_ops_vec (ops, binrhs);
4484
4485           if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef))
4486             add_to_ops_vec (ops, binlhs);
4487
4488           return;
4489         }
4490
4491       if (dump_file && (dump_flags & TDF_DETAILS))
4492         {
4493           fprintf (dump_file, "swapping operands of ");
4494           print_gimple_stmt (dump_file, stmt, 0, 0);
4495         }
4496
4497       swap_ssa_operands (stmt,
4498                          gimple_assign_rhs1_ptr (stmt),
4499                          gimple_assign_rhs2_ptr (stmt));
4500       update_stmt (stmt);
4501
4502       if (dump_file && (dump_flags & TDF_DETAILS))
4503         {
4504           fprintf (dump_file, " is now ");
4505           print_gimple_stmt (dump_file, stmt, 0, 0);
4506         }
4507
4508       /* We want to make it so the lhs is always the reassociative op,
4509          so swap.  */
4510       std::swap (binlhs, binrhs);
4511     }
4512   else if (binrhsisreassoc)
4513     {
4514       linearize_expr (stmt);
4515       binlhs = gimple_assign_rhs1 (stmt);
4516       binrhs = gimple_assign_rhs2 (stmt);
4517     }
4518
4519   gcc_assert (TREE_CODE (binrhs) != SSA_NAME
4520               || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
4521                                       rhscode, loop));
4522   linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
4523                        is_associative, set_visited);
4524
4525   if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
4526     add_to_ops_vec (ops, binrhs);
4527 }
4528
4529 /* Repropagate the negates back into subtracts, since no other pass
4530    currently does it.  */
4531
4532 static void
4533 repropagate_negates (void)
4534 {
4535   unsigned int i = 0;
4536   tree negate;
4537
4538   FOR_EACH_VEC_ELT (plus_negates, i, negate)
4539     {
4540       gimple *user = get_single_immediate_use (negate);
4541
4542       if (!user || !is_gimple_assign (user))
4543         continue;
4544
4545       /* The negate operand can be either operand of a PLUS_EXPR
4546          (it can be the LHS if the RHS is a constant for example).
4547
4548          Force the negate operand to the RHS of the PLUS_EXPR, then
4549          transform the PLUS_EXPR into a MINUS_EXPR.  */
4550       if (gimple_assign_rhs_code (user) == PLUS_EXPR)
4551         {
4552           /* If the negated operand appears on the LHS of the
4553              PLUS_EXPR, exchange the operands of the PLUS_EXPR
4554              to force the negated operand to the RHS of the PLUS_EXPR.  */
4555           if (gimple_assign_rhs1 (user) == negate)
4556             {
4557               swap_ssa_operands (user,
4558                                  gimple_assign_rhs1_ptr (user),
4559                                  gimple_assign_rhs2_ptr (user));
4560             }
4561
4562           /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
4563              the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR.  */
4564           if (gimple_assign_rhs2 (user) == negate)
4565             {
4566               tree rhs1 = gimple_assign_rhs1 (user);
4567               tree rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
4568               gimple_stmt_iterator gsi = gsi_for_stmt (user);
4569               gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
4570               update_stmt (user);
4571             }
4572         }
4573       else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
4574         {
4575           if (gimple_assign_rhs1 (user) == negate)
4576             {
4577               /* We have
4578                    x = -a
4579                    y = x - b
4580                  which we transform into
4581                    x = a + b
4582                    y = -x .
4583                  This pushes down the negate which we possibly can merge
4584                  into some other operation, hence insert it into the
4585                  plus_negates vector.  */
4586               gimple *feed = SSA_NAME_DEF_STMT (negate);
4587               tree a = gimple_assign_rhs1 (feed);
4588               tree b = gimple_assign_rhs2 (user);
4589               gimple_stmt_iterator gsi = gsi_for_stmt (feed);
4590               gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
4591               tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
4592               gimple *g = gimple_build_assign (x, PLUS_EXPR, a, b);
4593               gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
4594               gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
4595               user = gsi_stmt (gsi2);
4596               update_stmt (user);
4597               reassoc_remove_stmt (&gsi);
4598               release_defs (feed);
4599               plus_negates.safe_push (gimple_assign_lhs (user));
4600             }
4601           else
4602             {
4603               /* Transform "x = -a; y = b - x" into "y = b + a", getting
4604                  rid of one operation.  */
4605               gimple *feed = SSA_NAME_DEF_STMT (negate);
4606               tree a = gimple_assign_rhs1 (feed);
4607               tree rhs1 = gimple_assign_rhs1 (user);
4608               gimple_stmt_iterator gsi = gsi_for_stmt (user);
4609               gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
4610               update_stmt (gsi_stmt (gsi));
4611             }
4612         }
4613     }
4614 }
4615
4616 /* Returns true if OP is of a type for which we can do reassociation.
4617    That is for integral or non-saturating fixed-point types, and for
4618    floating point type when associative-math is enabled.  */
4619
4620 static bool
4621 can_reassociate_p (tree op)
4622 {
4623   tree type = TREE_TYPE (op);
4624   if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
4625       || NON_SAT_FIXED_POINT_TYPE_P (type)
4626       || (flag_associative_math && FLOAT_TYPE_P (type)))
4627     return true;
4628   return false;
4629 }
4630
4631 /* Break up subtract operations in block BB.
4632
4633    We do this top down because we don't know whether the subtract is
4634    part of a possible chain of reassociation except at the top.
4635
4636    IE given
4637    d = f + g
4638    c = a + e
4639    b = c - d
4640    q = b - r
4641    k = t - q
4642
4643    we want to break up k = t - q, but we won't until we've transformed q
4644    = b - r, which won't be broken up until we transform b = c - d.
4645
4646    En passant, clear the GIMPLE visited flag on every statement
4647    and set UIDs within each basic block.  */
4648
4649 static void
4650 break_up_subtract_bb (basic_block bb)
4651 {
4652   gimple_stmt_iterator gsi;
4653   basic_block son;
4654   unsigned int uid = 1;
4655
4656   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4657     {
4658       gimple *stmt = gsi_stmt (gsi);
4659       gimple_set_visited (stmt, false);
4660       gimple_set_uid (stmt, uid++);
4661
4662       if (!is_gimple_assign (stmt)
4663           || !can_reassociate_p (gimple_assign_lhs (stmt)))
4664         continue;
4665
4666       /* Look for simple gimple subtract operations.  */
4667       if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
4668         {
4669           if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
4670               || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
4671             continue;
4672
4673           /* Check for a subtract used only in an addition.  If this
4674              is the case, transform it into add of a negate for better
4675              reassociation.  IE transform C = A-B into C = A + -B if C
4676              is only used in an addition.  */
4677           if (should_break_up_subtract (stmt))
4678             break_up_subtract (stmt, &gsi);
4679         }
4680       else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
4681                && can_reassociate_p (gimple_assign_rhs1 (stmt)))
4682         plus_negates.safe_push (gimple_assign_lhs (stmt));
4683     }
4684   for (son = first_dom_son (CDI_DOMINATORS, bb);
4685        son;
4686        son = next_dom_son (CDI_DOMINATORS, son))
4687     break_up_subtract_bb (son);
4688 }
4689
4690 /* Used for repeated factor analysis.  */
4691 struct repeat_factor
4692 {
4693   /* An SSA name that occurs in a multiply chain.  */
4694   tree factor;
4695
4696   /* Cached rank of the factor.  */
4697   unsigned rank;
4698
4699   /* Number of occurrences of the factor in the chain.  */
4700   HOST_WIDE_INT count;
4701
4702   /* An SSA name representing the product of this factor and
4703      all factors appearing later in the repeated factor vector.  */
4704   tree repr;
4705 };
4706
4707
4708 static vec<repeat_factor> repeat_factor_vec;
4709
4710 /* Used for sorting the repeat factor vector.  Sort primarily by
4711    ascending occurrence count, secondarily by descending rank.  */
4712
4713 static int
4714 compare_repeat_factors (const void *x1, const void *x2)
4715 {
4716   const repeat_factor *rf1 = (const repeat_factor *) x1;
4717   const repeat_factor *rf2 = (const repeat_factor *) x2;
4718
4719   if (rf1->count != rf2->count)
4720     return rf1->count - rf2->count;
4721
4722   return rf2->rank - rf1->rank;
4723 }
4724
4725 /* Look for repeated operands in OPS in the multiply tree rooted at
4726    STMT.  Replace them with an optimal sequence of multiplies and powi
4727    builtin calls, and remove the used operands from OPS.  Return an
4728    SSA name representing the value of the replacement sequence.  */
4729
4730 static tree
4731 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
4732 {
4733   unsigned i, j, vec_len;
4734   int ii;
4735   operand_entry *oe;
4736   repeat_factor *rf1, *rf2;
4737   repeat_factor rfnew;
4738   tree result = NULL_TREE;
4739   tree target_ssa, iter_result;
4740   tree type = TREE_TYPE (gimple_get_lhs (stmt));
4741   tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
4742   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4743   gimple *mul_stmt, *pow_stmt;
4744
4745   /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4746      target.  */
4747   if (!powi_fndecl)
4748     return NULL_TREE;
4749
4750   /* Allocate the repeated factor vector.  */
4751   repeat_factor_vec.create (10);
4752
4753   /* Scan the OPS vector for all SSA names in the product and build
4754      up a vector of occurrence counts for each factor.  */
4755   FOR_EACH_VEC_ELT (*ops, i, oe)
4756     {
4757       if (TREE_CODE (oe->op) == SSA_NAME)
4758         {
4759           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4760             {
4761               if (rf1->factor == oe->op)
4762                 {
4763                   rf1->count += oe->count;
4764                   break;
4765                 }
4766             }
4767
4768           if (j >= repeat_factor_vec.length ())
4769             {
4770               rfnew.factor = oe->op;
4771               rfnew.rank = oe->rank;
4772               rfnew.count = oe->count;
4773               rfnew.repr = NULL_TREE;
4774               repeat_factor_vec.safe_push (rfnew);
4775             }
4776         }
4777     }
4778
4779   /* Sort the repeated factor vector by (a) increasing occurrence count,
4780      and (b) decreasing rank.  */
4781   repeat_factor_vec.qsort (compare_repeat_factors);
4782
4783   /* It is generally best to combine as many base factors as possible
4784      into a product before applying __builtin_powi to the result.
4785      However, the sort order chosen for the repeated factor vector
4786      allows us to cache partial results for the product of the base
4787      factors for subsequent use.  When we already have a cached partial
4788      result from a previous iteration, it is best to make use of it
4789      before looking for another __builtin_pow opportunity.
4790
4791      As an example, consider x * x * y * y * y * z * z * z * z.
4792      We want to first compose the product x * y * z, raise it to the
4793      second power, then multiply this by y * z, and finally multiply
4794      by z.  This can be done in 5 multiplies provided we cache y * z
4795      for use in both expressions:
4796
4797         t1 = y * z
4798         t2 = t1 * x
4799         t3 = t2 * t2
4800         t4 = t1 * t3
4801         result = t4 * z
4802
4803      If we instead ignored the cached y * z and first multiplied by
4804      the __builtin_pow opportunity z * z, we would get the inferior:
4805
4806         t1 = y * z
4807         t2 = t1 * x
4808         t3 = t2 * t2
4809         t4 = z * z
4810         t5 = t3 * t4
4811         result = t5 * y  */
4812
4813   vec_len = repeat_factor_vec.length ();
4814
4815   /* Repeatedly look for opportunities to create a builtin_powi call.  */
4816   while (true)
4817     {
4818       HOST_WIDE_INT power;
4819
4820       /* First look for the largest cached product of factors from
4821          preceding iterations.  If found, create a builtin_powi for
4822          it if the minimum occurrence count for its factors is at
4823          least 2, or just use this cached product as our next
4824          multiplicand if the minimum occurrence count is 1.  */
4825       FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4826         {
4827           if (rf1->repr && rf1->count > 0)
4828             break;
4829         }
4830
4831       if (j < vec_len)
4832         {
4833           power = rf1->count;
4834
4835           if (power == 1)
4836             {
4837               iter_result = rf1->repr;
4838
4839               if (dump_file && (dump_flags & TDF_DETAILS))
4840                 {
4841                   unsigned elt;
4842                   repeat_factor *rf;
4843                   fputs ("Multiplying by cached product ", dump_file);
4844                   for (elt = j; elt < vec_len; elt++)
4845                     {
4846                       rf = &repeat_factor_vec[elt];
4847                       print_generic_expr (dump_file, rf->factor, 0);
4848                       if (elt < vec_len - 1)
4849                         fputs (" * ", dump_file);
4850                     }
4851                   fputs ("\n", dump_file);
4852                 }
4853             }
4854           else
4855             {
4856               iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4857               pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4858                                             build_int_cst (integer_type_node,
4859                                                            power));
4860               gimple_call_set_lhs (pow_stmt, iter_result);
4861               gimple_set_location (pow_stmt, gimple_location (stmt));
4862               gimple_set_uid (pow_stmt, gimple_uid (stmt));
4863               gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4864
4865               if (dump_file && (dump_flags & TDF_DETAILS))
4866                 {
4867                   unsigned elt;
4868                   repeat_factor *rf;
4869                   fputs ("Building __builtin_pow call for cached product (",
4870                          dump_file);
4871                   for (elt = j; elt < vec_len; elt++)
4872                     {
4873                       rf = &repeat_factor_vec[elt];
4874                       print_generic_expr (dump_file, rf->factor, 0);
4875                       if (elt < vec_len - 1)
4876                         fputs (" * ", dump_file);
4877                     }
4878                   fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
4879                            power);
4880                 }
4881             }
4882         }
4883       else
4884         {
4885           /* Otherwise, find the first factor in the repeated factor
4886              vector whose occurrence count is at least 2.  If no such
4887              factor exists, there are no builtin_powi opportunities
4888              remaining.  */
4889           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4890             {
4891               if (rf1->count >= 2)
4892                 break;
4893             }
4894
4895           if (j >= vec_len)
4896             break;
4897
4898           power = rf1->count;
4899
4900           if (dump_file && (dump_flags & TDF_DETAILS))
4901             {
4902               unsigned elt;
4903               repeat_factor *rf;
4904               fputs ("Building __builtin_pow call for (", dump_file);
4905               for (elt = j; elt < vec_len; elt++)
4906                 {
4907                   rf = &repeat_factor_vec[elt];
4908                   print_generic_expr (dump_file, rf->factor, 0);
4909                   if (elt < vec_len - 1)
4910                     fputs (" * ", dump_file);
4911                 }
4912               fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
4913             }
4914
4915           reassociate_stats.pows_created++;
4916
4917           /* Visit each element of the vector in reverse order (so that
4918              high-occurrence elements are visited first, and within the
4919              same occurrence count, lower-ranked elements are visited
4920              first).  Form a linear product of all elements in this order
4921              whose occurrencce count is at least that of element J.
4922              Record the SSA name representing the product of each element
4923              with all subsequent elements in the vector.  */
4924           if (j == vec_len - 1)
4925             rf1->repr = rf1->factor;
4926           else
4927             {
4928               for (ii = vec_len - 2; ii >= (int)j; ii--)
4929                 {
4930                   tree op1, op2;
4931
4932                   rf1 = &repeat_factor_vec[ii];
4933                   rf2 = &repeat_factor_vec[ii + 1];
4934
4935                   /* Init the last factor's representative to be itself.  */
4936                   if (!rf2->repr)
4937                     rf2->repr = rf2->factor;
4938
4939                   op1 = rf1->factor;
4940                   op2 = rf2->repr;
4941
4942                   target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4943                   mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
4944                                                   op1, op2);
4945                   gimple_set_location (mul_stmt, gimple_location (stmt));
4946                   gimple_set_uid (mul_stmt, gimple_uid (stmt));
4947                   gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4948                   rf1->repr = target_ssa;
4949
4950                   /* Don't reprocess the multiply we just introduced.  */
4951                   gimple_set_visited (mul_stmt, true);
4952                 }
4953             }
4954
4955           /* Form a call to __builtin_powi for the maximum product
4956              just formed, raised to the power obtained earlier.  */
4957           rf1 = &repeat_factor_vec[j];
4958           iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4959           pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4960                                         build_int_cst (integer_type_node,
4961                                                        power));
4962           gimple_call_set_lhs (pow_stmt, iter_result);
4963           gimple_set_location (pow_stmt, gimple_location (stmt));
4964           gimple_set_uid (pow_stmt, gimple_uid (stmt));
4965           gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4966         }
4967
4968       /* If we previously formed at least one other builtin_powi call,
4969          form the product of this one and those others.  */
4970       if (result)
4971         {
4972           tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4973           mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
4974                                           result, iter_result);
4975           gimple_set_location (mul_stmt, gimple_location (stmt));
4976           gimple_set_uid (mul_stmt, gimple_uid (stmt));
4977           gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4978           gimple_set_visited (mul_stmt, true);
4979           result = new_result;
4980         }
4981       else
4982         result = iter_result;
4983
4984       /* Decrement the occurrence count of each element in the product
4985          by the count found above, and remove this many copies of each
4986          factor from OPS.  */
4987       for (i = j; i < vec_len; i++)
4988         {
4989           unsigned k = power;
4990           unsigned n;
4991
4992           rf1 = &repeat_factor_vec[i];
4993           rf1->count -= power;
4994
4995           FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
4996             {
4997               if (oe->op == rf1->factor)
4998                 {
4999                   if (oe->count <= k)
5000                     {
5001                       ops->ordered_remove (n);
5002                       k -= oe->count;
5003
5004                       if (k == 0)
5005                         break;
5006                     }
5007                   else
5008                     {
5009                       oe->count -= k;
5010                       break;
5011                     }
5012                 }
5013             }
5014         }
5015     }
5016
5017   /* At this point all elements in the repeated factor vector have a
5018      remaining occurrence count of 0 or 1, and those with a count of 1
5019      don't have cached representatives.  Re-sort the ops vector and
5020      clean up.  */
5021   ops->qsort (sort_by_operand_rank);
5022   repeat_factor_vec.release ();
5023
5024   /* Return the final product computed herein.  Note that there may
5025      still be some elements with single occurrence count left in OPS;
5026      those will be handled by the normal reassociation logic.  */
5027   return result;
5028 }
5029
5030 /* Attempt to optimize
5031    CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
5032    CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0.  */
5033
5034 static void
5035 attempt_builtin_copysign (vec<operand_entry *> *ops)
5036 {
5037   operand_entry *oe;
5038   unsigned int i;
5039   unsigned int length = ops->length ();
5040   tree cst = ops->last ()->op;
5041
5042   if (length == 1 || TREE_CODE (cst) != REAL_CST)
5043     return;
5044
5045   FOR_EACH_VEC_ELT (*ops, i, oe)
5046     {
5047       if (TREE_CODE (oe->op) == SSA_NAME
5048           && has_single_use (oe->op))
5049         {
5050           gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
5051           if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
5052             {
5053               tree arg0, arg1;
5054               switch (gimple_call_combined_fn (old_call))
5055                 {
5056                 CASE_CFN_COPYSIGN:
5057                   arg0 = gimple_call_arg (old_call, 0);
5058                   arg1 = gimple_call_arg (old_call, 1);
5059                   /* The first argument of copysign must be a constant,
5060                      otherwise there's nothing to do.  */
5061                   if (TREE_CODE (arg0) == REAL_CST)
5062                     {
5063                       tree type = TREE_TYPE (arg0);
5064                       tree mul = const_binop (MULT_EXPR, type, cst, arg0);
5065                       /* If we couldn't fold to a single constant, skip it.
5066                          That happens e.g. for inexact multiplication when
5067                          -frounding-math.  */
5068                       if (mul == NULL_TREE)
5069                         break;
5070                       /* Instead of adjusting OLD_CALL, let's build a new
5071                          call to not leak the LHS and prevent keeping bogus
5072                          debug statements.  DCE will clean up the old call.  */
5073                       gcall *new_call;
5074                       if (gimple_call_internal_p (old_call))
5075                         new_call = gimple_build_call_internal
5076                           (IFN_COPYSIGN, 2, mul, arg1);
5077                       else
5078                         new_call = gimple_build_call
5079                           (gimple_call_fndecl (old_call), 2, mul, arg1);
5080                       tree lhs = make_ssa_name (type);
5081                       gimple_call_set_lhs (new_call, lhs);
5082                       gimple_set_location (new_call,
5083                                            gimple_location (old_call));
5084                       insert_stmt_after (new_call, old_call);
5085                       /* We've used the constant, get rid of it.  */
5086                       ops->pop ();
5087                       bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
5088                       /* Handle the CST1 < 0 case by negating the result.  */
5089                       if (cst1_neg)
5090                         {
5091                           tree negrhs = make_ssa_name (TREE_TYPE (lhs));
5092                           gimple *negate_stmt
5093                             = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
5094                           insert_stmt_after (negate_stmt, new_call);
5095                           oe->op = negrhs;
5096                         }
5097                       else
5098                         oe->op = lhs;
5099                       if (dump_file && (dump_flags & TDF_DETAILS))
5100                         {
5101                           fprintf (dump_file, "Optimizing copysign: ");
5102                           print_generic_expr (dump_file, cst, 0);
5103                           fprintf (dump_file, " * COPYSIGN (");
5104                           print_generic_expr (dump_file, arg0, 0);
5105                           fprintf (dump_file, ", ");
5106                           print_generic_expr (dump_file, arg1, 0);
5107                           fprintf (dump_file, ") into %sCOPYSIGN (",
5108                                    cst1_neg ? "-" : "");
5109                           print_generic_expr (dump_file, mul, 0);
5110                           fprintf (dump_file, ", ");
5111                           print_generic_expr (dump_file, arg1, 0);
5112                           fprintf (dump_file, "\n");
5113                         }
5114                       return;
5115                     }
5116                   break;
5117                 default:
5118                   break;
5119                 }
5120             }
5121         }
5122     }
5123 }
5124
5125 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS.  */
5126
5127 static void
5128 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
5129 {
5130   tree rhs1;
5131
5132   if (dump_file && (dump_flags & TDF_DETAILS))
5133     {
5134       fprintf (dump_file, "Transforming ");
5135       print_gimple_stmt (dump_file, stmt, 0, 0);
5136     }
5137
5138   rhs1 = gimple_assign_rhs1 (stmt);
5139   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
5140   update_stmt (stmt);
5141   remove_visited_stmt_chain (rhs1);
5142
5143   if (dump_file && (dump_flags & TDF_DETAILS))
5144     {
5145       fprintf (dump_file, " into ");
5146       print_gimple_stmt (dump_file, stmt, 0, 0);
5147     }
5148 }
5149
5150 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2.  */
5151
5152 static void
5153 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
5154                             tree rhs1, tree rhs2)
5155 {
5156   if (dump_file && (dump_flags & TDF_DETAILS))
5157     {
5158       fprintf (dump_file, "Transforming ");
5159       print_gimple_stmt (dump_file, stmt, 0, 0);
5160     }
5161
5162   gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
5163   update_stmt (gsi_stmt (*gsi));
5164   remove_visited_stmt_chain (rhs1);
5165
5166   if (dump_file && (dump_flags & TDF_DETAILS))
5167     {
5168       fprintf (dump_file, " into ");
5169       print_gimple_stmt (dump_file, stmt, 0, 0);
5170     }
5171 }
5172
5173 /* Reassociate expressions in basic block BB and its post-dominator as
5174    children.
5175
5176    Bubble up return status from maybe_optimize_range_tests.  */
5177
5178 static bool
5179 reassociate_bb (basic_block bb)
5180 {
5181   gimple_stmt_iterator gsi;
5182   basic_block son;
5183   gimple *stmt = last_stmt (bb);
5184   bool cfg_cleanup_needed = false;
5185
5186   if (stmt && !gimple_visited_p (stmt))
5187     cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
5188
5189   for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
5190     {
5191       stmt = gsi_stmt (gsi);
5192
5193       if (is_gimple_assign (stmt)
5194           && !stmt_could_throw_p (stmt))
5195         {
5196           tree lhs, rhs1, rhs2;
5197           enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
5198
5199           /* If this is not a gimple binary expression, there is
5200              nothing for us to do with it.  */
5201           if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
5202             continue;
5203
5204           /* If this was part of an already processed statement,
5205              we don't need to touch it again. */
5206           if (gimple_visited_p (stmt))
5207             {
5208               /* This statement might have become dead because of previous
5209                  reassociations.  */
5210               if (has_zero_uses (gimple_get_lhs (stmt)))
5211                 {
5212                   reassoc_remove_stmt (&gsi);
5213                   release_defs (stmt);
5214                   /* We might end up removing the last stmt above which
5215                      places the iterator to the end of the sequence.
5216                      Reset it to the last stmt in this case which might
5217                      be the end of the sequence as well if we removed
5218                      the last statement of the sequence.  In which case
5219                      we need to bail out.  */
5220                   if (gsi_end_p (gsi))
5221                     {
5222                       gsi = gsi_last_bb (bb);
5223                       if (gsi_end_p (gsi))
5224                         break;
5225                     }
5226                 }
5227               continue;
5228             }
5229
5230           lhs = gimple_assign_lhs (stmt);
5231           rhs1 = gimple_assign_rhs1 (stmt);
5232           rhs2 = gimple_assign_rhs2 (stmt);
5233
5234           /* For non-bit or min/max operations we can't associate
5235              all types.  Verify that here.  */
5236           if (rhs_code != BIT_IOR_EXPR
5237               && rhs_code != BIT_AND_EXPR
5238               && rhs_code != BIT_XOR_EXPR
5239               && rhs_code != MIN_EXPR
5240               && rhs_code != MAX_EXPR
5241               && (!can_reassociate_p (lhs)
5242                   || !can_reassociate_p (rhs1)
5243                   || !can_reassociate_p (rhs2)))
5244             continue;
5245
5246           if (associative_tree_code (rhs_code))
5247             {
5248               auto_vec<operand_entry *> ops;
5249               tree powi_result = NULL_TREE;
5250               bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
5251
5252               /* There may be no immediate uses left by the time we
5253                  get here because we may have eliminated them all.  */
5254               if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
5255                 continue;
5256
5257               gimple_set_visited (stmt, true);
5258               linearize_expr_tree (&ops, stmt, true, true);
5259               ops.qsort (sort_by_operand_rank);
5260               optimize_ops_list (rhs_code, &ops);
5261               if (undistribute_ops_list (rhs_code, &ops,
5262                                          loop_containing_stmt (stmt)))
5263                 {
5264                   ops.qsort (sort_by_operand_rank);
5265                   optimize_ops_list (rhs_code, &ops);
5266                 }
5267
5268               if (rhs_code == PLUS_EXPR
5269                   && transform_add_to_multiply (&ops))
5270                 ops.qsort (sort_by_operand_rank);
5271
5272               if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
5273                 {
5274                   if (is_vector)
5275                     optimize_vec_cond_expr (rhs_code, &ops);
5276                   else
5277                     optimize_range_tests (rhs_code, &ops);
5278                 }
5279
5280               if (rhs_code == MULT_EXPR && !is_vector)
5281                 {
5282                   attempt_builtin_copysign (&ops);
5283
5284                   if (reassoc_insert_powi_p
5285                       && flag_unsafe_math_optimizations)
5286                     powi_result = attempt_builtin_powi (stmt, &ops);
5287                 }
5288
5289               operand_entry *last;
5290               bool negate_result = false;
5291               if (ops.length () > 1
5292                   && rhs_code == MULT_EXPR)
5293                 {
5294                   last = ops.last ();
5295                   if (((TREE_CODE (last->op) == INTEGER_CST
5296                         && integer_minus_onep (last->op))
5297                        || real_minus_onep (last->op))
5298                       && !HONOR_SNANS (TREE_TYPE (lhs))
5299                       && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs))
5300                           || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs))))
5301                     {
5302                       ops.pop ();
5303                       negate_result = true;
5304                     }
5305                 }
5306
5307               /* If the operand vector is now empty, all operands were
5308                  consumed by the __builtin_powi optimization.  */
5309               if (ops.length () == 0)
5310                 transform_stmt_to_copy (&gsi, stmt, powi_result);
5311               else if (ops.length () == 1)
5312                 {
5313                   tree last_op = ops.last ()->op;
5314
5315                   /* If the stmt that defines operand has to be inserted, insert it
5316                      before the use.  */
5317                   if (ops.last ()->stmt_to_insert)
5318                     insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert);
5319                   if (powi_result)
5320                     transform_stmt_to_multiply (&gsi, stmt, last_op,
5321                                                 powi_result);
5322                   else
5323                     transform_stmt_to_copy (&gsi, stmt, last_op);
5324                 }
5325               else
5326                 {
5327                   machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
5328                   int ops_num = ops.length ();
5329                   int width = get_reassociation_width (ops_num, rhs_code, mode);
5330                   tree new_lhs = lhs;
5331
5332                   if (dump_file && (dump_flags & TDF_DETAILS))
5333                     fprintf (dump_file,
5334                              "Width = %d was chosen for reassociation\n", width);
5335
5336                   if (width > 1
5337                       && ops.length () > 3)
5338                     rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
5339                                                 width, ops);
5340                   else
5341                     {
5342                       /* When there are three operands left, we want
5343                          to make sure the ones that get the double
5344                          binary op are chosen wisely.  */
5345                       int len = ops.length ();
5346                       if (len >= 3)
5347                         swap_ops_for_binary_stmt (ops, len - 3, stmt);
5348
5349                       new_lhs = rewrite_expr_tree (stmt, 0, ops,
5350                                                    powi_result != NULL);
5351                     }
5352
5353                   /* If we combined some repeated factors into a
5354                      __builtin_powi call, multiply that result by the
5355                      reassociated operands.  */
5356                   if (powi_result)
5357                     {
5358                       gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
5359                       tree type = TREE_TYPE (lhs);
5360                       tree target_ssa = make_temp_ssa_name (type, NULL,
5361                                                             "reassocpow");
5362                       gimple_set_lhs (lhs_stmt, target_ssa);
5363                       update_stmt (lhs_stmt);
5364                       if (lhs != new_lhs)
5365                         target_ssa = new_lhs;
5366                       mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
5367                                                       powi_result, target_ssa);
5368                       gimple_set_location (mul_stmt, gimple_location (stmt));
5369                       gimple_set_uid (mul_stmt, gimple_uid (stmt));
5370                       gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
5371                     }
5372                 }
5373
5374               if (negate_result)
5375                 {
5376                   stmt = SSA_NAME_DEF_STMT (lhs);
5377                   tree tmp = make_ssa_name (TREE_TYPE (lhs));
5378                   gimple_set_lhs (stmt, tmp);
5379                   gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR,
5380                                                            tmp);
5381                   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5382                   gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT);
5383                   update_stmt (stmt);
5384                 }
5385             }
5386         }
5387     }
5388   for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
5389        son;
5390        son = next_dom_son (CDI_POST_DOMINATORS, son))
5391     cfg_cleanup_needed |= reassociate_bb (son);
5392
5393   return cfg_cleanup_needed;
5394 }
5395
5396 /* Add jumps around shifts for range tests turned into bit tests.
5397    For each SSA_NAME VAR we have code like:
5398    VAR = ...; // final stmt of range comparison
5399    // bit test here...;
5400    OTHERVAR = ...; // final stmt of the bit test sequence
5401    RES = VAR | OTHERVAR;
5402    Turn the above into:
5403    VAR = ...;
5404    if (VAR != 0)
5405      goto <l3>;
5406    else
5407      goto <l2>;
5408    <l2>:
5409    // bit test here...;
5410    OTHERVAR = ...;
5411    <l3>:
5412    # RES = PHI<1(l1), OTHERVAR(l2)>;  */
5413
5414 static void
5415 branch_fixup (void)
5416 {
5417   tree var;
5418   unsigned int i;
5419
5420   FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
5421     {
5422       gimple *def_stmt = SSA_NAME_DEF_STMT (var);
5423       gimple *use_stmt;
5424       use_operand_p use;
5425       bool ok = single_imm_use (var, &use, &use_stmt);
5426       gcc_assert (ok
5427                   && is_gimple_assign (use_stmt)
5428                   && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
5429                   && gimple_bb (def_stmt) == gimple_bb (use_stmt));
5430
5431       basic_block cond_bb = gimple_bb (def_stmt);
5432       basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
5433       basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
5434
5435       gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
5436       gimple *g = gimple_build_cond (NE_EXPR, var,
5437                                      build_zero_cst (TREE_TYPE (var)),
5438                                      NULL_TREE, NULL_TREE);
5439       location_t loc = gimple_location (use_stmt);
5440       gimple_set_location (g, loc);
5441       gsi_insert_after (&gsi, g, GSI_NEW_STMT);
5442
5443       edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
5444       etrue->probability = REG_BR_PROB_BASE / 2;
5445       etrue->count = cond_bb->count / 2;
5446       edge efalse = find_edge (cond_bb, then_bb);
5447       efalse->flags = EDGE_FALSE_VALUE;
5448       efalse->probability -= etrue->probability;
5449       efalse->count -= etrue->count;
5450       then_bb->count -= etrue->count;
5451
5452       tree othervar = NULL_TREE;
5453       if (gimple_assign_rhs1 (use_stmt) == var)
5454         othervar = gimple_assign_rhs2 (use_stmt);
5455       else if (gimple_assign_rhs2 (use_stmt) == var)
5456         othervar = gimple_assign_rhs1 (use_stmt);
5457       else
5458         gcc_unreachable ();
5459       tree lhs = gimple_assign_lhs (use_stmt);
5460       gphi *phi = create_phi_node (lhs, merge_bb);
5461       add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
5462       add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
5463       gsi = gsi_for_stmt (use_stmt);
5464       gsi_remove (&gsi, true);
5465
5466       set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
5467       set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
5468     }
5469   reassoc_branch_fixups.release ();
5470 }
5471
5472 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
5473 void debug_ops_vector (vec<operand_entry *> ops);
5474
5475 /* Dump the operand entry vector OPS to FILE.  */
5476
5477 void
5478 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
5479 {
5480   operand_entry *oe;
5481   unsigned int i;
5482
5483   FOR_EACH_VEC_ELT (ops, i, oe)
5484     {
5485       fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
5486       print_generic_expr (file, oe->op, 0);
5487       fprintf (file, "\n");
5488     }
5489 }
5490
5491 /* Dump the operand entry vector OPS to STDERR.  */
5492
5493 DEBUG_FUNCTION void
5494 debug_ops_vector (vec<operand_entry *> ops)
5495 {
5496   dump_ops_vector (stderr, ops);
5497 }
5498
5499 /* Bubble up return status from reassociate_bb.  */
5500
5501 static bool
5502 do_reassoc (void)
5503 {
5504   break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
5505   return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
5506 }
5507
5508 /* Initialize the reassociation pass.  */
5509
5510 static void
5511 init_reassoc (void)
5512 {
5513   int i;
5514   long rank = 2;
5515   int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
5516
5517   /* Find the loops, so that we can prevent moving calculations in
5518      them.  */
5519   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
5520
5521   memset (&reassociate_stats, 0, sizeof (reassociate_stats));
5522
5523   next_operand_entry_id = 0;
5524
5525   /* Reverse RPO (Reverse Post Order) will give us something where
5526      deeper loops come later.  */
5527   pre_and_rev_post_order_compute (NULL, bbs, false);
5528   bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
5529   operand_rank = new hash_map<tree, long>;
5530
5531   /* Give each default definition a distinct rank.  This includes
5532      parameters and the static chain.  Walk backwards over all
5533      SSA names so that we get proper rank ordering according
5534      to tree_swap_operands_p.  */
5535   for (i = num_ssa_names - 1; i > 0; --i)
5536     {
5537       tree name = ssa_name (i);
5538       if (name && SSA_NAME_IS_DEFAULT_DEF (name))
5539         insert_operand_rank (name, ++rank);
5540     }
5541
5542   /* Set up rank for each BB  */
5543   for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
5544     bb_rank[bbs[i]] = ++rank  << 16;
5545
5546   free (bbs);
5547   calculate_dominance_info (CDI_POST_DOMINATORS);
5548   plus_negates = vNULL;
5549 }
5550
5551 /* Cleanup after the reassociation pass, and print stats if
5552    requested.  */
5553
5554 static void
5555 fini_reassoc (void)
5556 {
5557   statistics_counter_event (cfun, "Linearized",
5558                             reassociate_stats.linearized);
5559   statistics_counter_event (cfun, "Constants eliminated",
5560                             reassociate_stats.constants_eliminated);
5561   statistics_counter_event (cfun, "Ops eliminated",
5562                             reassociate_stats.ops_eliminated);
5563   statistics_counter_event (cfun, "Statements rewritten",
5564                             reassociate_stats.rewritten);
5565   statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
5566                             reassociate_stats.pows_encountered);
5567   statistics_counter_event (cfun, "Built-in powi calls created",
5568                             reassociate_stats.pows_created);
5569
5570   delete operand_rank;
5571   operand_entry_pool.release ();
5572   free (bb_rank);
5573   plus_negates.release ();
5574   free_dominance_info (CDI_POST_DOMINATORS);
5575   loop_optimizer_finalize ();
5576 }
5577
5578 /* Gate and execute functions for Reassociation.  If INSERT_POWI_P, enable
5579    insertion of __builtin_powi calls.
5580
5581    Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
5582    optimization of a gimple conditional.  Otherwise returns zero.  */
5583
5584 static unsigned int
5585 execute_reassoc (bool insert_powi_p)
5586 {
5587   reassoc_insert_powi_p = insert_powi_p;
5588
5589   init_reassoc ();
5590
5591   bool cfg_cleanup_needed;
5592   cfg_cleanup_needed = do_reassoc ();
5593   repropagate_negates ();
5594   branch_fixup ();
5595
5596   fini_reassoc ();
5597   return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
5598 }
5599
5600 namespace {
5601
5602 const pass_data pass_data_reassoc =
5603 {
5604   GIMPLE_PASS, /* type */
5605   "reassoc", /* name */
5606   OPTGROUP_NONE, /* optinfo_flags */
5607   TV_TREE_REASSOC, /* tv_id */
5608   ( PROP_cfg | PROP_ssa ), /* properties_required */
5609   0, /* properties_provided */
5610   0, /* properties_destroyed */
5611   0, /* todo_flags_start */
5612   TODO_update_ssa_only_virtuals, /* todo_flags_finish */
5613 };
5614
5615 class pass_reassoc : public gimple_opt_pass
5616 {
5617 public:
5618   pass_reassoc (gcc::context *ctxt)
5619     : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
5620   {}
5621
5622   /* opt_pass methods: */
5623   opt_pass * clone () { return new pass_reassoc (m_ctxt); }
5624   void set_pass_param (unsigned int n, bool param)
5625     {
5626       gcc_assert (n == 0);
5627       insert_powi_p = param;
5628     }
5629   virtual bool gate (function *) { return flag_tree_reassoc != 0; }
5630   virtual unsigned int execute (function *)
5631     { return execute_reassoc (insert_powi_p); }
5632
5633  private:
5634   /* Enable insertion of __builtin_powi calls during execute_reassoc.  See
5635      point 3a in the pass header comment.  */
5636   bool insert_powi_p;
5637 }; // class pass_reassoc
5638
5639 } // anon namespace
5640
5641 gimple_opt_pass *
5642 make_pass_reassoc (gcc::context *ctxt)
5643 {
5644   return new pass_reassoc (ctxt);
5645 }