gcc/tree-ssa-reassoc.c

   1 /* Reassociation for trees.
   2    Copyright (C) 2005-2021 Free Software Foundation, Inc.
   3    Contributed by Daniel Berlin <dan@dberlin.org>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 3, or (at your option)
  10 any later version.
  11
  12 GCC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "backend.h"
  25 #include "target.h"
  26 #include "rtl.h"
  27 #include "tree.h"
  28 #include "gimple.h"
  29 #include "cfghooks.h"
  30 #include "alloc-pool.h"
  31 #include "tree-pass.h"
  32 #include "memmodel.h"
  33 #include "tm_p.h"
  34 #include "ssa.h"
  35 #include "optabs-tree.h"
  36 #include "gimple-pretty-print.h"
  37 #include "diagnostic-core.h"
  38 #include "fold-const.h"
  39 #include "stor-layout.h"
  40 #include "cfganal.h"
  41 #include "gimple-fold.h"
  42 #include "tree-eh.h"
  43 #include "gimple-iterator.h"
  44 #include "gimplify-me.h"
  45 #include "tree-cfg.h"
  46 #include "tree-ssa-loop.h"
  47 #include "flags.h"
  48 #include "tree-ssa.h"
  49 #include "langhooks.h"
  50 #include "cfgloop.h"
  51 #include "builtins.h"
  52 #include "gimplify.h"
  53 #include "case-cfn-macros.h"
  54 #include "tree-ssa-reassoc.h"
  55 #include "tree-ssa-math-opts.h"
  56 #include "gimple-range.h"
  57
  58 /*  This is a simple global reassociation pass.  It is, in part, based
  59     on the LLVM pass of the same name (They do some things more/less
  60     than we do, in different orders, etc).
  61
  62     It consists of five steps:
  63
  64     1. Breaking up subtract operations into addition + negate, where
  65     it would promote the reassociation of adds.
  66
  67     2. Left linearization of the expression trees, so that (A+B)+(C+D)
  68     becomes (((A+B)+C)+D), which is easier for us to rewrite later.
  69     During linearization, we place the operands of the binary
  70     expressions into a vector of operand_entry_*
  71
  72     3. Optimization of the operand lists, eliminating things like a +
  73     -a, a & a, etc.
  74
  75     3a. Combine repeated factors with the same occurrence counts
  76     into a __builtin_powi call that will later be optimized into
  77     an optimal number of multiplies.
  78
  79     4. Rewrite the expression trees we linearized and optimized so
  80     they are in proper rank order.
  81
  82     5. Repropagate negates, as nothing else will clean it up ATM.
  83
  84     A bit of theory on #4, since nobody seems to write anything down
  85     about why it makes sense to do it the way they do it:
  86
  87     We could do this much nicer theoretically, but don't (for reasons
  88     explained after how to do it theoretically nice :P).
  89
  90     In order to promote the most redundancy elimination, you want
  91     binary expressions whose operands are the same rank (or
  92     preferably, the same value) exposed to the redundancy eliminator,
  93     for possible elimination.
  94
  95     So the way to do this if we really cared, is to build the new op
  96     tree from the leaves to the roots, merging as you go, and putting the
  97     new op on the end of the worklist, until you are left with one
  98     thing on the worklist.
  99
 100     IE if you have to rewrite the following set of operands (listed with
 101     rank in parentheses), with opcode PLUS_EXPR:
 102
 103     a (1),  b (1),  c (1),  d (2), e (2)
 104
 105
 106     We start with our merge worklist empty, and the ops list with all of
 107     those on it.
 108
 109     You want to first merge all leaves of the same rank, as much as
 110     possible.
 111
 112     So first build a binary op of
 113
 114     mergetmp = a + b, and put "mergetmp" on the merge worklist.
 115
 116     Because there is no three operand form of PLUS_EXPR, c is not going to
 117     be exposed to redundancy elimination as a rank 1 operand.
 118
 119     So you might as well throw it on the merge worklist (you could also
 120     consider it to now be a rank two operand, and merge it with d and e,
 121     but in this case, you then have evicted e from a binary op. So at
 122     least in this situation, you can't win.)
 123
 124     Then build a binary op of d + e
 125     mergetmp2 = d + e
 126
 127     and put mergetmp2 on the merge worklist.
 128
 129     so merge worklist = {mergetmp, c, mergetmp2}
 130
 131     Continue building binary ops of these operations until you have only
 132     one operation left on the worklist.
 133
 134     So we have
 135
 136     build binary op
 137     mergetmp3 = mergetmp + c
 138
 139     worklist = {mergetmp2, mergetmp3}
 140
 141     mergetmp4 = mergetmp2 + mergetmp3
 142
 143     worklist = {mergetmp4}
 144
 145     because we have one operation left, we can now just set the original
 146     statement equal to the result of that operation.
 147
 148     This will at least expose a + b  and d + e to redundancy elimination
 149     as binary operations.
 150
 151     For extra points, you can reuse the old statements to build the
 152     mergetmps, since you shouldn't run out.
 153
 154     So why don't we do this?
 155
 156     Because it's expensive, and rarely will help.  Most trees we are
 157     reassociating have 3 or less ops.  If they have 2 ops, they already
 158     will be written into a nice single binary op.  If you have 3 ops, a
 159     single simple check suffices to tell you whether the first two are of the
 160     same rank.  If so, you know to order it
 161
 162     mergetmp = op1 + op2
 163     newstmt = mergetmp + op3
 164
 165     instead of
 166     mergetmp = op2 + op3
 167     newstmt = mergetmp + op1
 168
 169     If all three are of the same rank, you can't expose them all in a
 170     single binary operator anyway, so the above is *still* the best you
 171     can do.
 172
 173     Thus, this is what we do.  When we have three ops left, we check to see
 174     what order to put them in, and call it a day.  As a nod to vector sum
 175     reduction, we check if any of the ops are really a phi node that is a
 176     destructive update for the associating op, and keep the destructive
 177     update together for vector sum reduction recognition.  */
 178
 179 /* Enable insertion of __builtin_powi calls during execute_reassoc.  See
 180    point 3a in the pass header comment.  */
 181 static bool reassoc_insert_powi_p;
 182
 183 /* Enable biasing ranks of loop accumulators.  We don't want this before
 184    vectorization, since it interferes with reduction chains.  */
 185 static bool reassoc_bias_loop_carried_phi_ranks_p;
 186
 187 /* Statistics */
 188 static struct
 189 {
 190   int linearized;
 191   int constants_eliminated;
 192   int ops_eliminated;
 193   int rewritten;
 194   int pows_encountered;
 195   int pows_created;
 196 } reassociate_stats;
 197
 198
 199 static object_allocator<operand_entry> operand_entry_pool
 200   ("operand entry pool");
 201
 202 /* This is used to assign a unique ID to each struct operand_entry
 203    so that qsort results are identical on different hosts.  */
 204 static unsigned int next_operand_entry_id;
 205
 206 /* Starting rank number for a given basic block, so that we can rank
 207    operations using unmovable instructions in that BB based on the bb
 208    depth.  */
 209 static int64_t *bb_rank;
 210
 211 /* Operand->rank hashtable.  */
 212 static hash_map<tree, int64_t> *operand_rank;
 213
 214 /* SSA_NAMEs that are forms of loop accumulators and whose ranks need to be
 215    biased.  */
 216 static auto_bitmap biased_names;
 217
 218 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
 219    all basic blocks the CFG should be adjusted - basic blocks
 220    split right after that SSA_NAME's definition statement and before
 221    the only use, which must be a bit ior.  */
 222 static vec<tree> reassoc_branch_fixups;
 223
 224 /* Forward decls.  */
 225 static int64_t get_rank (tree);
 226 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
 227
 228 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
 229    possibly added by gsi_remove.  */
 230
 231 static bool
 232 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
 233 {
 234   gimple *stmt = gsi_stmt (*gsi);
 235
 236   if (!MAY_HAVE_DEBUG_BIND_STMTS || gimple_code (stmt) == GIMPLE_PHI)
 237     return gsi_remove (gsi, true);
 238
 239   gimple_stmt_iterator prev = *gsi;
 240   gsi_prev (&prev);
 241   unsigned uid = gimple_uid (stmt);
 242   basic_block bb = gimple_bb (stmt);
 243   bool ret = gsi_remove (gsi, true);
 244   if (!gsi_end_p (prev))
 245     gsi_next (&prev);
 246   else
 247     prev = gsi_start_bb (bb);
 248   gimple *end_stmt = gsi_stmt (*gsi);
 249   while ((stmt = gsi_stmt (prev)) != end_stmt)
 250     {
 251       gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
 252       gimple_set_uid (stmt, uid);
 253       gsi_next (&prev);
 254     }
 255   return ret;
 256 }
 257
 258 /* Bias amount for loop-carried phis.  We want this to be larger than
 259    the depth of any reassociation tree we can see, but not larger than
 260    the rank difference between two blocks.  */
 261 #define PHI_LOOP_BIAS (1 << 15)
 262
 263 /* Return TRUE iff PHI_LOOP_BIAS should be propagated from one of the STMT's
 264    operands to the STMT's left-hand side.  The goal is to preserve bias in code
 265    like this:
 266
 267      x_1 = phi(x_0, x_2)
 268      a = x_1 | 1
 269      b = a ^ 2
 270      .MEM = b
 271      c = b + d
 272      x_2 = c + e
 273
 274    That is, we need to preserve bias along single-use chains originating from
 275    loop-carried phis.  Only GIMPLE_ASSIGNs to SSA_NAMEs are considered to be
 276    uses, because only they participate in rank propagation.  */
 277 static bool
 278 propagate_bias_p (gimple *stmt)
 279 {
 280   use_operand_p use;
 281   imm_use_iterator use_iter;
 282   gimple *single_use_stmt = NULL;
 283
 284   if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_reference)
 285     return false;
 286
 287   FOR_EACH_IMM_USE_FAST (use, use_iter, gimple_assign_lhs (stmt))
 288     {
 289       gimple *current_use_stmt = USE_STMT (use);
 290
 291       if (is_gimple_assign (current_use_stmt)
 292           && TREE_CODE (gimple_assign_lhs (current_use_stmt)) == SSA_NAME)
 293         {
 294           if (single_use_stmt != NULL && single_use_stmt != current_use_stmt)
 295             return false;
 296           single_use_stmt = current_use_stmt;
 297         }
 298     }
 299
 300   if (single_use_stmt == NULL)
 301     return false;
 302
 303   if (gimple_bb (stmt)->loop_father
 304       != gimple_bb (single_use_stmt)->loop_father)
 305     return false;
 306
 307   return true;
 308 }
 309
 310 /* Rank assigned to a phi statement.  If STMT is a loop-carried phi of
 311    an innermost loop, and the phi has only a single use which is inside
 312    the loop, then the rank is the block rank of the loop latch plus an
 313    extra bias for the loop-carried dependence.  This causes expressions
 314    calculated into an accumulator variable to be independent for each
 315    iteration of the loop.  If STMT is some other phi, the rank is the
 316    block rank of its containing block.  */
 317 static int64_t
 318 phi_rank (gimple *stmt)
 319 {
 320   basic_block bb = gimple_bb (stmt);
 321   class loop *father = bb->loop_father;
 322   tree res;
 323   unsigned i;
 324   use_operand_p use;
 325   gimple *use_stmt;
 326
 327   if (!reassoc_bias_loop_carried_phi_ranks_p)
 328     return bb_rank[bb->index];
 329
 330   /* We only care about real loops (those with a latch).  */
 331   if (!father->latch)
 332     return bb_rank[bb->index];
 333
 334   /* Interesting phis must be in headers of innermost loops.  */
 335   if (bb != father->header
 336       || father->inner)
 337     return bb_rank[bb->index];
 338
 339   /* Ignore virtual SSA_NAMEs.  */
 340   res = gimple_phi_result (stmt);
 341   if (virtual_operand_p (res))
 342     return bb_rank[bb->index];
 343
 344   /* The phi definition must have a single use, and that use must be
 345      within the loop.  Otherwise this isn't an accumulator pattern.  */
 346   if (!single_imm_use (res, &use, &use_stmt)
 347       || gimple_bb (use_stmt)->loop_father != father)
 348     return bb_rank[bb->index];
 349
 350   /* Look for phi arguments from within the loop.  If found, bias this phi.  */
 351   for (i = 0; i < gimple_phi_num_args (stmt); i++)
 352     {
 353       tree arg = gimple_phi_arg_def (stmt, i);
 354       if (TREE_CODE (arg) == SSA_NAME
 355           && !SSA_NAME_IS_DEFAULT_DEF (arg))
 356         {
 357           gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
 358           if (gimple_bb (def_stmt)->loop_father == father)
 359             return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
 360         }
 361     }
 362
 363   /* Must be an uninteresting phi.  */
 364   return bb_rank[bb->index];
 365 }
 366
 367 /* Return the maximum of RANK and the rank that should be propagated
 368    from expression OP.  For most operands, this is just the rank of OP.
 369    For loop-carried phis, the value is zero to avoid undoing the bias
 370    in favor of the phi.  */
 371 static int64_t
 372 propagate_rank (int64_t rank, tree op, bool *maybe_biased_p)
 373 {
 374   int64_t op_rank;
 375
 376   op_rank = get_rank (op);
 377
 378   /* Check whether op is biased after the get_rank () call, since it might have
 379      updated biased_names.  */
 380   if (TREE_CODE (op) == SSA_NAME
 381       && bitmap_bit_p (biased_names, SSA_NAME_VERSION (op)))
 382     {
 383       if (maybe_biased_p == NULL)
 384         return rank;
 385       *maybe_biased_p = true;
 386     }
 387
 388   return MAX (rank, op_rank);
 389 }
 390
 391 /* Look up the operand rank structure for expression E.  */
 392
 393 static inline int64_t
 394 find_operand_rank (tree e)
 395 {
 396   int64_t *slot = operand_rank->get (e);
 397   return slot ? *slot : -1;
 398 }
 399
 400 /* Insert {E,RANK} into the operand rank hashtable.  */
 401
 402 static inline void
 403 insert_operand_rank (tree e, int64_t rank)
 404 {
 405   gcc_assert (rank > 0);
 406   gcc_assert (!operand_rank->put (e, rank));
 407 }
 408
 409 /* Given an expression E, return the rank of the expression.  */
 410
 411 static int64_t
 412 get_rank (tree e)
 413 {
 414   /* SSA_NAME's have the rank of the expression they are the result
 415      of.
 416      For globals and uninitialized values, the rank is 0.
 417      For function arguments, use the pre-setup rank.
 418      For PHI nodes, stores, asm statements, etc, we use the rank of
 419      the BB.
 420      For simple operations, the rank is the maximum rank of any of
 421      its operands, or the bb_rank, whichever is less.
 422      I make no claims that this is optimal, however, it gives good
 423      results.  */
 424
 425   /* We make an exception to the normal ranking system to break
 426      dependences of accumulator variables in loops.  Suppose we
 427      have a simple one-block loop containing:
 428
 429        x_1 = phi(x_0, x_2)
 430        b = a + x_1
 431        c = b + d
 432        x_2 = c + e
 433
 434      As shown, each iteration of the calculation into x is fully
 435      dependent upon the iteration before it.  We would prefer to
 436      see this in the form:
 437
 438        x_1 = phi(x_0, x_2)
 439        b = a + d
 440        c = b + e
 441        x_2 = c + x_1
 442
 443      If the loop is unrolled, the calculations of b and c from
 444      different iterations can be interleaved.
 445
 446      To obtain this result during reassociation, we bias the rank
 447      of the phi definition x_1 upward, when it is recognized as an
 448      accumulator pattern.  The artificial rank causes it to be
 449      added last, providing the desired independence.  */
 450
 451   if (TREE_CODE (e) == SSA_NAME)
 452     {
 453       ssa_op_iter iter;
 454       gimple *stmt;
 455       int64_t rank;
 456       tree op;
 457
 458       /* If we already have a rank for this expression, use that.  */
 459       rank = find_operand_rank (e);
 460       if (rank != -1)
 461         return rank;
 462
 463       stmt = SSA_NAME_DEF_STMT (e);
 464       if (gimple_code (stmt) == GIMPLE_PHI)
 465         {
 466           rank = phi_rank (stmt);
 467           if (rank != bb_rank[gimple_bb (stmt)->index])
 468             bitmap_set_bit (biased_names, SSA_NAME_VERSION (e));
 469         }
 470
 471       else if (!is_gimple_assign (stmt))
 472         rank = bb_rank[gimple_bb (stmt)->index];
 473
 474       else
 475         {
 476           bool biased_p = false;
 477           bool *maybe_biased_p = propagate_bias_p (stmt) ? &biased_p : NULL;
 478
 479           /* Otherwise, find the maximum rank for the operands.  As an
 480              exception, remove the bias from loop-carried phis when propagating
 481              the rank so that dependent operations are not also biased.  */
 482           /* Simply walk over all SSA uses - this takes advatage of the
 483              fact that non-SSA operands are is_gimple_min_invariant and
 484              thus have rank 0.  */
 485           rank = 0;
 486           FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
 487             rank = propagate_rank (rank, op, maybe_biased_p);
 488
 489           rank += 1;
 490           if (biased_p)
 491             bitmap_set_bit (biased_names, SSA_NAME_VERSION (e));
 492         }
 493
 494       if (dump_file && (dump_flags & TDF_DETAILS))
 495         {
 496           fprintf (dump_file, "Rank for ");
 497           print_generic_expr (dump_file, e);
 498           fprintf (dump_file, " is %" PRId64 "\n", rank);
 499         }
 500
 501       /* Note the rank in the hashtable so we don't recompute it.  */
 502       insert_operand_rank (e, rank);
 503       return rank;
 504     }
 505
 506   /* Constants, globals, etc., are rank 0 */
 507   return 0;
 508 }
 509
 510
 511 /* We want integer ones to end up last no matter what, since they are
 512    the ones we can do the most with.  */
 513 #define INTEGER_CONST_TYPE 1 << 4
 514 #define FLOAT_ONE_CONST_TYPE 1 << 3
 515 #define FLOAT_CONST_TYPE 1 << 2
 516 #define OTHER_CONST_TYPE 1 << 1
 517
 518 /* Classify an invariant tree into integer, float, or other, so that
 519    we can sort them to be near other constants of the same type.  */
 520 static inline int
 521 constant_type (tree t)
 522 {
 523   if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
 524     return INTEGER_CONST_TYPE;
 525   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
 526     {
 527       /* Sort -1.0 and 1.0 constants last, while in some cases
 528          const_binop can't optimize some inexact operations, multiplication
 529          by -1.0 or 1.0 can be always merged with others.  */
 530       if (real_onep (t) || real_minus_onep (t))
 531         return FLOAT_ONE_CONST_TYPE;
 532       return FLOAT_CONST_TYPE;
 533     }
 534   else
 535     return OTHER_CONST_TYPE;
 536 }
 537
 538 /* qsort comparison function to sort operand entries PA and PB by rank
 539    so that the sorted array is ordered by rank in decreasing order.  */
 540 static int
 541 sort_by_operand_rank (const void *pa, const void *pb)
 542 {
 543   const operand_entry *oea = *(const operand_entry *const *)pa;
 544   const operand_entry *oeb = *(const operand_entry *const *)pb;
 545
 546   if (oeb->rank != oea->rank)
 547     return oeb->rank > oea->rank ? 1 : -1;
 548
 549   /* It's nicer for optimize_expression if constants that are likely
 550      to fold when added/multiplied/whatever are put next to each
 551      other.  Since all constants have rank 0, order them by type.  */
 552   if (oea->rank == 0)
 553     {
 554       if (constant_type (oeb->op) != constant_type (oea->op))
 555         return constant_type (oea->op) - constant_type (oeb->op);
 556       else
 557         /* To make sorting result stable, we use unique IDs to determine
 558            order.  */
 559         return oeb->id > oea->id ? 1 : -1;
 560     }
 561
 562   if (TREE_CODE (oea->op) != SSA_NAME)
 563     {
 564       if (TREE_CODE (oeb->op) != SSA_NAME)
 565         return oeb->id > oea->id ? 1 : -1;
 566       else
 567         return 1;
 568     }
 569   else if (TREE_CODE (oeb->op) != SSA_NAME)
 570     return -1;
 571
 572   /* Lastly, make sure the versions that are the same go next to each
 573      other.  */
 574   if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
 575     {
 576       /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
 577          versions of removed SSA_NAMEs, so if possible, prefer to sort
 578          based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
 579          See PR60418.  */
 580       gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
 581       gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
 582       basic_block bba = gimple_bb (stmta);
 583       basic_block bbb = gimple_bb (stmtb);
 584       if (bbb != bba)
 585         {
 586           /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert
 587              but the other might not.  */
 588           if (!bba)
 589             return 1;
 590           if (!bbb)
 591             return -1;
 592           /* If neither is, compare bb_rank.  */
 593           if (bb_rank[bbb->index] != bb_rank[bba->index])
 594             return (bb_rank[bbb->index] >> 16) - (bb_rank[bba->index] >> 16);
 595         }
 596
 597       bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
 598       bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
 599       if (da != db)
 600         return da ? 1 : -1;
 601
 602       return SSA_NAME_VERSION (oeb->op) > SSA_NAME_VERSION (oea->op) ? 1 : -1;
 603     }
 604
 605   return oeb->id > oea->id ? 1 : -1;
 606 }
 607
 608 /* Add an operand entry to *OPS for the tree operand OP.  */
 609
 610 static void
 611 add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL)
 612 {
 613   operand_entry *oe = operand_entry_pool.allocate ();
 614
 615   oe->op = op;
 616   oe->rank = get_rank (op);
 617   oe->id = next_operand_entry_id++;
 618   oe->count = 1;
 619   oe->stmt_to_insert = stmt_to_insert;
 620   ops->safe_push (oe);
 621 }
 622
 623 /* Add an operand entry to *OPS for the tree operand OP with repeat
 624    count REPEAT.  */
 625
 626 static void
 627 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
 628                        HOST_WIDE_INT repeat)
 629 {
 630   operand_entry *oe = operand_entry_pool.allocate ();
 631
 632   oe->op = op;
 633   oe->rank = get_rank (op);
 634   oe->id = next_operand_entry_id++;
 635   oe->count = repeat;
 636   oe->stmt_to_insert = NULL;
 637   ops->safe_push (oe);
 638
 639   reassociate_stats.pows_encountered++;
 640 }
 641
 642 /* Returns true if we can associate the SSA def OP.  */
 643
 644 static bool
 645 can_reassociate_op_p (tree op)
 646 {
 647   if (TREE_CODE (op) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op))
 648     return false;
 649   /* Make sure asm goto outputs do not participate in reassociation since
 650      we have no way to find an insertion place after asm goto.  */
 651   if (TREE_CODE (op) == SSA_NAME
 652       && gimple_code (SSA_NAME_DEF_STMT (op)) == GIMPLE_ASM
 653       && gimple_asm_nlabels (as_a <gasm *> (SSA_NAME_DEF_STMT (op))) != 0)
 654     return false;
 655   return true;
 656 }
 657
 658 /* Returns true if we can reassociate operations of TYPE.
 659    That is for integral or non-saturating fixed-point types, and for
 660    floating point type when associative-math is enabled.  */
 661
 662 static bool
 663 can_reassociate_type_p (tree type)
 664 {
 665   if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
 666       || NON_SAT_FIXED_POINT_TYPE_P (type)
 667       || (flag_associative_math && FLOAT_TYPE_P (type)))
 668     return true;
 669   return false;
 670 }
 671
 672 /* Return true if STMT is reassociable operation containing a binary
 673    operation with tree code CODE, and is inside LOOP.  */
 674
 675 static bool
 676 is_reassociable_op (gimple *stmt, enum tree_code code, class loop *loop)
 677 {
 678   basic_block bb = gimple_bb (stmt);
 679
 680   if (gimple_bb (stmt) == NULL)
 681     return false;
 682
 683   if (!flow_bb_inside_loop_p (loop, bb))
 684     return false;
 685
 686   if (is_gimple_assign (stmt)
 687       && gimple_assign_rhs_code (stmt) == code
 688       && has_single_use (gimple_assign_lhs (stmt)))
 689     {
 690       tree rhs1 = gimple_assign_rhs1 (stmt);
 691       tree rhs2 = gimple_assign_rhs2 (stmt);
 692       if (!can_reassociate_op_p (rhs1)
 693           || (rhs2 && !can_reassociate_op_p (rhs2)))
 694         return false;
 695       return true;
 696     }
 697
 698   return false;
 699 }
 700
 701
 702 /* Return true if STMT is a nop-conversion.  */
 703
 704 static bool
 705 gimple_nop_conversion_p (gimple *stmt)
 706 {
 707   if (gassign *ass = dyn_cast <gassign *> (stmt))
 708     {
 709       if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
 710           && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
 711                                     TREE_TYPE (gimple_assign_rhs1 (ass))))
 712         return true;
 713     }
 714   return false;
 715 }
 716
 717 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
 718    operand of the negate operation.  Otherwise, return NULL.  */
 719
 720 static tree
 721 get_unary_op (tree name, enum tree_code opcode)
 722 {
 723   gimple *stmt = SSA_NAME_DEF_STMT (name);
 724
 725   /* Look through nop conversions (sign changes).  */
 726   if (gimple_nop_conversion_p (stmt)
 727       && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
 728     stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
 729
 730   if (!is_gimple_assign (stmt))
 731     return NULL_TREE;
 732
 733   if (gimple_assign_rhs_code (stmt) == opcode)
 734     return gimple_assign_rhs1 (stmt);
 735   return NULL_TREE;
 736 }
 737
 738 /* Return true if OP1 and OP2 have the same value if casted to either type.  */
 739
 740 static bool
 741 ops_equal_values_p (tree op1, tree op2)
 742 {
 743   if (op1 == op2)
 744     return true;
 745
 746   tree orig_op1 = op1;
 747   if (TREE_CODE (op1) == SSA_NAME)
 748     {
 749       gimple *stmt = SSA_NAME_DEF_STMT (op1);
 750       if (gimple_nop_conversion_p (stmt))
 751         {
 752           op1 = gimple_assign_rhs1 (stmt);
 753           if (op1 == op2)
 754             return true;
 755         }
 756     }
 757
 758   if (TREE_CODE (op2) == SSA_NAME)
 759     {
 760       gimple *stmt = SSA_NAME_DEF_STMT (op2);
 761       if (gimple_nop_conversion_p (stmt))
 762         {
 763           op2 = gimple_assign_rhs1 (stmt);
 764           if (op1 == op2
 765               || orig_op1 == op2)
 766             return true;
 767         }
 768     }
 769
 770   return false;
 771 }
 772
 773
 774 /* If CURR and LAST are a pair of ops that OPCODE allows us to
 775    eliminate through equivalences, do so, remove them from OPS, and
 776    return true.  Otherwise, return false.  */
 777
 778 static bool
 779 eliminate_duplicate_pair (enum tree_code opcode,
 780                           vec<operand_entry *> *ops,
 781                           bool *all_done,
 782                           unsigned int i,
 783                           operand_entry *curr,
 784                           operand_entry *last)
 785 {
 786
 787   /* If we have two of the same op, and the opcode is & |, min, or max,
 788      we can eliminate one of them.
 789      If we have two of the same op, and the opcode is ^, we can
 790      eliminate both of them.  */
 791
 792   if (last && last->op == curr->op)
 793     {
 794       switch (opcode)
 795         {
 796         case MAX_EXPR:
 797         case MIN_EXPR:
 798         case BIT_IOR_EXPR:
 799         case BIT_AND_EXPR:
 800           if (dump_file && (dump_flags & TDF_DETAILS))
 801             {
 802               fprintf (dump_file, "Equivalence: ");
 803               print_generic_expr (dump_file, curr->op);
 804               fprintf (dump_file, " [&|minmax] ");
 805               print_generic_expr (dump_file, last->op);
 806               fprintf (dump_file, " -> ");
 807               print_generic_stmt (dump_file, last->op);
 808             }
 809
 810           ops->ordered_remove (i);
 811           reassociate_stats.ops_eliminated ++;
 812
 813           return true;
 814
 815         case BIT_XOR_EXPR:
 816           if (dump_file && (dump_flags & TDF_DETAILS))
 817             {
 818               fprintf (dump_file, "Equivalence: ");
 819               print_generic_expr (dump_file, curr->op);
 820               fprintf (dump_file, " ^ ");
 821               print_generic_expr (dump_file, last->op);
 822               fprintf (dump_file, " -> nothing\n");
 823             }
 824
 825           reassociate_stats.ops_eliminated += 2;
 826
 827           if (ops->length () == 2)
 828             {
 829               ops->truncate (0);
 830               add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
 831               *all_done = true;
 832             }
 833           else
 834             {
 835               ops->ordered_remove (i-1);
 836               ops->ordered_remove (i-1);
 837             }
 838
 839           return true;
 840
 841         default:
 842           break;
 843         }
 844     }
 845   return false;
 846 }
 847
 848 static vec<tree> plus_negates;
 849
 850 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
 851    expression, look in OPS for a corresponding positive operation to cancel
 852    it out.  If we find one, remove the other from OPS, replace
 853    OPS[CURRINDEX] with 0 or -1, respectively, and return true.  Otherwise,
 854    return false. */
 855
 856 static bool
 857 eliminate_plus_minus_pair (enum tree_code opcode,
 858                            vec<operand_entry *> *ops,
 859                            unsigned int currindex,
 860                            operand_entry *curr)
 861 {
 862   tree negateop;
 863   tree notop;
 864   unsigned int i;
 865   operand_entry *oe;
 866
 867   if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
 868     return false;
 869
 870   negateop = get_unary_op (curr->op, NEGATE_EXPR);
 871   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 872   if (negateop == NULL_TREE && notop == NULL_TREE)
 873     return false;
 874
 875   /* Any non-negated version will have a rank that is one less than
 876      the current rank.  So once we hit those ranks, if we don't find
 877      one, we can stop.  */
 878
 879   for (i = currindex + 1;
 880        ops->iterate (i, &oe)
 881        && oe->rank >= curr->rank - 1 ;
 882        i++)
 883     {
 884       if (negateop
 885           && ops_equal_values_p (oe->op, negateop))
 886         {
 887           if (dump_file && (dump_flags & TDF_DETAILS))
 888             {
 889               fprintf (dump_file, "Equivalence: ");
 890               print_generic_expr (dump_file, negateop);
 891               fprintf (dump_file, " + -");
 892               print_generic_expr (dump_file, oe->op);
 893               fprintf (dump_file, " -> 0\n");
 894             }
 895
 896           ops->ordered_remove (i);
 897           add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
 898           ops->ordered_remove (currindex);
 899           reassociate_stats.ops_eliminated ++;
 900
 901           return true;
 902         }
 903       else if (notop
 904                && ops_equal_values_p (oe->op, notop))
 905         {
 906           tree op_type = TREE_TYPE (oe->op);
 907
 908           if (dump_file && (dump_flags & TDF_DETAILS))
 909             {
 910               fprintf (dump_file, "Equivalence: ");
 911               print_generic_expr (dump_file, notop);
 912               fprintf (dump_file, " + ~");
 913               print_generic_expr (dump_file, oe->op);
 914               fprintf (dump_file, " -> -1\n");
 915             }
 916
 917           ops->ordered_remove (i);
 918           add_to_ops_vec (ops, build_all_ones_cst (op_type));
 919           ops->ordered_remove (currindex);
 920           reassociate_stats.ops_eliminated ++;
 921
 922           return true;
 923         }
 924     }
 925
 926   /* If CURR->OP is a negate expr without nop conversion in a plus expr:
 927      save it for later inspection in repropagate_negates().  */
 928   if (negateop != NULL_TREE
 929       && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
 930     plus_negates.safe_push (curr->op);
 931
 932   return false;
 933 }
 934
 935 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
 936    bitwise not expression, look in OPS for a corresponding operand to
 937    cancel it out.  If we find one, remove the other from OPS, replace
 938    OPS[CURRINDEX] with 0, and return true.  Otherwise, return
 939    false. */
 940
 941 static bool
 942 eliminate_not_pairs (enum tree_code opcode,
 943                      vec<operand_entry *> *ops,
 944                      unsigned int currindex,
 945                      operand_entry *curr)
 946 {
 947   tree notop;
 948   unsigned int i;
 949   operand_entry *oe;
 950
 951   if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
 952       || TREE_CODE (curr->op) != SSA_NAME)
 953     return false;
 954
 955   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 956   if (notop == NULL_TREE)
 957     return false;
 958
 959   /* Any non-not version will have a rank that is one less than
 960      the current rank.  So once we hit those ranks, if we don't find
 961      one, we can stop.  */
 962
 963   for (i = currindex + 1;
 964        ops->iterate (i, &oe)
 965        && oe->rank >= curr->rank - 1;
 966        i++)
 967     {
 968       if (oe->op == notop)
 969         {
 970           if (dump_file && (dump_flags & TDF_DETAILS))
 971             {
 972               fprintf (dump_file, "Equivalence: ");
 973               print_generic_expr (dump_file, notop);
 974               if (opcode == BIT_AND_EXPR)
 975                 fprintf (dump_file, " & ~");
 976               else if (opcode == BIT_IOR_EXPR)
 977                 fprintf (dump_file, " | ~");
 978               print_generic_expr (dump_file, oe->op);
 979               if (opcode == BIT_AND_EXPR)
 980                 fprintf (dump_file, " -> 0\n");
 981               else if (opcode == BIT_IOR_EXPR)
 982                 fprintf (dump_file, " -> -1\n");
 983             }
 984
 985           if (opcode == BIT_AND_EXPR)
 986             oe->op = build_zero_cst (TREE_TYPE (oe->op));
 987           else if (opcode == BIT_IOR_EXPR)
 988             oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
 989
 990           reassociate_stats.ops_eliminated += ops->length () - 1;
 991           ops->truncate (0);
 992           ops->quick_push (oe);
 993           return true;
 994         }
 995     }
 996
 997   return false;
 998 }
 999
1000 /* Use constant value that may be present in OPS to try to eliminate
1001    operands.  Note that this function is only really used when we've
1002    eliminated ops for other reasons, or merged constants.  Across
1003    single statements, fold already does all of this, plus more.  There
1004    is little point in duplicating logic, so I've only included the
1005    identities that I could ever construct testcases to trigger.  */
1006
1007 static void
1008 eliminate_using_constants (enum tree_code opcode,
1009                            vec<operand_entry *> *ops)
1010 {
1011   operand_entry *oelast = ops->last ();
1012   tree type = TREE_TYPE (oelast->op);
1013
1014   if (oelast->rank == 0
1015       && (ANY_INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
1016     {
1017       switch (opcode)
1018         {
1019         case BIT_AND_EXPR:
1020           if (integer_zerop (oelast->op))
1021             {
1022               if (ops->length () != 1)
1023                 {
1024                   if (dump_file && (dump_flags & TDF_DETAILS))
1025                     fprintf (dump_file, "Found & 0, removing all other ops\n");
1026
1027                   reassociate_stats.ops_eliminated += ops->length () - 1;
1028
1029                   ops->truncate (0);
1030                   ops->quick_push (oelast);
1031                   return;
1032                 }
1033             }
1034           else if (integer_all_onesp (oelast->op))
1035             {
1036               if (ops->length () != 1)
1037                 {
1038                   if (dump_file && (dump_flags & TDF_DETAILS))
1039                     fprintf (dump_file, "Found & -1, removing\n");
1040                   ops->pop ();
1041                   reassociate_stats.ops_eliminated++;
1042                 }
1043             }
1044           break;
1045         case BIT_IOR_EXPR:
1046           if (integer_all_onesp (oelast->op))
1047             {
1048               if (ops->length () != 1)
1049                 {
1050                   if (dump_file && (dump_flags & TDF_DETAILS))
1051                     fprintf (dump_file, "Found | -1, removing all other ops\n");
1052
1053                   reassociate_stats.ops_eliminated += ops->length () - 1;
1054
1055                   ops->truncate (0);
1056                   ops->quick_push (oelast);
1057                   return;
1058                 }
1059             }
1060           else if (integer_zerop (oelast->op))
1061             {
1062               if (ops->length () != 1)
1063                 {
1064                   if (dump_file && (dump_flags & TDF_DETAILS))
1065                     fprintf (dump_file, "Found | 0, removing\n");
1066                   ops->pop ();
1067                   reassociate_stats.ops_eliminated++;
1068                 }
1069             }
1070           break;
1071         case MULT_EXPR:
1072           if (integer_zerop (oelast->op)
1073               || (FLOAT_TYPE_P (type)
1074                   && !HONOR_NANS (type)
1075                   && !HONOR_SIGNED_ZEROS (type)
1076                   && real_zerop (oelast->op)))
1077             {
1078               if (ops->length () != 1)
1079                 {
1080                   if (dump_file && (dump_flags & TDF_DETAILS))
1081                     fprintf (dump_file, "Found * 0, removing all other ops\n");
1082
1083                   reassociate_stats.ops_eliminated += ops->length () - 1;
1084                   ops->truncate (0);
1085                   ops->quick_push (oelast);
1086                   return;
1087                 }
1088             }
1089           else if (integer_onep (oelast->op)
1090                    || (FLOAT_TYPE_P (type)
1091                        && !HONOR_SNANS (type)
1092                        && real_onep (oelast->op)))
1093             {
1094               if (ops->length () != 1)
1095                 {
1096                   if (dump_file && (dump_flags & TDF_DETAILS))
1097                     fprintf (dump_file, "Found * 1, removing\n");
1098                   ops->pop ();
1099                   reassociate_stats.ops_eliminated++;
1100                   return;
1101                 }
1102             }
1103           break;
1104         case BIT_XOR_EXPR:
1105         case PLUS_EXPR:
1106         case MINUS_EXPR:
1107           if (integer_zerop (oelast->op)
1108               || (FLOAT_TYPE_P (type)
1109                   && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1110                   && fold_real_zero_addition_p (type, 0, oelast->op,
1111                                                 opcode == MINUS_EXPR)))
1112             {
1113               if (ops->length () != 1)
1114                 {
1115                   if (dump_file && (dump_flags & TDF_DETAILS))
1116                     fprintf (dump_file, "Found [|^+] 0, removing\n");
1117                   ops->pop ();
1118                   reassociate_stats.ops_eliminated++;
1119                   return;
1120                 }
1121             }
1122           break;
1123         default:
1124           break;
1125         }
1126     }
1127 }
1128
1129
1130 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1131                                  bool, bool);
1132
1133 /* Structure for tracking and counting operands.  */
1134 struct oecount {
1135   unsigned int cnt;
1136   unsigned int id;
1137   enum tree_code oecode;
1138   tree op;
1139 };
1140
1141
1142 /* The heap for the oecount hashtable and the sorted list of operands.  */
1143 static vec<oecount> cvec;
1144
1145
1146 /* Oecount hashtable helpers.  */
1147
1148 struct oecount_hasher : int_hash <int, 0, 1>
1149 {
1150   static inline hashval_t hash (int);
1151   static inline bool equal (int, int);
1152 };
1153
1154 /* Hash function for oecount.  */
1155
1156 inline hashval_t
1157 oecount_hasher::hash (int p)
1158 {
1159   const oecount *c = &cvec[p - 42];
1160   return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1161 }
1162
1163 /* Comparison function for oecount.  */
1164
1165 inline bool
1166 oecount_hasher::equal (int p1, int p2)
1167 {
1168   const oecount *c1 = &cvec[p1 - 42];
1169   const oecount *c2 = &cvec[p2 - 42];
1170   return c1->oecode == c2->oecode && c1->op == c2->op;
1171 }
1172
1173 /* Comparison function for qsort sorting oecount elements by count.  */
1174
1175 static int
1176 oecount_cmp (const void *p1, const void *p2)
1177 {
1178   const oecount *c1 = (const oecount *)p1;
1179   const oecount *c2 = (const oecount *)p2;
1180   if (c1->cnt != c2->cnt)
1181     return c1->cnt > c2->cnt ? 1 : -1;
1182   else
1183     /* If counts are identical, use unique IDs to stabilize qsort.  */
1184     return c1->id > c2->id ? 1 : -1;
1185 }
1186
1187 /* Return TRUE iff STMT represents a builtin call that raises OP
1188    to some exponent.  */
1189
1190 static bool
1191 stmt_is_power_of_op (gimple *stmt, tree op)
1192 {
1193   if (!is_gimple_call (stmt))
1194     return false;
1195
1196   switch (gimple_call_combined_fn (stmt))
1197     {
1198     CASE_CFN_POW:
1199     CASE_CFN_POWI:
1200       return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1201
1202     default:
1203       return false;
1204     }
1205 }
1206
1207 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1208    in place and return the result.  Assumes that stmt_is_power_of_op
1209    was previously called for STMT and returned TRUE.  */
1210
1211 static HOST_WIDE_INT
1212 decrement_power (gimple *stmt)
1213 {
1214   REAL_VALUE_TYPE c, cint;
1215   HOST_WIDE_INT power;
1216   tree arg1;
1217
1218   switch (gimple_call_combined_fn (stmt))
1219     {
1220     CASE_CFN_POW:
1221       arg1 = gimple_call_arg (stmt, 1);
1222       c = TREE_REAL_CST (arg1);
1223       power = real_to_integer (&c) - 1;
1224       real_from_integer (&cint, VOIDmode, power, SIGNED);
1225       gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1226       return power;
1227
1228     CASE_CFN_POWI:
1229       arg1 = gimple_call_arg (stmt, 1);
1230       power = TREE_INT_CST_LOW (arg1) - 1;
1231       gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1232       return power;
1233
1234     default:
1235       gcc_unreachable ();
1236     }
1237 }
1238
1239 /* Replace SSA defined by STMT and replace all its uses with new
1240    SSA.  Also return the new SSA.  */
1241
1242 static tree
1243 make_new_ssa_for_def (gimple *stmt, enum tree_code opcode, tree op)
1244 {
1245   gimple *use_stmt;
1246   use_operand_p use;
1247   imm_use_iterator iter;
1248   tree new_lhs, new_debug_lhs = NULL_TREE;
1249   tree lhs = gimple_get_lhs (stmt);
1250
1251   new_lhs = make_ssa_name (TREE_TYPE (lhs));
1252   gimple_set_lhs (stmt, new_lhs);
1253
1254   /* Also need to update GIMPLE_DEBUGs.  */
1255   FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
1256     {
1257       tree repl = new_lhs;
1258       if (is_gimple_debug (use_stmt))
1259         {
1260           if (new_debug_lhs == NULL_TREE)
1261             {
1262               new_debug_lhs = make_node (DEBUG_EXPR_DECL);
1263               gdebug *def_temp
1264                 = gimple_build_debug_bind (new_debug_lhs,
1265                                            build2 (opcode, TREE_TYPE (lhs),
1266                                                    new_lhs, op),
1267                                            stmt);
1268               DECL_ARTIFICIAL (new_debug_lhs) = 1;
1269               TREE_TYPE (new_debug_lhs) = TREE_TYPE (lhs);
1270               SET_DECL_MODE (new_debug_lhs, TYPE_MODE (TREE_TYPE (lhs)));
1271               gimple_set_uid (def_temp, gimple_uid (stmt));
1272               gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1273               gsi_insert_after (&gsi, def_temp, GSI_SAME_STMT);
1274             }
1275           repl = new_debug_lhs;
1276         }
1277       FOR_EACH_IMM_USE_ON_STMT (use, iter)
1278         SET_USE (use, repl);
1279       update_stmt (use_stmt);
1280     }
1281   return new_lhs;
1282 }
1283
1284 /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1285    uses with new SSAs.  Also do this for the stmt that defines DEF
1286    if *DEF is not OP.  */
1287
1288 static void
1289 make_new_ssa_for_all_defs (tree *def, enum tree_code opcode, tree op,
1290                            vec<gimple *> &stmts_to_fix)
1291 {
1292   unsigned i;
1293   gimple *stmt;
1294
1295   if (*def != op
1296       && TREE_CODE (*def) == SSA_NAME
1297       && (stmt = SSA_NAME_DEF_STMT (*def))
1298       && gimple_code (stmt) != GIMPLE_NOP)
1299     *def = make_new_ssa_for_def (stmt, opcode, op);
1300
1301   FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt)
1302     make_new_ssa_for_def (stmt, opcode, op);
1303 }
1304
1305 /* Find the single immediate use of STMT's LHS, and replace it
1306    with OP.  Remove STMT.  If STMT's LHS is the same as *DEF,
1307    replace *DEF with OP as well.  */
1308
1309 static void
1310 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1311 {
1312   tree lhs;
1313   gimple *use_stmt;
1314   use_operand_p use;
1315   gimple_stmt_iterator gsi;
1316
1317   if (is_gimple_call (stmt))
1318     lhs = gimple_call_lhs (stmt);
1319   else
1320     lhs = gimple_assign_lhs (stmt);
1321
1322   gcc_assert (has_single_use (lhs));
1323   single_imm_use (lhs, &use, &use_stmt);
1324   if (lhs == *def)
1325     *def = op;
1326   SET_USE (use, op);
1327   if (TREE_CODE (op) != SSA_NAME)
1328     update_stmt (use_stmt);
1329   gsi = gsi_for_stmt (stmt);
1330   unlink_stmt_vdef (stmt);
1331   reassoc_remove_stmt (&gsi);
1332   release_defs (stmt);
1333 }
1334
1335 /* Walks the linear chain with result *DEF searching for an operation
1336    with operand OP and code OPCODE removing that from the chain.  *DEF
1337    is updated if there is only one operand but no operation left.  */
1338
1339 static void
1340 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1341 {
1342   tree orig_def = *def;
1343   gimple *stmt = SSA_NAME_DEF_STMT (*def);
1344   /* PR72835 - Record the stmt chain that has to be updated such that
1345      we dont use the same LHS when the values computed are different.  */
1346   auto_vec<gimple *, 64> stmts_to_fix;
1347
1348   do
1349     {
1350       tree name;
1351
1352       if (opcode == MULT_EXPR)
1353         {
1354           if (stmt_is_power_of_op (stmt, op))
1355             {
1356               if (decrement_power (stmt) == 1)
1357                 {
1358                   if (stmts_to_fix.length () > 0)
1359                     stmts_to_fix.pop ();
1360                   propagate_op_to_single_use (op, stmt, def);
1361                 }
1362               break;
1363             }
1364           else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR)
1365             {
1366               if (gimple_assign_rhs1 (stmt) == op)
1367                 {
1368                   tree cst = build_minus_one_cst (TREE_TYPE (op));
1369                   if (stmts_to_fix.length () > 0)
1370                     stmts_to_fix.pop ();
1371                   propagate_op_to_single_use (cst, stmt, def);
1372                   break;
1373                 }
1374               else if (integer_minus_onep (op)
1375                        || real_minus_onep (op))
1376                 {
1377                   gimple_assign_set_rhs_code
1378                     (stmt, TREE_CODE (gimple_assign_rhs1 (stmt)));
1379                   break;
1380                 }
1381             }
1382         }
1383
1384       name = gimple_assign_rhs1 (stmt);
1385
1386       /* If this is the operation we look for and one of the operands
1387          is ours simply propagate the other operand into the stmts
1388          single use.  */
1389       if (gimple_assign_rhs_code (stmt) == opcode
1390           && (name == op
1391               || gimple_assign_rhs2 (stmt) == op))
1392         {
1393           if (name == op)
1394             name = gimple_assign_rhs2 (stmt);
1395           if (stmts_to_fix.length () > 0)
1396             stmts_to_fix.pop ();
1397           propagate_op_to_single_use (name, stmt, def);
1398           break;
1399         }
1400
1401       /* We might have a multiply of two __builtin_pow* calls, and
1402          the operand might be hiding in the rightmost one.  Likewise
1403          this can happen for a negate.  */
1404       if (opcode == MULT_EXPR
1405           && gimple_assign_rhs_code (stmt) == opcode
1406           && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1407           && has_single_use (gimple_assign_rhs2 (stmt)))
1408         {
1409           gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1410           if (stmt_is_power_of_op (stmt2, op))
1411             {
1412               if (decrement_power (stmt2) == 1)
1413                 propagate_op_to_single_use (op, stmt2, def);
1414               else
1415                 stmts_to_fix.safe_push (stmt2);
1416               break;
1417             }
1418           else if (is_gimple_assign (stmt2)
1419                    && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR)
1420             {
1421               if (gimple_assign_rhs1 (stmt2) == op)
1422                 {
1423                   tree cst = build_minus_one_cst (TREE_TYPE (op));
1424                   propagate_op_to_single_use (cst, stmt2, def);
1425                   break;
1426                 }
1427               else if (integer_minus_onep (op)
1428                        || real_minus_onep (op))
1429                 {
1430                   stmts_to_fix.safe_push (stmt2);
1431                   gimple_assign_set_rhs_code
1432                     (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2)));
1433                   break;
1434                 }
1435             }
1436         }
1437
1438       /* Continue walking the chain.  */
1439       gcc_assert (name != op
1440                   && TREE_CODE (name) == SSA_NAME);
1441       stmt = SSA_NAME_DEF_STMT (name);
1442       stmts_to_fix.safe_push (stmt);
1443     }
1444   while (1);
1445
1446   if (stmts_to_fix.length () > 0 || *def == orig_def)
1447     make_new_ssa_for_all_defs (def, opcode, op, stmts_to_fix);
1448 }
1449
1450 /* Returns true if statement S1 dominates statement S2.  Like
1451    stmt_dominates_stmt_p, but uses stmt UIDs to optimize.  */
1452
1453 static bool
1454 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1455 {
1456   basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1457
1458   /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1459      SSA_NAME.  Assume it lives at the beginning of function and
1460      thus dominates everything.  */
1461   if (!bb1 || s1 == s2)
1462     return true;
1463
1464   /* If bb2 is NULL, it doesn't dominate any stmt with a bb.  */
1465   if (!bb2)
1466     return false;
1467
1468   if (bb1 == bb2)
1469     {
1470       /* PHIs in the same basic block are assumed to be
1471          executed all in parallel, if only one stmt is a PHI,
1472          it dominates the other stmt in the same basic block.  */
1473       if (gimple_code (s1) == GIMPLE_PHI)
1474         return true;
1475
1476       if (gimple_code (s2) == GIMPLE_PHI)
1477         return false;
1478
1479       gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1480
1481       if (gimple_uid (s1) < gimple_uid (s2))
1482         return true;
1483
1484       if (gimple_uid (s1) > gimple_uid (s2))
1485         return false;
1486
1487       gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1488       unsigned int uid = gimple_uid (s1);
1489       for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1490         {
1491           gimple *s = gsi_stmt (gsi);
1492           if (gimple_uid (s) != uid)
1493             break;
1494           if (s == s2)
1495             return true;
1496         }
1497
1498       return false;
1499     }
1500
1501   return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1502 }
1503
1504 /* Insert STMT after INSERT_POINT.  */
1505
1506 static void
1507 insert_stmt_after (gimple *stmt, gimple *insert_point)
1508 {
1509   gimple_stmt_iterator gsi;
1510   basic_block bb;
1511
1512   if (gimple_code (insert_point) == GIMPLE_PHI)
1513     bb = gimple_bb (insert_point);
1514   else if (!stmt_ends_bb_p (insert_point))
1515     {
1516       gsi = gsi_for_stmt (insert_point);
1517       gimple_set_uid (stmt, gimple_uid (insert_point));
1518       gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1519       return;
1520     }
1521   else if (gimple_code (insert_point) == GIMPLE_ASM)
1522     /* We have no idea where to insert - it depends on where the
1523        uses will be placed.  */
1524     gcc_unreachable ();
1525   else
1526     /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1527        thus if it must end a basic block, it should be a call that can
1528        throw, or some assignment that can throw.  If it throws, the LHS
1529        of it will not be initialized though, so only valid places using
1530        the SSA_NAME should be dominated by the fallthru edge.  */
1531     bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1532   gsi = gsi_after_labels (bb);
1533   if (gsi_end_p (gsi))
1534     {
1535       gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1536       gimple_set_uid (stmt,
1537                       gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1538     }
1539   else
1540     gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1541   gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1542 }
1543
1544 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1545    the result.  Places the statement after the definition of either
1546    OP1 or OP2.  Returns the new statement.  */
1547
1548 static gimple *
1549 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1550 {
1551   gimple *op1def = NULL, *op2def = NULL;
1552   gimple_stmt_iterator gsi;
1553   tree op;
1554   gassign *sum;
1555
1556   /* Create the addition statement.  */
1557   op = make_ssa_name (type);
1558   sum = gimple_build_assign (op, opcode, op1, op2);
1559
1560   /* Find an insertion place and insert.  */
1561   if (TREE_CODE (op1) == SSA_NAME)
1562     op1def = SSA_NAME_DEF_STMT (op1);
1563   if (TREE_CODE (op2) == SSA_NAME)
1564     op2def = SSA_NAME_DEF_STMT (op2);
1565   if ((!op1def || gimple_nop_p (op1def))
1566       && (!op2def || gimple_nop_p (op2def)))
1567     {
1568       gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1569       if (gsi_end_p (gsi))
1570         {
1571           gimple_stmt_iterator gsi2
1572             = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1573           gimple_set_uid (sum,
1574                           gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1575         }
1576       else
1577         gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1578       gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1579     }
1580   else
1581     {
1582       gimple *insert_point;
1583       if ((!op1def || gimple_nop_p (op1def))
1584            || (op2def && !gimple_nop_p (op2def)
1585                && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1586         insert_point = op2def;
1587       else
1588         insert_point = op1def;
1589       insert_stmt_after (sum, insert_point);
1590     }
1591   update_stmt (sum);
1592
1593   return sum;
1594 }
1595
1596 /* Perform un-distribution of divisions and multiplications.
1597    A * X + B * X is transformed into (A + B) * X and A / X + B / X
1598    to (A + B) / X for real X.
1599
1600    The algorithm is organized as follows.
1601
1602     - First we walk the addition chain *OPS looking for summands that
1603       are defined by a multiplication or a real division.  This results
1604       in the candidates bitmap with relevant indices into *OPS.
1605
1606     - Second we build the chains of multiplications or divisions for
1607       these candidates, counting the number of occurrences of (operand, code)
1608       pairs in all of the candidates chains.
1609
1610     - Third we sort the (operand, code) pairs by number of occurrence and
1611       process them starting with the pair with the most uses.
1612
1613       * For each such pair we walk the candidates again to build a
1614         second candidate bitmap noting all multiplication/division chains
1615         that have at least one occurrence of (operand, code).
1616
1617       * We build an alternate addition chain only covering these
1618         candidates with one (operand, code) operation removed from their
1619         multiplication/division chain.
1620
1621       * The first candidate gets replaced by the alternate addition chain
1622         multiplied/divided by the operand.
1623
1624       * All candidate chains get disabled for further processing and
1625         processing of (operand, code) pairs continues.
1626
1627   The alternate addition chains built are re-processed by the main
1628   reassociation algorithm which allows optimizing a * x * y + b * y * x
1629   to (a + b ) * x * y in one invocation of the reassociation pass.  */
1630
1631 static bool
1632 undistribute_ops_list (enum tree_code opcode,
1633                        vec<operand_entry *> *ops, class loop *loop)
1634 {
1635   unsigned int length = ops->length ();
1636   operand_entry *oe1;
1637   unsigned i, j;
1638   unsigned nr_candidates, nr_candidates2;
1639   sbitmap_iterator sbi0;
1640   vec<operand_entry *> *subops;
1641   bool changed = false;
1642   unsigned int next_oecount_id = 0;
1643
1644   if (length <= 1
1645       || opcode != PLUS_EXPR)
1646     return false;
1647
1648   /* Build a list of candidates to process.  */
1649   auto_sbitmap candidates (length);
1650   bitmap_clear (candidates);
1651   nr_candidates = 0;
1652   FOR_EACH_VEC_ELT (*ops, i, oe1)
1653     {
1654       enum tree_code dcode;
1655       gimple *oe1def;
1656
1657       if (TREE_CODE (oe1->op) != SSA_NAME)
1658         continue;
1659       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1660       if (!is_gimple_assign (oe1def))
1661         continue;
1662       dcode = gimple_assign_rhs_code (oe1def);
1663       if ((dcode != MULT_EXPR
1664            && dcode != RDIV_EXPR)
1665           || !is_reassociable_op (oe1def, dcode, loop))
1666         continue;
1667
1668       bitmap_set_bit (candidates, i);
1669       nr_candidates++;
1670     }
1671
1672   if (nr_candidates < 2)
1673     return false;
1674
1675   if (dump_file && (dump_flags & TDF_DETAILS))
1676     {
1677       fprintf (dump_file, "searching for un-distribute opportunities ");
1678       print_generic_expr (dump_file,
1679         (*ops)[bitmap_first_set_bit (candidates)]->op, TDF_NONE);
1680       fprintf (dump_file, " %d\n", nr_candidates);
1681     }
1682
1683   /* Build linearized sub-operand lists and the counting table.  */
1684   cvec.create (0);
1685
1686   hash_table<oecount_hasher> ctable (15);
1687
1688   /* ??? Macro arguments cannot have multi-argument template types in
1689      them.  This typedef is needed to workaround that limitation.  */
1690   typedef vec<operand_entry *> vec_operand_entry_t_heap;
1691   subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1692   EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1693     {
1694       gimple *oedef;
1695       enum tree_code oecode;
1696       unsigned j;
1697
1698       oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1699       oecode = gimple_assign_rhs_code (oedef);
1700       linearize_expr_tree (&subops[i], oedef,
1701                            associative_tree_code (oecode), false);
1702
1703       FOR_EACH_VEC_ELT (subops[i], j, oe1)
1704         {
1705           oecount c;
1706           int *slot;
1707           int idx;
1708           c.oecode = oecode;
1709           c.cnt = 1;
1710           c.id = next_oecount_id++;
1711           c.op = oe1->op;
1712           cvec.safe_push (c);
1713           idx = cvec.length () + 41;
1714           slot = ctable.find_slot (idx, INSERT);
1715           if (!*slot)
1716             {
1717               *slot = idx;
1718             }
1719           else
1720             {
1721               cvec.pop ();
1722               cvec[*slot - 42].cnt++;
1723             }
1724         }
1725     }
1726
1727   /* Sort the counting table.  */
1728   cvec.qsort (oecount_cmp);
1729
1730   if (dump_file && (dump_flags & TDF_DETAILS))
1731     {
1732       oecount *c;
1733       fprintf (dump_file, "Candidates:\n");
1734       FOR_EACH_VEC_ELT (cvec, j, c)
1735         {
1736           fprintf (dump_file, "  %u %s: ", c->cnt,
1737                    c->oecode == MULT_EXPR
1738                    ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1739           print_generic_expr (dump_file, c->op);
1740           fprintf (dump_file, "\n");
1741         }
1742     }
1743
1744   /* Process the (operand, code) pairs in order of most occurrence.  */
1745   auto_sbitmap candidates2 (length);
1746   while (!cvec.is_empty ())
1747     {
1748       oecount *c = &cvec.last ();
1749       if (c->cnt < 2)
1750         break;
1751
1752       /* Now collect the operands in the outer chain that contain
1753          the common operand in their inner chain.  */
1754       bitmap_clear (candidates2);
1755       nr_candidates2 = 0;
1756       EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1757         {
1758           gimple *oedef;
1759           enum tree_code oecode;
1760           unsigned j;
1761           tree op = (*ops)[i]->op;
1762
1763           /* If we undistributed in this chain already this may be
1764              a constant.  */
1765           if (TREE_CODE (op) != SSA_NAME)
1766             continue;
1767
1768           oedef = SSA_NAME_DEF_STMT (op);
1769           oecode = gimple_assign_rhs_code (oedef);
1770           if (oecode != c->oecode)
1771             continue;
1772
1773           FOR_EACH_VEC_ELT (subops[i], j, oe1)
1774             {
1775               if (oe1->op == c->op)
1776                 {
1777                   bitmap_set_bit (candidates2, i);
1778                   ++nr_candidates2;
1779                   break;
1780                 }
1781             }
1782         }
1783
1784       if (nr_candidates2 >= 2)
1785         {
1786           operand_entry *oe1, *oe2;
1787           gimple *prod;
1788           int first = bitmap_first_set_bit (candidates2);
1789
1790           /* Build the new addition chain.  */
1791           oe1 = (*ops)[first];
1792           if (dump_file && (dump_flags & TDF_DETAILS))
1793             {
1794               fprintf (dump_file, "Building (");
1795               print_generic_expr (dump_file, oe1->op);
1796             }
1797           zero_one_operation (&oe1->op, c->oecode, c->op);
1798           EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1799             {
1800               gimple *sum;
1801               oe2 = (*ops)[i];
1802               if (dump_file && (dump_flags & TDF_DETAILS))
1803                 {
1804                   fprintf (dump_file, " + ");
1805                   print_generic_expr (dump_file, oe2->op);
1806                 }
1807               zero_one_operation (&oe2->op, c->oecode, c->op);
1808               sum = build_and_add_sum (TREE_TYPE (oe1->op),
1809                                        oe1->op, oe2->op, opcode);
1810               oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1811               oe2->rank = 0;
1812               oe1->op = gimple_get_lhs (sum);
1813             }
1814
1815           /* Apply the multiplication/division.  */
1816           prod = build_and_add_sum (TREE_TYPE (oe1->op),
1817                                     oe1->op, c->op, c->oecode);
1818           if (dump_file && (dump_flags & TDF_DETAILS))
1819             {
1820               fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1821               print_generic_expr (dump_file, c->op);
1822               fprintf (dump_file, "\n");
1823             }
1824
1825           /* Record it in the addition chain and disable further
1826              undistribution with this op.  */
1827           oe1->op = gimple_assign_lhs (prod);
1828           oe1->rank = get_rank (oe1->op);
1829           subops[first].release ();
1830
1831           changed = true;
1832         }
1833
1834       cvec.pop ();
1835     }
1836
1837   for (i = 0; i < ops->length (); ++i)
1838     subops[i].release ();
1839   free (subops);
1840   cvec.release ();
1841
1842   return changed;
1843 }
1844
1845 /* Pair to hold the information of one specific VECTOR_TYPE SSA_NAME:
1846    first: element index for each relevant BIT_FIELD_REF.
1847    second: the index of vec ops* for each relevant BIT_FIELD_REF.  */
1848 typedef std::pair<unsigned, unsigned> v_info_elem;
1849 struct v_info {
1850   tree vec_type;
1851   auto_vec<v_info_elem, 32> vec;
1852 };
1853 typedef v_info *v_info_ptr;
1854
1855 /* Comparison function for qsort on VECTOR SSA_NAME trees by machine mode.  */
1856 static int
1857 sort_by_mach_mode (const void *p_i, const void *p_j)
1858 {
1859   const tree tr1 = *((const tree *) p_i);
1860   const tree tr2 = *((const tree *) p_j);
1861   unsigned int mode1 = TYPE_MODE (TREE_TYPE (tr1));
1862   unsigned int mode2 = TYPE_MODE (TREE_TYPE (tr2));
1863   if (mode1 > mode2)
1864     return 1;
1865   else if (mode1 < mode2)
1866     return -1;
1867   if (SSA_NAME_VERSION (tr1) < SSA_NAME_VERSION (tr2))
1868     return -1;
1869   else if (SSA_NAME_VERSION (tr1) > SSA_NAME_VERSION (tr2))
1870     return 1;
1871   return 0;
1872 }
1873
1874 /* Cleanup hash map for VECTOR information.  */
1875 static void
1876 cleanup_vinfo_map (hash_map<tree, v_info_ptr> &info_map)
1877 {
1878   for (hash_map<tree, v_info_ptr>::iterator it = info_map.begin ();
1879        it != info_map.end (); ++it)
1880     {
1881       v_info_ptr info = (*it).second;
1882       delete info;
1883       (*it).second = NULL;
1884     }
1885 }
1886
1887 /* Perform un-distribution of BIT_FIELD_REF on VECTOR_TYPE.
1888      V1[0] + V1[1] + ... + V1[k] + V2[0] + V2[1] + ... + V2[k] + ... Vn[k]
1889    is transformed to
1890      Vs = (V1 + V2 + ... + Vn)
1891      Vs[0] + Vs[1] + ... + Vs[k]
1892
1893    The basic steps are listed below:
1894
1895     1) Check the addition chain *OPS by looking those summands coming from
1896        VECTOR bit_field_ref on VECTOR type.  Put the information into
1897        v_info_map for each satisfied summand, using VECTOR SSA_NAME as key.
1898
1899     2) For each key (VECTOR SSA_NAME), validate all its BIT_FIELD_REFs are
1900        continuous, they can cover the whole VECTOR perfectly without any holes.
1901        Obtain one VECTOR list which contain candidates to be transformed.
1902
1903     3) Sort the VECTOR list by machine mode of VECTOR type, for each group of
1904        candidates with same mode, build the addition statements for them and
1905        generate BIT_FIELD_REFs accordingly.
1906
1907    TODO:
1908        The current implementation requires the whole VECTORs should be fully
1909        covered, but it can be extended to support partial, checking adjacent
1910        but not fill the whole, it may need some cost model to define the
1911        boundary to do or not.
1912 */
1913 static bool
1914 undistribute_bitref_for_vector (enum tree_code opcode,
1915                                 vec<operand_entry *> *ops, struct loop *loop)
1916 {
1917   if (ops->length () <= 1)
1918     return false;
1919
1920   if (opcode != PLUS_EXPR
1921       && opcode != MULT_EXPR
1922       && opcode != BIT_XOR_EXPR
1923       && opcode != BIT_IOR_EXPR
1924       && opcode != BIT_AND_EXPR)
1925     return false;
1926
1927   hash_map<tree, v_info_ptr> v_info_map;
1928   operand_entry *oe1;
1929   unsigned i;
1930
1931   /* Find those summands from VECTOR BIT_FIELD_REF in addition chain, put the
1932      information into map.  */
1933   FOR_EACH_VEC_ELT (*ops, i, oe1)
1934     {
1935       enum tree_code dcode;
1936       gimple *oe1def;
1937
1938       if (TREE_CODE (oe1->op) != SSA_NAME)
1939         continue;
1940       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1941       if (!is_gimple_assign (oe1def))
1942         continue;
1943       dcode = gimple_assign_rhs_code (oe1def);
1944       if (dcode != BIT_FIELD_REF || !is_reassociable_op (oe1def, dcode, loop))
1945         continue;
1946
1947       tree rhs = gimple_assign_rhs1 (oe1def);
1948       tree vec = TREE_OPERAND (rhs, 0);
1949       tree vec_type = TREE_TYPE (vec);
1950
1951       if (TREE_CODE (vec) != SSA_NAME || !VECTOR_TYPE_P (vec_type))
1952         continue;
1953
1954       /* Ignore it if target machine can't support this VECTOR type.  */
1955       if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1956         continue;
1957
1958       /* Check const vector type, constrain BIT_FIELD_REF offset and size.  */
1959       if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
1960         continue;
1961
1962       if (VECTOR_TYPE_P (TREE_TYPE (rhs))
1963           || !is_a <scalar_mode> (TYPE_MODE (TREE_TYPE (rhs))))
1964         continue;
1965
1966       /* The type of BIT_FIELD_REF might not be equal to the element type of
1967          the vector.  We want to use a vector type with element type the
1968          same as the BIT_FIELD_REF and size the same as TREE_TYPE (vec).  */
1969       if (!useless_type_conversion_p (TREE_TYPE (rhs), TREE_TYPE (vec_type)))
1970         {
1971           machine_mode simd_mode;
1972           unsigned HOST_WIDE_INT size, nunits;
1973           unsigned HOST_WIDE_INT elem_size
1974             = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (rhs)));
1975           if (!GET_MODE_BITSIZE (TYPE_MODE (vec_type)).is_constant (&size))
1976             continue;
1977           if (size <= elem_size || (size % elem_size) != 0)
1978             continue;
1979           nunits = size / elem_size;
1980           if (!mode_for_vector (SCALAR_TYPE_MODE (TREE_TYPE (rhs)),
1981                                 nunits).exists (&simd_mode))
1982             continue;
1983           vec_type = build_vector_type_for_mode (TREE_TYPE (rhs), simd_mode);
1984
1985           /* Ignore it if target machine can't support this VECTOR type.  */
1986           if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1987             continue;
1988
1989           /* Check const vector type, constrain BIT_FIELD_REF offset and
1990              size.  */
1991           if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
1992             continue;
1993
1994           if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vec_type)),
1995                         GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vec)))))
1996             continue;
1997         }
1998
1999       tree elem_type = TREE_TYPE (vec_type);
2000       unsigned HOST_WIDE_INT elem_size = tree_to_uhwi (TYPE_SIZE (elem_type));
2001       if (maybe_ne (bit_field_size (rhs), elem_size))
2002         continue;
2003
2004       unsigned idx;
2005       if (!constant_multiple_p (bit_field_offset (rhs), elem_size, &idx))
2006         continue;
2007
2008       /* Ignore it if target machine can't support this type of VECTOR
2009          operation.  */
2010       optab op_tab = optab_for_tree_code (opcode, vec_type, optab_vector);
2011       if (optab_handler (op_tab, TYPE_MODE (vec_type)) == CODE_FOR_nothing)
2012         continue;
2013
2014       bool existed;
2015       v_info_ptr &info = v_info_map.get_or_insert (vec, &existed);
2016       if (!existed)
2017         {
2018           info = new v_info;
2019           info->vec_type = vec_type;
2020         }
2021       else if (!types_compatible_p (vec_type, info->vec_type))
2022         continue;
2023       info->vec.safe_push (std::make_pair (idx, i));
2024     }
2025
2026   /* At least two VECTOR to combine.  */
2027   if (v_info_map.elements () <= 1)
2028     {
2029       cleanup_vinfo_map (v_info_map);
2030       return false;
2031     }
2032
2033   /* Verify all VECTOR candidates by checking two conditions:
2034        1) sorted offsets are adjacent, no holes.
2035        2) can fill the whole VECTOR perfectly.
2036      And add the valid candidates to a vector for further handling.  */
2037   auto_vec<tree> valid_vecs (v_info_map.elements ());
2038   for (hash_map<tree, v_info_ptr>::iterator it = v_info_map.begin ();
2039        it != v_info_map.end (); ++it)
2040     {
2041       tree cand_vec = (*it).first;
2042       v_info_ptr cand_info = (*it).second;
2043       unsigned int num_elems
2044         = TYPE_VECTOR_SUBPARTS (cand_info->vec_type).to_constant ();
2045       if (cand_info->vec.length () != num_elems)
2046         continue;
2047       sbitmap holes = sbitmap_alloc (num_elems);
2048       bitmap_ones (holes);
2049       bool valid = true;
2050       v_info_elem *curr;
2051       FOR_EACH_VEC_ELT (cand_info->vec, i, curr)
2052         {
2053           if (!bitmap_bit_p (holes, curr->first))
2054             {
2055               valid = false;
2056               break;
2057             }
2058           else
2059             bitmap_clear_bit (holes, curr->first);
2060         }
2061       if (valid && bitmap_empty_p (holes))
2062         valid_vecs.quick_push (cand_vec);
2063       sbitmap_free (holes);
2064     }
2065
2066   /* At least two VECTOR to combine.  */
2067   if (valid_vecs.length () <= 1)
2068     {
2069       cleanup_vinfo_map (v_info_map);
2070       return false;
2071     }
2072
2073   valid_vecs.qsort (sort_by_mach_mode);
2074   /* Go through all candidates by machine mode order, query the mode_to_total
2075      to get the total number for each mode and skip the single one.  */
2076   for (unsigned i = 0; i < valid_vecs.length () - 1; ++i)
2077     {
2078       tree tvec = valid_vecs[i];
2079       enum machine_mode mode = TYPE_MODE (TREE_TYPE (tvec));
2080
2081       /* Skip modes with only a single candidate.  */
2082       if (TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) != mode)
2083         continue;
2084
2085       unsigned int idx, j;
2086       gimple *sum = NULL;
2087       tree sum_vec = tvec;
2088       v_info_ptr info_ptr = *(v_info_map.get (tvec));
2089       v_info_elem *elem;
2090       tree vec_type = info_ptr->vec_type;
2091
2092       /* Build the sum for all candidates with same mode.  */
2093       do
2094         {
2095           sum = build_and_add_sum (vec_type, sum_vec,
2096                                    valid_vecs[i + 1], opcode);
2097           if (!useless_type_conversion_p (vec_type,
2098                                           TREE_TYPE (valid_vecs[i + 1])))
2099             {
2100               /* Update the operands only after build_and_add_sum,
2101                  so that we don't have to repeat the placement algorithm
2102                  of build_and_add_sum.  */
2103               gimple_stmt_iterator gsi = gsi_for_stmt (sum);
2104               tree vce = build1 (VIEW_CONVERT_EXPR, vec_type,
2105                                  valid_vecs[i + 1]);
2106               tree lhs = make_ssa_name (vec_type);
2107               gimple *g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2108               gimple_set_uid (g, gimple_uid (sum));
2109               gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2110               gimple_assign_set_rhs2 (sum, lhs);
2111               if (sum_vec == tvec)
2112                 {
2113                   vce = build1 (VIEW_CONVERT_EXPR, vec_type, sum_vec);
2114                   lhs = make_ssa_name (vec_type);
2115                   g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2116                   gimple_set_uid (g, gimple_uid (sum));
2117                   gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2118                   gimple_assign_set_rhs1 (sum, lhs);
2119                 }
2120               update_stmt (sum);
2121             }
2122           sum_vec = gimple_get_lhs (sum);
2123           info_ptr = *(v_info_map.get (valid_vecs[i + 1]));
2124           gcc_assert (types_compatible_p (vec_type, info_ptr->vec_type));
2125           /* Update those related ops of current candidate VECTOR.  */
2126           FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2127             {
2128               idx = elem->second;
2129               gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2130               /* Set this then op definition will get DCEd later.  */
2131               gimple_set_visited (def, true);
2132               if (opcode == PLUS_EXPR
2133                   || opcode == BIT_XOR_EXPR
2134                   || opcode == BIT_IOR_EXPR)
2135                 (*ops)[idx]->op = build_zero_cst (TREE_TYPE ((*ops)[idx]->op));
2136               else if (opcode == MULT_EXPR)
2137                 (*ops)[idx]->op = build_one_cst (TREE_TYPE ((*ops)[idx]->op));
2138               else
2139                 {
2140                   gcc_assert (opcode == BIT_AND_EXPR);
2141                   (*ops)[idx]->op
2142                     = build_all_ones_cst (TREE_TYPE ((*ops)[idx]->op));
2143                 }
2144               (*ops)[idx]->rank = 0;
2145             }
2146           if (dump_file && (dump_flags & TDF_DETAILS))
2147             {
2148               fprintf (dump_file, "Generating addition -> ");
2149               print_gimple_stmt (dump_file, sum, 0);
2150             }
2151           i++;
2152         }
2153       while ((i < valid_vecs.length () - 1)
2154              && TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) == mode);
2155
2156       /* Referring to first valid VECTOR with this mode, generate the
2157          BIT_FIELD_REF statements accordingly.  */
2158       info_ptr = *(v_info_map.get (tvec));
2159       gcc_assert (sum);
2160       tree elem_type = TREE_TYPE (vec_type);
2161       FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2162         {
2163           idx = elem->second;
2164           tree dst = make_ssa_name (elem_type);
2165           tree pos = bitsize_int (elem->first
2166                                   * tree_to_uhwi (TYPE_SIZE (elem_type)));
2167           tree bfr = build3 (BIT_FIELD_REF, elem_type, sum_vec,
2168                              TYPE_SIZE (elem_type), pos);
2169           gimple *gs = gimple_build_assign (dst, BIT_FIELD_REF, bfr);
2170           insert_stmt_after (gs, sum);
2171           gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2172           /* Set this then op definition will get DCEd later.  */
2173           gimple_set_visited (def, true);
2174           (*ops)[idx]->op = gimple_assign_lhs (gs);
2175           (*ops)[idx]->rank = get_rank ((*ops)[idx]->op);
2176           if (dump_file && (dump_flags & TDF_DETAILS))
2177             {
2178               fprintf (dump_file, "Generating bit_field_ref -> ");
2179               print_gimple_stmt (dump_file, gs, 0);
2180             }
2181         }
2182     }
2183
2184   if (dump_file && (dump_flags & TDF_DETAILS))
2185     fprintf (dump_file, "undistributiong bit_field_ref for vector done.\n");
2186
2187   cleanup_vinfo_map (v_info_map);
2188
2189   return true;
2190 }
2191
2192 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
2193    expression, examine the other OPS to see if any of them are comparisons
2194    of the same values, which we may be able to combine or eliminate.
2195    For example, we can rewrite (a < b) | (a == b) as (a <= b).  */
2196
2197 static bool
2198 eliminate_redundant_comparison (enum tree_code opcode,
2199                                 vec<operand_entry *> *ops,
2200                                 unsigned int currindex,
2201                                 operand_entry *curr)
2202 {
2203   tree op1, op2;
2204   enum tree_code lcode, rcode;
2205   gimple *def1, *def2;
2206   int i;
2207   operand_entry *oe;
2208
2209   if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
2210     return false;
2211
2212   /* Check that CURR is a comparison.  */
2213   if (TREE_CODE (curr->op) != SSA_NAME)
2214     return false;
2215   def1 = SSA_NAME_DEF_STMT (curr->op);
2216   if (!is_gimple_assign (def1))
2217     return false;
2218   lcode = gimple_assign_rhs_code (def1);
2219   if (TREE_CODE_CLASS (lcode) != tcc_comparison)
2220     return false;
2221   op1 = gimple_assign_rhs1 (def1);
2222   op2 = gimple_assign_rhs2 (def1);
2223
2224   /* Now look for a similar comparison in the remaining OPS.  */
2225   for (i = currindex + 1; ops->iterate (i, &oe); i++)
2226     {
2227       tree t;
2228
2229       if (TREE_CODE (oe->op) != SSA_NAME)
2230         continue;
2231       def2 = SSA_NAME_DEF_STMT (oe->op);
2232       if (!is_gimple_assign (def2))
2233         continue;
2234       rcode = gimple_assign_rhs_code (def2);
2235       if (TREE_CODE_CLASS (rcode) != tcc_comparison)
2236         continue;
2237
2238       /* If we got here, we have a match.  See if we can combine the
2239          two comparisons.  */
2240       tree type = TREE_TYPE (gimple_assign_lhs (def1));
2241       if (opcode == BIT_IOR_EXPR)
2242         t = maybe_fold_or_comparisons (type,
2243                                        lcode, op1, op2,
2244                                        rcode, gimple_assign_rhs1 (def2),
2245                                        gimple_assign_rhs2 (def2));
2246       else
2247         t = maybe_fold_and_comparisons (type,
2248                                         lcode, op1, op2,
2249                                         rcode, gimple_assign_rhs1 (def2),
2250                                         gimple_assign_rhs2 (def2));
2251       if (!t)
2252         continue;
2253
2254       /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
2255          always give us a boolean_type_node value back.  If the original
2256          BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
2257          we need to convert.  */
2258       if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
2259         t = fold_convert (TREE_TYPE (curr->op), t);
2260
2261       if (TREE_CODE (t) != INTEGER_CST
2262           && !operand_equal_p (t, curr->op, 0))
2263         {
2264           enum tree_code subcode;
2265           tree newop1, newop2;
2266           if (!COMPARISON_CLASS_P (t))
2267             continue;
2268           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2269           STRIP_USELESS_TYPE_CONVERSION (newop1);
2270           STRIP_USELESS_TYPE_CONVERSION (newop2);
2271           if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
2272             continue;
2273         }
2274
2275       if (dump_file && (dump_flags & TDF_DETAILS))
2276         {
2277           fprintf (dump_file, "Equivalence: ");
2278           print_generic_expr (dump_file, curr->op);
2279           fprintf (dump_file, " %s ", op_symbol_code (opcode));
2280           print_generic_expr (dump_file, oe->op);
2281           fprintf (dump_file, " -> ");
2282           print_generic_expr (dump_file, t);
2283           fprintf (dump_file, "\n");
2284         }
2285
2286       /* Now we can delete oe, as it has been subsumed by the new combined
2287          expression t.  */
2288       ops->ordered_remove (i);
2289       reassociate_stats.ops_eliminated ++;
2290
2291       /* If t is the same as curr->op, we're done.  Otherwise we must
2292          replace curr->op with t.  Special case is if we got a constant
2293          back, in which case we add it to the end instead of in place of
2294          the current entry.  */
2295       if (TREE_CODE (t) == INTEGER_CST)
2296         {
2297           ops->ordered_remove (currindex);
2298           add_to_ops_vec (ops, t);
2299         }
2300       else if (!operand_equal_p (t, curr->op, 0))
2301         {
2302           gimple *sum;
2303           enum tree_code subcode;
2304           tree newop1;
2305           tree newop2;
2306           gcc_assert (COMPARISON_CLASS_P (t));
2307           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2308           STRIP_USELESS_TYPE_CONVERSION (newop1);
2309           STRIP_USELESS_TYPE_CONVERSION (newop2);
2310           gcc_checking_assert (is_gimple_val (newop1)
2311                                && is_gimple_val (newop2));
2312           sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
2313           curr->op = gimple_get_lhs (sum);
2314         }
2315       return true;
2316     }
2317
2318   return false;
2319 }
2320
2321
2322 /* Transform repeated addition of same values into multiply with
2323    constant.  */
2324 static bool
2325 transform_add_to_multiply (vec<operand_entry *> *ops)
2326 {
2327   operand_entry *oe;
2328   tree op = NULL_TREE;
2329   int j;
2330   int i, start = -1, end = 0, count = 0;
2331   auto_vec<std::pair <int, int> > indxs;
2332   bool changed = false;
2333
2334   if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2335       && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2336           || !flag_unsafe_math_optimizations))
2337     return false;
2338
2339   /* Look for repeated operands.  */
2340   FOR_EACH_VEC_ELT (*ops, i, oe)
2341     {
2342       if (start == -1)
2343         {
2344           count = 1;
2345           op = oe->op;
2346           start = i;
2347         }
2348       else if (operand_equal_p (oe->op, op, 0))
2349         {
2350           count++;
2351           end = i;
2352         }
2353       else
2354         {
2355           if (count > 1)
2356             indxs.safe_push (std::make_pair (start, end));
2357           count = 1;
2358           op = oe->op;
2359           start = i;
2360         }
2361     }
2362
2363   if (count > 1)
2364     indxs.safe_push (std::make_pair (start, end));
2365
2366   for (j = indxs.length () - 1; j >= 0; --j)
2367     {
2368       /* Convert repeated operand addition to multiplication.  */
2369       start = indxs[j].first;
2370       end = indxs[j].second;
2371       op = (*ops)[start]->op;
2372       count = end - start + 1;
2373       for (i = end; i >= start; --i)
2374         ops->unordered_remove (i);
2375       tree tmp = make_ssa_name (TREE_TYPE (op));
2376       tree cst = build_int_cst (integer_type_node, count);
2377       gassign *mul_stmt
2378         = gimple_build_assign (tmp, MULT_EXPR,
2379                                op, fold_convert (TREE_TYPE (op), cst));
2380       gimple_set_visited (mul_stmt, true);
2381       add_to_ops_vec (ops, tmp, mul_stmt);
2382       changed = true;
2383     }
2384
2385   return changed;
2386 }
2387
2388
2389 /* Perform various identities and other optimizations on the list of
2390    operand entries, stored in OPS.  The tree code for the binary
2391    operation between all the operands is OPCODE.  */
2392
2393 static void
2394 optimize_ops_list (enum tree_code opcode,
2395                    vec<operand_entry *> *ops)
2396 {
2397   unsigned int length = ops->length ();
2398   unsigned int i;
2399   operand_entry *oe;
2400   operand_entry *oelast = NULL;
2401   bool iterate = false;
2402
2403   if (length == 1)
2404     return;
2405
2406   oelast = ops->last ();
2407
2408   /* If the last two are constants, pop the constants off, merge them
2409      and try the next two.  */
2410   if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
2411     {
2412       operand_entry *oelm1 = (*ops)[length - 2];
2413
2414       if (oelm1->rank == 0
2415           && is_gimple_min_invariant (oelm1->op)
2416           && useless_type_conversion_p (TREE_TYPE (oelm1->op),
2417                                        TREE_TYPE (oelast->op)))
2418         {
2419           tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
2420                                      oelm1->op, oelast->op);
2421
2422           if (folded && is_gimple_min_invariant (folded))
2423             {
2424               if (dump_file && (dump_flags & TDF_DETAILS))
2425                 fprintf (dump_file, "Merging constants\n");
2426
2427               ops->pop ();
2428               ops->pop ();
2429
2430               add_to_ops_vec (ops, folded);
2431               reassociate_stats.constants_eliminated++;
2432
2433               optimize_ops_list (opcode, ops);
2434               return;
2435             }
2436         }
2437     }
2438
2439   eliminate_using_constants (opcode, ops);
2440   oelast = NULL;
2441
2442   for (i = 0; ops->iterate (i, &oe);)
2443     {
2444       bool done = false;
2445
2446       if (eliminate_not_pairs (opcode, ops, i, oe))
2447         return;
2448       if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
2449           || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
2450           || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
2451         {
2452           if (done)
2453             return;
2454           iterate = true;
2455           oelast = NULL;
2456           continue;
2457         }
2458       oelast = oe;
2459       i++;
2460     }
2461
2462   if (iterate)
2463     optimize_ops_list (opcode, ops);
2464 }
2465
2466 /* The following functions are subroutines to optimize_range_tests and allow
2467    it to try to change a logical combination of comparisons into a range
2468    test.
2469
2470    For example, both
2471         X == 2 || X == 5 || X == 3 || X == 4
2472    and
2473         X >= 2 && X <= 5
2474    are converted to
2475         (unsigned) (X - 2) <= 3
2476
2477    For more information see comments above fold_test_range in fold-const.c,
2478    this implementation is for GIMPLE.  */
2479
2480
2481
2482 /* Dump the range entry R to FILE, skipping its expression if SKIP_EXP.  */
2483
2484 void
2485 dump_range_entry (FILE *file, struct range_entry *r, bool skip_exp)
2486 {
2487   if (!skip_exp)
2488     print_generic_expr (file, r->exp);
2489   fprintf (file, " %c[", r->in_p ? '+' : '-');
2490   print_generic_expr (file, r->low);
2491   fputs (", ", file);
2492   print_generic_expr (file, r->high);
2493   fputc (']', file);
2494 }
2495
2496 /* Dump the range entry R to STDERR.  */
2497
2498 DEBUG_FUNCTION void
2499 debug_range_entry (struct range_entry *r)
2500 {
2501   dump_range_entry (stderr, r, false);
2502   fputc ('\n', stderr);
2503 }
2504
2505 /* This is similar to make_range in fold-const.c, but on top of
2506    GIMPLE instead of trees.  If EXP is non-NULL, it should be
2507    an SSA_NAME and STMT argument is ignored, otherwise STMT
2508    argument should be a GIMPLE_COND.  */
2509
2510 void
2511 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
2512 {
2513   int in_p;
2514   tree low, high;
2515   bool is_bool, strict_overflow_p;
2516
2517   r->exp = NULL_TREE;
2518   r->in_p = false;
2519   r->strict_overflow_p = false;
2520   r->low = NULL_TREE;
2521   r->high = NULL_TREE;
2522   if (exp != NULL_TREE
2523       && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
2524     return;
2525
2526   /* Start with simply saying "EXP != 0" and then look at the code of EXP
2527      and see if we can refine the range.  Some of the cases below may not
2528      happen, but it doesn't seem worth worrying about this.  We "continue"
2529      the outer loop when we've changed something; otherwise we "break"
2530      the switch, which will "break" the while.  */
2531   low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
2532   high = low;
2533   in_p = 0;
2534   strict_overflow_p = false;
2535   is_bool = false;
2536   if (exp == NULL_TREE)
2537     is_bool = true;
2538   else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
2539     {
2540       if (TYPE_UNSIGNED (TREE_TYPE (exp)))
2541         is_bool = true;
2542       else
2543         return;
2544     }
2545   else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
2546     is_bool = true;
2547
2548   while (1)
2549     {
2550       enum tree_code code;
2551       tree arg0, arg1, exp_type;
2552       tree nexp;
2553       location_t loc;
2554
2555       if (exp != NULL_TREE)
2556         {
2557           if (TREE_CODE (exp) != SSA_NAME
2558               || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
2559             break;
2560
2561           stmt = SSA_NAME_DEF_STMT (exp);
2562           if (!is_gimple_assign (stmt))
2563             break;
2564
2565           code = gimple_assign_rhs_code (stmt);
2566           arg0 = gimple_assign_rhs1 (stmt);
2567           arg1 = gimple_assign_rhs2 (stmt);
2568           exp_type = TREE_TYPE (exp);
2569         }
2570       else
2571         {
2572           code = gimple_cond_code (stmt);
2573           arg0 = gimple_cond_lhs (stmt);
2574           arg1 = gimple_cond_rhs (stmt);
2575           exp_type = boolean_type_node;
2576         }
2577
2578       if (TREE_CODE (arg0) != SSA_NAME
2579           || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (arg0))
2580         break;
2581       loc = gimple_location (stmt);
2582       switch (code)
2583         {
2584         case BIT_NOT_EXPR:
2585           if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
2586               /* Ensure the range is either +[-,0], +[0,0],
2587                  -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2588                  -[1,1].  If it is e.g. +[-,-] or -[-,-]
2589                  or similar expression of unconditional true or
2590                  false, it should not be negated.  */
2591               && ((high && integer_zerop (high))
2592                   || (low && integer_onep (low))))
2593             {
2594               in_p = !in_p;
2595               exp = arg0;
2596               continue;
2597             }
2598           break;
2599         case SSA_NAME:
2600           exp = arg0;
2601           continue;
2602         CASE_CONVERT:
2603           if (is_bool)
2604             {
2605               if ((TYPE_PRECISION (exp_type) == 1
2606                    || TREE_CODE (exp_type) == BOOLEAN_TYPE)
2607                   && TYPE_PRECISION (TREE_TYPE (arg0)) > 1)
2608                 return;
2609             }
2610           else if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
2611             {
2612               if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
2613                 is_bool = true;
2614               else
2615                 return;
2616             }
2617           else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
2618             is_bool = true;
2619           goto do_default;
2620         case EQ_EXPR:
2621         case NE_EXPR:
2622         case LT_EXPR:
2623         case LE_EXPR:
2624         case GE_EXPR:
2625         case GT_EXPR:
2626           is_bool = true;
2627           /* FALLTHRU */
2628         default:
2629           if (!is_bool)
2630             return;
2631         do_default:
2632           nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2633                                   &low, &high, &in_p,
2634                                   &strict_overflow_p);
2635           if (nexp != NULL_TREE)
2636             {
2637               exp = nexp;
2638               gcc_assert (TREE_CODE (exp) == SSA_NAME);
2639               continue;
2640             }
2641           break;
2642         }
2643       break;
2644     }
2645   if (is_bool)
2646     {
2647       r->exp = exp;
2648       r->in_p = in_p;
2649       r->low = low;
2650       r->high = high;
2651       r->strict_overflow_p = strict_overflow_p;
2652     }
2653 }
2654
2655 /* Comparison function for qsort.  Sort entries
2656    without SSA_NAME exp first, then with SSA_NAMEs sorted
2657    by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2658    by increasing ->low and if ->low is the same, by increasing
2659    ->high.  ->low == NULL_TREE means minimum, ->high == NULL_TREE
2660    maximum.  */
2661
2662 static int
2663 range_entry_cmp (const void *a, const void *b)
2664 {
2665   const struct range_entry *p = (const struct range_entry *) a;
2666   const struct range_entry *q = (const struct range_entry *) b;
2667
2668   if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2669     {
2670       if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2671         {
2672           /* Group range_entries for the same SSA_NAME together.  */
2673           if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2674             return -1;
2675           else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2676             return 1;
2677           /* If ->low is different, NULL low goes first, then by
2678              ascending low.  */
2679           if (p->low != NULL_TREE)
2680             {
2681               if (q->low != NULL_TREE)
2682                 {
2683                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
2684                                           p->low, q->low);
2685                   if (tem && integer_onep (tem))
2686                     return -1;
2687                   tem = fold_binary (GT_EXPR, boolean_type_node,
2688                                      p->low, q->low);
2689                   if (tem && integer_onep (tem))
2690                     return 1;
2691                 }
2692               else
2693                 return 1;
2694             }
2695           else if (q->low != NULL_TREE)
2696             return -1;
2697           /* If ->high is different, NULL high goes last, before that by
2698              ascending high.  */
2699           if (p->high != NULL_TREE)
2700             {
2701               if (q->high != NULL_TREE)
2702                 {
2703                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
2704                                           p->high, q->high);
2705                   if (tem && integer_onep (tem))
2706                     return -1;
2707                   tem = fold_binary (GT_EXPR, boolean_type_node,
2708                                      p->high, q->high);
2709                   if (tem && integer_onep (tem))
2710                     return 1;
2711                 }
2712               else
2713                 return -1;
2714             }
2715           else if (q->high != NULL_TREE)
2716             return 1;
2717           /* If both ranges are the same, sort below by ascending idx.  */
2718         }
2719       else
2720         return 1;
2721     }
2722   else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2723     return -1;
2724
2725   if (p->idx < q->idx)
2726     return -1;
2727   else
2728     {
2729       gcc_checking_assert (p->idx > q->idx);
2730       return 1;
2731     }
2732 }
2733
2734 /* Helper function for update_range_test.  Force EXPR into an SSA_NAME,
2735    insert needed statements BEFORE or after GSI.  */
2736
2737 static tree
2738 force_into_ssa_name (gimple_stmt_iterator *gsi, tree expr, bool before)
2739 {
2740   enum gsi_iterator_update m = before ? GSI_SAME_STMT : GSI_CONTINUE_LINKING;
2741   tree ret = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, before, m);
2742   if (TREE_CODE (ret) != SSA_NAME)
2743     {
2744       gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (ret)), ret);
2745       if (before)
2746         gsi_insert_before (gsi, g, GSI_SAME_STMT);
2747       else
2748         gsi_insert_after (gsi, g, GSI_CONTINUE_LINKING);
2749       ret = gimple_assign_lhs (g);
2750     }
2751   return ret;
2752 }
2753
2754 /* Helper routine of optimize_range_test.
2755    [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2756    RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2757    OPCODE and OPS are arguments of optimize_range_tests.  If OTHERRANGE
2758    is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2759    an array of COUNT pointers to other ranges.  Return
2760    true if the range merge has been successful.
2761    If OPCODE is ERROR_MARK, this is called from within
2762    maybe_optimize_range_tests and is performing inter-bb range optimization.
2763    In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2764    oe->rank.  */
2765
2766 static bool
2767 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2768                    struct range_entry **otherrangep,
2769                    unsigned int count, enum tree_code opcode,
2770                    vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2771                    bool in_p, tree low, tree high, bool strict_overflow_p)
2772 {
2773   operand_entry *oe = (*ops)[range->idx];
2774   tree op = oe->op;
2775   gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2776                     : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2777   location_t loc = gimple_location (stmt);
2778   tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2779   tree tem = build_range_check (loc, optype, unshare_expr (exp),
2780                                 in_p, low, high);
2781   enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2782   gimple_stmt_iterator gsi;
2783   unsigned int i, uid;
2784
2785   if (tem == NULL_TREE)
2786     return false;
2787
2788   /* If op is default def SSA_NAME, there is no place to insert the
2789      new comparison.  Give up, unless we can use OP itself as the
2790      range test.  */
2791   if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2792     {
2793       if (op == range->exp
2794           && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2795               || TREE_CODE (optype) == BOOLEAN_TYPE)
2796           && (op == tem
2797               || (TREE_CODE (tem) == EQ_EXPR
2798                   && TREE_OPERAND (tem, 0) == op
2799                   && integer_onep (TREE_OPERAND (tem, 1))))
2800           && opcode != BIT_IOR_EXPR
2801           && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2802         {
2803           stmt = NULL;
2804           tem = op;
2805         }
2806       else
2807         return false;
2808     }
2809
2810   if (strict_overflow_p && issue_strict_overflow_warning (wc))
2811     warning_at (loc, OPT_Wstrict_overflow,
2812                 "assuming signed overflow does not occur "
2813                 "when simplifying range test");
2814
2815   if (dump_file && (dump_flags & TDF_DETAILS))
2816     {
2817       struct range_entry *r;
2818       fprintf (dump_file, "Optimizing range tests ");
2819       dump_range_entry (dump_file, range, false);
2820       for (i = 0; i < count; i++)
2821         {
2822           if (otherrange)
2823             r = otherrange + i;
2824           else
2825             r = otherrangep[i];
2826           if (r->exp
2827               && r->exp != range->exp
2828               && TREE_CODE (r->exp) == SSA_NAME)
2829             {
2830               fprintf (dump_file, " and ");
2831               dump_range_entry (dump_file, r, false);
2832             }
2833           else
2834             {
2835               fprintf (dump_file, " and");
2836               dump_range_entry (dump_file, r, true);
2837             }
2838         }
2839       fprintf (dump_file, "\n into ");
2840       print_generic_expr (dump_file, tem);
2841       fprintf (dump_file, "\n");
2842     }
2843
2844   if (opcode == BIT_IOR_EXPR
2845       || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2846     tem = invert_truthvalue_loc (loc, tem);
2847
2848   tem = fold_convert_loc (loc, optype, tem);
2849   if (stmt)
2850     {
2851       gsi = gsi_for_stmt (stmt);
2852       uid = gimple_uid (stmt);
2853     }
2854   else
2855     {
2856       gsi = gsi_none ();
2857       uid = 0;
2858     }
2859   if (stmt == NULL)
2860     gcc_checking_assert (tem == op);
2861   /* In rare cases range->exp can be equal to lhs of stmt.
2862      In that case we have to insert after the stmt rather then before
2863      it.  If stmt is a PHI, insert it at the start of the basic block.  */
2864   else if (op != range->exp)
2865     {
2866       gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2867       tem = force_into_ssa_name (&gsi, tem, true);
2868       gsi_prev (&gsi);
2869     }
2870   else if (gimple_code (stmt) != GIMPLE_PHI)
2871     {
2872       gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2873       tem = force_into_ssa_name (&gsi, tem, false);
2874     }
2875   else
2876     {
2877       gsi = gsi_after_labels (gimple_bb (stmt));
2878       if (!gsi_end_p (gsi))
2879         uid = gimple_uid (gsi_stmt (gsi));
2880       else
2881         {
2882           gsi = gsi_start_bb (gimple_bb (stmt));
2883           uid = 1;
2884           while (!gsi_end_p (gsi))
2885             {
2886               uid = gimple_uid (gsi_stmt (gsi));
2887               gsi_next (&gsi);
2888             }
2889         }
2890       gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2891       tem = force_into_ssa_name (&gsi, tem, true);
2892       if (gsi_end_p (gsi))
2893         gsi = gsi_last_bb (gimple_bb (stmt));
2894       else
2895         gsi_prev (&gsi);
2896     }
2897   for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2898     if (gimple_uid (gsi_stmt (gsi)))
2899       break;
2900     else
2901       gimple_set_uid (gsi_stmt (gsi), uid);
2902
2903   oe->op = tem;
2904   range->exp = exp;
2905   range->low = low;
2906   range->high = high;
2907   range->in_p = in_p;
2908   range->strict_overflow_p = false;
2909
2910   for (i = 0; i < count; i++)
2911     {
2912       if (otherrange)
2913         range = otherrange + i;
2914       else
2915         range = otherrangep[i];
2916       oe = (*ops)[range->idx];
2917       /* Now change all the other range test immediate uses, so that
2918          those tests will be optimized away.  */
2919       if (opcode == ERROR_MARK)
2920         {
2921           if (oe->op)
2922             oe->op = build_int_cst (TREE_TYPE (oe->op),
2923                                     oe->rank == BIT_IOR_EXPR ? 0 : 1);
2924           else
2925             oe->op = (oe->rank == BIT_IOR_EXPR
2926                       ? boolean_false_node : boolean_true_node);
2927         }
2928       else
2929         oe->op = error_mark_node;
2930       range->exp = NULL_TREE;
2931       range->low = NULL_TREE;
2932       range->high = NULL_TREE;
2933     }
2934   return true;
2935 }
2936
2937 /* Optimize X == CST1 || X == CST2
2938    if popcount (CST1 ^ CST2) == 1 into
2939    (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2940    Similarly for ranges.  E.g.
2941    X != 2 && X != 3 && X != 10 && X != 11
2942    will be transformed by the previous optimization into
2943    !((X - 2U) <= 1U || (X - 10U) <= 1U)
2944    and this loop can transform that into
2945    !(((X & ~8) - 2U) <= 1U).  */
2946
2947 static bool
2948 optimize_range_tests_xor (enum tree_code opcode, tree type,
2949                           tree lowi, tree lowj, tree highi, tree highj,
2950                           vec<operand_entry *> *ops,
2951                           struct range_entry *rangei,
2952                           struct range_entry *rangej)
2953 {
2954   tree lowxor, highxor, tem, exp;
2955   /* Check lowi ^ lowj == highi ^ highj and
2956      popcount (lowi ^ lowj) == 1.  */
2957   lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2958   if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2959     return false;
2960   if (!integer_pow2p (lowxor))
2961     return false;
2962   highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2963   if (!tree_int_cst_equal (lowxor, highxor))
2964     return false;
2965
2966   exp = rangei->exp;
2967   scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
2968   int prec = GET_MODE_PRECISION (mode);
2969   if (TYPE_PRECISION (type) < prec
2970       || (wi::to_wide (TYPE_MIN_VALUE (type))
2971           != wi::min_value (prec, TYPE_SIGN (type)))
2972       || (wi::to_wide (TYPE_MAX_VALUE (type))
2973           != wi::max_value (prec, TYPE_SIGN (type))))
2974     {
2975       type = build_nonstandard_integer_type (prec, TYPE_UNSIGNED (type));
2976       exp = fold_convert (type, exp);
2977       lowxor = fold_convert (type, lowxor);
2978       lowi = fold_convert (type, lowi);
2979       highi = fold_convert (type, highi);
2980     }
2981   tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2982   exp = fold_build2 (BIT_AND_EXPR, type, exp, tem);
2983   lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2984   highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2985   if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2986                          NULL, rangei->in_p, lowj, highj,
2987                          rangei->strict_overflow_p
2988                          || rangej->strict_overflow_p))
2989     return true;
2990   return false;
2991 }
2992
2993 /* Optimize X == CST1 || X == CST2
2994    if popcount (CST2 - CST1) == 1 into
2995    ((X - CST1) & ~(CST2 - CST1)) == 0.
2996    Similarly for ranges.  E.g.
2997    X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2998    || X == 75 || X == 45
2999    will be transformed by the previous optimization into
3000    (X - 43U) <= 3U || (X - 75U) <= 3U
3001    and this loop can transform that into
3002    ((X - 43U) & ~(75U - 43U)) <= 3U.  */
3003 static bool
3004 optimize_range_tests_diff (enum tree_code opcode, tree type,
3005                            tree lowi, tree lowj, tree highi, tree highj,
3006                            vec<operand_entry *> *ops,
3007                            struct range_entry *rangei,
3008                            struct range_entry *rangej)
3009 {
3010   tree tem1, tem2, mask;
3011   /* Check highi - lowi == highj - lowj.  */
3012   tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
3013   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
3014     return false;
3015   tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
3016   if (!tree_int_cst_equal (tem1, tem2))
3017     return false;
3018   /* Check popcount (lowj - lowi) == 1.  */
3019   tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
3020   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
3021     return false;
3022   if (!integer_pow2p (tem1))
3023     return false;
3024
3025   scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
3026   int prec = GET_MODE_PRECISION (mode);
3027   if (TYPE_PRECISION (type) < prec
3028       || (wi::to_wide (TYPE_MIN_VALUE (type))
3029           != wi::min_value (prec, TYPE_SIGN (type)))
3030       || (wi::to_wide (TYPE_MAX_VALUE (type))
3031           != wi::max_value (prec, TYPE_SIGN (type))))
3032     type = build_nonstandard_integer_type (prec, 1);
3033   else
3034     type = unsigned_type_for (type);
3035   tem1 = fold_convert (type, tem1);
3036   tem2 = fold_convert (type, tem2);
3037   lowi = fold_convert (type, lowi);
3038   mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
3039   tem1 = fold_build2 (MINUS_EXPR, type,
3040                       fold_convert (type, rangei->exp), lowi);
3041   tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
3042   lowj = build_int_cst (type, 0);
3043   if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
3044                          NULL, rangei->in_p, lowj, tem2,
3045                          rangei->strict_overflow_p
3046                          || rangej->strict_overflow_p))
3047     return true;
3048   return false;
3049 }
3050
3051 /* It does some common checks for function optimize_range_tests_xor and
3052    optimize_range_tests_diff.
3053    If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
3054    Else it calls optimize_range_tests_diff.  */
3055
3056 static bool
3057 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
3058                         bool optimize_xor, vec<operand_entry *> *ops,
3059                         struct range_entry *ranges)
3060 {
3061   int i, j;
3062   bool any_changes = false;
3063   for (i = first; i < length; i++)
3064     {
3065       tree lowi, highi, lowj, highj, type, tem;
3066
3067       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
3068         continue;
3069       type = TREE_TYPE (ranges[i].exp);
3070       if (!INTEGRAL_TYPE_P (type))
3071         continue;
3072       lowi = ranges[i].low;
3073       if (lowi == NULL_TREE)
3074         lowi = TYPE_MIN_VALUE (type);
3075       highi = ranges[i].high;
3076       if (highi == NULL_TREE)
3077         continue;
3078       for (j = i + 1; j < length && j < i + 64; j++)
3079         {
3080           bool changes;
3081           if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
3082             continue;
3083           lowj = ranges[j].low;
3084           if (lowj == NULL_TREE)
3085             continue;
3086           highj = ranges[j].high;
3087           if (highj == NULL_TREE)
3088             highj = TYPE_MAX_VALUE (type);
3089           /* Check lowj > highi.  */
3090           tem = fold_binary (GT_EXPR, boolean_type_node,
3091                              lowj, highi);
3092           if (tem == NULL_TREE || !integer_onep (tem))
3093             continue;
3094           if (optimize_xor)
3095             changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
3096                                                 highi, highj, ops,
3097                                                 ranges + i, ranges + j);
3098           else
3099             changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
3100                                                  highi, highj, ops,
3101                                                  ranges + i, ranges + j);
3102           if (changes)
3103             {
3104               any_changes = true;
3105               break;
3106             }
3107         }
3108     }
3109   return any_changes;
3110 }
3111
3112 /* Helper function of optimize_range_tests_to_bit_test.  Handle a single
3113    range, EXP, LOW, HIGH, compute bit mask of bits to test and return
3114    EXP on success, NULL otherwise.  */
3115
3116 static tree
3117 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
3118                        wide_int *mask, tree *totallowp)
3119 {
3120   tree tem = int_const_binop (MINUS_EXPR, high, low);
3121   if (tem == NULL_TREE
3122       || TREE_CODE (tem) != INTEGER_CST
3123       || TREE_OVERFLOW (tem)
3124       || tree_int_cst_sgn (tem) == -1
3125       || compare_tree_int (tem, prec) != -1)
3126     return NULL_TREE;
3127
3128   unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
3129   *mask = wi::shifted_mask (0, max, false, prec);
3130   if (TREE_CODE (exp) == BIT_AND_EXPR
3131       && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3132     {
3133       widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
3134       msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
3135       if (wi::popcount (msk) == 1
3136           && wi::ltu_p (msk, prec - max))
3137         {
3138           *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
3139           max += msk.to_uhwi ();
3140           exp = TREE_OPERAND (exp, 0);
3141           if (integer_zerop (low)
3142               && TREE_CODE (exp) == PLUS_EXPR
3143               && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3144             {
3145               tree ret = TREE_OPERAND (exp, 0);
3146               STRIP_NOPS (ret);
3147               widest_int bias
3148                 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
3149                                      TYPE_PRECISION (TREE_TYPE (low))));
3150               tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
3151               if (totallowp)
3152                 {
3153                   *totallowp = tbias;
3154                   return ret;
3155                 }
3156               else if (!tree_int_cst_lt (totallow, tbias))
3157                 return NULL_TREE;
3158               bias = wi::to_widest (tbias);
3159               bias -= wi::to_widest (totallow);
3160               if (bias >= 0 && bias < prec - max)
3161                 {
3162                   *mask = wi::lshift (*mask, bias);
3163                   return ret;
3164                 }
3165             }
3166         }
3167     }
3168   if (totallowp)
3169     return exp;
3170   if (!tree_int_cst_lt (totallow, low))
3171     return exp;
3172   tem = int_const_binop (MINUS_EXPR, low, totallow);
3173   if (tem == NULL_TREE
3174       || TREE_CODE (tem) != INTEGER_CST
3175       || TREE_OVERFLOW (tem)
3176       || compare_tree_int (tem, prec - max) == 1)
3177     return NULL_TREE;
3178
3179   *mask = wi::lshift (*mask, wi::to_widest (tem));
3180   return exp;
3181 }
3182
3183 /* Attempt to optimize small range tests using bit test.
3184    E.g.
3185    X != 43 && X != 76 && X != 44 && X != 78 && X != 49
3186    && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
3187    has been by earlier optimizations optimized into:
3188    ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
3189    As all the 43 through 82 range is less than 64 numbers,
3190    for 64-bit word targets optimize that into:
3191    (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0  */
3192
3193 static bool
3194 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
3195                                   vec<operand_entry *> *ops,
3196                                   struct range_entry *ranges)
3197 {
3198   int i, j;
3199   bool any_changes = false;
3200   int prec = GET_MODE_BITSIZE (word_mode);
3201   auto_vec<struct range_entry *, 64> candidates;
3202
3203   for (i = first; i < length - 1; i++)
3204     {
3205       tree lowi, highi, lowj, highj, type;
3206
3207       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
3208         continue;
3209       type = TREE_TYPE (ranges[i].exp);
3210       if (!INTEGRAL_TYPE_P (type))
3211         continue;
3212       lowi = ranges[i].low;
3213       if (lowi == NULL_TREE)
3214         lowi = TYPE_MIN_VALUE (type);
3215       highi = ranges[i].high;
3216       if (highi == NULL_TREE)
3217         continue;
3218       wide_int mask;
3219       tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
3220                                         highi, &mask, &lowi);
3221       if (exp == NULL_TREE)
3222         continue;
3223       bool strict_overflow_p = ranges[i].strict_overflow_p;
3224       candidates.truncate (0);
3225       int end = MIN (i + 64, length);
3226       for (j = i + 1; j < end; j++)
3227         {
3228           tree exp2;
3229           if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
3230             continue;
3231           if (ranges[j].exp == exp)
3232             ;
3233           else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
3234             {
3235               exp2 = TREE_OPERAND (ranges[j].exp, 0);
3236               if (exp2 == exp)
3237                 ;
3238               else if (TREE_CODE (exp2) == PLUS_EXPR)
3239                 {
3240                   exp2 = TREE_OPERAND (exp2, 0);
3241                   STRIP_NOPS (exp2);
3242                   if (exp2 != exp)
3243                     continue;
3244                 }
3245               else
3246                 continue;
3247             }
3248           else
3249             continue;
3250           lowj = ranges[j].low;
3251           if (lowj == NULL_TREE)
3252             continue;
3253           highj = ranges[j].high;
3254           if (highj == NULL_TREE)
3255             highj = TYPE_MAX_VALUE (type);
3256           wide_int mask2;
3257           exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
3258                                         highj, &mask2, NULL);
3259           if (exp2 != exp)
3260             continue;
3261           mask |= mask2;
3262           strict_overflow_p |= ranges[j].strict_overflow_p;
3263           candidates.safe_push (&ranges[j]);
3264         }
3265
3266       /* If every possible relative value of the expression is a valid shift
3267          amount, then we can merge the entry test in the bit test.  In this
3268          case, if we would need otherwise 2 or more comparisons, then use
3269          the bit test; in the other cases, the threshold is 3 comparisons.  */
3270       bool entry_test_needed;
3271       value_range r;
3272       if (TREE_CODE (exp) == SSA_NAME
3273           && get_range_query (cfun)->range_of_expr (r, exp)
3274           && r.kind () == VR_RANGE
3275           && wi::leu_p (r.upper_bound () - r.lower_bound (), prec - 1))
3276         {
3277           wide_int min = r.lower_bound ();
3278           wide_int ilowi = wi::to_wide (lowi);
3279           if (wi::lt_p (min, ilowi, TYPE_SIGN (TREE_TYPE (lowi))))
3280             {
3281               lowi = wide_int_to_tree (TREE_TYPE (lowi), min);
3282               mask = wi::lshift (mask, ilowi - min);
3283             }
3284           else if (wi::gt_p (min, ilowi, TYPE_SIGN (TREE_TYPE (lowi))))
3285             {
3286               lowi = wide_int_to_tree (TREE_TYPE (lowi), min);
3287               mask = wi::lrshift (mask, min - ilowi);
3288             }
3289           entry_test_needed = false;
3290         }
3291       else
3292         entry_test_needed = true;
3293       if (candidates.length () >= (entry_test_needed ? 2 : 1))
3294         {
3295           tree high = wide_int_to_tree (TREE_TYPE (lowi),
3296                                         wi::to_widest (lowi)
3297                                         + prec - 1 - wi::clz (mask));
3298           operand_entry *oe = (*ops)[ranges[i].idx];
3299           tree op = oe->op;
3300           gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
3301                             : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3302           location_t loc = gimple_location (stmt);
3303           tree optype = op ? TREE_TYPE (op) : boolean_type_node;
3304
3305           /* See if it isn't cheaper to pretend the minimum value of the
3306              range is 0, if maximum value is small enough.
3307              We can avoid then subtraction of the minimum value, but the
3308              mask constant could be perhaps more expensive.  */
3309           if (compare_tree_int (lowi, 0) > 0
3310               && compare_tree_int (high, prec) < 0)
3311             {
3312               int cost_diff;
3313               HOST_WIDE_INT m = tree_to_uhwi (lowi);
3314               rtx reg = gen_raw_REG (word_mode, 10000);
3315               bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
3316               cost_diff = set_src_cost (gen_rtx_PLUS (word_mode, reg,
3317                                                       GEN_INT (-m)),
3318                                         word_mode, speed_p);
3319               rtx r = immed_wide_int_const (mask, word_mode);
3320               cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
3321                                          word_mode, speed_p);
3322               r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
3323               cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
3324                                          word_mode, speed_p);
3325               if (cost_diff > 0)
3326                 {
3327                   mask = wi::lshift (mask, m);
3328                   lowi = build_zero_cst (TREE_TYPE (lowi));
3329                 }
3330             }
3331
3332           tree tem;
3333           if (entry_test_needed)
3334             {
3335               tem = build_range_check (loc, optype, unshare_expr (exp),
3336                                        false, lowi, high);
3337               if (tem == NULL_TREE || is_gimple_val (tem))
3338                 continue;
3339             }
3340           else
3341             tem = NULL_TREE;
3342           tree etype = unsigned_type_for (TREE_TYPE (exp));
3343           exp = fold_build2_loc (loc, MINUS_EXPR, etype,
3344                                  fold_convert_loc (loc, etype, exp),
3345                                  fold_convert_loc (loc, etype, lowi));
3346           exp = fold_convert_loc (loc, integer_type_node, exp);
3347           tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
3348           exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
3349                                  build_int_cst (word_type, 1), exp);
3350           exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
3351                                  wide_int_to_tree (word_type, mask));
3352           exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
3353                                  build_zero_cst (word_type));
3354           if (is_gimple_val (exp))
3355             continue;
3356
3357           /* The shift might have undefined behavior if TEM is true,
3358              but reassociate_bb isn't prepared to have basic blocks
3359              split when it is running.  So, temporarily emit a code
3360              with BIT_IOR_EXPR instead of &&, and fix it up in
3361              branch_fixup.  */
3362           gimple_seq seq = NULL;
3363           if (tem)
3364             {
3365               tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
3366               gcc_assert (TREE_CODE (tem) == SSA_NAME);
3367               gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
3368             }
3369           gimple_seq seq2;
3370           exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
3371           gimple_seq_add_seq_without_update (&seq, seq2);
3372           gcc_assert (TREE_CODE (exp) == SSA_NAME);
3373           gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
3374           if (tem)
3375             {
3376               gimple *g = gimple_build_assign (make_ssa_name (optype),
3377                                                BIT_IOR_EXPR, tem, exp);
3378               gimple_set_location (g, loc);
3379               gimple_seq_add_stmt_without_update (&seq, g);
3380               exp = gimple_assign_lhs (g);
3381             }
3382           tree val = build_zero_cst (optype);
3383           if (update_range_test (&ranges[i], NULL, candidates.address (),
3384                                  candidates.length (), opcode, ops, exp,
3385                                  seq, false, val, val, strict_overflow_p))
3386             {
3387               any_changes = true;
3388               if (tem)
3389                 reassoc_branch_fixups.safe_push (tem);
3390             }
3391           else
3392             gimple_seq_discard (seq);
3393         }
3394     }
3395   return any_changes;
3396 }
3397
3398 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
3399    and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1.
3400    Also, handle x < C && y < C && z < C where C is power of two as
3401    (x | y | z) < C.  And also handle signed x < 0 && y < 0 && z < 0
3402    as (x | y | z) < 0.  */
3403
3404 static bool
3405 optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
3406                                   vec<operand_entry *> *ops,
3407                                   struct range_entry *ranges)
3408 {
3409   int i;
3410   unsigned int b;
3411   bool any_changes = false;
3412   auto_vec<int, 128> buckets;
3413   auto_vec<int, 32> chains;
3414   auto_vec<struct range_entry *, 32> candidates;
3415
3416   for (i = first; i < length; i++)
3417     {
3418       int idx;
3419
3420       if (ranges[i].exp == NULL_TREE
3421           || TREE_CODE (ranges[i].exp) != SSA_NAME
3422           || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1
3423           || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE)
3424         continue;
3425
3426       if (ranges[i].low != NULL_TREE
3427           && ranges[i].high != NULL_TREE
3428           && ranges[i].in_p
3429           && tree_int_cst_equal (ranges[i].low, ranges[i].high))
3430         {
3431           idx = !integer_zerop (ranges[i].low);
3432           if (idx && !integer_all_onesp (ranges[i].low))
3433             continue;
3434         }
3435       else if (ranges[i].high != NULL_TREE
3436                && TREE_CODE (ranges[i].high) == INTEGER_CST
3437                && ranges[i].in_p)
3438         {
3439           wide_int w = wi::to_wide (ranges[i].high);
3440           int prec = TYPE_PRECISION (TREE_TYPE (ranges[i].exp));
3441           int l = wi::clz (w);
3442           idx = 2;
3443           if (l <= 0
3444               || l >= prec
3445               || w != wi::mask (prec - l, false, prec))
3446             continue;
3447           if (!((TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3448                  && ranges[i].low == NULL_TREE)
3449                 || (ranges[i].low
3450                     && integer_zerop (ranges[i].low))))
3451             continue;
3452         }
3453       else if (ranges[i].high == NULL_TREE
3454                && ranges[i].low != NULL_TREE
3455                /* Perform this optimization only in the last
3456                   reassoc pass, as it interferes with the reassociation
3457                   itself or could also with VRP etc. which might not
3458                   be able to virtually undo the optimization.  */
3459                && !reassoc_insert_powi_p
3460                && !TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3461                && integer_zerop (ranges[i].low))
3462         idx = 3;
3463       else
3464         continue;
3465
3466       b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 4 + idx;
3467       if (buckets.length () <= b)
3468         buckets.safe_grow_cleared (b + 1, true);
3469       if (chains.length () <= (unsigned) i)
3470         chains.safe_grow (i + 1, true);
3471       chains[i] = buckets[b];
3472       buckets[b] = i + 1;
3473     }
3474
3475   FOR_EACH_VEC_ELT (buckets, b, i)
3476     if (i && chains[i - 1])
3477       {
3478         int j, k = i;
3479         if ((b % 4) == 2)
3480           {
3481             /* When ranges[X - 1].high + 1 is a power of two,
3482                we need to process the same bucket up to
3483                precision - 1 times, each time split the entries
3484                with the same high bound into one chain and the
3485                rest into another one to be processed later.  */
3486             int this_prev = i;
3487             int other_prev = 0;
3488             for (j = chains[i - 1]; j; j = chains[j - 1])
3489               {
3490                 if (tree_int_cst_equal (ranges[i - 1].high,
3491                                         ranges[j - 1].high))
3492                   {
3493                     chains[this_prev - 1] = j;
3494                     this_prev = j;
3495                   }
3496                 else if (other_prev == 0)
3497                   {
3498                     buckets[b] = j;
3499                     other_prev = j;
3500                   }
3501                 else
3502                   {
3503                     chains[other_prev - 1] = j;
3504                     other_prev = j;
3505                   }
3506               }
3507             chains[this_prev - 1] = 0;
3508             if (other_prev)
3509               chains[other_prev - 1] = 0;
3510             if (chains[i - 1] == 0)
3511               {
3512                 if (other_prev)
3513                   b--;
3514                 continue;
3515               }
3516           }
3517         for (j = chains[i - 1]; j; j = chains[j - 1])
3518           {
3519             gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp);
3520             gimple *gj = SSA_NAME_DEF_STMT (ranges[j - 1].exp);
3521             if (reassoc_stmt_dominates_stmt_p (gk, gj))
3522               k = j;
3523           }
3524         tree type1 = TREE_TYPE (ranges[k - 1].exp);
3525         tree type2 = NULL_TREE;
3526         bool strict_overflow_p = false;
3527         candidates.truncate (0);
3528         for (j = i; j; j = chains[j - 1])
3529           {
3530             tree type = TREE_TYPE (ranges[j - 1].exp);
3531             strict_overflow_p |= ranges[j - 1].strict_overflow_p;
3532             if ((b % 4) == 3)
3533               {
3534                 /* For the signed < 0 cases, the types should be
3535                    really compatible (all signed with the same precision,
3536                    instead put ranges that have different in_p from
3537                    k first.  */
3538                 if (!useless_type_conversion_p (type1, type))
3539                   continue;
3540                 if (ranges[j - 1].in_p != ranges[k - 1].in_p)
3541                   candidates.safe_push (&ranges[j - 1]);
3542                 type2 = type1;
3543                 continue;
3544               }
3545             if (j == k
3546                 || useless_type_conversion_p (type1, type))
3547               ;
3548             else if (type2 == NULL_TREE
3549                      || useless_type_conversion_p (type2, type))
3550               {
3551                 if (type2 == NULL_TREE)
3552                   type2 = type;
3553                 candidates.safe_push (&ranges[j - 1]);
3554               }
3555           }
3556         unsigned l = candidates.length ();
3557         for (j = i; j; j = chains[j - 1])
3558           {
3559             tree type = TREE_TYPE (ranges[j - 1].exp);
3560             if (j == k)
3561               continue;
3562             if ((b % 4) == 3)
3563               {
3564                 if (!useless_type_conversion_p (type1, type))
3565                   continue;
3566                 if (ranges[j - 1].in_p == ranges[k - 1].in_p)
3567                   candidates.safe_push (&ranges[j - 1]);
3568                 continue;
3569               }
3570             if (useless_type_conversion_p (type1, type))
3571               ;
3572             else if (type2 == NULL_TREE
3573                      || useless_type_conversion_p (type2, type))
3574               continue;
3575             candidates.safe_push (&ranges[j - 1]);
3576           }
3577         gimple_seq seq = NULL;
3578         tree op = NULL_TREE;
3579         unsigned int id;
3580         struct range_entry *r;
3581         candidates.safe_push (&ranges[k - 1]);
3582         FOR_EACH_VEC_ELT (candidates, id, r)
3583           {
3584             gimple *g;
3585             enum tree_code code;
3586             if (id == 0)
3587               {
3588                 op = r->exp;
3589                 continue;
3590               }
3591             if (id == l)
3592               {
3593                 code = (b % 4) == 3 ? BIT_NOT_EXPR : NOP_EXPR;
3594                 g = gimple_build_assign (make_ssa_name (type1), code, op);
3595                 gimple_seq_add_stmt_without_update (&seq, g);
3596                 op = gimple_assign_lhs (g);
3597               }
3598             tree type = TREE_TYPE (r->exp);
3599             tree exp = r->exp;
3600             if (id >= l && !useless_type_conversion_p (type1, type))
3601               {
3602                 g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, exp);
3603                 gimple_seq_add_stmt_without_update (&seq, g);
3604                 exp = gimple_assign_lhs (g);
3605               }
3606             if ((b % 4) == 3)
3607               code = r->in_p ? BIT_IOR_EXPR : BIT_AND_EXPR;
3608             else
3609               code = (b % 4) == 1 ? BIT_AND_EXPR : BIT_IOR_EXPR;
3610             g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2),
3611                                      code, op, exp);
3612             gimple_seq_add_stmt_without_update (&seq, g);
3613             op = gimple_assign_lhs (g);
3614           }
3615         candidates.pop ();
3616         if (update_range_test (&ranges[k - 1], NULL, candidates.address (),
3617                                candidates.length (), opcode, ops, op,
3618                                seq, ranges[k - 1].in_p, ranges[k - 1].low,
3619                                ranges[k - 1].high, strict_overflow_p))
3620           any_changes = true;
3621         else
3622           gimple_seq_discard (seq);
3623         if ((b % 4) == 2 && buckets[b] != i)
3624           /* There is more work to do for this bucket.  */
3625           b--;
3626       }
3627
3628   return any_changes;
3629 }
3630
3631 /* Attempt to optimize for signed a and b where b is known to be >= 0:
3632    a >= 0 && a < b into (unsigned) a < (unsigned) b
3633    a >= 0 && a <= b into (unsigned) a <= (unsigned) b  */
3634
3635 static bool
3636 optimize_range_tests_var_bound (enum tree_code opcode, int first, int length,
3637                                 vec<operand_entry *> *ops,
3638                                 struct range_entry *ranges,
3639                                 basic_block first_bb)
3640 {
3641   int i;
3642   bool any_changes = false;
3643   hash_map<tree, int> *map = NULL;
3644
3645   for (i = first; i < length; i++)
3646     {
3647       if (ranges[i].exp == NULL_TREE
3648           || TREE_CODE (ranges[i].exp) != SSA_NAME
3649           || !ranges[i].in_p)
3650         continue;
3651
3652       tree type = TREE_TYPE (ranges[i].exp);
3653       if (!INTEGRAL_TYPE_P (type)
3654           || TYPE_UNSIGNED (type)
3655           || ranges[i].low == NULL_TREE
3656           || !integer_zerop (ranges[i].low)
3657           || ranges[i].high != NULL_TREE)
3658         continue;
3659       /* EXP >= 0 here.  */
3660       if (map == NULL)
3661         map = new hash_map <tree, int>;
3662       map->put (ranges[i].exp, i);
3663     }
3664
3665   if (map == NULL)
3666     return false;
3667
3668   for (i = 0; i < length; i++)
3669     {
3670       bool in_p = ranges[i].in_p;
3671       if (ranges[i].low == NULL_TREE
3672           || ranges[i].high == NULL_TREE)
3673         continue;
3674       if (!integer_zerop (ranges[i].low)
3675           || !integer_zerop (ranges[i].high))
3676         {
3677           if (ranges[i].exp
3678               && TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) == 1
3679               && TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3680               && integer_onep (ranges[i].low)
3681               && integer_onep (ranges[i].high))
3682             in_p = !in_p;
3683           else
3684             continue;
3685         }
3686
3687       gimple *stmt;
3688       tree_code ccode;
3689       tree rhs1, rhs2;
3690       if (ranges[i].exp)
3691         {
3692           if (TREE_CODE (ranges[i].exp) != SSA_NAME)
3693             continue;
3694           stmt = SSA_NAME_DEF_STMT (ranges[i].exp);
3695           if (!is_gimple_assign (stmt))
3696             continue;
3697           ccode = gimple_assign_rhs_code (stmt);
3698           rhs1 = gimple_assign_rhs1 (stmt);
3699           rhs2 = gimple_assign_rhs2 (stmt);
3700         }
3701       else
3702         {
3703           operand_entry *oe = (*ops)[ranges[i].idx];
3704           stmt = last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3705           if (gimple_code (stmt) != GIMPLE_COND)
3706             continue;
3707           ccode = gimple_cond_code (stmt);
3708           rhs1 = gimple_cond_lhs (stmt);
3709           rhs2 = gimple_cond_rhs (stmt);
3710         }
3711
3712       if (TREE_CODE (rhs1) != SSA_NAME
3713           || rhs2 == NULL_TREE
3714           || TREE_CODE (rhs2) != SSA_NAME)
3715         continue;
3716
3717       switch (ccode)
3718         {
3719         case GT_EXPR:
3720         case GE_EXPR:
3721         case LT_EXPR:
3722         case LE_EXPR:
3723           break;
3724         default:
3725           continue;
3726         }
3727       if (in_p)
3728         ccode = invert_tree_comparison (ccode, false);
3729       switch (ccode)
3730         {
3731         case GT_EXPR:
3732         case GE_EXPR:
3733           std::swap (rhs1, rhs2);
3734           ccode = swap_tree_comparison (ccode);
3735           break;
3736         case LT_EXPR:
3737         case LE_EXPR:
3738           break;
3739         default:
3740           gcc_unreachable ();
3741         }
3742
3743       int *idx = map->get (rhs1);
3744       if (idx == NULL)
3745         continue;
3746
3747       /* maybe_optimize_range_tests allows statements without side-effects
3748          in the basic blocks as long as they are consumed in the same bb.
3749          Make sure rhs2's def stmt is not among them, otherwise we can't
3750          use safely get_nonzero_bits on it.  E.g. in:
3751           # RANGE [-83, 1] NONZERO 173
3752           # k_32 = PHI <k_47(13), k_12(9)>
3753          ...
3754           if (k_32 >= 0)
3755             goto <bb 5>; [26.46%]
3756           else
3757             goto <bb 9>; [73.54%]
3758
3759           <bb 5> [local count: 140323371]:
3760           # RANGE [0, 1] NONZERO 1
3761           _5 = (int) k_32;
3762           # RANGE [0, 4] NONZERO 4
3763           _21 = _5 << 2;
3764           # RANGE [0, 4] NONZERO 4
3765           iftmp.0_44 = (char) _21;
3766           if (k_32 < iftmp.0_44)
3767             goto <bb 6>; [84.48%]
3768           else
3769             goto <bb 9>; [15.52%]
3770          the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that
3771          k_32 >= 0.  If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44
3772          to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute
3773          those stmts even for negative k_32 and the value ranges would be no
3774          longer guaranteed and so the optimization would be invalid.  */
3775       while (opcode == ERROR_MARK)
3776         {
3777           gimple *g = SSA_NAME_DEF_STMT (rhs2);
3778           basic_block bb2 = gimple_bb (g);
3779           if (bb2
3780               && bb2 != first_bb
3781               && dominated_by_p (CDI_DOMINATORS, bb2, first_bb))
3782             {
3783               /* As an exception, handle a few common cases.  */
3784               if (gimple_assign_cast_p (g)
3785                   && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g))))
3786                 {
3787                   tree op0 = gimple_assign_rhs1 (g);
3788                   if (TYPE_UNSIGNED (TREE_TYPE (op0))
3789                       && (TYPE_PRECISION (TREE_TYPE (rhs2))
3790                           > TYPE_PRECISION (TREE_TYPE (op0))))
3791                     /* Zero-extension is always ok.  */
3792                     break;
3793                   else if (TYPE_PRECISION (TREE_TYPE (rhs2))
3794                            == TYPE_PRECISION (TREE_TYPE (op0))
3795                            && TREE_CODE (op0) == SSA_NAME)
3796                     {
3797                       /* Cast from signed to unsigned or vice versa.  Retry
3798                          with the op0 as new rhs2.  */
3799                       rhs2 = op0;
3800                       continue;
3801                     }
3802                 }
3803               else if (is_gimple_assign (g)
3804                        && gimple_assign_rhs_code (g) == BIT_AND_EXPR
3805                        && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
3806                        && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g))))
3807                 /* Masking with INTEGER_CST with MSB clear is always ok
3808                    too.  */
3809                 break;
3810               rhs2 = NULL_TREE;
3811             }
3812           break;
3813         }
3814       if (rhs2 == NULL_TREE)
3815         continue;
3816
3817       wide_int nz = get_nonzero_bits (rhs2);
3818       if (wi::neg_p (nz))
3819         continue;
3820
3821       /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
3822          and RHS2 is known to be RHS2 >= 0.  */
3823       tree utype = unsigned_type_for (TREE_TYPE (rhs1));
3824
3825       enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
3826       if ((ranges[*idx].strict_overflow_p
3827            || ranges[i].strict_overflow_p)
3828           && issue_strict_overflow_warning (wc))
3829         warning_at (gimple_location (stmt), OPT_Wstrict_overflow,
3830                     "assuming signed overflow does not occur "
3831                     "when simplifying range test");
3832
3833       if (dump_file && (dump_flags & TDF_DETAILS))
3834         {
3835           struct range_entry *r = &ranges[*idx];
3836           fprintf (dump_file, "Optimizing range test ");
3837           print_generic_expr (dump_file, r->exp);
3838           fprintf (dump_file, " +[");
3839           print_generic_expr (dump_file, r->low);
3840           fprintf (dump_file, ", ");
3841           print_generic_expr (dump_file, r->high);
3842           fprintf (dump_file, "] and comparison ");
3843           print_generic_expr (dump_file, rhs1);
3844           fprintf (dump_file, " %s ", op_symbol_code (ccode));
3845           print_generic_expr (dump_file, rhs2);
3846           fprintf (dump_file, "\n into (");
3847           print_generic_expr (dump_file, utype);
3848           fprintf (dump_file, ") ");
3849           print_generic_expr (dump_file, rhs1);
3850           fprintf (dump_file, " %s (", op_symbol_code (ccode));
3851           print_generic_expr (dump_file, utype);
3852           fprintf (dump_file, ") ");
3853           print_generic_expr (dump_file, rhs2);
3854           fprintf (dump_file, "\n");
3855         }
3856
3857       operand_entry *oe = (*ops)[ranges[i].idx];
3858       ranges[i].in_p = 0;
3859       if (opcode == BIT_IOR_EXPR
3860           || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3861         {
3862           ranges[i].in_p = 1;
3863           ccode = invert_tree_comparison (ccode, false);
3864         }
3865
3866       unsigned int uid = gimple_uid (stmt);
3867       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3868       gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1);
3869       gimple_set_uid (g, uid);
3870       rhs1 = gimple_assign_lhs (g);
3871       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3872       if (!useless_type_conversion_p (utype, TREE_TYPE (rhs2)))
3873         {
3874           g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2);
3875           gimple_set_uid (g, uid);
3876           rhs2 = gimple_assign_lhs (g);
3877           gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3878         }
3879       if (tree_swap_operands_p (rhs1, rhs2))
3880         {
3881           std::swap (rhs1, rhs2);
3882           ccode = swap_tree_comparison (ccode);
3883         }
3884       if (gimple_code (stmt) == GIMPLE_COND)
3885         {
3886           gcond *c = as_a <gcond *> (stmt);
3887           gimple_cond_set_code (c, ccode);
3888           gimple_cond_set_lhs (c, rhs1);
3889           gimple_cond_set_rhs (c, rhs2);
3890           update_stmt (stmt);
3891         }
3892       else
3893         {
3894           tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node;
3895           if (!INTEGRAL_TYPE_P (ctype)
3896               || (TREE_CODE (ctype) != BOOLEAN_TYPE
3897                   && TYPE_PRECISION (ctype) != 1))
3898             ctype = boolean_type_node;
3899           g = gimple_build_assign (make_ssa_name (ctype), ccode, rhs1, rhs2);
3900           gimple_set_uid (g, uid);
3901           gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3902           if (oe->op && ctype != TREE_TYPE (oe->op))
3903             {
3904               g = gimple_build_assign (make_ssa_name (TREE_TYPE (oe->op)),
3905                                        NOP_EXPR, gimple_assign_lhs (g));
3906               gimple_set_uid (g, uid);
3907               gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3908             }
3909           ranges[i].exp = gimple_assign_lhs (g);
3910           oe->op = ranges[i].exp;
3911           ranges[i].low = build_zero_cst (TREE_TYPE (ranges[i].exp));
3912           ranges[i].high = ranges[i].low;
3913         }
3914       ranges[i].strict_overflow_p = false;
3915       oe = (*ops)[ranges[*idx].idx];
3916       /* Now change all the other range test immediate uses, so that
3917          those tests will be optimized away.  */
3918       if (opcode == ERROR_MARK)
3919         {
3920           if (oe->op)
3921             oe->op = build_int_cst (TREE_TYPE (oe->op),
3922                                     oe->rank == BIT_IOR_EXPR ? 0 : 1);
3923           else
3924             oe->op = (oe->rank == BIT_IOR_EXPR
3925                       ? boolean_false_node : boolean_true_node);
3926         }
3927       else
3928         oe->op = error_mark_node;
3929       ranges[*idx].exp = NULL_TREE;
3930       ranges[*idx].low = NULL_TREE;
3931       ranges[*idx].high = NULL_TREE;
3932       any_changes = true;
3933     }
3934
3935   delete map;
3936   return any_changes;
3937 }
3938
3939 /* Optimize range tests, similarly how fold_range_test optimizes
3940    it on trees.  The tree code for the binary
3941    operation between all the operands is OPCODE.
3942    If OPCODE is ERROR_MARK, optimize_range_tests is called from within
3943    maybe_optimize_range_tests for inter-bb range optimization.
3944    In that case if oe->op is NULL, oe->id is bb->index whose
3945    GIMPLE_COND is && or ||ed into the test, and oe->rank says
3946    the actual opcode.
3947    FIRST_BB is the first basic block if OPCODE is ERROR_MARK.  */
3948
3949 static bool
3950 optimize_range_tests (enum tree_code opcode,
3951                       vec<operand_entry *> *ops, basic_block first_bb)
3952 {
3953   unsigned int length = ops->length (), i, j, first;
3954   operand_entry *oe;
3955   struct range_entry *ranges;
3956   bool any_changes = false;
3957
3958   if (length == 1)
3959     return false;
3960
3961   ranges = XNEWVEC (struct range_entry, length);
3962   for (i = 0; i < length; i++)
3963     {
3964       oe = (*ops)[i];
3965       ranges[i].idx = i;
3966       init_range_entry (ranges + i, oe->op,
3967                         oe->op
3968                         ? NULL
3969                         : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
3970       /* For | invert it now, we will invert it again before emitting
3971          the optimized expression.  */
3972       if (opcode == BIT_IOR_EXPR
3973           || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3974         ranges[i].in_p = !ranges[i].in_p;
3975     }
3976
3977   qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
3978   for (i = 0; i < length; i++)
3979     if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
3980       break;
3981
3982   /* Try to merge ranges.  */
3983   for (first = i; i < length; i++)
3984     {
3985       tree low = ranges[i].low;
3986       tree high = ranges[i].high;
3987       int in_p = ranges[i].in_p;
3988       bool strict_overflow_p = ranges[i].strict_overflow_p;
3989       int update_fail_count = 0;
3990
3991       for (j = i + 1; j < length; j++)
3992         {
3993           if (ranges[i].exp != ranges[j].exp)
3994             break;
3995           if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
3996                              ranges[j].in_p, ranges[j].low, ranges[j].high))
3997             break;
3998           strict_overflow_p |= ranges[j].strict_overflow_p;
3999         }
4000
4001       if (j == i + 1)
4002         continue;
4003
4004       if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
4005                              opcode, ops, ranges[i].exp, NULL, in_p,
4006                              low, high, strict_overflow_p))
4007         {
4008           i = j - 1;
4009           any_changes = true;
4010         }
4011       /* Avoid quadratic complexity if all merge_ranges calls would succeed,
4012          while update_range_test would fail.  */
4013       else if (update_fail_count == 64)
4014         i = j - 1;
4015       else
4016         ++update_fail_count;
4017     }
4018
4019   any_changes |= optimize_range_tests_1 (opcode, first, length, true,
4020                                          ops, ranges);
4021
4022   if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
4023     any_changes |= optimize_range_tests_1 (opcode, first, length, false,
4024                                            ops, ranges);
4025   if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
4026     any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
4027                                                      ops, ranges);
4028   any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops,
4029                                                  ranges, first_bb);
4030   any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length,
4031                                                    ops, ranges);
4032
4033   if (any_changes && opcode != ERROR_MARK)
4034     {
4035       j = 0;
4036       FOR_EACH_VEC_ELT (*ops, i, oe)
4037         {
4038           if (oe->op == error_mark_node)
4039             continue;
4040           else if (i != j)
4041             (*ops)[j] = oe;
4042           j++;
4043         }
4044       ops->truncate (j);
4045     }
4046
4047   XDELETEVEC (ranges);
4048   return any_changes;
4049 }
4050
4051 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
4052    the operands of the VEC_COND_EXPR.  Returns ERROR_MARK on failure,
4053    otherwise the comparison code.  TYPE is a return value that is set
4054    to type of comparison.  */
4055
4056 static tree_code
4057 ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type,
4058                   tree *lhs, tree *rhs, gassign **vcond)
4059 {
4060   if (TREE_CODE (var) != SSA_NAME)
4061     return ERROR_MARK;
4062
4063   gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
4064   if (stmt == NULL)
4065     return ERROR_MARK;
4066   if (vcond)
4067     *vcond = stmt;
4068
4069   /* ??? If we start creating more COND_EXPR, we could perform
4070      this same optimization with them.  For now, simplify.  */
4071   if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
4072     return ERROR_MARK;
4073
4074   tree cond = gimple_assign_rhs1 (stmt);
4075   tree_code cmp = TREE_CODE (cond);
4076   if (cmp != SSA_NAME)
4077     return ERROR_MARK;
4078
4079   gassign *assign = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (cond));
4080   if (assign == NULL
4081       || TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) != tcc_comparison)
4082     return ERROR_MARK;
4083
4084   cmp = gimple_assign_rhs_code (assign);
4085   if (lhs)
4086     *lhs = gimple_assign_rhs1 (assign);
4087   if (rhs)
4088     *rhs = gimple_assign_rhs2 (assign);
4089
4090   /* ??? For now, allow only canonical true and false result vectors.
4091      We could expand this to other constants should the need arise,
4092      but at the moment we don't create them.  */
4093   tree t = gimple_assign_rhs2 (stmt);
4094   tree f = gimple_assign_rhs3 (stmt);
4095   bool inv;
4096   if (integer_all_onesp (t))
4097     inv = false;
4098   else if (integer_all_onesp (f))
4099     {
4100       cmp = invert_tree_comparison (cmp, false);
4101       inv = true;
4102     }
4103   else
4104     return ERROR_MARK;
4105   if (!integer_zerop (f))
4106     return ERROR_MARK;
4107
4108   /* Success!  */
4109   if (rets)
4110     *rets = assign;
4111   if (reti)
4112     *reti = inv;
4113   if (type)
4114     *type = TREE_TYPE (cond);
4115   return cmp;
4116 }
4117
4118 /* Optimize the condition of VEC_COND_EXPRs which have been combined
4119    with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR).  */
4120
4121 static bool
4122 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
4123 {
4124   unsigned int length = ops->length (), i, j;
4125   bool any_changes = false;
4126
4127   if (length == 1)
4128     return false;
4129
4130   for (i = 0; i < length; ++i)
4131     {
4132       tree elt0 = (*ops)[i]->op;
4133
4134       gassign *stmt0, *vcond0;
4135       bool invert;
4136       tree type, lhs0, rhs0;
4137       tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert, &type, &lhs0,
4138                                          &rhs0, &vcond0);
4139       if (cmp0 == ERROR_MARK)
4140         continue;
4141
4142       for (j = i + 1; j < length; ++j)
4143         {
4144           tree &elt1 = (*ops)[j]->op;
4145
4146           gassign *stmt1, *vcond1;
4147           tree lhs1, rhs1;
4148           tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL, NULL, &lhs1,
4149                                              &rhs1, &vcond1);
4150           if (cmp1 == ERROR_MARK)
4151             continue;
4152
4153           tree comb;
4154           if (opcode == BIT_AND_EXPR)
4155             comb = maybe_fold_and_comparisons (type, cmp0, lhs0, rhs0,
4156                                                cmp1, lhs1, rhs1);
4157           else if (opcode == BIT_IOR_EXPR)
4158             comb = maybe_fold_or_comparisons (type, cmp0, lhs0, rhs0,
4159                                               cmp1, lhs1, rhs1);
4160           else
4161             gcc_unreachable ();
4162           if (comb == NULL)
4163             continue;
4164
4165           /* Success! */
4166           if (dump_file && (dump_flags & TDF_DETAILS))
4167             {
4168               fprintf (dump_file, "Transforming ");
4169               print_generic_expr (dump_file, gimple_assign_lhs (stmt0));
4170               fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
4171               print_generic_expr (dump_file, gimple_assign_lhs (stmt1));
4172               fprintf (dump_file, " into ");
4173               print_generic_expr (dump_file, comb);
4174               fputc ('\n', dump_file);
4175             }
4176
4177           gimple_stmt_iterator gsi = gsi_for_stmt (vcond0);
4178           tree exp = force_gimple_operand_gsi (&gsi, comb, true, NULL_TREE,
4179                                                true, GSI_SAME_STMT);
4180           if (invert)
4181             swap_ssa_operands (vcond0, gimple_assign_rhs2_ptr (vcond0),
4182                                gimple_assign_rhs3_ptr (vcond0));
4183           gimple_assign_set_rhs1 (vcond0, exp);
4184           update_stmt (vcond0);
4185
4186           elt1 = error_mark_node;
4187           any_changes = true;
4188         }
4189     }
4190
4191   if (any_changes)
4192     {
4193       operand_entry *oe;
4194       j = 0;
4195       FOR_EACH_VEC_ELT (*ops, i, oe)
4196         {
4197           if (oe->op == error_mark_node)
4198             continue;
4199           else if (i != j)
4200             (*ops)[j] = oe;
4201           j++;
4202         }
4203       ops->truncate (j);
4204     }
4205
4206   return any_changes;
4207 }
4208
4209 /* Return true if STMT is a cast like:
4210    <bb N>:
4211    ...
4212    _123 = (int) _234;
4213
4214    <bb M>:
4215    # _345 = PHI <_123(N), 1(...), 1(...)>
4216    where _234 has bool type, _123 has single use and
4217    bb N has a single successor M.  This is commonly used in
4218    the last block of a range test.
4219
4220    Also Return true if STMT is tcc_compare like:
4221    <bb N>:
4222    ...
4223    _234 = a_2(D) == 2;
4224
4225    <bb M>:
4226    # _345 = PHI <_234(N), 1(...), 1(...)>
4227    _346 = (int) _345;
4228    where _234 has booltype, single use and
4229    bb N has a single successor M.  This is commonly used in
4230    the last block of a range test.  */
4231
4232 static bool
4233 final_range_test_p (gimple *stmt)
4234 {
4235   basic_block bb, rhs_bb, lhs_bb;
4236   edge e;
4237   tree lhs, rhs;
4238   use_operand_p use_p;
4239   gimple *use_stmt;
4240
4241   if (!gimple_assign_cast_p (stmt)
4242       && (!is_gimple_assign (stmt)
4243           || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4244               != tcc_comparison)))
4245     return false;
4246   bb = gimple_bb (stmt);
4247   if (!single_succ_p (bb))
4248     return false;
4249   e = single_succ_edge (bb);
4250   if (e->flags & EDGE_COMPLEX)
4251     return false;
4252
4253   lhs = gimple_assign_lhs (stmt);
4254   rhs = gimple_assign_rhs1 (stmt);
4255   if (gimple_assign_cast_p (stmt)
4256       && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4257           || TREE_CODE (rhs) != SSA_NAME
4258           || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE))
4259     return false;
4260
4261   if (!gimple_assign_cast_p (stmt)
4262       && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE))
4263       return false;
4264
4265   /* Test whether lhs is consumed only by a PHI in the only successor bb.  */
4266   if (!single_imm_use (lhs, &use_p, &use_stmt))
4267     return false;
4268
4269   if (gimple_code (use_stmt) != GIMPLE_PHI
4270       || gimple_bb (use_stmt) != e->dest)
4271     return false;
4272
4273   /* And that the rhs is defined in the same loop.  */
4274   if (gimple_assign_cast_p (stmt))
4275     {
4276       if (TREE_CODE (rhs) != SSA_NAME
4277           || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)))
4278           || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
4279         return false;
4280     }
4281   else
4282     {
4283       if (TREE_CODE (lhs) != SSA_NAME
4284           || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs)))
4285           || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb))
4286         return false;
4287     }
4288
4289   return true;
4290 }
4291
4292 /* Return true if BB is suitable basic block for inter-bb range test
4293    optimization.  If BACKWARD is true, BB should be the only predecessor
4294    of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
4295    or compared with to find a common basic block to which all conditions
4296    branch to if true resp. false.  If BACKWARD is false, TEST_BB should
4297    be the only predecessor of BB.  *TEST_SWAPPED_P is set to true if
4298    TEST_BB is a bb ending in condition where the edge to non-*OTHER_BB
4299    block points to an empty block that falls through into *OTHER_BB and
4300    the phi args match that path.  */
4301
4302 static bool
4303 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
4304                   bool *test_swapped_p, bool backward)
4305 {
4306   edge_iterator ei, ei2;
4307   edge e, e2;
4308   gimple *stmt;
4309   gphi_iterator gsi;
4310   bool other_edge_seen = false;
4311   bool is_cond;
4312
4313   if (test_bb == bb)
4314     return false;
4315   /* Check last stmt first.  */
4316   stmt = last_stmt (bb);
4317   if (stmt == NULL
4318       || (gimple_code (stmt) != GIMPLE_COND
4319           && (backward || !final_range_test_p (stmt)))
4320       || gimple_visited_p (stmt)
4321       || stmt_could_throw_p (cfun, stmt)
4322       || *other_bb == bb)
4323     return false;
4324   is_cond = gimple_code (stmt) == GIMPLE_COND;
4325   if (is_cond)
4326     {
4327       /* If last stmt is GIMPLE_COND, verify that one of the succ edges
4328          goes to the next bb (if BACKWARD, it is TEST_BB), and the other
4329          to *OTHER_BB (if not set yet, try to find it out).  */
4330       if (EDGE_COUNT (bb->succs) != 2)
4331         return false;
4332       FOR_EACH_EDGE (e, ei, bb->succs)
4333         {
4334           if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4335             return false;
4336           if (e->dest == test_bb)
4337             {
4338               if (backward)
4339                 continue;
4340               else
4341                 return false;
4342             }
4343           if (e->dest == bb)
4344             return false;
4345           if (*other_bb == NULL)
4346             {
4347               FOR_EACH_EDGE (e2, ei2, test_bb->succs)
4348                 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4349                   return false;
4350                 else if (e->dest == e2->dest)
4351                   *other_bb = e->dest;
4352               if (*other_bb == NULL)
4353                 return false;
4354             }
4355           if (e->dest == *other_bb)
4356             other_edge_seen = true;
4357           else if (backward)
4358             return false;
4359         }
4360       if (*other_bb == NULL || !other_edge_seen)
4361         return false;
4362     }
4363   else if (single_succ (bb) != *other_bb)
4364     return false;
4365
4366   /* Now check all PHIs of *OTHER_BB.  */
4367   e = find_edge (bb, *other_bb);
4368   e2 = find_edge (test_bb, *other_bb);
4369  retry:;
4370   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
4371     {
4372       gphi *phi = gsi.phi ();
4373       /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
4374          corresponding to BB and TEST_BB predecessor must be the same.  */
4375       if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
4376                             gimple_phi_arg_def (phi, e2->dest_idx), 0))
4377         {
4378           /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
4379              one of the PHIs should have the lhs of the last stmt in
4380              that block as PHI arg and that PHI should have 0 or 1
4381              corresponding to it in all other range test basic blocks
4382              considered.  */
4383           if (!is_cond)
4384             {
4385               if (gimple_phi_arg_def (phi, e->dest_idx)
4386                   == gimple_assign_lhs (stmt)
4387                   && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
4388                       || integer_onep (gimple_phi_arg_def (phi,
4389                                                            e2->dest_idx))))
4390                 continue;
4391             }
4392           else
4393             {
4394               gimple *test_last = last_stmt (test_bb);
4395               if (gimple_code (test_last) == GIMPLE_COND)
4396                 {
4397                   if (backward ? e2->src != test_bb : e->src != bb)
4398                     return false;
4399
4400                   /* For last_bb, handle also:
4401                      if (x_3(D) == 3)
4402                        goto <bb 6>; [34.00%]
4403                      else
4404                        goto <bb 7>; [66.00%]
4405
4406                      <bb 6> [local count: 79512730]:
4407
4408                      <bb 7> [local count: 1073741824]:
4409                      # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4410                      where bb 7 is *OTHER_BB, but the PHI values from the
4411                      earlier bbs match the path through the empty bb
4412                      in between.  */
4413                   edge e3;
4414                   if (backward)
4415                     e3 = EDGE_SUCC (test_bb,
4416                                     e2 == EDGE_SUCC (test_bb, 0) ? 1 : 0);
4417                   else
4418                     e3 = EDGE_SUCC (bb,
4419                                     e == EDGE_SUCC (bb, 0) ? 1 : 0);
4420                   if (empty_block_p (e3->dest)
4421                       && single_succ_p (e3->dest)
4422                       && single_succ (e3->dest) == *other_bb
4423                       && single_pred_p (e3->dest)
4424                       && single_succ_edge (e3->dest)->flags == EDGE_FALLTHRU)
4425                     {
4426                       if (backward)
4427                         e2 = single_succ_edge (e3->dest);
4428                       else
4429                         e = single_succ_edge (e3->dest);
4430                       if (test_swapped_p)
4431                         *test_swapped_p = true;
4432                       goto retry;
4433                     }
4434                 }
4435               else if (gimple_phi_arg_def (phi, e2->dest_idx)
4436                        == gimple_assign_lhs (test_last)
4437                        && (integer_zerop (gimple_phi_arg_def (phi,
4438                                                               e->dest_idx))
4439                            || integer_onep (gimple_phi_arg_def (phi,
4440                                                                 e->dest_idx))))
4441                 continue;
4442             }
4443
4444           return false;
4445         }
4446     }
4447   return true;
4448 }
4449
4450 /* Return true if BB doesn't have side-effects that would disallow
4451    range test optimization, all SSA_NAMEs set in the bb are consumed
4452    in the bb and there are no PHIs.  */
4453
4454 bool
4455 no_side_effect_bb (basic_block bb)
4456 {
4457   gimple_stmt_iterator gsi;
4458   gimple *last;
4459
4460   if (!gimple_seq_empty_p (phi_nodes (bb)))
4461     return false;
4462   last = last_stmt (bb);
4463   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4464     {
4465       gimple *stmt = gsi_stmt (gsi);
4466       tree lhs;
4467       imm_use_iterator imm_iter;
4468       use_operand_p use_p;
4469
4470       if (is_gimple_debug (stmt))
4471         continue;
4472       if (gimple_has_side_effects (stmt))
4473         return false;
4474       if (stmt == last)
4475         return true;
4476       if (!is_gimple_assign (stmt))
4477         return false;
4478       lhs = gimple_assign_lhs (stmt);
4479       if (TREE_CODE (lhs) != SSA_NAME)
4480         return false;
4481       if (gimple_assign_rhs_could_trap_p (stmt))
4482         return false;
4483       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
4484         {
4485           gimple *use_stmt = USE_STMT (use_p);
4486           if (is_gimple_debug (use_stmt))
4487             continue;
4488           if (gimple_bb (use_stmt) != bb)
4489             return false;
4490         }
4491     }
4492   return false;
4493 }
4494
4495 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
4496    return true and fill in *OPS recursively.  */
4497
4498 static bool
4499 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
4500          class loop *loop)
4501 {
4502   gimple *stmt = SSA_NAME_DEF_STMT (var);
4503   tree rhs[2];
4504   int i;
4505
4506   if (!is_reassociable_op (stmt, code, loop))
4507     return false;
4508
4509   rhs[0] = gimple_assign_rhs1 (stmt);
4510   rhs[1] = gimple_assign_rhs2 (stmt);
4511   gimple_set_visited (stmt, true);
4512   for (i = 0; i < 2; i++)
4513     if (TREE_CODE (rhs[i]) == SSA_NAME
4514         && !get_ops (rhs[i], code, ops, loop)
4515         && has_single_use (rhs[i]))
4516       {
4517         operand_entry *oe = operand_entry_pool.allocate ();
4518
4519         oe->op = rhs[i];
4520         oe->rank = code;
4521         oe->id = 0;
4522         oe->count = 1;
4523         oe->stmt_to_insert = NULL;
4524         ops->safe_push (oe);
4525       }
4526   return true;
4527 }
4528
4529 /* Find the ops that were added by get_ops starting from VAR, see if
4530    they were changed during update_range_test and if yes, create new
4531    stmts.  */
4532
4533 static tree
4534 update_ops (tree var, enum tree_code code, const vec<operand_entry *> &ops,
4535             unsigned int *pidx, class loop *loop)
4536 {
4537   gimple *stmt = SSA_NAME_DEF_STMT (var);
4538   tree rhs[4];
4539   int i;
4540
4541   if (!is_reassociable_op (stmt, code, loop))
4542     return NULL;
4543
4544   rhs[0] = gimple_assign_rhs1 (stmt);
4545   rhs[1] = gimple_assign_rhs2 (stmt);
4546   rhs[2] = rhs[0];
4547   rhs[3] = rhs[1];
4548   for (i = 0; i < 2; i++)
4549     if (TREE_CODE (rhs[i]) == SSA_NAME)
4550       {
4551         rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
4552         if (rhs[2 + i] == NULL_TREE)
4553           {
4554             if (has_single_use (rhs[i]))
4555               rhs[2 + i] = ops[(*pidx)++]->op;
4556             else
4557               rhs[2 + i] = rhs[i];
4558           }
4559       }
4560   if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
4561       && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
4562     {
4563       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4564       var = make_ssa_name (TREE_TYPE (var));
4565       gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
4566                                         rhs[2], rhs[3]);
4567       gimple_set_uid (g, gimple_uid (stmt));
4568       gimple_set_visited (g, true);
4569       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4570     }
4571   return var;
4572 }
4573
4574 /* Structure to track the initial value passed to get_ops and
4575    the range in the ops vector for each basic block.  */
4576
4577 struct inter_bb_range_test_entry
4578 {
4579   tree op;
4580   unsigned int first_idx, last_idx;
4581 };
4582
4583 /* Inter-bb range test optimization.
4584
4585    Returns TRUE if a gimple conditional is optimized to a true/false,
4586    otherwise return FALSE.
4587
4588    This indicates to the caller that it should run a CFG cleanup pass
4589    once reassociation is completed.  */
4590
4591 static bool
4592 maybe_optimize_range_tests (gimple *stmt)
4593 {
4594   basic_block first_bb = gimple_bb (stmt);
4595   basic_block last_bb = first_bb;
4596   basic_block other_bb = NULL;
4597   basic_block bb;
4598   edge_iterator ei;
4599   edge e;
4600   auto_vec<operand_entry *> ops;
4601   auto_vec<inter_bb_range_test_entry> bbinfo;
4602   bool any_changes = false;
4603   bool cfg_cleanup_needed = false;
4604
4605   /* Consider only basic blocks that end with GIMPLE_COND or
4606      a cast statement satisfying final_range_test_p.  All
4607      but the last bb in the first_bb .. last_bb range
4608      should end with GIMPLE_COND.  */
4609   if (gimple_code (stmt) == GIMPLE_COND)
4610     {
4611       if (EDGE_COUNT (first_bb->succs) != 2)
4612         return cfg_cleanup_needed;
4613     }
4614   else if (final_range_test_p (stmt))
4615     other_bb = single_succ (first_bb);
4616   else
4617     return cfg_cleanup_needed;
4618
4619   if (stmt_could_throw_p (cfun, stmt))
4620     return cfg_cleanup_needed;
4621
4622   /* As relative ordering of post-dominator sons isn't fixed,
4623      maybe_optimize_range_tests can be called first on any
4624      bb in the range we want to optimize.  So, start searching
4625      backwards, if first_bb can be set to a predecessor.  */
4626   while (single_pred_p (first_bb))
4627     {
4628       basic_block pred_bb = single_pred (first_bb);
4629       if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, NULL, true))
4630         break;
4631       if (!no_side_effect_bb (first_bb))
4632         break;
4633       first_bb = pred_bb;
4634     }
4635   /* If first_bb is last_bb, other_bb hasn't been computed yet.
4636      Before starting forward search in last_bb successors, find
4637      out the other_bb.  */
4638   if (first_bb == last_bb)
4639     {
4640       other_bb = NULL;
4641       /* As non-GIMPLE_COND last stmt always terminates the range,
4642          if forward search didn't discover anything, just give up.  */
4643       if (gimple_code (stmt) != GIMPLE_COND)
4644         return cfg_cleanup_needed;
4645       /* Look at both successors.  Either it ends with a GIMPLE_COND
4646          and satisfies suitable_cond_bb, or ends with a cast and
4647          other_bb is that cast's successor.  */
4648       FOR_EACH_EDGE (e, ei, first_bb->succs)
4649         if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
4650             || e->dest == first_bb)
4651           return cfg_cleanup_needed;
4652         else if (single_pred_p (e->dest))
4653           {
4654             stmt = last_stmt (e->dest);
4655             if (stmt
4656                 && gimple_code (stmt) == GIMPLE_COND
4657                 && EDGE_COUNT (e->dest->succs) == 2)
4658               {
4659                 if (suitable_cond_bb (first_bb, e->dest, &other_bb,
4660                                       NULL, true))
4661                   break;
4662                 else
4663                   other_bb = NULL;
4664               }
4665             else if (stmt
4666                      && final_range_test_p (stmt)
4667                      && find_edge (first_bb, single_succ (e->dest)))
4668               {
4669                 other_bb = single_succ (e->dest);
4670                 if (other_bb == first_bb)
4671                   other_bb = NULL;
4672               }
4673           }
4674       if (other_bb == NULL)
4675         return cfg_cleanup_needed;
4676     }
4677   /* Now do the forward search, moving last_bb to successor bbs
4678      that aren't other_bb.  */
4679   while (EDGE_COUNT (last_bb->succs) == 2)
4680     {
4681       FOR_EACH_EDGE (e, ei, last_bb->succs)
4682         if (e->dest != other_bb)
4683           break;
4684       if (e == NULL)
4685         break;
4686       if (!single_pred_p (e->dest))
4687         break;
4688       if (!suitable_cond_bb (e->dest, last_bb, &other_bb, NULL, false))
4689         break;
4690       if (!no_side_effect_bb (e->dest))
4691         break;
4692       last_bb = e->dest;
4693     }
4694   if (first_bb == last_bb)
4695     return cfg_cleanup_needed;
4696   /* Here basic blocks first_bb through last_bb's predecessor
4697      end with GIMPLE_COND, all of them have one of the edges to
4698      other_bb and another to another block in the range,
4699      all blocks except first_bb don't have side-effects and
4700      last_bb ends with either GIMPLE_COND, or cast satisfying
4701      final_range_test_p.  */
4702   for (bb = last_bb; ; bb = single_pred (bb))
4703     {
4704       enum tree_code code;
4705       tree lhs, rhs;
4706       inter_bb_range_test_entry bb_ent;
4707
4708       bb_ent.op = NULL_TREE;
4709       bb_ent.first_idx = ops.length ();
4710       bb_ent.last_idx = bb_ent.first_idx;
4711       e = find_edge (bb, other_bb);
4712       stmt = last_stmt (bb);
4713       gimple_set_visited (stmt, true);
4714       if (gimple_code (stmt) != GIMPLE_COND)
4715         {
4716           use_operand_p use_p;
4717           gimple *phi;
4718           edge e2;
4719           unsigned int d;
4720
4721           lhs = gimple_assign_lhs (stmt);
4722           rhs = gimple_assign_rhs1 (stmt);
4723           gcc_assert (bb == last_bb);
4724
4725           /* stmt is
4726              _123 = (int) _234;
4727              OR
4728              _234 = a_2(D) == 2;
4729
4730              followed by:
4731              <bb M>:
4732              # _345 = PHI <_123(N), 1(...), 1(...)>
4733
4734              or 0 instead of 1.  If it is 0, the _234
4735              range test is anded together with all the
4736              other range tests, if it is 1, it is ored with
4737              them.  */
4738           single_imm_use (lhs, &use_p, &phi);
4739           gcc_assert (gimple_code (phi) == GIMPLE_PHI);
4740           e2 = find_edge (first_bb, other_bb);
4741           d = e2->dest_idx;
4742           gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
4743           if (integer_zerop (gimple_phi_arg_def (phi, d)))
4744             code = BIT_AND_EXPR;
4745           else
4746             {
4747               gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
4748               code = BIT_IOR_EXPR;
4749             }
4750
4751           /* If _234 SSA_NAME_DEF_STMT is
4752              _234 = _567 | _789;
4753              (or &, corresponding to 1/0 in the phi arguments,
4754              push into ops the individual range test arguments
4755              of the bitwise or resp. and, recursively.  */
4756           if (TREE_CODE (rhs) == SSA_NAME
4757               && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4758                   != tcc_comparison)
4759               && !get_ops (rhs, code, &ops,
4760                         loop_containing_stmt (stmt))
4761               && has_single_use (rhs))
4762             {
4763               /* Otherwise, push the _234 range test itself.  */
4764               operand_entry *oe = operand_entry_pool.allocate ();
4765
4766               oe->op = rhs;
4767               oe->rank = code;
4768               oe->id = 0;
4769               oe->count = 1;
4770               oe->stmt_to_insert = NULL;
4771               ops.safe_push (oe);
4772               bb_ent.last_idx++;
4773               bb_ent.op = rhs;
4774             }
4775           else if (is_gimple_assign (stmt)
4776                    && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4777                        == tcc_comparison)
4778                    && !get_ops (lhs, code, &ops,
4779                                 loop_containing_stmt (stmt))
4780                    && has_single_use (lhs))
4781             {
4782               operand_entry *oe = operand_entry_pool.allocate ();
4783               oe->op = lhs;
4784               oe->rank = code;
4785               oe->id = 0;
4786               oe->count = 1;
4787               ops.safe_push (oe);
4788               bb_ent.last_idx++;
4789               bb_ent.op = lhs;
4790             }
4791           else
4792             {
4793               bb_ent.last_idx = ops.length ();
4794               bb_ent.op = rhs;
4795             }
4796           bbinfo.safe_push (bb_ent);
4797           continue;
4798         }
4799       else if (bb == last_bb)
4800         {
4801           /* For last_bb, handle also:
4802              if (x_3(D) == 3)
4803                goto <bb 6>; [34.00%]
4804              else
4805                goto <bb 7>; [66.00%]
4806
4807              <bb 6> [local count: 79512730]:
4808
4809              <bb 7> [local count: 1073741824]:
4810              # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4811              where bb 7 is OTHER_BB, but the PHI values from the
4812              earlier bbs match the path through the empty bb
4813              in between.  */
4814           bool test_swapped_p = false;
4815           bool ok = suitable_cond_bb (single_pred (last_bb), last_bb,
4816                                       &other_bb, &test_swapped_p, true);
4817           gcc_assert (ok);
4818           if (test_swapped_p)
4819             e = EDGE_SUCC (bb, e == EDGE_SUCC (bb, 0) ? 1 : 0);
4820         }
4821       /* Otherwise stmt is GIMPLE_COND.  */
4822       code = gimple_cond_code (stmt);
4823       lhs = gimple_cond_lhs (stmt);
4824       rhs = gimple_cond_rhs (stmt);
4825       if (TREE_CODE (lhs) == SSA_NAME
4826           && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4827           && ((code != EQ_EXPR && code != NE_EXPR)
4828               || rhs != boolean_false_node
4829                  /* Either push into ops the individual bitwise
4830                     or resp. and operands, depending on which
4831                     edge is other_bb.  */
4832               || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
4833                                  ^ (code == EQ_EXPR))
4834                                 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
4835                            loop_containing_stmt (stmt))))
4836         {
4837           /* Or push the GIMPLE_COND stmt itself.  */
4838           operand_entry *oe = operand_entry_pool.allocate ();
4839
4840           oe->op = NULL;
4841           oe->rank = (e->flags & EDGE_TRUE_VALUE)
4842                      ? BIT_IOR_EXPR : BIT_AND_EXPR;
4843           /* oe->op = NULL signs that there is no SSA_NAME
4844              for the range test, and oe->id instead is the
4845              basic block number, at which's end the GIMPLE_COND
4846              is.  */
4847           oe->id = bb->index;
4848           oe->count = 1;
4849           oe->stmt_to_insert = NULL;
4850           ops.safe_push (oe);
4851           bb_ent.op = NULL;
4852           bb_ent.last_idx++;
4853         }
4854       else if (ops.length () > bb_ent.first_idx)
4855         {
4856           bb_ent.op = lhs;
4857           bb_ent.last_idx = ops.length ();
4858         }
4859       bbinfo.safe_push (bb_ent);
4860       if (bb == first_bb)
4861         break;
4862     }
4863   if (ops.length () > 1)
4864     any_changes = optimize_range_tests (ERROR_MARK, &ops, first_bb);
4865   if (any_changes)
4866     {
4867       unsigned int idx, max_idx = 0;
4868       /* update_ops relies on has_single_use predicates returning the
4869          same values as it did during get_ops earlier.  Additionally it
4870          never removes statements, only adds new ones and it should walk
4871          from the single imm use and check the predicate already before
4872          making those changes.
4873          On the other side, the handling of GIMPLE_COND directly can turn
4874          previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
4875          it needs to be done in a separate loop afterwards.  */
4876       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4877         {
4878           if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4879               && bbinfo[idx].op != NULL_TREE)
4880             {
4881               tree new_op;
4882
4883               max_idx = idx;
4884               stmt = last_stmt (bb);
4885               new_op = update_ops (bbinfo[idx].op,
4886                                    (enum tree_code)
4887                                    ops[bbinfo[idx].first_idx]->rank,
4888                                    ops, &bbinfo[idx].first_idx,
4889                                    loop_containing_stmt (stmt));
4890               if (new_op == NULL_TREE)
4891                 {
4892                   gcc_assert (bb == last_bb);
4893                   new_op = ops[bbinfo[idx].first_idx++]->op;
4894                 }
4895               if (bbinfo[idx].op != new_op)
4896                 {
4897                   imm_use_iterator iter;
4898                   use_operand_p use_p;
4899                   gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL;
4900
4901                   FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
4902                     if (is_gimple_debug (use_stmt))
4903                       continue;
4904                     else if (gimple_code (use_stmt) == GIMPLE_COND
4905                              || gimple_code (use_stmt) == GIMPLE_PHI)
4906                       FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
4907                         SET_USE (use_p, new_op);
4908                     else if ((is_gimple_assign (use_stmt)
4909                               && (TREE_CODE_CLASS
4910                                   (gimple_assign_rhs_code (use_stmt))
4911                                   == tcc_comparison)))
4912                       cast_or_tcc_cmp_stmt = use_stmt;
4913                     else if (gimple_assign_cast_p (use_stmt))
4914                       cast_or_tcc_cmp_stmt = use_stmt;
4915                     else
4916                       gcc_unreachable ();
4917
4918                   if (cast_or_tcc_cmp_stmt)
4919                     {
4920                       gcc_assert (bb == last_bb);
4921                       tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt);
4922                       tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
4923                       enum tree_code rhs_code
4924                         = gimple_assign_cast_p (cast_or_tcc_cmp_stmt)
4925                         ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt)
4926                         : CONVERT_EXPR;
4927                       gassign *g;
4928                       if (is_gimple_min_invariant (new_op))
4929                         {
4930                           new_op = fold_convert (TREE_TYPE (lhs), new_op);
4931                           g = gimple_build_assign (new_lhs, new_op);
4932                         }
4933                       else
4934                         g = gimple_build_assign (new_lhs, rhs_code, new_op);
4935                       gimple_stmt_iterator gsi
4936                         = gsi_for_stmt (cast_or_tcc_cmp_stmt);
4937                       gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt));
4938                       gimple_set_visited (g, true);
4939                       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4940                       FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
4941                         if (is_gimple_debug (use_stmt))
4942                           continue;
4943                         else if (gimple_code (use_stmt) == GIMPLE_COND
4944                                  || gimple_code (use_stmt) == GIMPLE_PHI)
4945                           FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
4946                             SET_USE (use_p, new_lhs);
4947                         else
4948                           gcc_unreachable ();
4949                     }
4950                 }
4951             }
4952           if (bb == first_bb)
4953             break;
4954         }
4955       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4956         {
4957           if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4958               && bbinfo[idx].op == NULL_TREE
4959               && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
4960             {
4961               gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
4962
4963               if (idx > max_idx)
4964                 max_idx = idx;
4965
4966               /* If we collapse the conditional to a true/false
4967                  condition, then bubble that knowledge up to our caller.  */
4968               if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
4969                 {
4970                   gimple_cond_make_false (cond_stmt);
4971                   cfg_cleanup_needed = true;
4972                 }
4973               else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
4974                 {
4975                   gimple_cond_make_true (cond_stmt);
4976                   cfg_cleanup_needed = true;
4977                 }
4978               else
4979                 {
4980                   gimple_cond_set_code (cond_stmt, NE_EXPR);
4981                   gimple_cond_set_lhs (cond_stmt,
4982                                        ops[bbinfo[idx].first_idx]->op);
4983                   gimple_cond_set_rhs (cond_stmt, boolean_false_node);
4984                 }
4985               update_stmt (cond_stmt);
4986             }
4987           if (bb == first_bb)
4988             break;
4989         }
4990
4991       /* The above changes could result in basic blocks after the first
4992          modified one, up to and including last_bb, to be executed even if
4993          they would not be in the original program.  If the value ranges of
4994          assignment lhs' in those bbs were dependent on the conditions
4995          guarding those basic blocks which now can change, the VRs might
4996          be incorrect.  As no_side_effect_bb should ensure those SSA_NAMEs
4997          are only used within the same bb, it should be not a big deal if
4998          we just reset all the VRs in those bbs.  See PR68671.  */
4999       for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
5000         reset_flow_sensitive_info_in_bb (bb);
5001     }
5002   return cfg_cleanup_needed;
5003 }
5004
5005 /* Return true if OPERAND is defined by a PHI node which uses the LHS
5006    of STMT in it's operands.  This is also known as a "destructive
5007    update" operation.  */
5008
5009 static bool
5010 is_phi_for_stmt (gimple *stmt, tree operand)
5011 {
5012   gimple *def_stmt;
5013   gphi *def_phi;
5014   tree lhs;
5015   use_operand_p arg_p;
5016   ssa_op_iter i;
5017
5018   if (TREE_CODE (operand) != SSA_NAME)
5019     return false;
5020
5021   lhs = gimple_assign_lhs (stmt);
5022
5023   def_stmt = SSA_NAME_DEF_STMT (operand);
5024   def_phi = dyn_cast <gphi *> (def_stmt);
5025   if (!def_phi)
5026     return false;
5027
5028   FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
5029     if (lhs == USE_FROM_PTR (arg_p))
5030       return true;
5031   return false;
5032 }
5033
5034 /* Remove def stmt of VAR if VAR has zero uses and recurse
5035    on rhs1 operand if so.  */
5036
5037 static void
5038 remove_visited_stmt_chain (tree var)
5039 {
5040   gimple *stmt;
5041   gimple_stmt_iterator gsi;
5042
5043   while (1)
5044     {
5045       if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
5046         return;
5047       stmt = SSA_NAME_DEF_STMT (var);
5048       if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
5049         {
5050           var = gimple_assign_rhs1 (stmt);
5051           gsi = gsi_for_stmt (stmt);
5052           reassoc_remove_stmt (&gsi);
5053           release_defs (stmt);
5054         }
5055       else
5056         return;
5057     }
5058 }
5059
5060 /* This function checks three consequtive operands in
5061    passed operands vector OPS starting from OPINDEX and
5062    swaps two operands if it is profitable for binary operation
5063    consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
5064
5065    We pair ops with the same rank if possible.
5066
5067    The alternative we try is to see if STMT is a destructive
5068    update style statement, which is like:
5069    b = phi (a, ...)
5070    a = c + b;
5071    In that case, we want to use the destructive update form to
5072    expose the possible vectorizer sum reduction opportunity.
5073    In that case, the third operand will be the phi node. This
5074    check is not performed if STMT is null.
5075
5076    We could, of course, try to be better as noted above, and do a
5077    lot of work to try to find these opportunities in >3 operand
5078    cases, but it is unlikely to be worth it.  */
5079
5080 static void
5081 swap_ops_for_binary_stmt (const vec<operand_entry *> &ops,
5082                           unsigned int opindex, gimple *stmt)
5083 {
5084   operand_entry *oe1, *oe2, *oe3;
5085
5086   oe1 = ops[opindex];
5087   oe2 = ops[opindex + 1];
5088   oe3 = ops[opindex + 2];
5089
5090   if ((oe1->rank == oe2->rank
5091        && oe2->rank != oe3->rank)
5092       || (stmt && is_phi_for_stmt (stmt, oe3->op)
5093           && !is_phi_for_stmt (stmt, oe1->op)
5094           && !is_phi_for_stmt (stmt, oe2->op)))
5095     std::swap (*oe1, *oe3);
5096   else if ((oe1->rank == oe3->rank
5097             && oe2->rank != oe3->rank)
5098            || (stmt && is_phi_for_stmt (stmt, oe2->op)
5099                && !is_phi_for_stmt (stmt, oe1->op)
5100                && !is_phi_for_stmt (stmt, oe3->op)))
5101     std::swap (*oe1, *oe2);
5102 }
5103
5104 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
5105    two definitions, otherwise return STMT.  */
5106
5107 static inline gimple *
5108 find_insert_point (gimple *stmt, tree rhs1, tree rhs2)
5109 {
5110   if (TREE_CODE (rhs1) == SSA_NAME
5111       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
5112     stmt = SSA_NAME_DEF_STMT (rhs1);
5113   if (TREE_CODE (rhs2) == SSA_NAME
5114       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
5115     stmt = SSA_NAME_DEF_STMT (rhs2);
5116   return stmt;
5117 }
5118
5119 /* If the stmt that defines operand has to be inserted, insert it
5120    before the use.  */
5121 static void
5122 insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert)
5123 {
5124   gcc_assert (is_gimple_assign (stmt_to_insert));
5125   tree rhs1 = gimple_assign_rhs1 (stmt_to_insert);
5126   tree rhs2 = gimple_assign_rhs2 (stmt_to_insert);
5127   gimple *insert_point = find_insert_point (stmt, rhs1, rhs2);
5128   gimple_stmt_iterator gsi = gsi_for_stmt (insert_point);
5129   gimple_set_uid (stmt_to_insert, gimple_uid (insert_point));
5130
5131   /* If the insert point is not stmt, then insert_point would be
5132      the point where operand rhs1 or rhs2 is defined. In this case,
5133      stmt_to_insert has to be inserted afterwards. This would
5134      only happen when the stmt insertion point is flexible. */
5135   if (stmt == insert_point)
5136     gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT);
5137   else
5138     insert_stmt_after (stmt_to_insert, insert_point);
5139 }
5140
5141
5142 /* Recursively rewrite our linearized statements so that the operators
5143    match those in OPS[OPINDEX], putting the computation in rank
5144    order.  Return new lhs.
5145    CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in
5146    the current stmt and during recursive invocations.
5147    NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in
5148    recursive invocations.  */
5149
5150 static tree
5151 rewrite_expr_tree (gimple *stmt, enum tree_code rhs_code, unsigned int opindex,
5152                    const vec<operand_entry *> &ops, bool changed,
5153                    bool next_changed)
5154 {
5155   tree rhs1 = gimple_assign_rhs1 (stmt);
5156   tree rhs2 = gimple_assign_rhs2 (stmt);
5157   tree lhs = gimple_assign_lhs (stmt);
5158   operand_entry *oe;
5159
5160   /* The final recursion case for this function is that you have
5161      exactly two operations left.
5162      If we had exactly one op in the entire list to start with, we
5163      would have never called this function, and the tail recursion
5164      rewrites them one at a time.  */
5165   if (opindex + 2 == ops.length ())
5166     {
5167       operand_entry *oe1, *oe2;
5168
5169       oe1 = ops[opindex];
5170       oe2 = ops[opindex + 1];
5171
5172       if (rhs1 != oe1->op || rhs2 != oe2->op)
5173         {
5174           gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5175           unsigned int uid = gimple_uid (stmt);
5176
5177           if (dump_file && (dump_flags & TDF_DETAILS))
5178             {
5179               fprintf (dump_file, "Transforming ");
5180               print_gimple_stmt (dump_file, stmt, 0);
5181             }
5182
5183           /* If the stmt that defines operand has to be inserted, insert it
5184              before the use.  */
5185           if (oe1->stmt_to_insert)
5186             insert_stmt_before_use (stmt, oe1->stmt_to_insert);
5187           if (oe2->stmt_to_insert)
5188             insert_stmt_before_use (stmt, oe2->stmt_to_insert);
5189           /* Even when changed is false, reassociation could have e.g. removed
5190              some redundant operations, so unless we are just swapping the
5191              arguments or unless there is no change at all (then we just
5192              return lhs), force creation of a new SSA_NAME.  */
5193           if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
5194             {
5195               gimple *insert_point
5196                 = find_insert_point (stmt, oe1->op, oe2->op);
5197               lhs = make_ssa_name (TREE_TYPE (lhs));
5198               stmt
5199                 = gimple_build_assign (lhs, rhs_code,
5200                                        oe1->op, oe2->op);
5201               gimple_set_uid (stmt, uid);
5202               gimple_set_visited (stmt, true);
5203               if (insert_point == gsi_stmt (gsi))
5204                 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5205               else
5206                 insert_stmt_after (stmt, insert_point);
5207             }
5208           else
5209             {
5210               gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
5211                                    == stmt);
5212               gimple_assign_set_rhs1 (stmt, oe1->op);
5213               gimple_assign_set_rhs2 (stmt, oe2->op);
5214               update_stmt (stmt);
5215             }
5216
5217           if (rhs1 != oe1->op && rhs1 != oe2->op)
5218             remove_visited_stmt_chain (rhs1);
5219
5220           if (dump_file && (dump_flags & TDF_DETAILS))
5221             {
5222               fprintf (dump_file, " into ");
5223               print_gimple_stmt (dump_file, stmt, 0);
5224             }
5225         }
5226       return lhs;
5227     }
5228
5229   /* If we hit here, we should have 3 or more ops left.  */
5230   gcc_assert (opindex + 2 < ops.length ());
5231
5232   /* Rewrite the next operator.  */
5233   oe = ops[opindex];
5234
5235   /* If the stmt that defines operand has to be inserted, insert it
5236      before the use.  */
5237   if (oe->stmt_to_insert)
5238     insert_stmt_before_use (stmt, oe->stmt_to_insert);
5239
5240   /* Recurse on the LHS of the binary operator, which is guaranteed to
5241      be the non-leaf side.  */
5242   tree new_rhs1
5243     = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), rhs_code, opindex + 1, ops,
5244                          changed || oe->op != rhs2 || next_changed,
5245                          false);
5246
5247   if (oe->op != rhs2 || new_rhs1 != rhs1)
5248     {
5249       if (dump_file && (dump_flags & TDF_DETAILS))
5250         {
5251           fprintf (dump_file, "Transforming ");
5252           print_gimple_stmt (dump_file, stmt, 0);
5253         }
5254
5255       /* If changed is false, this is either opindex == 0
5256          or all outer rhs2's were equal to corresponding oe->op,
5257          and powi_result is NULL.
5258          That means lhs is equivalent before and after reassociation.
5259          Otherwise ensure the old lhs SSA_NAME is not reused and
5260          create a new stmt as well, so that any debug stmts will be
5261          properly adjusted.  */
5262       if (changed)
5263         {
5264           gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5265           unsigned int uid = gimple_uid (stmt);
5266           gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op);
5267
5268           lhs = make_ssa_name (TREE_TYPE (lhs));
5269           stmt = gimple_build_assign (lhs, rhs_code,
5270                                       new_rhs1, oe->op);
5271           gimple_set_uid (stmt, uid);
5272           gimple_set_visited (stmt, true);
5273           if (insert_point == gsi_stmt (gsi))
5274             gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5275           else
5276             insert_stmt_after (stmt, insert_point);
5277         }
5278       else
5279         {
5280           gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
5281                                == stmt);
5282           gimple_assign_set_rhs1 (stmt, new_rhs1);
5283           gimple_assign_set_rhs2 (stmt, oe->op);
5284           update_stmt (stmt);
5285         }
5286
5287       if (dump_file && (dump_flags & TDF_DETAILS))
5288         {
5289           fprintf (dump_file, " into ");
5290           print_gimple_stmt (dump_file, stmt, 0);
5291         }
5292     }
5293   return lhs;
5294 }
5295
5296 /* Find out how many cycles we need to compute statements chain.
5297    OPS_NUM holds number os statements in a chain.  CPU_WIDTH is a
5298    maximum number of independent statements we may execute per cycle.  */
5299
5300 static int
5301 get_required_cycles (int ops_num, int cpu_width)
5302 {
5303   int res;
5304   int elog;
5305   unsigned int rest;
5306
5307   /* While we have more than 2 * cpu_width operands
5308      we may reduce number of operands by cpu_width
5309      per cycle.  */
5310   res = ops_num / (2 * cpu_width);
5311
5312   /* Remained operands count may be reduced twice per cycle
5313      until we have only one operand.  */
5314   rest = (unsigned)(ops_num - res * cpu_width);
5315   elog = exact_log2 (rest);
5316   if (elog >= 0)
5317     res += elog;
5318   else
5319     res += floor_log2 (rest) + 1;
5320
5321   return res;
5322 }
5323
5324 /* Returns an optimal number of registers to use for computation of
5325    given statements.  */
5326
5327 static int
5328 get_reassociation_width (int ops_num, enum tree_code opc,
5329                          machine_mode mode)
5330 {
5331   int param_width = param_tree_reassoc_width;
5332   int width;
5333   int width_min;
5334   int cycles_best;
5335
5336   if (param_width > 0)
5337     width = param_width;
5338   else
5339     width = targetm.sched.reassociation_width (opc, mode);
5340
5341   if (width == 1)
5342     return width;
5343
5344   /* Get the minimal time required for sequence computation.  */
5345   cycles_best = get_required_cycles (ops_num, width);
5346
5347   /* Check if we may use less width and still compute sequence for
5348      the same time.  It will allow us to reduce registers usage.
5349      get_required_cycles is monotonically increasing with lower width
5350      so we can perform a binary search for the minimal width that still
5351      results in the optimal cycle count.  */
5352   width_min = 1;
5353   while (width > width_min)
5354     {
5355       int width_mid = (width + width_min) / 2;
5356
5357       if (get_required_cycles (ops_num, width_mid) == cycles_best)
5358         width = width_mid;
5359       else if (width_min < width_mid)
5360         width_min = width_mid;
5361       else
5362         break;
5363     }
5364
5365   return width;
5366 }
5367
5368 /* Recursively rewrite our linearized statements so that the operators
5369    match those in OPS[OPINDEX], putting the computation in rank
5370    order and trying to allow operations to be executed in
5371    parallel.  */
5372
5373 static void
5374 rewrite_expr_tree_parallel (gassign *stmt, int width,
5375                             const vec<operand_entry *> &ops)
5376 {
5377   enum tree_code opcode = gimple_assign_rhs_code (stmt);
5378   int op_num = ops.length ();
5379   gcc_assert (op_num > 0);
5380   int stmt_num = op_num - 1;
5381   gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
5382   int op_index = op_num - 1;
5383   int stmt_index = 0;
5384   int ready_stmts_end = 0;
5385   int i = 0;
5386   gimple *stmt1 = NULL, *stmt2 = NULL;
5387   tree last_rhs1 = gimple_assign_rhs1 (stmt);
5388
5389   /* We start expression rewriting from the top statements.
5390      So, in this loop we create a full list of statements
5391      we will work with.  */
5392   stmts[stmt_num - 1] = stmt;
5393   for (i = stmt_num - 2; i >= 0; i--)
5394     stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
5395
5396   for (i = 0; i < stmt_num; i++)
5397     {
5398       tree op1, op2;
5399
5400       /* Determine whether we should use results of
5401          already handled statements or not.  */
5402       if (ready_stmts_end == 0
5403           && (i - stmt_index >= width || op_index < 1))
5404         ready_stmts_end = i;
5405
5406       /* Now we choose operands for the next statement.  Non zero
5407          value in ready_stmts_end means here that we should use
5408          the result of already generated statements as new operand.  */
5409       if (ready_stmts_end > 0)
5410         {
5411           op1 = gimple_assign_lhs (stmts[stmt_index++]);
5412           if (ready_stmts_end > stmt_index)
5413             op2 = gimple_assign_lhs (stmts[stmt_index++]);
5414           else if (op_index >= 0)
5415             {
5416               operand_entry *oe = ops[op_index--];
5417               stmt2 = oe->stmt_to_insert;
5418               op2 = oe->op;
5419             }
5420           else
5421             {
5422               gcc_assert (stmt_index < i);
5423               op2 = gimple_assign_lhs (stmts[stmt_index++]);
5424             }
5425
5426           if (stmt_index >= ready_stmts_end)
5427             ready_stmts_end = 0;
5428         }
5429       else
5430         {
5431           if (op_index > 1)
5432             swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
5433           operand_entry *oe2 = ops[op_index--];
5434           operand_entry *oe1 = ops[op_index--];
5435           op2 = oe2->op;
5436           stmt2 = oe2->stmt_to_insert;
5437           op1 = oe1->op;
5438           stmt1 = oe1->stmt_to_insert;
5439         }
5440
5441       /* If we emit the last statement then we should put
5442          operands into the last statement.  It will also
5443          break the loop.  */
5444       if (op_index < 0 && stmt_index == i)
5445         i = stmt_num - 1;
5446
5447       if (dump_file && (dump_flags & TDF_DETAILS))
5448         {
5449           fprintf (dump_file, "Transforming ");
5450           print_gimple_stmt (dump_file, stmts[i], 0);
5451         }
5452
5453       /* If the stmt that defines operand has to be inserted, insert it
5454          before the use.  */
5455       if (stmt1)
5456         insert_stmt_before_use (stmts[i], stmt1);
5457       if (stmt2)
5458         insert_stmt_before_use (stmts[i], stmt2);
5459       stmt1 = stmt2 = NULL;
5460
5461       /* We keep original statement only for the last one.  All
5462          others are recreated.  */
5463       if (i == stmt_num - 1)
5464         {
5465           gimple_assign_set_rhs1 (stmts[i], op1);
5466           gimple_assign_set_rhs2 (stmts[i], op2);
5467           update_stmt (stmts[i]);
5468         }
5469       else
5470         {
5471           stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
5472           gimple_set_visited (stmts[i], true);
5473         }
5474       if (dump_file && (dump_flags & TDF_DETAILS))
5475         {
5476           fprintf (dump_file, " into ");
5477           print_gimple_stmt (dump_file, stmts[i], 0);
5478         }
5479     }
5480
5481   remove_visited_stmt_chain (last_rhs1);
5482 }
5483
5484 /* Transform STMT, which is really (A +B) + (C + D) into the left
5485    linear form, ((A+B)+C)+D.
5486    Recurse on D if necessary.  */
5487
5488 static void
5489 linearize_expr (gimple *stmt)
5490 {
5491   gimple_stmt_iterator gsi;
5492   gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
5493   gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5494   gimple *oldbinrhs = binrhs;
5495   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5496   gimple *newbinrhs = NULL;
5497   class loop *loop = loop_containing_stmt (stmt);
5498   tree lhs = gimple_assign_lhs (stmt);
5499
5500   gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
5501               && is_reassociable_op (binrhs, rhscode, loop));
5502
5503   gsi = gsi_for_stmt (stmt);
5504
5505   gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
5506   binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
5507                                 gimple_assign_rhs_code (binrhs),
5508                                 gimple_assign_lhs (binlhs),
5509                                 gimple_assign_rhs2 (binrhs));
5510   gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
5511   gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
5512   gimple_set_uid (binrhs, gimple_uid (stmt));
5513
5514   if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
5515     newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5516
5517   if (dump_file && (dump_flags & TDF_DETAILS))
5518     {
5519       fprintf (dump_file, "Linearized: ");
5520       print_gimple_stmt (dump_file, stmt, 0);
5521     }
5522
5523   reassociate_stats.linearized++;
5524   update_stmt (stmt);
5525
5526   gsi = gsi_for_stmt (oldbinrhs);
5527   reassoc_remove_stmt (&gsi);
5528   release_defs (oldbinrhs);
5529
5530   gimple_set_visited (stmt, true);
5531   gimple_set_visited (binlhs, true);
5532   gimple_set_visited (binrhs, true);
5533
5534   /* Tail recurse on the new rhs if it still needs reassociation.  */
5535   if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
5536     /* ??? This should probably be linearize_expr (newbinrhs) but I don't
5537            want to change the algorithm while converting to tuples.  */
5538     linearize_expr (stmt);
5539 }
5540
5541 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
5542    it.  Otherwise, return NULL.  */
5543
5544 static gimple *
5545 get_single_immediate_use (tree lhs)
5546 {
5547   use_operand_p immuse;
5548   gimple *immusestmt;
5549
5550   if (TREE_CODE (lhs) == SSA_NAME
5551       && single_imm_use (lhs, &immuse, &immusestmt)
5552       && is_gimple_assign (immusestmt))
5553     return immusestmt;
5554
5555   return NULL;
5556 }
5557
5558 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
5559    representing the negated value.  Insertions of any necessary
5560    instructions go before GSI.
5561    This function is recursive in that, if you hand it "a_5" as the
5562    value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
5563    transform b_3 + b_4 into a_5 = -b_3 + -b_4.  */
5564
5565 static tree
5566 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
5567 {
5568   gimple *negatedefstmt = NULL;
5569   tree resultofnegate;
5570   gimple_stmt_iterator gsi;
5571   unsigned int uid;
5572
5573   /* If we are trying to negate a name, defined by an add, negate the
5574      add operands instead.  */
5575   if (TREE_CODE (tonegate) == SSA_NAME)
5576     negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
5577   if (TREE_CODE (tonegate) == SSA_NAME
5578       && is_gimple_assign (negatedefstmt)
5579       && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
5580       && has_single_use (gimple_assign_lhs (negatedefstmt))
5581       && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
5582     {
5583       tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
5584       tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
5585       tree lhs = gimple_assign_lhs (negatedefstmt);
5586       gimple *g;
5587
5588       gsi = gsi_for_stmt (negatedefstmt);
5589       rhs1 = negate_value (rhs1, &gsi);
5590
5591       gsi = gsi_for_stmt (negatedefstmt);
5592       rhs2 = negate_value (rhs2, &gsi);
5593
5594       gsi = gsi_for_stmt (negatedefstmt);
5595       lhs = make_ssa_name (TREE_TYPE (lhs));
5596       gimple_set_visited (negatedefstmt, true);
5597       g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
5598       gimple_set_uid (g, gimple_uid (negatedefstmt));
5599       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5600       return lhs;
5601     }
5602
5603   tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
5604   resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
5605                                              NULL_TREE, true, GSI_SAME_STMT);
5606   gsi = *gsip;
5607   uid = gimple_uid (gsi_stmt (gsi));
5608   for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
5609     {
5610       gimple *stmt = gsi_stmt (gsi);
5611       if (gimple_uid (stmt) != 0)
5612         break;
5613       gimple_set_uid (stmt, uid);
5614     }
5615   return resultofnegate;
5616 }
5617
5618 /* Return true if we should break up the subtract in STMT into an add
5619    with negate.  This is true when we the subtract operands are really
5620    adds, or the subtract itself is used in an add expression.  In
5621    either case, breaking up the subtract into an add with negate
5622    exposes the adds to reassociation.  */
5623
5624 static bool
5625 should_break_up_subtract (gimple *stmt)
5626 {
5627   tree lhs = gimple_assign_lhs (stmt);
5628   tree binlhs = gimple_assign_rhs1 (stmt);
5629   tree binrhs = gimple_assign_rhs2 (stmt);
5630   gimple *immusestmt;
5631   class loop *loop = loop_containing_stmt (stmt);
5632
5633   if (TREE_CODE (binlhs) == SSA_NAME
5634       && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
5635     return true;
5636
5637   if (TREE_CODE (binrhs) == SSA_NAME
5638       && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
5639     return true;
5640
5641   if (TREE_CODE (lhs) == SSA_NAME
5642       && (immusestmt = get_single_immediate_use (lhs))
5643       && is_gimple_assign (immusestmt)
5644       && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
5645           || (gimple_assign_rhs_code (immusestmt) == MINUS_EXPR
5646               && gimple_assign_rhs1 (immusestmt) == lhs)
5647           || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
5648     return true;
5649   return false;
5650 }
5651
5652 /* Transform STMT from A - B into A + -B.  */
5653
5654 static void
5655 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
5656 {
5657   tree rhs1 = gimple_assign_rhs1 (stmt);
5658   tree rhs2 = gimple_assign_rhs2 (stmt);
5659
5660   if (dump_file && (dump_flags & TDF_DETAILS))
5661     {
5662       fprintf (dump_file, "Breaking up subtract ");
5663       print_gimple_stmt (dump_file, stmt, 0);
5664     }
5665
5666   rhs2 = negate_value (rhs2, gsip);
5667   gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
5668   update_stmt (stmt);
5669 }
5670
5671 /* Determine whether STMT is a builtin call that raises an SSA name
5672    to an integer power and has only one use.  If so, and this is early
5673    reassociation and unsafe math optimizations are permitted, place
5674    the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
5675    If any of these conditions does not hold, return FALSE.  */
5676
5677 static bool
5678 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent)
5679 {
5680   tree arg1;
5681   REAL_VALUE_TYPE c, cint;
5682
5683   switch (gimple_call_combined_fn (stmt))
5684     {
5685     CASE_CFN_POW:
5686       if (flag_errno_math)
5687         return false;
5688
5689       *base = gimple_call_arg (stmt, 0);
5690       arg1 = gimple_call_arg (stmt, 1);
5691
5692       if (TREE_CODE (arg1) != REAL_CST)
5693         return false;
5694
5695       c = TREE_REAL_CST (arg1);
5696
5697       if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
5698         return false;
5699
5700       *exponent = real_to_integer (&c);
5701       real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
5702       if (!real_identical (&c, &cint))
5703         return false;
5704
5705       break;
5706
5707     CASE_CFN_POWI:
5708       *base = gimple_call_arg (stmt, 0);
5709       arg1 = gimple_call_arg (stmt, 1);
5710
5711       if (!tree_fits_shwi_p (arg1))
5712         return false;
5713
5714       *exponent = tree_to_shwi (arg1);
5715       break;
5716
5717     default:
5718       return false;
5719     }
5720
5721   /* Expanding negative exponents is generally unproductive, so we don't
5722      complicate matters with those.  Exponents of zero and one should
5723      have been handled by expression folding.  */
5724   if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
5725     return false;
5726
5727   return true;
5728 }
5729
5730 /* Try to derive and add operand entry for OP to *OPS.  Return false if
5731    unsuccessful.  */
5732
5733 static bool
5734 try_special_add_to_ops (vec<operand_entry *> *ops,
5735                         enum tree_code code,
5736                         tree op, gimple* def_stmt)
5737 {
5738   tree base = NULL_TREE;
5739   HOST_WIDE_INT exponent = 0;
5740
5741   if (TREE_CODE (op) != SSA_NAME
5742       || ! has_single_use (op))
5743     return false;
5744
5745   if (code == MULT_EXPR
5746       && reassoc_insert_powi_p
5747       && flag_unsafe_math_optimizations
5748       && is_gimple_call (def_stmt)
5749       && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent))
5750     {
5751       add_repeat_to_ops_vec (ops, base, exponent);
5752       gimple_set_visited (def_stmt, true);
5753       return true;
5754     }
5755   else if (code == MULT_EXPR
5756            && is_gimple_assign (def_stmt)
5757            && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
5758            && !HONOR_SNANS (TREE_TYPE (op))
5759            && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op))
5760                || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op))))
5761     {
5762       tree rhs1 = gimple_assign_rhs1 (def_stmt);
5763       tree cst = build_minus_one_cst (TREE_TYPE (op));
5764       add_to_ops_vec (ops, rhs1);
5765       add_to_ops_vec (ops, cst);
5766       gimple_set_visited (def_stmt, true);
5767       return true;
5768     }
5769
5770   return false;
5771 }
5772
5773 /* Recursively linearize a binary expression that is the RHS of STMT.
5774    Place the operands of the expression tree in the vector named OPS.  */
5775
5776 static void
5777 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
5778                      bool is_associative, bool set_visited)
5779 {
5780   tree binlhs = gimple_assign_rhs1 (stmt);
5781   tree binrhs = gimple_assign_rhs2 (stmt);
5782   gimple *binlhsdef = NULL, *binrhsdef = NULL;
5783   bool binlhsisreassoc = false;
5784   bool binrhsisreassoc = false;
5785   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5786   class loop *loop = loop_containing_stmt (stmt);
5787
5788   if (set_visited)
5789     gimple_set_visited (stmt, true);
5790
5791   if (TREE_CODE (binlhs) == SSA_NAME)
5792     {
5793       binlhsdef = SSA_NAME_DEF_STMT (binlhs);
5794       binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
5795                          && !stmt_could_throw_p (cfun, binlhsdef));
5796     }
5797
5798   if (TREE_CODE (binrhs) == SSA_NAME)
5799     {
5800       binrhsdef = SSA_NAME_DEF_STMT (binrhs);
5801       binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
5802                          && !stmt_could_throw_p (cfun, binrhsdef));
5803     }
5804
5805   /* If the LHS is not reassociable, but the RHS is, we need to swap
5806      them.  If neither is reassociable, there is nothing we can do, so
5807      just put them in the ops vector.  If the LHS is reassociable,
5808      linearize it.  If both are reassociable, then linearize the RHS
5809      and the LHS.  */
5810
5811   if (!binlhsisreassoc)
5812     {
5813       /* If this is not a associative operation like division, give up.  */
5814       if (!is_associative)
5815         {
5816           add_to_ops_vec (ops, binrhs);
5817           return;
5818         }
5819
5820       if (!binrhsisreassoc)
5821         {
5822           bool swap = false;
5823           if (try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5824             /* If we add ops for the rhs we expect to be able to recurse
5825                to it via the lhs during expression rewrite so swap
5826                operands.  */
5827             swap = true;
5828           else
5829             add_to_ops_vec (ops, binrhs);
5830
5831           if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef))
5832             add_to_ops_vec (ops, binlhs);
5833
5834           if (!swap)
5835             return;
5836         }
5837
5838       if (dump_file && (dump_flags & TDF_DETAILS))
5839         {
5840           fprintf (dump_file, "swapping operands of ");
5841           print_gimple_stmt (dump_file, stmt, 0);
5842         }
5843
5844       swap_ssa_operands (stmt,
5845                          gimple_assign_rhs1_ptr (stmt),
5846                          gimple_assign_rhs2_ptr (stmt));
5847       update_stmt (stmt);
5848
5849       if (dump_file && (dump_flags & TDF_DETAILS))
5850         {
5851           fprintf (dump_file, " is now ");
5852           print_gimple_stmt (dump_file, stmt, 0);
5853         }
5854       if (!binrhsisreassoc)
5855         return;
5856
5857       /* We want to make it so the lhs is always the reassociative op,
5858          so swap.  */
5859       std::swap (binlhs, binrhs);
5860     }
5861   else if (binrhsisreassoc)
5862     {
5863       linearize_expr (stmt);
5864       binlhs = gimple_assign_rhs1 (stmt);
5865       binrhs = gimple_assign_rhs2 (stmt);
5866     }
5867
5868   gcc_assert (TREE_CODE (binrhs) != SSA_NAME
5869               || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
5870                                       rhscode, loop));
5871   linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
5872                        is_associative, set_visited);
5873
5874   if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5875     add_to_ops_vec (ops, binrhs);
5876 }
5877
5878 /* Repropagate the negates back into subtracts, since no other pass
5879    currently does it.  */
5880
5881 static void
5882 repropagate_negates (void)
5883 {
5884   unsigned int i = 0;
5885   tree negate;
5886
5887   FOR_EACH_VEC_ELT (plus_negates, i, negate)
5888     {
5889       gimple *user = get_single_immediate_use (negate);
5890
5891       if (!user || !is_gimple_assign (user))
5892         continue;
5893
5894       /* The negate operand can be either operand of a PLUS_EXPR
5895          (it can be the LHS if the RHS is a constant for example).
5896
5897          Force the negate operand to the RHS of the PLUS_EXPR, then
5898          transform the PLUS_EXPR into a MINUS_EXPR.  */
5899       if (gimple_assign_rhs_code (user) == PLUS_EXPR)
5900         {
5901           /* If the negated operand appears on the LHS of the
5902              PLUS_EXPR, exchange the operands of the PLUS_EXPR
5903              to force the negated operand to the RHS of the PLUS_EXPR.  */
5904           if (gimple_assign_rhs1 (user) == negate)
5905             {
5906               swap_ssa_operands (user,
5907                                  gimple_assign_rhs1_ptr (user),
5908                                  gimple_assign_rhs2_ptr (user));
5909             }
5910
5911           /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
5912              the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR.  */
5913           if (gimple_assign_rhs2 (user) == negate)
5914             {
5915               tree rhs1 = gimple_assign_rhs1 (user);
5916               tree rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
5917               gimple_stmt_iterator gsi = gsi_for_stmt (user);
5918               gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
5919               update_stmt (user);
5920             }
5921         }
5922       else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
5923         {
5924           if (gimple_assign_rhs1 (user) == negate)
5925             {
5926               /* We have
5927                    x = -a
5928                    y = x - b
5929                  which we transform into
5930                    x = a + b
5931                    y = -x .
5932                  This pushes down the negate which we possibly can merge
5933                  into some other operation, hence insert it into the
5934                  plus_negates vector.  */
5935               gimple *feed = SSA_NAME_DEF_STMT (negate);
5936               tree a = gimple_assign_rhs1 (feed);
5937               tree b = gimple_assign_rhs2 (user);
5938               gimple_stmt_iterator gsi = gsi_for_stmt (feed);
5939               gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
5940               tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
5941               gimple *g = gimple_build_assign (x, PLUS_EXPR, a, b);
5942               gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
5943               gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
5944               user = gsi_stmt (gsi2);
5945               update_stmt (user);
5946               reassoc_remove_stmt (&gsi);
5947               release_defs (feed);
5948               plus_negates.safe_push (gimple_assign_lhs (user));
5949             }
5950           else
5951             {
5952               /* Transform "x = -a; y = b - x" into "y = b + a", getting
5953                  rid of one operation.  */
5954               gimple *feed = SSA_NAME_DEF_STMT (negate);
5955               tree a = gimple_assign_rhs1 (feed);
5956               tree rhs1 = gimple_assign_rhs1 (user);
5957               gimple_stmt_iterator gsi = gsi_for_stmt (user);
5958               gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
5959               update_stmt (gsi_stmt (gsi));
5960             }
5961         }
5962     }
5963 }
5964
5965 /* Break up subtract operations in block BB.
5966
5967    We do this top down because we don't know whether the subtract is
5968    part of a possible chain of reassociation except at the top.
5969
5970    IE given
5971    d = f + g
5972    c = a + e
5973    b = c - d
5974    q = b - r
5975    k = t - q
5976
5977    we want to break up k = t - q, but we won't until we've transformed q
5978    = b - r, which won't be broken up until we transform b = c - d.
5979
5980    En passant, clear the GIMPLE visited flag on every statement
5981    and set UIDs within each basic block.  */
5982
5983 static void
5984 break_up_subtract_bb (basic_block bb)
5985 {
5986   gimple_stmt_iterator gsi;
5987   basic_block son;
5988   unsigned int uid = 1;
5989
5990   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5991     {
5992       gimple *stmt = gsi_stmt (gsi);
5993       gimple_set_visited (stmt, false);
5994       gimple_set_uid (stmt, uid++);
5995
5996       if (!is_gimple_assign (stmt)
5997           || !can_reassociate_type_p (TREE_TYPE (gimple_assign_lhs (stmt)))
5998           || !can_reassociate_op_p (gimple_assign_lhs (stmt)))
5999         continue;
6000
6001       /* Look for simple gimple subtract operations.  */
6002       if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
6003         {
6004           if (!can_reassociate_op_p (gimple_assign_rhs1 (stmt))
6005               || !can_reassociate_op_p (gimple_assign_rhs2 (stmt)))
6006             continue;
6007
6008           /* Check for a subtract used only in an addition.  If this
6009              is the case, transform it into add of a negate for better
6010              reassociation.  IE transform C = A-B into C = A + -B if C
6011              is only used in an addition.  */
6012           if (should_break_up_subtract (stmt))
6013             break_up_subtract (stmt, &gsi);
6014         }
6015       else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
6016                && can_reassociate_op_p (gimple_assign_rhs1 (stmt)))
6017         plus_negates.safe_push (gimple_assign_lhs (stmt));
6018     }
6019   for (son = first_dom_son (CDI_DOMINATORS, bb);
6020        son;
6021        son = next_dom_son (CDI_DOMINATORS, son))
6022     break_up_subtract_bb (son);
6023 }
6024
6025 /* Used for repeated factor analysis.  */
6026 struct repeat_factor
6027 {
6028   /* An SSA name that occurs in a multiply chain.  */
6029   tree factor;
6030
6031   /* Cached rank of the factor.  */
6032   unsigned rank;
6033
6034   /* Number of occurrences of the factor in the chain.  */
6035   HOST_WIDE_INT count;
6036
6037   /* An SSA name representing the product of this factor and
6038      all factors appearing later in the repeated factor vector.  */
6039   tree repr;
6040 };
6041
6042
6043 static vec<repeat_factor> repeat_factor_vec;
6044
6045 /* Used for sorting the repeat factor vector.  Sort primarily by
6046    ascending occurrence count, secondarily by descending rank.  */
6047
6048 static int
6049 compare_repeat_factors (const void *x1, const void *x2)
6050 {
6051   const repeat_factor *rf1 = (const repeat_factor *) x1;
6052   const repeat_factor *rf2 = (const repeat_factor *) x2;
6053
6054   if (rf1->count != rf2->count)
6055     return rf1->count - rf2->count;
6056
6057   return rf2->rank - rf1->rank;
6058 }
6059
6060 /* Look for repeated operands in OPS in the multiply tree rooted at
6061    STMT.  Replace them with an optimal sequence of multiplies and powi
6062    builtin calls, and remove the used operands from OPS.  Return an
6063    SSA name representing the value of the replacement sequence.  */
6064
6065 static tree
6066 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
6067 {
6068   unsigned i, j, vec_len;
6069   int ii;
6070   operand_entry *oe;
6071   repeat_factor *rf1, *rf2;
6072   repeat_factor rfnew;
6073   tree result = NULL_TREE;
6074   tree target_ssa, iter_result;
6075   tree type = TREE_TYPE (gimple_get_lhs (stmt));
6076   tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
6077   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
6078   gimple *mul_stmt, *pow_stmt;
6079
6080   /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
6081      target, unless type is integral.  */
6082   if (!powi_fndecl && !INTEGRAL_TYPE_P (type))
6083     return NULL_TREE;
6084
6085   /* Allocate the repeated factor vector.  */
6086   repeat_factor_vec.create (10);
6087
6088   /* Scan the OPS vector for all SSA names in the product and build
6089      up a vector of occurrence counts for each factor.  */
6090   FOR_EACH_VEC_ELT (*ops, i, oe)
6091     {
6092       if (TREE_CODE (oe->op) == SSA_NAME)
6093         {
6094           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6095             {
6096               if (rf1->factor == oe->op)
6097                 {
6098                   rf1->count += oe->count;
6099                   break;
6100                 }
6101             }
6102
6103           if (j >= repeat_factor_vec.length ())
6104             {
6105               rfnew.factor = oe->op;
6106               rfnew.rank = oe->rank;
6107               rfnew.count = oe->count;
6108               rfnew.repr = NULL_TREE;
6109               repeat_factor_vec.safe_push (rfnew);
6110             }
6111         }
6112     }
6113
6114   /* Sort the repeated factor vector by (a) increasing occurrence count,
6115      and (b) decreasing rank.  */
6116   repeat_factor_vec.qsort (compare_repeat_factors);
6117
6118   /* It is generally best to combine as many base factors as possible
6119      into a product before applying __builtin_powi to the result.
6120      However, the sort order chosen for the repeated factor vector
6121      allows us to cache partial results for the product of the base
6122      factors for subsequent use.  When we already have a cached partial
6123      result from a previous iteration, it is best to make use of it
6124      before looking for another __builtin_pow opportunity.
6125
6126      As an example, consider x * x * y * y * y * z * z * z * z.
6127      We want to first compose the product x * y * z, raise it to the
6128      second power, then multiply this by y * z, and finally multiply
6129      by z.  This can be done in 5 multiplies provided we cache y * z
6130      for use in both expressions:
6131
6132         t1 = y * z
6133         t2 = t1 * x
6134         t3 = t2 * t2
6135         t4 = t1 * t3
6136         result = t4 * z
6137
6138      If we instead ignored the cached y * z and first multiplied by
6139      the __builtin_pow opportunity z * z, we would get the inferior:
6140
6141         t1 = y * z
6142         t2 = t1 * x
6143         t3 = t2 * t2
6144         t4 = z * z
6145         t5 = t3 * t4
6146         result = t5 * y  */
6147
6148   vec_len = repeat_factor_vec.length ();
6149
6150   /* Repeatedly look for opportunities to create a builtin_powi call.  */
6151   while (true)
6152     {
6153       HOST_WIDE_INT power;
6154
6155       /* First look for the largest cached product of factors from
6156          preceding iterations.  If found, create a builtin_powi for
6157          it if the minimum occurrence count for its factors is at
6158          least 2, or just use this cached product as our next
6159          multiplicand if the minimum occurrence count is 1.  */
6160       FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6161         {
6162           if (rf1->repr && rf1->count > 0)
6163             break;
6164         }
6165
6166       if (j < vec_len)
6167         {
6168           power = rf1->count;
6169
6170           if (power == 1)
6171             {
6172               iter_result = rf1->repr;
6173
6174               if (dump_file && (dump_flags & TDF_DETAILS))
6175                 {
6176                   unsigned elt;
6177                   repeat_factor *rf;
6178                   fputs ("Multiplying by cached product ", dump_file);
6179                   for (elt = j; elt < vec_len; elt++)
6180                     {
6181                       rf = &repeat_factor_vec[elt];
6182                       print_generic_expr (dump_file, rf->factor);
6183                       if (elt < vec_len - 1)
6184                         fputs (" * ", dump_file);
6185                     }
6186                   fputs ("\n", dump_file);
6187                 }
6188             }
6189           else
6190             {
6191               if (INTEGRAL_TYPE_P (type))
6192                 {
6193                   gcc_assert (power > 1);
6194                   gimple_stmt_iterator gsip = gsi;
6195                   gsi_prev (&gsip);
6196                   iter_result = powi_as_mults (&gsi, gimple_location (stmt),
6197                                                rf1->repr, power);
6198                   gimple_stmt_iterator gsic = gsi;
6199                   while (gsi_stmt (gsic) != gsi_stmt (gsip))
6200                     {
6201                       gimple_set_uid (gsi_stmt (gsic), gimple_uid (stmt));
6202                       gimple_set_visited (gsi_stmt (gsic), true);
6203                       gsi_prev (&gsic);
6204                     }
6205                 }
6206               else
6207                 {
6208                   iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
6209                   pow_stmt
6210                     = gimple_build_call (powi_fndecl, 2, rf1->repr,
6211                                          build_int_cst (integer_type_node,
6212                                                         power));
6213                   gimple_call_set_lhs (pow_stmt, iter_result);
6214                   gimple_set_location (pow_stmt, gimple_location (stmt));
6215                   gimple_set_uid (pow_stmt, gimple_uid (stmt));
6216                   gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
6217                 }
6218
6219               if (dump_file && (dump_flags & TDF_DETAILS))
6220                 {
6221                   unsigned elt;
6222                   repeat_factor *rf;
6223                   fputs ("Building __builtin_pow call for cached product (",
6224                          dump_file);
6225                   for (elt = j; elt < vec_len; elt++)
6226                     {
6227                       rf = &repeat_factor_vec[elt];
6228                       print_generic_expr (dump_file, rf->factor);
6229                       if (elt < vec_len - 1)
6230                         fputs (" * ", dump_file);
6231                     }
6232                   fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
6233                            power);
6234                 }
6235             }
6236         }
6237       else
6238         {
6239           /* Otherwise, find the first factor in the repeated factor
6240              vector whose occurrence count is at least 2.  If no such
6241              factor exists, there are no builtin_powi opportunities
6242              remaining.  */
6243           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6244             {
6245               if (rf1->count >= 2)
6246                 break;
6247             }
6248
6249           if (j >= vec_len)
6250             break;
6251
6252           power = rf1->count;
6253
6254           if (dump_file && (dump_flags & TDF_DETAILS))
6255             {
6256               unsigned elt;
6257               repeat_factor *rf;
6258               fputs ("Building __builtin_pow call for (", dump_file);
6259               for (elt = j; elt < vec_len; elt++)
6260                 {
6261                   rf = &repeat_factor_vec[elt];
6262                   print_generic_expr (dump_file, rf->factor);
6263                   if (elt < vec_len - 1)
6264                     fputs (" * ", dump_file);
6265                 }
6266               fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
6267             }
6268
6269           reassociate_stats.pows_created++;
6270
6271           /* Visit each element of the vector in reverse order (so that
6272              high-occurrence elements are visited first, and within the
6273              same occurrence count, lower-ranked elements are visited
6274              first).  Form a linear product of all elements in this order
6275              whose occurrencce count is at least that of element J.
6276              Record the SSA name representing the product of each element
6277              with all subsequent elements in the vector.  */
6278           if (j == vec_len - 1)
6279             rf1->repr = rf1->factor;
6280           else
6281             {
6282               for (ii = vec_len - 2; ii >= (int)j; ii--)
6283                 {
6284                   tree op1, op2;
6285
6286                   rf1 = &repeat_factor_vec[ii];
6287                   rf2 = &repeat_factor_vec[ii + 1];
6288
6289                   /* Init the last factor's representative to be itself.  */
6290                   if (!rf2->repr)
6291                     rf2->repr = rf2->factor;
6292
6293                   op1 = rf1->factor;
6294                   op2 = rf2->repr;
6295
6296                   target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
6297                   mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
6298                                                   op1, op2);
6299                   gimple_set_location (mul_stmt, gimple_location (stmt));
6300                   gimple_set_uid (mul_stmt, gimple_uid (stmt));
6301                   gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
6302                   rf1->repr = target_ssa;
6303
6304                   /* Don't reprocess the multiply we just introduced.  */
6305                   gimple_set_visited (mul_stmt, true);
6306                 }
6307             }
6308
6309           /* Form a call to __builtin_powi for the maximum product
6310              just formed, raised to the power obtained earlier.  */
6311           rf1 = &repeat_factor_vec[j];
6312           if (INTEGRAL_TYPE_P (type))
6313             {
6314               gcc_assert (power > 1);
6315               gimple_stmt_iterator gsip = gsi;
6316               gsi_prev (&gsip);
6317               iter_result = powi_as_mults (&gsi, gimple_location (stmt),
6318                                            rf1->repr, power);
6319               gimple_stmt_iterator gsic = gsi;
6320               while (gsi_stmt (gsic) != gsi_stmt (gsip))
6321                 {
6322                   gimple_set_uid (gsi_stmt (gsic), gimple_uid (stmt));
6323                   gimple_set_visited (gsi_stmt (gsic), true);
6324                   gsi_prev (&gsic);
6325                 }
6326             }
6327           else
6328             {
6329               iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
6330               pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
6331                                             build_int_cst (integer_type_node,
6332                                                            power));
6333               gimple_call_set_lhs (pow_stmt, iter_result);
6334               gimple_set_location (pow_stmt, gimple_location (stmt));
6335               gimple_set_uid (pow_stmt, gimple_uid (stmt));
6336               gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
6337             }
6338         }
6339
6340       /* If we previously formed at least one other builtin_powi call,
6341          form the product of this one and those others.  */
6342       if (result)
6343         {
6344           tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
6345           mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
6346                                           result, iter_result);
6347           gimple_set_location (mul_stmt, gimple_location (stmt));
6348           gimple_set_uid (mul_stmt, gimple_uid (stmt));
6349           gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
6350           gimple_set_visited (mul_stmt, true);
6351           result = new_result;
6352         }
6353       else
6354         result = iter_result;
6355
6356       /* Decrement the occurrence count of each element in the product
6357          by the count found above, and remove this many copies of each
6358          factor from OPS.  */
6359       for (i = j; i < vec_len; i++)
6360         {
6361           unsigned k = power;
6362           unsigned n;
6363
6364           rf1 = &repeat_factor_vec[i];
6365           rf1->count -= power;
6366
6367           FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
6368             {
6369               if (oe->op == rf1->factor)
6370                 {
6371                   if (oe->count <= k)
6372                     {
6373                       ops->ordered_remove (n);
6374                       k -= oe->count;
6375
6376                       if (k == 0)
6377                         break;
6378                     }
6379                   else
6380                     {
6381                       oe->count -= k;
6382                       break;
6383                     }
6384                 }
6385             }
6386         }
6387     }
6388
6389   /* At this point all elements in the repeated factor vector have a
6390      remaining occurrence count of 0 or 1, and those with a count of 1
6391      don't have cached representatives.  Re-sort the ops vector and
6392      clean up.  */
6393   ops->qsort (sort_by_operand_rank);
6394   repeat_factor_vec.release ();
6395
6396   /* Return the final product computed herein.  Note that there may
6397      still be some elements with single occurrence count left in OPS;
6398      those will be handled by the normal reassociation logic.  */
6399   return result;
6400 }
6401
6402 /* Attempt to optimize
6403    CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
6404    CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0.  */
6405
6406 static void
6407 attempt_builtin_copysign (vec<operand_entry *> *ops)
6408 {
6409   operand_entry *oe;
6410   unsigned int i;
6411   unsigned int length = ops->length ();
6412   tree cst = ops->last ()->op;
6413
6414   if (length == 1 || TREE_CODE (cst) != REAL_CST)
6415     return;
6416
6417   FOR_EACH_VEC_ELT (*ops, i, oe)
6418     {
6419       if (TREE_CODE (oe->op) == SSA_NAME
6420           && has_single_use (oe->op))
6421         {
6422           gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
6423           if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
6424             {
6425               tree arg0, arg1;
6426               switch (gimple_call_combined_fn (old_call))
6427                 {
6428                 CASE_CFN_COPYSIGN:
6429                 CASE_CFN_COPYSIGN_FN:
6430                   arg0 = gimple_call_arg (old_call, 0);
6431                   arg1 = gimple_call_arg (old_call, 1);
6432                   /* The first argument of copysign must be a constant,
6433                      otherwise there's nothing to do.  */
6434                   if (TREE_CODE (arg0) == REAL_CST)
6435                     {
6436                       tree type = TREE_TYPE (arg0);
6437                       tree mul = const_binop (MULT_EXPR, type, cst, arg0);
6438                       /* If we couldn't fold to a single constant, skip it.
6439                          That happens e.g. for inexact multiplication when
6440                          -frounding-math.  */
6441                       if (mul == NULL_TREE)
6442                         break;
6443                       /* Instead of adjusting OLD_CALL, let's build a new
6444                          call to not leak the LHS and prevent keeping bogus
6445                          debug statements.  DCE will clean up the old call.  */
6446                       gcall *new_call;
6447                       if (gimple_call_internal_p (old_call))
6448                         new_call = gimple_build_call_internal
6449                           (IFN_COPYSIGN, 2, mul, arg1);
6450                       else
6451                         new_call = gimple_build_call
6452                           (gimple_call_fndecl (old_call), 2, mul, arg1);
6453                       tree lhs = make_ssa_name (type);
6454                       gimple_call_set_lhs (new_call, lhs);
6455                       gimple_set_location (new_call,
6456                                            gimple_location (old_call));
6457                       insert_stmt_after (new_call, old_call);
6458                       /* We've used the constant, get rid of it.  */
6459                       ops->pop ();
6460                       bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
6461                       /* Handle the CST1 < 0 case by negating the result.  */
6462                       if (cst1_neg)
6463                         {
6464                           tree negrhs = make_ssa_name (TREE_TYPE (lhs));
6465                           gimple *negate_stmt
6466                             = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
6467                           insert_stmt_after (negate_stmt, new_call);
6468                           oe->op = negrhs;
6469                         }
6470                       else
6471                         oe->op = lhs;
6472                       if (dump_file && (dump_flags & TDF_DETAILS))
6473                         {
6474                           fprintf (dump_file, "Optimizing copysign: ");
6475                           print_generic_expr (dump_file, cst);
6476                           fprintf (dump_file, " * COPYSIGN (");
6477                           print_generic_expr (dump_file, arg0);
6478                           fprintf (dump_file, ", ");
6479                           print_generic_expr (dump_file, arg1);
6480                           fprintf (dump_file, ") into %sCOPYSIGN (",
6481                                    cst1_neg ? "-" : "");
6482                           print_generic_expr (dump_file, mul);
6483                           fprintf (dump_file, ", ");
6484                           print_generic_expr (dump_file, arg1);
6485                           fprintf (dump_file, "\n");
6486                         }
6487                       return;
6488                     }
6489                   break;
6490                 default:
6491                   break;
6492                 }
6493             }
6494         }
6495     }
6496 }
6497
6498 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS.  */
6499
6500 static void
6501 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
6502 {
6503   tree rhs1;
6504
6505   if (dump_file && (dump_flags & TDF_DETAILS))
6506     {
6507       fprintf (dump_file, "Transforming ");
6508       print_gimple_stmt (dump_file, stmt, 0);
6509     }
6510
6511   rhs1 = gimple_assign_rhs1 (stmt);
6512   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
6513   update_stmt (stmt);
6514   remove_visited_stmt_chain (rhs1);
6515
6516   if (dump_file && (dump_flags & TDF_DETAILS))
6517     {
6518       fprintf (dump_file, " into ");
6519       print_gimple_stmt (dump_file, stmt, 0);
6520     }
6521 }
6522
6523 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2.  */
6524
6525 static void
6526 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
6527                             tree rhs1, tree rhs2)
6528 {
6529   if (dump_file && (dump_flags & TDF_DETAILS))
6530     {
6531       fprintf (dump_file, "Transforming ");
6532       print_gimple_stmt (dump_file, stmt, 0);
6533     }
6534
6535   gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
6536   update_stmt (gsi_stmt (*gsi));
6537   remove_visited_stmt_chain (rhs1);
6538
6539   if (dump_file && (dump_flags & TDF_DETAILS))
6540     {
6541       fprintf (dump_file, " into ");
6542       print_gimple_stmt (dump_file, stmt, 0);
6543     }
6544 }
6545
6546 /* Reassociate expressions in basic block BB and its post-dominator as
6547    children.
6548
6549    Bubble up return status from maybe_optimize_range_tests.  */
6550
6551 static bool
6552 reassociate_bb (basic_block bb)
6553 {
6554   gimple_stmt_iterator gsi;
6555   basic_block son;
6556   gimple *stmt = last_stmt (bb);
6557   bool cfg_cleanup_needed = false;
6558
6559   if (stmt && !gimple_visited_p (stmt))
6560     cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
6561
6562   bool do_prev = false;
6563   for (gsi = gsi_last_bb (bb);
6564        !gsi_end_p (gsi); do_prev ? gsi_prev (&gsi) : (void) 0)
6565     {
6566       do_prev = true;
6567       stmt = gsi_stmt (gsi);
6568
6569       if (is_gimple_assign (stmt)
6570           && !stmt_could_throw_p (cfun, stmt))
6571         {
6572           tree lhs, rhs1, rhs2;
6573           enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
6574
6575           /* If this was part of an already processed statement,
6576              we don't need to touch it again. */
6577           if (gimple_visited_p (stmt))
6578             {
6579               /* This statement might have become dead because of previous
6580                  reassociations.  */
6581               if (has_zero_uses (gimple_get_lhs (stmt)))
6582                 {
6583                   reassoc_remove_stmt (&gsi);
6584                   release_defs (stmt);
6585                   /* We might end up removing the last stmt above which
6586                      places the iterator to the end of the sequence.
6587                      Reset it to the last stmt in this case and make sure
6588                      we don't do gsi_prev in that case.  */
6589                   if (gsi_end_p (gsi))
6590                     {
6591                       gsi = gsi_last_bb (bb);
6592                       do_prev = false;
6593                     }
6594                 }
6595               continue;
6596             }
6597
6598           /* If this is not a gimple binary expression, there is
6599              nothing for us to do with it.  */
6600           if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
6601             continue;
6602
6603           lhs = gimple_assign_lhs (stmt);
6604           rhs1 = gimple_assign_rhs1 (stmt);
6605           rhs2 = gimple_assign_rhs2 (stmt);
6606
6607           /* For non-bit or min/max operations we can't associate
6608              all types.  Verify that here.  */
6609           if ((rhs_code != BIT_IOR_EXPR
6610                && rhs_code != BIT_AND_EXPR
6611                && rhs_code != BIT_XOR_EXPR
6612                && rhs_code != MIN_EXPR
6613                && rhs_code != MAX_EXPR
6614                && !can_reassociate_type_p (TREE_TYPE (lhs)))
6615               || !can_reassociate_op_p (rhs1)
6616               || !can_reassociate_op_p (rhs2))
6617             continue;
6618
6619           if (associative_tree_code (rhs_code))
6620             {
6621               auto_vec<operand_entry *> ops;
6622               tree powi_result = NULL_TREE;
6623               bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
6624
6625               /* There may be no immediate uses left by the time we
6626                  get here because we may have eliminated them all.  */
6627               if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
6628                 continue;
6629
6630               gimple_set_visited (stmt, true);
6631               linearize_expr_tree (&ops, stmt, true, true);
6632               ops.qsort (sort_by_operand_rank);
6633               int orig_len = ops.length ();
6634               optimize_ops_list (rhs_code, &ops);
6635               if (undistribute_ops_list (rhs_code, &ops,
6636                                          loop_containing_stmt (stmt)))
6637                 {
6638                   ops.qsort (sort_by_operand_rank);
6639                   optimize_ops_list (rhs_code, &ops);
6640                 }
6641               if (undistribute_bitref_for_vector (rhs_code, &ops,
6642                                                   loop_containing_stmt (stmt)))
6643                 {
6644                   ops.qsort (sort_by_operand_rank);
6645                   optimize_ops_list (rhs_code, &ops);
6646                 }
6647               if (rhs_code == PLUS_EXPR
6648                   && transform_add_to_multiply (&ops))
6649                 ops.qsort (sort_by_operand_rank);
6650
6651               if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
6652                 {
6653                   if (is_vector)
6654                     optimize_vec_cond_expr (rhs_code, &ops);
6655                   else
6656                     optimize_range_tests (rhs_code, &ops, NULL);
6657                 }
6658
6659               if (rhs_code == MULT_EXPR && !is_vector)
6660                 {
6661                   attempt_builtin_copysign (&ops);
6662
6663                   if (reassoc_insert_powi_p
6664                       && (flag_unsafe_math_optimizations
6665                           || (INTEGRAL_TYPE_P (TREE_TYPE (lhs)))))
6666                     powi_result = attempt_builtin_powi (stmt, &ops);
6667                 }
6668
6669               operand_entry *last;
6670               bool negate_result = false;
6671               if (ops.length () > 1
6672                   && rhs_code == MULT_EXPR)
6673                 {
6674                   last = ops.last ();
6675                   if ((integer_minus_onep (last->op)
6676                        || real_minus_onep (last->op))
6677                       && !HONOR_SNANS (TREE_TYPE (lhs))
6678                       && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs))
6679                           || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs))))
6680                     {
6681                       ops.pop ();
6682                       negate_result = true;
6683                     }
6684                 }
6685
6686               tree new_lhs = lhs;
6687               /* If the operand vector is now empty, all operands were
6688                  consumed by the __builtin_powi optimization.  */
6689               if (ops.length () == 0)
6690                 transform_stmt_to_copy (&gsi, stmt, powi_result);
6691               else if (ops.length () == 1)
6692                 {
6693                   tree last_op = ops.last ()->op;
6694
6695                   /* If the stmt that defines operand has to be inserted, insert it
6696                      before the use.  */
6697                   if (ops.last ()->stmt_to_insert)
6698                     insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert);
6699                   if (powi_result)
6700                     transform_stmt_to_multiply (&gsi, stmt, last_op,
6701                                                 powi_result);
6702                   else
6703                     transform_stmt_to_copy (&gsi, stmt, last_op);
6704                 }
6705               else
6706                 {
6707                   machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
6708                   int ops_num = ops.length ();
6709                   int width;
6710
6711                   /* For binary bit operations, if there are at least 3
6712                      operands and the last operand in OPS is a constant,
6713                      move it to the front.  This helps ensure that we generate
6714                      (X & Y) & C rather than (X & C) & Y.  The former will
6715                      often match a canonical bit test when we get to RTL.  */
6716                   if (ops.length () > 2
6717                       && (rhs_code == BIT_AND_EXPR
6718                           || rhs_code == BIT_IOR_EXPR
6719                           || rhs_code == BIT_XOR_EXPR)
6720                       && TREE_CODE (ops.last ()->op) == INTEGER_CST)
6721                     std::swap (*ops[0], *ops[ops_num - 1]);
6722
6723                   /* Only rewrite the expression tree to parallel in the
6724                      last reassoc pass to avoid useless work back-and-forth
6725                      with initial linearization.  */
6726                   if (!reassoc_insert_powi_p
6727                       && ops.length () > 3
6728                       && (width = get_reassociation_width (ops_num, rhs_code,
6729                                                            mode)) > 1)
6730                     {
6731                       if (dump_file && (dump_flags & TDF_DETAILS))
6732                         fprintf (dump_file,
6733                                  "Width = %d was chosen for reassociation\n",
6734                                  width);
6735                       rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
6736                                                   width, ops);
6737                     }
6738                   else
6739                     {
6740                       /* When there are three operands left, we want
6741                          to make sure the ones that get the double
6742                          binary op are chosen wisely.  */
6743                       int len = ops.length ();
6744                       if (len >= 3)
6745                         swap_ops_for_binary_stmt (ops, len - 3, stmt);
6746
6747                       new_lhs = rewrite_expr_tree (stmt, rhs_code, 0, ops,
6748                                                    powi_result != NULL
6749                                                    || negate_result,
6750                                                    len != orig_len);
6751                     }
6752
6753                   /* If we combined some repeated factors into a
6754                      __builtin_powi call, multiply that result by the
6755                      reassociated operands.  */
6756                   if (powi_result)
6757                     {
6758                       gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
6759                       tree type = TREE_TYPE (lhs);
6760                       tree target_ssa = make_temp_ssa_name (type, NULL,
6761                                                             "reassocpow");
6762                       gimple_set_lhs (lhs_stmt, target_ssa);
6763                       update_stmt (lhs_stmt);
6764                       if (lhs != new_lhs)
6765                         {
6766                           target_ssa = new_lhs;
6767                           new_lhs = lhs;
6768                         }
6769                       mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
6770                                                       powi_result, target_ssa);
6771                       gimple_set_location (mul_stmt, gimple_location (stmt));
6772                       gimple_set_uid (mul_stmt, gimple_uid (stmt));
6773                       gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
6774                     }
6775                 }
6776
6777               if (negate_result)
6778                 {
6779                   stmt = SSA_NAME_DEF_STMT (lhs);
6780                   tree tmp = make_ssa_name (TREE_TYPE (lhs));
6781                   gimple_set_lhs (stmt, tmp);
6782                   if (lhs != new_lhs)
6783                     tmp = new_lhs;
6784                   gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR,
6785                                                            tmp);
6786                   gimple_set_uid (neg_stmt, gimple_uid (stmt));
6787                   gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT);
6788                   update_stmt (stmt);
6789                 }
6790             }
6791         }
6792     }
6793   for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
6794        son;
6795        son = next_dom_son (CDI_POST_DOMINATORS, son))
6796     cfg_cleanup_needed |= reassociate_bb (son);
6797
6798   return cfg_cleanup_needed;
6799 }
6800
6801 /* Add jumps around shifts for range tests turned into bit tests.
6802    For each SSA_NAME VAR we have code like:
6803    VAR = ...; // final stmt of range comparison
6804    // bit test here...;
6805    OTHERVAR = ...; // final stmt of the bit test sequence
6806    RES = VAR | OTHERVAR;
6807    Turn the above into:
6808    VAR = ...;
6809    if (VAR != 0)
6810      goto <l3>;
6811    else
6812      goto <l2>;
6813    <l2>:
6814    // bit test here...;
6815    OTHERVAR = ...;
6816    <l3>:
6817    # RES = PHI<1(l1), OTHERVAR(l2)>;  */
6818
6819 static void
6820 branch_fixup (void)
6821 {
6822   tree var;
6823   unsigned int i;
6824
6825   FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
6826     {
6827       gimple *def_stmt = SSA_NAME_DEF_STMT (var);
6828       gimple *use_stmt;
6829       use_operand_p use;
6830       bool ok = single_imm_use (var, &use, &use_stmt);
6831       gcc_assert (ok
6832                   && is_gimple_assign (use_stmt)
6833                   && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
6834                   && gimple_bb (def_stmt) == gimple_bb (use_stmt));
6835
6836       basic_block cond_bb = gimple_bb (def_stmt);
6837       basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
6838       basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
6839
6840       gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6841       gimple *g = gimple_build_cond (NE_EXPR, var,
6842                                      build_zero_cst (TREE_TYPE (var)),
6843                                      NULL_TREE, NULL_TREE);
6844       location_t loc = gimple_location (use_stmt);
6845       gimple_set_location (g, loc);
6846       gsi_insert_after (&gsi, g, GSI_NEW_STMT);
6847
6848       edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
6849       etrue->probability = profile_probability::even ();
6850       edge efalse = find_edge (cond_bb, then_bb);
6851       efalse->flags = EDGE_FALSE_VALUE;
6852       efalse->probability -= etrue->probability;
6853       then_bb->count -= etrue->count ();
6854
6855       tree othervar = NULL_TREE;
6856       if (gimple_assign_rhs1 (use_stmt) == var)
6857         othervar = gimple_assign_rhs2 (use_stmt);
6858       else if (gimple_assign_rhs2 (use_stmt) == var)
6859         othervar = gimple_assign_rhs1 (use_stmt);
6860       else
6861         gcc_unreachable ();
6862       tree lhs = gimple_assign_lhs (use_stmt);
6863       gphi *phi = create_phi_node (lhs, merge_bb);
6864       add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
6865       add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
6866       gsi = gsi_for_stmt (use_stmt);
6867       gsi_remove (&gsi, true);
6868
6869       set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
6870       set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
6871     }
6872   reassoc_branch_fixups.release ();
6873 }
6874
6875 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
6876 void debug_ops_vector (vec<operand_entry *> ops);
6877
6878 /* Dump the operand entry vector OPS to FILE.  */
6879
6880 void
6881 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
6882 {
6883   operand_entry *oe;
6884   unsigned int i;
6885
6886   FOR_EACH_VEC_ELT (ops, i, oe)
6887     {
6888       fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
6889       print_generic_expr (file, oe->op);
6890       fprintf (file, "\n");
6891     }
6892 }
6893
6894 /* Dump the operand entry vector OPS to STDERR.  */
6895
6896 DEBUG_FUNCTION void
6897 debug_ops_vector (vec<operand_entry *> ops)
6898 {
6899   dump_ops_vector (stderr, ops);
6900 }
6901
6902 /* Bubble up return status from reassociate_bb.  */
6903
6904 static bool
6905 do_reassoc (void)
6906 {
6907   break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6908   return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
6909 }
6910
6911 /* Initialize the reassociation pass.  */
6912
6913 static void
6914 init_reassoc (void)
6915 {
6916   int i;
6917   int64_t rank = 2;
6918   int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
6919
6920   /* Find the loops, so that we can prevent moving calculations in
6921      them.  */
6922   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
6923
6924   memset (&reassociate_stats, 0, sizeof (reassociate_stats));
6925
6926   next_operand_entry_id = 0;
6927
6928   /* Reverse RPO (Reverse Post Order) will give us something where
6929      deeper loops come later.  */
6930   pre_and_rev_post_order_compute (NULL, bbs, false);
6931   bb_rank = XCNEWVEC (int64_t, last_basic_block_for_fn (cfun));
6932   operand_rank = new hash_map<tree, int64_t>;
6933
6934   /* Give each default definition a distinct rank.  This includes
6935      parameters and the static chain.  Walk backwards over all
6936      SSA names so that we get proper rank ordering according
6937      to tree_swap_operands_p.  */
6938   for (i = num_ssa_names - 1; i > 0; --i)
6939     {
6940       tree name = ssa_name (i);
6941       if (name && SSA_NAME_IS_DEFAULT_DEF (name))
6942         insert_operand_rank (name, ++rank);
6943     }
6944
6945   /* Set up rank for each BB  */
6946   for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
6947     bb_rank[bbs[i]] = ++rank << 16;
6948
6949   free (bbs);
6950   calculate_dominance_info (CDI_POST_DOMINATORS);
6951   plus_negates = vNULL;
6952 }
6953
6954 /* Cleanup after the reassociation pass, and print stats if
6955    requested.  */
6956
6957 static void
6958 fini_reassoc (void)
6959 {
6960   statistics_counter_event (cfun, "Linearized",
6961                             reassociate_stats.linearized);
6962   statistics_counter_event (cfun, "Constants eliminated",
6963                             reassociate_stats.constants_eliminated);
6964   statistics_counter_event (cfun, "Ops eliminated",
6965                             reassociate_stats.ops_eliminated);
6966   statistics_counter_event (cfun, "Statements rewritten",
6967                             reassociate_stats.rewritten);
6968   statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
6969                             reassociate_stats.pows_encountered);
6970   statistics_counter_event (cfun, "Built-in powi calls created",
6971                             reassociate_stats.pows_created);
6972
6973   delete operand_rank;
6974   bitmap_clear (biased_names);
6975   operand_entry_pool.release ();
6976   free (bb_rank);
6977   plus_negates.release ();
6978   free_dominance_info (CDI_POST_DOMINATORS);
6979   loop_optimizer_finalize ();
6980 }
6981
6982 /* Gate and execute functions for Reassociation.  If INSERT_POWI_P, enable
6983    insertion of __builtin_powi calls.
6984
6985    Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
6986    optimization of a gimple conditional.  Otherwise returns zero.  */
6987
6988 static unsigned int
6989 execute_reassoc (bool insert_powi_p, bool bias_loop_carried_phi_ranks_p)
6990 {
6991   reassoc_insert_powi_p = insert_powi_p;
6992   reassoc_bias_loop_carried_phi_ranks_p = bias_loop_carried_phi_ranks_p;
6993
6994   init_reassoc ();
6995
6996   bool cfg_cleanup_needed;
6997   cfg_cleanup_needed = do_reassoc ();
6998   repropagate_negates ();
6999   branch_fixup ();
7000
7001   fini_reassoc ();
7002   return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
7003 }
7004
7005 namespace {
7006
7007 const pass_data pass_data_reassoc =
7008 {
7009   GIMPLE_PASS, /* type */
7010   "reassoc", /* name */
7011   OPTGROUP_NONE, /* optinfo_flags */
7012   TV_TREE_REASSOC, /* tv_id */
7013   ( PROP_cfg | PROP_ssa ), /* properties_required */
7014   0, /* properties_provided */
7015   0, /* properties_destroyed */
7016   0, /* todo_flags_start */
7017   TODO_update_ssa_only_virtuals, /* todo_flags_finish */
7018 };
7019
7020 class pass_reassoc : public gimple_opt_pass
7021 {
7022 public:
7023   pass_reassoc (gcc::context *ctxt)
7024     : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
7025   {}
7026
7027   /* opt_pass methods: */
7028   opt_pass * clone () { return new pass_reassoc (m_ctxt); }
7029   void set_pass_param (unsigned int n, bool param)
7030     {
7031       gcc_assert (n == 0);
7032       insert_powi_p = param;
7033       bias_loop_carried_phi_ranks_p = !param;
7034     }
7035   virtual bool gate (function *) { return flag_tree_reassoc != 0; }
7036   virtual unsigned int execute (function *)
7037   {
7038     return execute_reassoc (insert_powi_p, bias_loop_carried_phi_ranks_p);
7039   }
7040
7041  private:
7042   /* Enable insertion of __builtin_powi calls during execute_reassoc.  See
7043      point 3a in the pass header comment.  */
7044   bool insert_powi_p;
7045   bool bias_loop_carried_phi_ranks_p;
7046 }; // class pass_reassoc
7047
7048 } // anon namespace
7049
7050 gimple_opt_pass *
7051 make_pass_reassoc (gcc::context *ctxt)
7052 {
7053   return new pass_reassoc (ctxt);
7054 }