gcc/tree-ssa-reassoc.cc

   1 /* Reassociation for trees.
   2    Copyright (C) 2005-2024 Free Software Foundation, Inc.
   3    Contributed by Daniel Berlin <dan@dberlin.org>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 3, or (at your option)
  10 any later version.
  11
  12 GCC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "backend.h"
  25 #include "target.h"
  26 #include "rtl.h"
  27 #include "tree.h"
  28 #include "gimple.h"
  29 #include "cfghooks.h"
  30 #include "alloc-pool.h"
  31 #include "tree-pass.h"
  32 #include "memmodel.h"
  33 #include "tm_p.h"
  34 #include "ssa.h"
  35 #include "optabs-tree.h"
  36 #include "gimple-pretty-print.h"
  37 #include "diagnostic-core.h"
  38 #include "fold-const.h"
  39 #include "stor-layout.h"
  40 #include "cfganal.h"
  41 #include "gimple-iterator.h"
  42 #include "gimple-fold.h"
  43 #include "tree-eh.h"
  44 #include "gimplify-me.h"
  45 #include "tree-cfg.h"
  46 #include "tree-ssa-loop.h"
  47 #include "flags.h"
  48 #include "tree-ssa.h"
  49 #include "langhooks.h"
  50 #include "cfgloop.h"
  51 #include "builtins.h"
  52 #include "gimplify.h"
  53 #include "case-cfn-macros.h"
  54 #include "tree-ssa-reassoc.h"
  55 #include "tree-ssa-math-opts.h"
  56 #include "gimple-range.h"
  57 #include "internal-fn.h"
  58
  59 /*  This is a simple global reassociation pass.  It is, in part, based
  60     on the LLVM pass of the same name (They do some things more/less
  61     than we do, in different orders, etc).
  62
  63     It consists of five steps:
  64
  65     1. Breaking up subtract operations into addition + negate, where
  66     it would promote the reassociation of adds.
  67
  68     2. Left linearization of the expression trees, so that (A+B)+(C+D)
  69     becomes (((A+B)+C)+D), which is easier for us to rewrite later.
  70     During linearization, we place the operands of the binary
  71     expressions into a vector of operand_entry_*
  72
  73     3. Optimization of the operand lists, eliminating things like a +
  74     -a, a & a, etc.
  75
  76     3a. Combine repeated factors with the same occurrence counts
  77     into a __builtin_powi call that will later be optimized into
  78     an optimal number of multiplies.
  79
  80     4. Rewrite the expression trees we linearized and optimized so
  81     they are in proper rank order.
  82
  83     5. Repropagate negates, as nothing else will clean it up ATM.
  84
  85     A bit of theory on #4, since nobody seems to write anything down
  86     about why it makes sense to do it the way they do it:
  87
  88     We could do this much nicer theoretically, but don't (for reasons
  89     explained after how to do it theoretically nice :P).
  90
  91     In order to promote the most redundancy elimination, you want
  92     binary expressions whose operands are the same rank (or
  93     preferably, the same value) exposed to the redundancy eliminator,
  94     for possible elimination.
  95
  96     So the way to do this if we really cared, is to build the new op
  97     tree from the leaves to the roots, merging as you go, and putting the
  98     new op on the end of the worklist, until you are left with one
  99     thing on the worklist.
 100
 101     IE if you have to rewrite the following set of operands (listed with
 102     rank in parentheses), with opcode PLUS_EXPR:
 103
 104     a (1),  b (1),  c (1),  d (2), e (2)
 105
 106
 107     We start with our merge worklist empty, and the ops list with all of
 108     those on it.
 109
 110     You want to first merge all leaves of the same rank, as much as
 111     possible.
 112
 113     So first build a binary op of
 114
 115     mergetmp = a + b, and put "mergetmp" on the merge worklist.
 116
 117     Because there is no three operand form of PLUS_EXPR, c is not going to
 118     be exposed to redundancy elimination as a rank 1 operand.
 119
 120     So you might as well throw it on the merge worklist (you could also
 121     consider it to now be a rank two operand, and merge it with d and e,
 122     but in this case, you then have evicted e from a binary op. So at
 123     least in this situation, you can't win.)
 124
 125     Then build a binary op of d + e
 126     mergetmp2 = d + e
 127
 128     and put mergetmp2 on the merge worklist.
 129
 130     so merge worklist = {mergetmp, c, mergetmp2}
 131
 132     Continue building binary ops of these operations until you have only
 133     one operation left on the worklist.
 134
 135     So we have
 136
 137     build binary op
 138     mergetmp3 = mergetmp + c
 139
 140     worklist = {mergetmp2, mergetmp3}
 141
 142     mergetmp4 = mergetmp2 + mergetmp3
 143
 144     worklist = {mergetmp4}
 145
 146     because we have one operation left, we can now just set the original
 147     statement equal to the result of that operation.
 148
 149     This will at least expose a + b  and d + e to redundancy elimination
 150     as binary operations.
 151
 152     For extra points, you can reuse the old statements to build the
 153     mergetmps, since you shouldn't run out.
 154
 155     So why don't we do this?
 156
 157     Because it's expensive, and rarely will help.  Most trees we are
 158     reassociating have 3 or less ops.  If they have 2 ops, they already
 159     will be written into a nice single binary op.  If you have 3 ops, a
 160     single simple check suffices to tell you whether the first two are of the
 161     same rank.  If so, you know to order it
 162
 163     mergetmp = op1 + op2
 164     newstmt = mergetmp + op3
 165
 166     instead of
 167     mergetmp = op2 + op3
 168     newstmt = mergetmp + op1
 169
 170     If all three are of the same rank, you can't expose them all in a
 171     single binary operator anyway, so the above is *still* the best you
 172     can do.
 173
 174     Thus, this is what we do.  When we have three ops left, we check to see
 175     what order to put them in, and call it a day.  As a nod to vector sum
 176     reduction, we check if any of the ops are really a phi node that is a
 177     destructive update for the associating op, and keep the destructive
 178     update together for vector sum reduction recognition.  */
 179
 180 /* Enable insertion of __builtin_powi calls during execute_reassoc.  See
 181    point 3a in the pass header comment.  */
 182 static bool reassoc_insert_powi_p;
 183
 184 /* Enable biasing ranks of loop accumulators.  We don't want this before
 185    vectorization, since it interferes with reduction chains.  */
 186 static bool reassoc_bias_loop_carried_phi_ranks_p;
 187
 188 /* Statistics */
 189 static struct
 190 {
 191   int linearized;
 192   int constants_eliminated;
 193   int ops_eliminated;
 194   int rewritten;
 195   int pows_encountered;
 196   int pows_created;
 197 } reassociate_stats;
 198
 199
 200 static object_allocator<operand_entry> operand_entry_pool
 201   ("operand entry pool");
 202
 203 /* This is used to assign a unique ID to each struct operand_entry
 204    so that qsort results are identical on different hosts.  */
 205 static unsigned int next_operand_entry_id;
 206
 207 /* Starting rank number for a given basic block, so that we can rank
 208    operations using unmovable instructions in that BB based on the bb
 209    depth.  */
 210 static int64_t *bb_rank;
 211
 212 /* Operand->rank hashtable.  */
 213 static hash_map<tree, int64_t> *operand_rank;
 214
 215 /* SSA_NAMEs that are forms of loop accumulators and whose ranks need to be
 216    biased.  */
 217 static auto_bitmap biased_names;
 218
 219 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
 220    all basic blocks the CFG should be adjusted - basic blocks
 221    split right after that SSA_NAME's definition statement and before
 222    the only use, which must be a bit ior.  */
 223 static vec<tree> reassoc_branch_fixups;
 224
 225 /* Forward decls.  */
 226 static int64_t get_rank (tree);
 227 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
 228
 229 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
 230    possibly added by gsi_remove.  */
 231
 232 static bool
 233 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
 234 {
 235   gimple *stmt = gsi_stmt (*gsi);
 236
 237   if (!MAY_HAVE_DEBUG_BIND_STMTS || gimple_code (stmt) == GIMPLE_PHI)
 238     return gsi_remove (gsi, true);
 239
 240   gimple_stmt_iterator prev = *gsi;
 241   gsi_prev (&prev);
 242   unsigned uid = gimple_uid (stmt);
 243   basic_block bb = gimple_bb (stmt);
 244   bool ret = gsi_remove (gsi, true);
 245   if (!gsi_end_p (prev))
 246     gsi_next (&prev);
 247   else
 248     prev = gsi_start_bb (bb);
 249   gimple *end_stmt = gsi_stmt (*gsi);
 250   while ((stmt = gsi_stmt (prev)) != end_stmt)
 251     {
 252       gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
 253       gimple_set_uid (stmt, uid);
 254       gsi_next (&prev);
 255     }
 256   return ret;
 257 }
 258
 259 /* Bias amount for loop-carried phis.  We want this to be larger than
 260    the depth of any reassociation tree we can see, but not larger than
 261    the rank difference between two blocks.  */
 262 #define PHI_LOOP_BIAS (1 << 15)
 263
 264 /* Return TRUE iff PHI_LOOP_BIAS should be propagated from one of the STMT's
 265    operands to the STMT's left-hand side.  The goal is to preserve bias in code
 266    like this:
 267
 268      x_1 = phi(x_0, x_2)
 269      a = x_1 | 1
 270      b = a ^ 2
 271      .MEM = b
 272      c = b + d
 273      x_2 = c + e
 274
 275    That is, we need to preserve bias along single-use chains originating from
 276    loop-carried phis.  Only GIMPLE_ASSIGNs to SSA_NAMEs are considered to be
 277    uses, because only they participate in rank propagation.  */
 278 static bool
 279 propagate_bias_p (gimple *stmt)
 280 {
 281   use_operand_p use;
 282   imm_use_iterator use_iter;
 283   gimple *single_use_stmt = NULL;
 284
 285   if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_reference)
 286     return false;
 287
 288   FOR_EACH_IMM_USE_FAST (use, use_iter, gimple_assign_lhs (stmt))
 289     {
 290       gimple *current_use_stmt = USE_STMT (use);
 291
 292       if (is_gimple_assign (current_use_stmt)
 293           && TREE_CODE (gimple_assign_lhs (current_use_stmt)) == SSA_NAME)
 294         {
 295           if (single_use_stmt != NULL && single_use_stmt != current_use_stmt)
 296             return false;
 297           single_use_stmt = current_use_stmt;
 298         }
 299     }
 300
 301   if (single_use_stmt == NULL)
 302     return false;
 303
 304   if (gimple_bb (stmt)->loop_father
 305       != gimple_bb (single_use_stmt)->loop_father)
 306     return false;
 307
 308   return true;
 309 }
 310
 311 /* Rank assigned to a phi statement.  If STMT is a loop-carried phi of
 312    an innermost loop, and the phi has only a single use which is inside
 313    the loop, then the rank is the block rank of the loop latch plus an
 314    extra bias for the loop-carried dependence.  This causes expressions
 315    calculated into an accumulator variable to be independent for each
 316    iteration of the loop.  If STMT is some other phi, the rank is the
 317    block rank of its containing block.  */
 318 static int64_t
 319 phi_rank (gimple *stmt)
 320 {
 321   basic_block bb = gimple_bb (stmt);
 322   class loop *father = bb->loop_father;
 323   tree res;
 324   unsigned i;
 325   use_operand_p use;
 326   gimple *use_stmt;
 327
 328   if (!reassoc_bias_loop_carried_phi_ranks_p)
 329     return bb_rank[bb->index];
 330
 331   /* We only care about real loops (those with a latch).  */
 332   if (!father->latch)
 333     return bb_rank[bb->index];
 334
 335   /* Interesting phis must be in headers of innermost loops.  */
 336   if (bb != father->header
 337       || father->inner)
 338     return bb_rank[bb->index];
 339
 340   /* Ignore virtual SSA_NAMEs.  */
 341   res = gimple_phi_result (stmt);
 342   if (virtual_operand_p (res))
 343     return bb_rank[bb->index];
 344
 345   /* The phi definition must have a single use, and that use must be
 346      within the loop.  Otherwise this isn't an accumulator pattern.  */
 347   if (!single_imm_use (res, &use, &use_stmt)
 348       || gimple_bb (use_stmt)->loop_father != father)
 349     return bb_rank[bb->index];
 350
 351   /* Look for phi arguments from within the loop.  If found, bias this phi.  */
 352   for (i = 0; i < gimple_phi_num_args (stmt); i++)
 353     {
 354       tree arg = gimple_phi_arg_def (stmt, i);
 355       if (TREE_CODE (arg) == SSA_NAME
 356           && !SSA_NAME_IS_DEFAULT_DEF (arg))
 357         {
 358           gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
 359           if (gimple_bb (def_stmt)->loop_father == father)
 360             return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
 361         }
 362     }
 363
 364   /* Must be an uninteresting phi.  */
 365   return bb_rank[bb->index];
 366 }
 367
 368 /* Return the maximum of RANK and the rank that should be propagated
 369    from expression OP.  For most operands, this is just the rank of OP.
 370    For loop-carried phis, the value is zero to avoid undoing the bias
 371    in favor of the phi.  */
 372 static int64_t
 373 propagate_rank (int64_t rank, tree op, bool *maybe_biased_p)
 374 {
 375   int64_t op_rank;
 376
 377   op_rank = get_rank (op);
 378
 379   /* Check whether op is biased after the get_rank () call, since it might have
 380      updated biased_names.  */
 381   if (TREE_CODE (op) == SSA_NAME
 382       && bitmap_bit_p (biased_names, SSA_NAME_VERSION (op)))
 383     {
 384       if (maybe_biased_p == NULL)
 385         return rank;
 386       *maybe_biased_p = true;
 387     }
 388
 389   return MAX (rank, op_rank);
 390 }
 391
 392 /* Look up the operand rank structure for expression E.  */
 393
 394 static inline int64_t
 395 find_operand_rank (tree e)
 396 {
 397   int64_t *slot = operand_rank->get (e);
 398   return slot ? *slot : -1;
 399 }
 400
 401 /* Insert {E,RANK} into the operand rank hashtable.  */
 402
 403 static inline void
 404 insert_operand_rank (tree e, int64_t rank)
 405 {
 406   gcc_assert (rank > 0);
 407   gcc_assert (!operand_rank->put (e, rank));
 408 }
 409
 410 /* Given an expression E, return the rank of the expression.  */
 411
 412 static int64_t
 413 get_rank (tree e)
 414 {
 415   /* SSA_NAME's have the rank of the expression they are the result
 416      of.
 417      For globals and uninitialized values, the rank is 0.
 418      For function arguments, use the pre-setup rank.
 419      For PHI nodes, stores, asm statements, etc, we use the rank of
 420      the BB.
 421      For simple operations, the rank is the maximum rank of any of
 422      its operands, or the bb_rank, whichever is less.
 423      I make no claims that this is optimal, however, it gives good
 424      results.  */
 425
 426   /* We make an exception to the normal ranking system to break
 427      dependences of accumulator variables in loops.  Suppose we
 428      have a simple one-block loop containing:
 429
 430        x_1 = phi(x_0, x_2)
 431        b = a + x_1
 432        c = b + d
 433        x_2 = c + e
 434
 435      As shown, each iteration of the calculation into x is fully
 436      dependent upon the iteration before it.  We would prefer to
 437      see this in the form:
 438
 439        x_1 = phi(x_0, x_2)
 440        b = a + d
 441        c = b + e
 442        x_2 = c + x_1
 443
 444      If the loop is unrolled, the calculations of b and c from
 445      different iterations can be interleaved.
 446
 447      To obtain this result during reassociation, we bias the rank
 448      of the phi definition x_1 upward, when it is recognized as an
 449      accumulator pattern.  The artificial rank causes it to be
 450      added last, providing the desired independence.  */
 451
 452   if (TREE_CODE (e) == SSA_NAME)
 453     {
 454       ssa_op_iter iter;
 455       gimple *stmt;
 456       int64_t rank;
 457       tree op;
 458
 459       /* If we already have a rank for this expression, use that.  */
 460       rank = find_operand_rank (e);
 461       if (rank != -1)
 462         return rank;
 463
 464       stmt = SSA_NAME_DEF_STMT (e);
 465       if (gimple_code (stmt) == GIMPLE_PHI)
 466         {
 467           rank = phi_rank (stmt);
 468           if (rank != bb_rank[gimple_bb (stmt)->index])
 469             bitmap_set_bit (biased_names, SSA_NAME_VERSION (e));
 470         }
 471
 472       else if (!is_gimple_assign (stmt))
 473         rank = bb_rank[gimple_bb (stmt)->index];
 474
 475       else
 476         {
 477           bool biased_p = false;
 478           bool *maybe_biased_p = propagate_bias_p (stmt) ? &biased_p : NULL;
 479
 480           /* Otherwise, find the maximum rank for the operands.  As an
 481              exception, remove the bias from loop-carried phis when propagating
 482              the rank so that dependent operations are not also biased.  */
 483           /* Simply walk over all SSA uses - this takes advatage of the
 484              fact that non-SSA operands are is_gimple_min_invariant and
 485              thus have rank 0.  */
 486           rank = 0;
 487           FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
 488             rank = propagate_rank (rank, op, maybe_biased_p);
 489
 490           rank += 1;
 491           if (biased_p)
 492             bitmap_set_bit (biased_names, SSA_NAME_VERSION (e));
 493         }
 494
 495       if (dump_file && (dump_flags & TDF_DETAILS))
 496         {
 497           fprintf (dump_file, "Rank for ");
 498           print_generic_expr (dump_file, e);
 499           fprintf (dump_file, " is %" PRId64 "\n", rank);
 500         }
 501
 502       /* Note the rank in the hashtable so we don't recompute it.  */
 503       insert_operand_rank (e, rank);
 504       return rank;
 505     }
 506
 507   /* Constants, globals, etc., are rank 0 */
 508   return 0;
 509 }
 510
 511
 512 /* We want integer ones to end up last no matter what, since they are
 513    the ones we can do the most with.  */
 514 #define INTEGER_CONST_TYPE 1 << 4
 515 #define FLOAT_ONE_CONST_TYPE 1 << 3
 516 #define FLOAT_CONST_TYPE 1 << 2
 517 #define OTHER_CONST_TYPE 1 << 1
 518
 519 /* Classify an invariant tree into integer, float, or other, so that
 520    we can sort them to be near other constants of the same type.  */
 521 static inline int
 522 constant_type (tree t)
 523 {
 524   if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
 525     return INTEGER_CONST_TYPE;
 526   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
 527     {
 528       /* Sort -1.0 and 1.0 constants last, while in some cases
 529          const_binop can't optimize some inexact operations, multiplication
 530          by -1.0 or 1.0 can be always merged with others.  */
 531       if (real_onep (t) || real_minus_onep (t))
 532         return FLOAT_ONE_CONST_TYPE;
 533       return FLOAT_CONST_TYPE;
 534     }
 535   else
 536     return OTHER_CONST_TYPE;
 537 }
 538
 539 /* qsort comparison function to sort operand entries PA and PB by rank
 540    so that the sorted array is ordered by rank in decreasing order.  */
 541 static int
 542 sort_by_operand_rank (const void *pa, const void *pb)
 543 {
 544   const operand_entry *oea = *(const operand_entry *const *)pa;
 545   const operand_entry *oeb = *(const operand_entry *const *)pb;
 546
 547   if (oeb->rank != oea->rank)
 548     return oeb->rank > oea->rank ? 1 : -1;
 549
 550   /* It's nicer for optimize_expression if constants that are likely
 551      to fold when added/multiplied/whatever are put next to each
 552      other.  Since all constants have rank 0, order them by type.  */
 553   if (oea->rank == 0)
 554     {
 555       if (constant_type (oeb->op) != constant_type (oea->op))
 556         return constant_type (oea->op) - constant_type (oeb->op);
 557       else
 558         /* To make sorting result stable, we use unique IDs to determine
 559            order.  */
 560         return oeb->id > oea->id ? 1 : -1;
 561     }
 562
 563   if (TREE_CODE (oea->op) != SSA_NAME)
 564     {
 565       if (TREE_CODE (oeb->op) != SSA_NAME)
 566         return oeb->id > oea->id ? 1 : -1;
 567       else
 568         return 1;
 569     }
 570   else if (TREE_CODE (oeb->op) != SSA_NAME)
 571     return -1;
 572
 573   /* Lastly, make sure the versions that are the same go next to each
 574      other.  */
 575   if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
 576     {
 577       /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
 578          versions of removed SSA_NAMEs, so if possible, prefer to sort
 579          based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
 580          See PR60418.  */
 581       gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
 582       gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
 583       basic_block bba = gimple_bb (stmta);
 584       basic_block bbb = gimple_bb (stmtb);
 585       if (bbb != bba)
 586         {
 587           /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert
 588              but the other might not.  */
 589           if (!bba)
 590             return 1;
 591           if (!bbb)
 592             return -1;
 593           /* If neither is, compare bb_rank.  */
 594           if (bb_rank[bbb->index] != bb_rank[bba->index])
 595             return (bb_rank[bbb->index] >> 16) - (bb_rank[bba->index] >> 16);
 596         }
 597
 598       bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
 599       bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
 600       if (da != db)
 601         return da ? 1 : -1;
 602
 603       return SSA_NAME_VERSION (oeb->op) > SSA_NAME_VERSION (oea->op) ? 1 : -1;
 604     }
 605
 606   return oeb->id > oea->id ? 1 : -1;
 607 }
 608
 609 /* Add an operand entry to *OPS for the tree operand OP.  */
 610
 611 static void
 612 add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL)
 613 {
 614   operand_entry *oe = operand_entry_pool.allocate ();
 615
 616   oe->op = op;
 617   oe->rank = get_rank (op);
 618   oe->id = next_operand_entry_id++;
 619   oe->count = 1;
 620   oe->stmt_to_insert = stmt_to_insert;
 621   ops->safe_push (oe);
 622 }
 623
 624 /* Add an operand entry to *OPS for the tree operand OP with repeat
 625    count REPEAT.  */
 626
 627 static void
 628 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
 629                        HOST_WIDE_INT repeat)
 630 {
 631   operand_entry *oe = operand_entry_pool.allocate ();
 632
 633   oe->op = op;
 634   oe->rank = get_rank (op);
 635   oe->id = next_operand_entry_id++;
 636   oe->count = repeat;
 637   oe->stmt_to_insert = NULL;
 638   ops->safe_push (oe);
 639
 640   reassociate_stats.pows_encountered++;
 641 }
 642
 643 /* Returns true if we can associate the SSA def OP.  */
 644
 645 static bool
 646 can_reassociate_op_p (tree op)
 647 {
 648   if (TREE_CODE (op) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op))
 649     return false;
 650   /* Uninitialized variables can't participate in reassociation. */
 651   if (TREE_CODE (op) == SSA_NAME && ssa_name_maybe_undef_p (op))
 652     return false;
 653   /* Make sure asm goto outputs do not participate in reassociation since
 654      we have no way to find an insertion place after asm goto.  */
 655   if (TREE_CODE (op) == SSA_NAME
 656       && gimple_code (SSA_NAME_DEF_STMT (op)) == GIMPLE_ASM
 657       && gimple_asm_nlabels (as_a <gasm *> (SSA_NAME_DEF_STMT (op))) != 0)
 658     return false;
 659   return true;
 660 }
 661
 662 /* Returns true if we can reassociate operations of TYPE.
 663    That is for integral or non-saturating fixed-point types, and for
 664    floating point type when associative-math is enabled.  */
 665
 666 static bool
 667 can_reassociate_type_p (tree type)
 668 {
 669   if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
 670       || NON_SAT_FIXED_POINT_TYPE_P (type)
 671       || (flag_associative_math && FLOAT_TYPE_P (type)))
 672     return true;
 673   return false;
 674 }
 675
 676 /* Return true if STMT is reassociable operation containing a binary
 677    operation with tree code CODE, and is inside LOOP.  */
 678
 679 static bool
 680 is_reassociable_op (gimple *stmt, enum tree_code code, class loop *loop)
 681 {
 682   basic_block bb = gimple_bb (stmt);
 683
 684   if (gimple_bb (stmt) == NULL)
 685     return false;
 686
 687   if (!flow_bb_inside_loop_p (loop, bb))
 688     return false;
 689
 690   if (is_gimple_assign (stmt)
 691       && gimple_assign_rhs_code (stmt) == code
 692       && has_single_use (gimple_assign_lhs (stmt)))
 693     {
 694       tree rhs1 = gimple_assign_rhs1 (stmt);
 695       tree rhs2 = gimple_assign_rhs2 (stmt);
 696       if (!can_reassociate_op_p (rhs1)
 697           || (rhs2 && !can_reassociate_op_p (rhs2)))
 698         return false;
 699       return true;
 700     }
 701
 702   return false;
 703 }
 704
 705
 706 /* Return true if STMT is a nop-conversion.  */
 707
 708 static bool
 709 gimple_nop_conversion_p (gimple *stmt)
 710 {
 711   if (gassign *ass = dyn_cast <gassign *> (stmt))
 712     {
 713       if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
 714           && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
 715                                     TREE_TYPE (gimple_assign_rhs1 (ass))))
 716         return true;
 717     }
 718   return false;
 719 }
 720
 721 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
 722    operand of the negate operation.  Otherwise, return NULL.  */
 723
 724 static tree
 725 get_unary_op (tree name, enum tree_code opcode)
 726 {
 727   gimple *stmt = SSA_NAME_DEF_STMT (name);
 728
 729   /* Look through nop conversions (sign changes).  */
 730   if (gimple_nop_conversion_p (stmt)
 731       && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
 732     stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
 733
 734   if (!is_gimple_assign (stmt))
 735     return NULL_TREE;
 736
 737   if (gimple_assign_rhs_code (stmt) == opcode)
 738     return gimple_assign_rhs1 (stmt);
 739   return NULL_TREE;
 740 }
 741
 742 /* Return true if OP1 and OP2 have the same value if casted to either type.  */
 743
 744 static bool
 745 ops_equal_values_p (tree op1, tree op2)
 746 {
 747   if (op1 == op2)
 748     return true;
 749
 750   tree orig_op1 = op1;
 751   if (TREE_CODE (op1) == SSA_NAME)
 752     {
 753       gimple *stmt = SSA_NAME_DEF_STMT (op1);
 754       if (gimple_nop_conversion_p (stmt))
 755         {
 756           op1 = gimple_assign_rhs1 (stmt);
 757           if (op1 == op2)
 758             return true;
 759         }
 760     }
 761
 762   if (TREE_CODE (op2) == SSA_NAME)
 763     {
 764       gimple *stmt = SSA_NAME_DEF_STMT (op2);
 765       if (gimple_nop_conversion_p (stmt))
 766         {
 767           op2 = gimple_assign_rhs1 (stmt);
 768           if (op1 == op2
 769               || orig_op1 == op2)
 770             return true;
 771         }
 772     }
 773
 774   return false;
 775 }
 776
 777
 778 /* If CURR and LAST are a pair of ops that OPCODE allows us to
 779    eliminate through equivalences, do so, remove them from OPS, and
 780    return true.  Otherwise, return false.  */
 781
 782 static bool
 783 eliminate_duplicate_pair (enum tree_code opcode,
 784                           vec<operand_entry *> *ops,
 785                           bool *all_done,
 786                           unsigned int i,
 787                           operand_entry *curr,
 788                           operand_entry *last)
 789 {
 790
 791   /* If we have two of the same op, and the opcode is & |, min, or max,
 792      we can eliminate one of them.
 793      If we have two of the same op, and the opcode is ^, we can
 794      eliminate both of them.  */
 795
 796   if (last && last->op == curr->op)
 797     {
 798       switch (opcode)
 799         {
 800         case MAX_EXPR:
 801         case MIN_EXPR:
 802         case BIT_IOR_EXPR:
 803         case BIT_AND_EXPR:
 804           if (dump_file && (dump_flags & TDF_DETAILS))
 805             {
 806               fprintf (dump_file, "Equivalence: ");
 807               print_generic_expr (dump_file, curr->op);
 808               fprintf (dump_file, " [&|minmax] ");
 809               print_generic_expr (dump_file, last->op);
 810               fprintf (dump_file, " -> ");
 811               print_generic_stmt (dump_file, last->op);
 812             }
 813
 814           ops->ordered_remove (i);
 815           reassociate_stats.ops_eliminated ++;
 816
 817           return true;
 818
 819         case BIT_XOR_EXPR:
 820           if (dump_file && (dump_flags & TDF_DETAILS))
 821             {
 822               fprintf (dump_file, "Equivalence: ");
 823               print_generic_expr (dump_file, curr->op);
 824               fprintf (dump_file, " ^ ");
 825               print_generic_expr (dump_file, last->op);
 826               fprintf (dump_file, " -> nothing\n");
 827             }
 828
 829           reassociate_stats.ops_eliminated += 2;
 830
 831           if (ops->length () == 2)
 832             {
 833               ops->truncate (0);
 834               add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
 835               *all_done = true;
 836             }
 837           else
 838             {
 839               ops->ordered_remove (i-1);
 840               ops->ordered_remove (i-1);
 841             }
 842
 843           return true;
 844
 845         default:
 846           break;
 847         }
 848     }
 849   return false;
 850 }
 851
 852 static vec<tree> plus_negates;
 853
 854 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
 855    expression, look in OPS for a corresponding positive operation to cancel
 856    it out.  If we find one, remove the other from OPS, replace
 857    OPS[CURRINDEX] with 0 or -1, respectively, and return true.  Otherwise,
 858    return false. */
 859
 860 static bool
 861 eliminate_plus_minus_pair (enum tree_code opcode,
 862                            vec<operand_entry *> *ops,
 863                            unsigned int currindex,
 864                            operand_entry *curr)
 865 {
 866   tree negateop;
 867   tree notop;
 868   unsigned int i;
 869   operand_entry *oe;
 870
 871   if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
 872     return false;
 873
 874   negateop = get_unary_op (curr->op, NEGATE_EXPR);
 875   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 876   if (negateop == NULL_TREE && notop == NULL_TREE)
 877     return false;
 878
 879   /* Any non-negated version will have a rank that is one less than
 880      the current rank.  So once we hit those ranks, if we don't find
 881      one, we can stop.  */
 882
 883   for (i = currindex + 1;
 884        ops->iterate (i, &oe)
 885        && oe->rank >= curr->rank - 1 ;
 886        i++)
 887     {
 888       if (negateop
 889           && ops_equal_values_p (oe->op, negateop))
 890         {
 891           if (dump_file && (dump_flags & TDF_DETAILS))
 892             {
 893               fprintf (dump_file, "Equivalence: ");
 894               print_generic_expr (dump_file, negateop);
 895               fprintf (dump_file, " + -");
 896               print_generic_expr (dump_file, oe->op);
 897               fprintf (dump_file, " -> 0\n");
 898             }
 899
 900           ops->ordered_remove (i);
 901           add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
 902           ops->ordered_remove (currindex);
 903           reassociate_stats.ops_eliminated ++;
 904
 905           return true;
 906         }
 907       else if (notop
 908                && ops_equal_values_p (oe->op, notop))
 909         {
 910           tree op_type = TREE_TYPE (oe->op);
 911
 912           if (dump_file && (dump_flags & TDF_DETAILS))
 913             {
 914               fprintf (dump_file, "Equivalence: ");
 915               print_generic_expr (dump_file, notop);
 916               fprintf (dump_file, " + ~");
 917               print_generic_expr (dump_file, oe->op);
 918               fprintf (dump_file, " -> -1\n");
 919             }
 920
 921           ops->ordered_remove (i);
 922           add_to_ops_vec (ops, build_all_ones_cst (op_type));
 923           ops->ordered_remove (currindex);
 924           reassociate_stats.ops_eliminated ++;
 925
 926           return true;
 927         }
 928     }
 929
 930   /* If CURR->OP is a negate expr without nop conversion in a plus expr:
 931      save it for later inspection in repropagate_negates().  */
 932   if (negateop != NULL_TREE
 933       && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
 934     plus_negates.safe_push (curr->op);
 935
 936   return false;
 937 }
 938
 939 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
 940    bitwise not expression, look in OPS for a corresponding operand to
 941    cancel it out.  If we find one, remove the other from OPS, replace
 942    OPS[CURRINDEX] with 0, and return true.  Otherwise, return
 943    false. */
 944
 945 static bool
 946 eliminate_not_pairs (enum tree_code opcode,
 947                      vec<operand_entry *> *ops,
 948                      unsigned int currindex,
 949                      operand_entry *curr)
 950 {
 951   tree notop;
 952   unsigned int i;
 953   operand_entry *oe;
 954
 955   if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
 956       || TREE_CODE (curr->op) != SSA_NAME)
 957     return false;
 958
 959   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 960   if (notop == NULL_TREE)
 961     return false;
 962
 963   /* Any non-not version will have a rank that is one less than
 964      the current rank.  So once we hit those ranks, if we don't find
 965      one, we can stop.  */
 966
 967   for (i = currindex + 1;
 968        ops->iterate (i, &oe)
 969        && oe->rank >= curr->rank - 1;
 970        i++)
 971     {
 972       if (oe->op == notop)
 973         {
 974           if (dump_file && (dump_flags & TDF_DETAILS))
 975             {
 976               fprintf (dump_file, "Equivalence: ");
 977               print_generic_expr (dump_file, notop);
 978               if (opcode == BIT_AND_EXPR)
 979                 fprintf (dump_file, " & ~");
 980               else if (opcode == BIT_IOR_EXPR)
 981                 fprintf (dump_file, " | ~");
 982               print_generic_expr (dump_file, oe->op);
 983               if (opcode == BIT_AND_EXPR)
 984                 fprintf (dump_file, " -> 0\n");
 985               else if (opcode == BIT_IOR_EXPR)
 986                 fprintf (dump_file, " -> -1\n");
 987             }
 988
 989           if (opcode == BIT_AND_EXPR)
 990             oe->op = build_zero_cst (TREE_TYPE (oe->op));
 991           else if (opcode == BIT_IOR_EXPR)
 992             oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
 993
 994           reassociate_stats.ops_eliminated += ops->length () - 1;
 995           ops->truncate (0);
 996           ops->quick_push (oe);
 997           return true;
 998         }
 999     }
1000
1001   return false;
1002 }
1003
1004 /* Use constant value that may be present in OPS to try to eliminate
1005    operands.  Note that this function is only really used when we've
1006    eliminated ops for other reasons, or merged constants.  Across
1007    single statements, fold already does all of this, plus more.  There
1008    is little point in duplicating logic, so I've only included the
1009    identities that I could ever construct testcases to trigger.  */
1010
1011 static void
1012 eliminate_using_constants (enum tree_code opcode,
1013                            vec<operand_entry *> *ops)
1014 {
1015   operand_entry *oelast = ops->last ();
1016   tree type = TREE_TYPE (oelast->op);
1017
1018   if (oelast->rank == 0
1019       && (ANY_INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
1020     {
1021       switch (opcode)
1022         {
1023         case BIT_AND_EXPR:
1024           if (integer_zerop (oelast->op))
1025             {
1026               if (ops->length () != 1)
1027                 {
1028                   if (dump_file && (dump_flags & TDF_DETAILS))
1029                     fprintf (dump_file, "Found & 0, removing all other ops\n");
1030
1031                   reassociate_stats.ops_eliminated += ops->length () - 1;
1032
1033                   ops->truncate (0);
1034                   ops->quick_push (oelast);
1035                   return;
1036                 }
1037             }
1038           else if (integer_all_onesp (oelast->op))
1039             {
1040               if (ops->length () != 1)
1041                 {
1042                   if (dump_file && (dump_flags & TDF_DETAILS))
1043                     fprintf (dump_file, "Found & -1, removing\n");
1044                   ops->pop ();
1045                   reassociate_stats.ops_eliminated++;
1046                 }
1047             }
1048           break;
1049         case BIT_IOR_EXPR:
1050           if (integer_all_onesp (oelast->op))
1051             {
1052               if (ops->length () != 1)
1053                 {
1054                   if (dump_file && (dump_flags & TDF_DETAILS))
1055                     fprintf (dump_file, "Found | -1, removing all other ops\n");
1056
1057                   reassociate_stats.ops_eliminated += ops->length () - 1;
1058
1059                   ops->truncate (0);
1060                   ops->quick_push (oelast);
1061                   return;
1062                 }
1063             }
1064           else if (integer_zerop (oelast->op))
1065             {
1066               if (ops->length () != 1)
1067                 {
1068                   if (dump_file && (dump_flags & TDF_DETAILS))
1069                     fprintf (dump_file, "Found | 0, removing\n");
1070                   ops->pop ();
1071                   reassociate_stats.ops_eliminated++;
1072                 }
1073             }
1074           break;
1075         case MULT_EXPR:
1076           if (integer_zerop (oelast->op)
1077               || (FLOAT_TYPE_P (type)
1078                   && !HONOR_NANS (type)
1079                   && !HONOR_SIGNED_ZEROS (type)
1080                   && real_zerop (oelast->op)))
1081             {
1082               if (ops->length () != 1)
1083                 {
1084                   if (dump_file && (dump_flags & TDF_DETAILS))
1085                     fprintf (dump_file, "Found * 0, removing all other ops\n");
1086
1087                   reassociate_stats.ops_eliminated += ops->length () - 1;
1088                   ops->truncate (0);
1089                   ops->quick_push (oelast);
1090                   return;
1091                 }
1092             }
1093           else if (integer_onep (oelast->op)
1094                    || (FLOAT_TYPE_P (type)
1095                        && !HONOR_SNANS (type)
1096                        && real_onep (oelast->op)))
1097             {
1098               if (ops->length () != 1)
1099                 {
1100                   if (dump_file && (dump_flags & TDF_DETAILS))
1101                     fprintf (dump_file, "Found * 1, removing\n");
1102                   ops->pop ();
1103                   reassociate_stats.ops_eliminated++;
1104                   return;
1105                 }
1106             }
1107           break;
1108         case BIT_XOR_EXPR:
1109         case PLUS_EXPR:
1110         case MINUS_EXPR:
1111           if (integer_zerop (oelast->op)
1112               || (FLOAT_TYPE_P (type)
1113                   && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1114                   && fold_real_zero_addition_p (type, 0, oelast->op,
1115                                                 opcode == MINUS_EXPR)))
1116             {
1117               if (ops->length () != 1)
1118                 {
1119                   if (dump_file && (dump_flags & TDF_DETAILS))
1120                     fprintf (dump_file, "Found [|^+] 0, removing\n");
1121                   ops->pop ();
1122                   reassociate_stats.ops_eliminated++;
1123                   return;
1124                 }
1125             }
1126           break;
1127         default:
1128           break;
1129         }
1130     }
1131 }
1132
1133
1134 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1135                                  bool, bool);
1136
1137 /* Structure for tracking and counting operands.  */
1138 struct oecount {
1139   unsigned int cnt;
1140   unsigned int id;
1141   enum tree_code oecode;
1142   tree op;
1143 };
1144
1145
1146 /* The heap for the oecount hashtable and the sorted list of operands.  */
1147 static vec<oecount> cvec;
1148
1149
1150 /* Oecount hashtable helpers.  */
1151
1152 struct oecount_hasher : int_hash <int, 0, 1>
1153 {
1154   static inline hashval_t hash (int);
1155   static inline bool equal (int, int);
1156 };
1157
1158 /* Hash function for oecount.  */
1159
1160 inline hashval_t
1161 oecount_hasher::hash (int p)
1162 {
1163   const oecount *c = &cvec[p - 42];
1164   return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1165 }
1166
1167 /* Comparison function for oecount.  */
1168
1169 inline bool
1170 oecount_hasher::equal (int p1, int p2)
1171 {
1172   const oecount *c1 = &cvec[p1 - 42];
1173   const oecount *c2 = &cvec[p2 - 42];
1174   return c1->oecode == c2->oecode && c1->op == c2->op;
1175 }
1176
1177 /* Comparison function for qsort sorting oecount elements by count.  */
1178
1179 static int
1180 oecount_cmp (const void *p1, const void *p2)
1181 {
1182   const oecount *c1 = (const oecount *)p1;
1183   const oecount *c2 = (const oecount *)p2;
1184   if (c1->cnt != c2->cnt)
1185     return c1->cnt > c2->cnt ? 1 : -1;
1186   else
1187     /* If counts are identical, use unique IDs to stabilize qsort.  */
1188     return c1->id > c2->id ? 1 : -1;
1189 }
1190
1191 /* Return TRUE iff STMT represents a builtin call that raises OP
1192    to some exponent.  */
1193
1194 static bool
1195 stmt_is_power_of_op (gimple *stmt, tree op)
1196 {
1197   if (!is_gimple_call (stmt))
1198     return false;
1199
1200   switch (gimple_call_combined_fn (stmt))
1201     {
1202     CASE_CFN_POW:
1203     CASE_CFN_POWI:
1204       return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1205
1206     default:
1207       return false;
1208     }
1209 }
1210
1211 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1212    in place and return the result.  Assumes that stmt_is_power_of_op
1213    was previously called for STMT and returned TRUE.  */
1214
1215 static HOST_WIDE_INT
1216 decrement_power (gimple *stmt)
1217 {
1218   REAL_VALUE_TYPE c, cint;
1219   HOST_WIDE_INT power;
1220   tree arg1;
1221
1222   switch (gimple_call_combined_fn (stmt))
1223     {
1224     CASE_CFN_POW:
1225       arg1 = gimple_call_arg (stmt, 1);
1226       c = TREE_REAL_CST (arg1);
1227       power = real_to_integer (&c) - 1;
1228       real_from_integer (&cint, VOIDmode, power, SIGNED);
1229       gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1230       return power;
1231
1232     CASE_CFN_POWI:
1233       arg1 = gimple_call_arg (stmt, 1);
1234       power = TREE_INT_CST_LOW (arg1) - 1;
1235       gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1236       return power;
1237
1238     default:
1239       gcc_unreachable ();
1240     }
1241 }
1242
1243 /* Replace SSA defined by STMT and replace all its uses with new
1244    SSA.  Also return the new SSA.  */
1245
1246 static tree
1247 make_new_ssa_for_def (gimple *stmt, enum tree_code opcode, tree op)
1248 {
1249   gimple *use_stmt;
1250   use_operand_p use;
1251   imm_use_iterator iter;
1252   tree new_lhs, new_debug_lhs = NULL_TREE;
1253   tree lhs = gimple_get_lhs (stmt);
1254
1255   new_lhs = make_ssa_name (TREE_TYPE (lhs));
1256   gimple_set_lhs (stmt, new_lhs);
1257
1258   /* Also need to update GIMPLE_DEBUGs.  */
1259   FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
1260     {
1261       tree repl = new_lhs;
1262       if (is_gimple_debug (use_stmt))
1263         {
1264           if (new_debug_lhs == NULL_TREE)
1265             {
1266               new_debug_lhs = build_debug_expr_decl (TREE_TYPE (lhs));
1267               gdebug *def_temp
1268                 = gimple_build_debug_bind (new_debug_lhs,
1269                                            build2 (opcode, TREE_TYPE (lhs),
1270                                                    new_lhs, op),
1271                                            stmt);
1272               gimple_set_uid (def_temp, gimple_uid (stmt));
1273               gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1274               gsi_insert_after (&gsi, def_temp, GSI_SAME_STMT);
1275             }
1276           repl = new_debug_lhs;
1277         }
1278       FOR_EACH_IMM_USE_ON_STMT (use, iter)
1279         SET_USE (use, repl);
1280       update_stmt (use_stmt);
1281     }
1282   return new_lhs;
1283 }
1284
1285 /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1286    uses with new SSAs.  Also do this for the stmt that defines DEF
1287    if *DEF is not OP.  */
1288
1289 static void
1290 make_new_ssa_for_all_defs (tree *def, enum tree_code opcode, tree op,
1291                            vec<gimple *> &stmts_to_fix)
1292 {
1293   unsigned i;
1294   gimple *stmt;
1295
1296   if (*def != op
1297       && TREE_CODE (*def) == SSA_NAME
1298       && (stmt = SSA_NAME_DEF_STMT (*def))
1299       && gimple_code (stmt) != GIMPLE_NOP)
1300     *def = make_new_ssa_for_def (stmt, opcode, op);
1301
1302   FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt)
1303     make_new_ssa_for_def (stmt, opcode, op);
1304 }
1305
1306 /* Find the single immediate use of STMT's LHS, and replace it
1307    with OP.  Remove STMT.  If STMT's LHS is the same as *DEF,
1308    replace *DEF with OP as well.  */
1309
1310 static void
1311 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1312 {
1313   tree lhs;
1314   gimple *use_stmt;
1315   use_operand_p use;
1316   gimple_stmt_iterator gsi;
1317
1318   if (is_gimple_call (stmt))
1319     lhs = gimple_call_lhs (stmt);
1320   else
1321     lhs = gimple_assign_lhs (stmt);
1322
1323   gcc_assert (has_single_use (lhs));
1324   single_imm_use (lhs, &use, &use_stmt);
1325   if (lhs == *def)
1326     *def = op;
1327   SET_USE (use, op);
1328   if (TREE_CODE (op) != SSA_NAME)
1329     update_stmt (use_stmt);
1330   gsi = gsi_for_stmt (stmt);
1331   unlink_stmt_vdef (stmt);
1332   reassoc_remove_stmt (&gsi);
1333   release_defs (stmt);
1334 }
1335
1336 /* Walks the linear chain with result *DEF searching for an operation
1337    with operand OP and code OPCODE removing that from the chain.  *DEF
1338    is updated if there is only one operand but no operation left.  */
1339
1340 static void
1341 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1342 {
1343   tree orig_def = *def;
1344   gimple *stmt = SSA_NAME_DEF_STMT (*def);
1345   /* PR72835 - Record the stmt chain that has to be updated such that
1346      we dont use the same LHS when the values computed are different.  */
1347   auto_vec<gimple *, 64> stmts_to_fix;
1348
1349   do
1350     {
1351       tree name;
1352
1353       if (opcode == MULT_EXPR)
1354         {
1355           if (stmt_is_power_of_op (stmt, op))
1356             {
1357               if (decrement_power (stmt) == 1)
1358                 {
1359                   if (stmts_to_fix.length () > 0)
1360                     stmts_to_fix.pop ();
1361                   propagate_op_to_single_use (op, stmt, def);
1362                 }
1363               break;
1364             }
1365           else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR)
1366             {
1367               if (gimple_assign_rhs1 (stmt) == op)
1368                 {
1369                   tree cst = build_minus_one_cst (TREE_TYPE (op));
1370                   if (stmts_to_fix.length () > 0)
1371                     stmts_to_fix.pop ();
1372                   propagate_op_to_single_use (cst, stmt, def);
1373                   break;
1374                 }
1375               else if (integer_minus_onep (op)
1376                        || real_minus_onep (op))
1377                 {
1378                   gimple_assign_set_rhs_code
1379                     (stmt, TREE_CODE (gimple_assign_rhs1 (stmt)));
1380                   break;
1381                 }
1382             }
1383         }
1384
1385       name = gimple_assign_rhs1 (stmt);
1386
1387       /* If this is the operation we look for and one of the operands
1388          is ours simply propagate the other operand into the stmts
1389          single use.  */
1390       if (gimple_assign_rhs_code (stmt) == opcode
1391           && (name == op
1392               || gimple_assign_rhs2 (stmt) == op))
1393         {
1394           if (name == op)
1395             name = gimple_assign_rhs2 (stmt);
1396           if (stmts_to_fix.length () > 0)
1397             stmts_to_fix.pop ();
1398           propagate_op_to_single_use (name, stmt, def);
1399           break;
1400         }
1401
1402       /* We might have a multiply of two __builtin_pow* calls, and
1403          the operand might be hiding in the rightmost one.  Likewise
1404          this can happen for a negate.  */
1405       if (opcode == MULT_EXPR
1406           && gimple_assign_rhs_code (stmt) == opcode
1407           && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1408           && has_single_use (gimple_assign_rhs2 (stmt)))
1409         {
1410           gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1411           if (stmt_is_power_of_op (stmt2, op))
1412             {
1413               if (decrement_power (stmt2) == 1)
1414                 propagate_op_to_single_use (op, stmt2, def);
1415               else
1416                 stmts_to_fix.safe_push (stmt2);
1417               break;
1418             }
1419           else if (is_gimple_assign (stmt2)
1420                    && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR)
1421             {
1422               if (gimple_assign_rhs1 (stmt2) == op)
1423                 {
1424                   tree cst = build_minus_one_cst (TREE_TYPE (op));
1425                   propagate_op_to_single_use (cst, stmt2, def);
1426                   break;
1427                 }
1428               else if (integer_minus_onep (op)
1429                        || real_minus_onep (op))
1430                 {
1431                   stmts_to_fix.safe_push (stmt2);
1432                   gimple_assign_set_rhs_code
1433                     (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2)));
1434                   break;
1435                 }
1436             }
1437         }
1438
1439       /* Continue walking the chain.  */
1440       gcc_assert (name != op
1441                   && TREE_CODE (name) == SSA_NAME);
1442       stmt = SSA_NAME_DEF_STMT (name);
1443       stmts_to_fix.safe_push (stmt);
1444     }
1445   while (1);
1446
1447   if (stmts_to_fix.length () > 0 || *def == orig_def)
1448     make_new_ssa_for_all_defs (def, opcode, op, stmts_to_fix);
1449 }
1450
1451 /* Returns true if statement S1 dominates statement S2.  Like
1452    stmt_dominates_stmt_p, but uses stmt UIDs to optimize.  */
1453
1454 static bool
1455 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1456 {
1457   basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1458
1459   /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1460      SSA_NAME.  Assume it lives at the beginning of function and
1461      thus dominates everything.  */
1462   if (!bb1 || s1 == s2)
1463     return true;
1464
1465   /* If bb2 is NULL, it doesn't dominate any stmt with a bb.  */
1466   if (!bb2)
1467     return false;
1468
1469   if (bb1 == bb2)
1470     {
1471       /* PHIs in the same basic block are assumed to be
1472          executed all in parallel, if only one stmt is a PHI,
1473          it dominates the other stmt in the same basic block.  */
1474       if (gimple_code (s1) == GIMPLE_PHI)
1475         return true;
1476
1477       if (gimple_code (s2) == GIMPLE_PHI)
1478         return false;
1479
1480       gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1481
1482       if (gimple_uid (s1) < gimple_uid (s2))
1483         return true;
1484
1485       if (gimple_uid (s1) > gimple_uid (s2))
1486         return false;
1487
1488       gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1489       unsigned int uid = gimple_uid (s1);
1490       for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1491         {
1492           gimple *s = gsi_stmt (gsi);
1493           if (gimple_uid (s) != uid)
1494             break;
1495           if (s == s2)
1496             return true;
1497         }
1498
1499       return false;
1500     }
1501
1502   return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1503 }
1504
1505 /* Insert STMT after INSERT_POINT.  */
1506
1507 static void
1508 insert_stmt_after (gimple *stmt, gimple *insert_point)
1509 {
1510   gimple_stmt_iterator gsi;
1511   basic_block bb;
1512
1513   if (gimple_code (insert_point) == GIMPLE_PHI)
1514     bb = gimple_bb (insert_point);
1515   else if (!stmt_ends_bb_p (insert_point))
1516     {
1517       gsi = gsi_for_stmt (insert_point);
1518       gimple_set_uid (stmt, gimple_uid (insert_point));
1519       gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1520       return;
1521     }
1522   else if (gimple_code (insert_point) == GIMPLE_ASM
1523            && gimple_asm_nlabels (as_a <gasm *> (insert_point)) != 0)
1524     /* We have no idea where to insert - it depends on where the
1525        uses will be placed.  */
1526     gcc_unreachable ();
1527   else
1528     /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1529        thus if it must end a basic block, it should be a call that can
1530        throw, or some assignment that can throw.  If it throws, the LHS
1531        of it will not be initialized though, so only valid places using
1532        the SSA_NAME should be dominated by the fallthru edge.  */
1533     bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1534   gsi = gsi_after_labels (bb);
1535   if (gsi_end_p (gsi))
1536     {
1537       gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1538       gimple_set_uid (stmt,
1539                       gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1540     }
1541   else
1542     gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1543   gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1544 }
1545
1546 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1547    the result.  Places the statement after the definition of either
1548    OP1 or OP2.  Returns the new statement.  */
1549
1550 static gimple *
1551 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1552 {
1553   gimple *op1def = NULL, *op2def = NULL;
1554   gimple_stmt_iterator gsi;
1555   tree op;
1556   gassign *sum;
1557
1558   /* Create the addition statement.  */
1559   op = make_ssa_name (type);
1560   sum = gimple_build_assign (op, opcode, op1, op2);
1561
1562   /* Find an insertion place and insert.  */
1563   if (TREE_CODE (op1) == SSA_NAME)
1564     op1def = SSA_NAME_DEF_STMT (op1);
1565   if (TREE_CODE (op2) == SSA_NAME)
1566     op2def = SSA_NAME_DEF_STMT (op2);
1567   if ((!op1def || gimple_nop_p (op1def))
1568       && (!op2def || gimple_nop_p (op2def)))
1569     {
1570       gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1571       if (!gsi_end_p (gsi)
1572           && is_gimple_call (gsi_stmt (gsi))
1573           && (gimple_call_flags (gsi_stmt (gsi)) & ECF_RETURNS_TWICE))
1574         {
1575           /* Don't add statements before a returns_twice call at the start
1576              of a function.  */
1577           split_edge (single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1578           gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1579         }
1580       if (gsi_end_p (gsi))
1581         {
1582           gimple_stmt_iterator gsi2
1583             = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1584           gimple_set_uid (sum,
1585                           gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1586         }
1587       else
1588         gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1589       gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1590     }
1591   else
1592     {
1593       gimple *insert_point;
1594       if ((!op1def || gimple_nop_p (op1def))
1595            || (op2def && !gimple_nop_p (op2def)
1596                && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1597         insert_point = op2def;
1598       else
1599         insert_point = op1def;
1600       insert_stmt_after (sum, insert_point);
1601     }
1602   update_stmt (sum);
1603
1604   return sum;
1605 }
1606
1607 /* Perform un-distribution of divisions and multiplications.
1608    A * X + B * X is transformed into (A + B) * X and A / X + B / X
1609    to (A + B) / X for real X.
1610
1611    The algorithm is organized as follows.
1612
1613     - First we walk the addition chain *OPS looking for summands that
1614       are defined by a multiplication or a real division.  This results
1615       in the candidates bitmap with relevant indices into *OPS.
1616
1617     - Second we build the chains of multiplications or divisions for
1618       these candidates, counting the number of occurrences of (operand, code)
1619       pairs in all of the candidates chains.
1620
1621     - Third we sort the (operand, code) pairs by number of occurrence and
1622       process them starting with the pair with the most uses.
1623
1624       * For each such pair we walk the candidates again to build a
1625         second candidate bitmap noting all multiplication/division chains
1626         that have at least one occurrence of (operand, code).
1627
1628       * We build an alternate addition chain only covering these
1629         candidates with one (operand, code) operation removed from their
1630         multiplication/division chain.
1631
1632       * The first candidate gets replaced by the alternate addition chain
1633         multiplied/divided by the operand.
1634
1635       * All candidate chains get disabled for further processing and
1636         processing of (operand, code) pairs continues.
1637
1638   The alternate addition chains built are re-processed by the main
1639   reassociation algorithm which allows optimizing a * x * y + b * y * x
1640   to (a + b ) * x * y in one invocation of the reassociation pass.  */
1641
1642 static bool
1643 undistribute_ops_list (enum tree_code opcode,
1644                        vec<operand_entry *> *ops, class loop *loop)
1645 {
1646   unsigned int length = ops->length ();
1647   operand_entry *oe1;
1648   unsigned i, j;
1649   unsigned nr_candidates, nr_candidates2;
1650   sbitmap_iterator sbi0;
1651   vec<operand_entry *> *subops;
1652   bool changed = false;
1653   unsigned int next_oecount_id = 0;
1654
1655   if (length <= 1
1656       || opcode != PLUS_EXPR)
1657     return false;
1658
1659   /* Build a list of candidates to process.  */
1660   auto_sbitmap candidates (length);
1661   bitmap_clear (candidates);
1662   nr_candidates = 0;
1663   FOR_EACH_VEC_ELT (*ops, i, oe1)
1664     {
1665       enum tree_code dcode;
1666       gimple *oe1def;
1667
1668       if (TREE_CODE (oe1->op) != SSA_NAME)
1669         continue;
1670       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1671       if (!is_gimple_assign (oe1def))
1672         continue;
1673       dcode = gimple_assign_rhs_code (oe1def);
1674       if ((dcode != MULT_EXPR
1675            && dcode != RDIV_EXPR)
1676           || !is_reassociable_op (oe1def, dcode, loop))
1677         continue;
1678
1679       bitmap_set_bit (candidates, i);
1680       nr_candidates++;
1681     }
1682
1683   if (nr_candidates < 2)
1684     return false;
1685
1686   if (dump_file && (dump_flags & TDF_DETAILS))
1687     {
1688       fprintf (dump_file, "searching for un-distribute opportunities ");
1689       print_generic_expr (dump_file,
1690         (*ops)[bitmap_first_set_bit (candidates)]->op, TDF_NONE);
1691       fprintf (dump_file, " %d\n", nr_candidates);
1692     }
1693
1694   /* Build linearized sub-operand lists and the counting table.  */
1695   cvec.create (0);
1696
1697   hash_table<oecount_hasher> ctable (15);
1698
1699   /* ??? Macro arguments cannot have multi-argument template types in
1700      them.  This typedef is needed to workaround that limitation.  */
1701   typedef vec<operand_entry *> vec_operand_entry_t_heap;
1702   subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1703   EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1704     {
1705       gimple *oedef;
1706       enum tree_code oecode;
1707       unsigned j;
1708
1709       oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1710       oecode = gimple_assign_rhs_code (oedef);
1711       linearize_expr_tree (&subops[i], oedef,
1712                            associative_tree_code (oecode), false);
1713
1714       FOR_EACH_VEC_ELT (subops[i], j, oe1)
1715         {
1716           oecount c;
1717           int *slot;
1718           int idx;
1719           c.oecode = oecode;
1720           c.cnt = 1;
1721           c.id = next_oecount_id++;
1722           c.op = oe1->op;
1723           cvec.safe_push (c);
1724           idx = cvec.length () + 41;
1725           slot = ctable.find_slot (idx, INSERT);
1726           if (!*slot)
1727             {
1728               *slot = idx;
1729             }
1730           else
1731             {
1732               cvec.pop ();
1733               cvec[*slot - 42].cnt++;
1734             }
1735         }
1736     }
1737
1738   /* Sort the counting table.  */
1739   cvec.qsort (oecount_cmp);
1740
1741   if (dump_file && (dump_flags & TDF_DETAILS))
1742     {
1743       oecount *c;
1744       fprintf (dump_file, "Candidates:\n");
1745       FOR_EACH_VEC_ELT (cvec, j, c)
1746         {
1747           fprintf (dump_file, "  %u %s: ", c->cnt,
1748                    c->oecode == MULT_EXPR
1749                    ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1750           print_generic_expr (dump_file, c->op);
1751           fprintf (dump_file, "\n");
1752         }
1753     }
1754
1755   /* Process the (operand, code) pairs in order of most occurrence.  */
1756   auto_sbitmap candidates2 (length);
1757   while (!cvec.is_empty ())
1758     {
1759       oecount *c = &cvec.last ();
1760       if (c->cnt < 2)
1761         break;
1762
1763       /* Now collect the operands in the outer chain that contain
1764          the common operand in their inner chain.  */
1765       bitmap_clear (candidates2);
1766       nr_candidates2 = 0;
1767       EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1768         {
1769           gimple *oedef;
1770           enum tree_code oecode;
1771           unsigned j;
1772           tree op = (*ops)[i]->op;
1773
1774           /* If we undistributed in this chain already this may be
1775              a constant.  */
1776           if (TREE_CODE (op) != SSA_NAME)
1777             continue;
1778
1779           oedef = SSA_NAME_DEF_STMT (op);
1780           oecode = gimple_assign_rhs_code (oedef);
1781           if (oecode != c->oecode)
1782             continue;
1783
1784           FOR_EACH_VEC_ELT (subops[i], j, oe1)
1785             {
1786               if (oe1->op == c->op)
1787                 {
1788                   bitmap_set_bit (candidates2, i);
1789                   ++nr_candidates2;
1790                   break;
1791                 }
1792             }
1793         }
1794
1795       if (nr_candidates2 >= 2)
1796         {
1797           operand_entry *oe1, *oe2;
1798           gimple *prod;
1799           int first = bitmap_first_set_bit (candidates2);
1800
1801           /* Build the new addition chain.  */
1802           oe1 = (*ops)[first];
1803           if (dump_file && (dump_flags & TDF_DETAILS))
1804             {
1805               fprintf (dump_file, "Building (");
1806               print_generic_expr (dump_file, oe1->op);
1807             }
1808           zero_one_operation (&oe1->op, c->oecode, c->op);
1809           EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1810             {
1811               gimple *sum;
1812               oe2 = (*ops)[i];
1813               if (dump_file && (dump_flags & TDF_DETAILS))
1814                 {
1815                   fprintf (dump_file, " + ");
1816                   print_generic_expr (dump_file, oe2->op);
1817                 }
1818               zero_one_operation (&oe2->op, c->oecode, c->op);
1819               sum = build_and_add_sum (TREE_TYPE (oe1->op),
1820                                        oe1->op, oe2->op, opcode);
1821               oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1822               oe2->rank = 0;
1823               oe1->op = gimple_get_lhs (sum);
1824             }
1825
1826           /* Apply the multiplication/division.  */
1827           prod = build_and_add_sum (TREE_TYPE (oe1->op),
1828                                     oe1->op, c->op, c->oecode);
1829           if (dump_file && (dump_flags & TDF_DETAILS))
1830             {
1831               fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1832               print_generic_expr (dump_file, c->op);
1833               fprintf (dump_file, "\n");
1834             }
1835
1836           /* Record it in the addition chain and disable further
1837              undistribution with this op.  */
1838           oe1->op = gimple_assign_lhs (prod);
1839           oe1->rank = get_rank (oe1->op);
1840           subops[first].release ();
1841
1842           changed = true;
1843         }
1844
1845       cvec.pop ();
1846     }
1847
1848   for (i = 0; i < ops->length (); ++i)
1849     subops[i].release ();
1850   free (subops);
1851   cvec.release ();
1852
1853   return changed;
1854 }
1855
1856 /* Pair to hold the information of one specific VECTOR_TYPE SSA_NAME:
1857    first: element index for each relevant BIT_FIELD_REF.
1858    second: the index of vec ops* for each relevant BIT_FIELD_REF.  */
1859 typedef std::pair<unsigned, unsigned> v_info_elem;
1860 struct v_info {
1861   tree vec_type;
1862   auto_vec<v_info_elem, 32> vec;
1863 };
1864 typedef v_info *v_info_ptr;
1865
1866 /* Comparison function for qsort on VECTOR SSA_NAME trees by machine mode.  */
1867 static int
1868 sort_by_mach_mode (const void *p_i, const void *p_j)
1869 {
1870   const tree tr1 = *((const tree *) p_i);
1871   const tree tr2 = *((const tree *) p_j);
1872   unsigned int mode1 = TYPE_MODE (TREE_TYPE (tr1));
1873   unsigned int mode2 = TYPE_MODE (TREE_TYPE (tr2));
1874   if (mode1 > mode2)
1875     return 1;
1876   else if (mode1 < mode2)
1877     return -1;
1878   if (SSA_NAME_VERSION (tr1) < SSA_NAME_VERSION (tr2))
1879     return -1;
1880   else if (SSA_NAME_VERSION (tr1) > SSA_NAME_VERSION (tr2))
1881     return 1;
1882   return 0;
1883 }
1884
1885 /* Cleanup hash map for VECTOR information.  */
1886 static void
1887 cleanup_vinfo_map (hash_map<tree, v_info_ptr> &info_map)
1888 {
1889   for (hash_map<tree, v_info_ptr>::iterator it = info_map.begin ();
1890        it != info_map.end (); ++it)
1891     {
1892       v_info_ptr info = (*it).second;
1893       delete info;
1894       (*it).second = NULL;
1895     }
1896 }
1897
1898 /* Perform un-distribution of BIT_FIELD_REF on VECTOR_TYPE.
1899      V1[0] + V1[1] + ... + V1[k] + V2[0] + V2[1] + ... + V2[k] + ... Vn[k]
1900    is transformed to
1901      Vs = (V1 + V2 + ... + Vn)
1902      Vs[0] + Vs[1] + ... + Vs[k]
1903
1904    The basic steps are listed below:
1905
1906     1) Check the addition chain *OPS by looking those summands coming from
1907        VECTOR bit_field_ref on VECTOR type.  Put the information into
1908        v_info_map for each satisfied summand, using VECTOR SSA_NAME as key.
1909
1910     2) For each key (VECTOR SSA_NAME), validate all its BIT_FIELD_REFs are
1911        continuous, they can cover the whole VECTOR perfectly without any holes.
1912        Obtain one VECTOR list which contain candidates to be transformed.
1913
1914     3) Sort the VECTOR list by machine mode of VECTOR type, for each group of
1915        candidates with same mode, build the addition statements for them and
1916        generate BIT_FIELD_REFs accordingly.
1917
1918    TODO:
1919        The current implementation requires the whole VECTORs should be fully
1920        covered, but it can be extended to support partial, checking adjacent
1921        but not fill the whole, it may need some cost model to define the
1922        boundary to do or not.
1923 */
1924 static bool
1925 undistribute_bitref_for_vector (enum tree_code opcode,
1926                                 vec<operand_entry *> *ops, struct loop *loop)
1927 {
1928   if (ops->length () <= 1)
1929     return false;
1930
1931   if (opcode != PLUS_EXPR
1932       && opcode != MULT_EXPR
1933       && opcode != BIT_XOR_EXPR
1934       && opcode != BIT_IOR_EXPR
1935       && opcode != BIT_AND_EXPR)
1936     return false;
1937
1938   hash_map<tree, v_info_ptr> v_info_map;
1939   operand_entry *oe1;
1940   unsigned i;
1941
1942   /* Find those summands from VECTOR BIT_FIELD_REF in addition chain, put the
1943      information into map.  */
1944   FOR_EACH_VEC_ELT (*ops, i, oe1)
1945     {
1946       enum tree_code dcode;
1947       gimple *oe1def;
1948
1949       if (TREE_CODE (oe1->op) != SSA_NAME)
1950         continue;
1951       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1952       if (!is_gimple_assign (oe1def))
1953         continue;
1954       dcode = gimple_assign_rhs_code (oe1def);
1955       if (dcode != BIT_FIELD_REF || !is_reassociable_op (oe1def, dcode, loop))
1956         continue;
1957
1958       tree rhs = gimple_assign_rhs1 (oe1def);
1959       tree vec = TREE_OPERAND (rhs, 0);
1960       tree vec_type = TREE_TYPE (vec);
1961
1962       if (TREE_CODE (vec) != SSA_NAME || !VECTOR_TYPE_P (vec_type))
1963         continue;
1964
1965       /* Ignore it if target machine can't support this VECTOR type.  */
1966       if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1967         continue;
1968
1969       /* Check const vector type, constrain BIT_FIELD_REF offset and size.  */
1970       if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
1971         continue;
1972
1973       if (VECTOR_TYPE_P (TREE_TYPE (rhs))
1974           || !is_a <scalar_mode> (TYPE_MODE (TREE_TYPE (rhs))))
1975         continue;
1976
1977       /* The type of BIT_FIELD_REF might not be equal to the element type of
1978          the vector.  We want to use a vector type with element type the
1979          same as the BIT_FIELD_REF and size the same as TREE_TYPE (vec).  */
1980       if (!useless_type_conversion_p (TREE_TYPE (rhs), TREE_TYPE (vec_type)))
1981         {
1982           machine_mode simd_mode;
1983           unsigned HOST_WIDE_INT size, nunits;
1984           unsigned HOST_WIDE_INT elem_size
1985             = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (rhs)));
1986           if (!GET_MODE_BITSIZE (TYPE_MODE (vec_type)).is_constant (&size))
1987             continue;
1988           if (size <= elem_size || (size % elem_size) != 0)
1989             continue;
1990           nunits = size / elem_size;
1991           if (!mode_for_vector (SCALAR_TYPE_MODE (TREE_TYPE (rhs)),
1992                                 nunits).exists (&simd_mode))
1993             continue;
1994           vec_type = build_vector_type_for_mode (TREE_TYPE (rhs), simd_mode);
1995
1996           /* Ignore it if target machine can't support this VECTOR type.  */
1997           if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1998             continue;
1999
2000           /* Check const vector type, constrain BIT_FIELD_REF offset and
2001              size.  */
2002           if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
2003             continue;
2004
2005           if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vec_type)),
2006                         GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vec)))))
2007             continue;
2008         }
2009
2010       tree elem_type = TREE_TYPE (vec_type);
2011       unsigned HOST_WIDE_INT elem_size = tree_to_uhwi (TYPE_SIZE (elem_type));
2012       if (maybe_ne (bit_field_size (rhs), elem_size))
2013         continue;
2014
2015       unsigned idx;
2016       if (!constant_multiple_p (bit_field_offset (rhs), elem_size, &idx))
2017         continue;
2018
2019       /* Ignore it if target machine can't support this type of VECTOR
2020          operation.  */
2021       optab op_tab = optab_for_tree_code (opcode, vec_type, optab_vector);
2022       if (optab_handler (op_tab, TYPE_MODE (vec_type)) == CODE_FOR_nothing)
2023         continue;
2024
2025       bool existed;
2026       v_info_ptr &info = v_info_map.get_or_insert (vec, &existed);
2027       if (!existed)
2028         {
2029           info = new v_info;
2030           info->vec_type = vec_type;
2031         }
2032       else if (!types_compatible_p (vec_type, info->vec_type))
2033         continue;
2034       info->vec.safe_push (std::make_pair (idx, i));
2035     }
2036
2037   /* At least two VECTOR to combine.  */
2038   if (v_info_map.elements () <= 1)
2039     {
2040       cleanup_vinfo_map (v_info_map);
2041       return false;
2042     }
2043
2044   /* Verify all VECTOR candidates by checking two conditions:
2045        1) sorted offsets are adjacent, no holes.
2046        2) can fill the whole VECTOR perfectly.
2047      And add the valid candidates to a vector for further handling.  */
2048   auto_vec<tree> valid_vecs (v_info_map.elements ());
2049   for (hash_map<tree, v_info_ptr>::iterator it = v_info_map.begin ();
2050        it != v_info_map.end (); ++it)
2051     {
2052       tree cand_vec = (*it).first;
2053       v_info_ptr cand_info = (*it).second;
2054       unsigned int num_elems
2055         = TYPE_VECTOR_SUBPARTS (cand_info->vec_type).to_constant ();
2056       if (cand_info->vec.length () != num_elems)
2057         continue;
2058       sbitmap holes = sbitmap_alloc (num_elems);
2059       bitmap_ones (holes);
2060       bool valid = true;
2061       v_info_elem *curr;
2062       FOR_EACH_VEC_ELT (cand_info->vec, i, curr)
2063         {
2064           if (!bitmap_bit_p (holes, curr->first))
2065             {
2066               valid = false;
2067               break;
2068             }
2069           else
2070             bitmap_clear_bit (holes, curr->first);
2071         }
2072       if (valid && bitmap_empty_p (holes))
2073         valid_vecs.quick_push (cand_vec);
2074       sbitmap_free (holes);
2075     }
2076
2077   /* At least two VECTOR to combine.  */
2078   if (valid_vecs.length () <= 1)
2079     {
2080       cleanup_vinfo_map (v_info_map);
2081       return false;
2082     }
2083
2084   valid_vecs.qsort (sort_by_mach_mode);
2085   /* Go through all candidates by machine mode order, query the mode_to_total
2086      to get the total number for each mode and skip the single one.  */
2087   for (unsigned i = 0; i < valid_vecs.length () - 1; ++i)
2088     {
2089       tree tvec = valid_vecs[i];
2090       enum machine_mode mode = TYPE_MODE (TREE_TYPE (tvec));
2091
2092       /* Skip modes with only a single candidate.  */
2093       if (TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) != mode)
2094         continue;
2095
2096       unsigned int idx, j;
2097       gimple *sum = NULL;
2098       tree sum_vec = tvec;
2099       v_info_ptr info_ptr = *(v_info_map.get (tvec));
2100       v_info_elem *elem;
2101       tree vec_type = info_ptr->vec_type;
2102
2103       /* Build the sum for all candidates with same mode.  */
2104       do
2105         {
2106           sum = build_and_add_sum (vec_type, sum_vec,
2107                                    valid_vecs[i + 1], opcode);
2108           /* Update the operands only after build_and_add_sum,
2109              so that we don't have to repeat the placement algorithm
2110              of build_and_add_sum.  */
2111           if (sum_vec == tvec
2112               && !useless_type_conversion_p (vec_type, TREE_TYPE (sum_vec)))
2113             {
2114               gimple_stmt_iterator gsi = gsi_for_stmt (sum);
2115               tree vce = build1 (VIEW_CONVERT_EXPR, vec_type, sum_vec);
2116               tree lhs = make_ssa_name (vec_type);
2117               gimple *g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2118               gimple_set_uid (g, gimple_uid (sum));
2119               gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2120               gimple_assign_set_rhs1 (sum, lhs);
2121               update_stmt (sum);
2122             }
2123           if (!useless_type_conversion_p (vec_type,
2124                                           TREE_TYPE (valid_vecs[i + 1])))
2125             {
2126               gimple_stmt_iterator gsi = gsi_for_stmt (sum);
2127               tree vce = build1 (VIEW_CONVERT_EXPR, vec_type,
2128                                  valid_vecs[i + 1]);
2129               tree lhs = make_ssa_name (vec_type);
2130               gimple *g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2131               gimple_set_uid (g, gimple_uid (sum));
2132               gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2133               gimple_assign_set_rhs2 (sum, lhs);
2134               update_stmt (sum);
2135             }
2136           sum_vec = gimple_get_lhs (sum);
2137           info_ptr = *(v_info_map.get (valid_vecs[i + 1]));
2138           gcc_assert (types_compatible_p (vec_type, info_ptr->vec_type));
2139           /* Update those related ops of current candidate VECTOR.  */
2140           FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2141             {
2142               idx = elem->second;
2143               gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2144               /* Set this then op definition will get DCEd later.  */
2145               gimple_set_visited (def, true);
2146               if (opcode == PLUS_EXPR
2147                   || opcode == BIT_XOR_EXPR
2148                   || opcode == BIT_IOR_EXPR)
2149                 (*ops)[idx]->op = build_zero_cst (TREE_TYPE ((*ops)[idx]->op));
2150               else if (opcode == MULT_EXPR)
2151                 (*ops)[idx]->op = build_one_cst (TREE_TYPE ((*ops)[idx]->op));
2152               else
2153                 {
2154                   gcc_assert (opcode == BIT_AND_EXPR);
2155                   (*ops)[idx]->op
2156                     = build_all_ones_cst (TREE_TYPE ((*ops)[idx]->op));
2157                 }
2158               (*ops)[idx]->rank = 0;
2159             }
2160           if (dump_file && (dump_flags & TDF_DETAILS))
2161             {
2162               fprintf (dump_file, "Generating addition -> ");
2163               print_gimple_stmt (dump_file, sum, 0);
2164             }
2165           i++;
2166         }
2167       while ((i < valid_vecs.length () - 1)
2168              && TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) == mode);
2169
2170       /* Referring to first valid VECTOR with this mode, generate the
2171          BIT_FIELD_REF statements accordingly.  */
2172       info_ptr = *(v_info_map.get (tvec));
2173       gcc_assert (sum);
2174       tree elem_type = TREE_TYPE (vec_type);
2175       FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2176         {
2177           idx = elem->second;
2178           tree dst = make_ssa_name (elem_type);
2179           tree pos = bitsize_int (elem->first
2180                                   * tree_to_uhwi (TYPE_SIZE (elem_type)));
2181           tree bfr = build3 (BIT_FIELD_REF, elem_type, sum_vec,
2182                              TYPE_SIZE (elem_type), pos);
2183           gimple *gs = gimple_build_assign (dst, BIT_FIELD_REF, bfr);
2184           insert_stmt_after (gs, sum);
2185           gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2186           /* Set this then op definition will get DCEd later.  */
2187           gimple_set_visited (def, true);
2188           (*ops)[idx]->op = gimple_assign_lhs (gs);
2189           (*ops)[idx]->rank = get_rank ((*ops)[idx]->op);
2190           if (dump_file && (dump_flags & TDF_DETAILS))
2191             {
2192               fprintf (dump_file, "Generating bit_field_ref -> ");
2193               print_gimple_stmt (dump_file, gs, 0);
2194             }
2195         }
2196     }
2197
2198   if (dump_file && (dump_flags & TDF_DETAILS))
2199     fprintf (dump_file, "undistributiong bit_field_ref for vector done.\n");
2200
2201   cleanup_vinfo_map (v_info_map);
2202
2203   return true;
2204 }
2205
2206 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
2207    expression, examine the other OPS to see if any of them are comparisons
2208    of the same values, which we may be able to combine or eliminate.
2209    For example, we can rewrite (a < b) | (a == b) as (a <= b).  */
2210
2211 static bool
2212 eliminate_redundant_comparison (enum tree_code opcode,
2213                                 vec<operand_entry *> *ops,
2214                                 unsigned int currindex,
2215                                 operand_entry *curr)
2216 {
2217   tree op1, op2;
2218   enum tree_code lcode, rcode;
2219   gimple *def1, *def2;
2220   int i;
2221   operand_entry *oe;
2222
2223   if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
2224     return false;
2225
2226   /* Check that CURR is a comparison.  */
2227   if (TREE_CODE (curr->op) != SSA_NAME)
2228     return false;
2229   def1 = SSA_NAME_DEF_STMT (curr->op);
2230   if (!is_gimple_assign (def1))
2231     return false;
2232   lcode = gimple_assign_rhs_code (def1);
2233   if (TREE_CODE_CLASS (lcode) != tcc_comparison)
2234     return false;
2235   op1 = gimple_assign_rhs1 (def1);
2236   op2 = gimple_assign_rhs2 (def1);
2237
2238   /* Now look for a similar comparison in the remaining OPS.  */
2239   for (i = currindex + 1; ops->iterate (i, &oe); i++)
2240     {
2241       tree t;
2242
2243       if (TREE_CODE (oe->op) != SSA_NAME)
2244         continue;
2245       def2 = SSA_NAME_DEF_STMT (oe->op);
2246       if (!is_gimple_assign (def2))
2247         continue;
2248       rcode = gimple_assign_rhs_code (def2);
2249       if (TREE_CODE_CLASS (rcode) != tcc_comparison)
2250         continue;
2251
2252       /* If we got here, we have a match.  See if we can combine the
2253          two comparisons.  */
2254       tree type = TREE_TYPE (gimple_assign_lhs (def1));
2255       if (opcode == BIT_IOR_EXPR)
2256         t = maybe_fold_or_comparisons (type,
2257                                        lcode, op1, op2,
2258                                        rcode, gimple_assign_rhs1 (def2),
2259                                        gimple_assign_rhs2 (def2));
2260       else
2261         t = maybe_fold_and_comparisons (type,
2262                                         lcode, op1, op2,
2263                                         rcode, gimple_assign_rhs1 (def2),
2264                                         gimple_assign_rhs2 (def2));
2265       if (!t)
2266         continue;
2267
2268       /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
2269          always give us a boolean_type_node value back.  If the original
2270          BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
2271          we need to convert.  */
2272       if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
2273         {
2274           if (!fold_convertible_p (TREE_TYPE (curr->op), t))
2275             continue;
2276           t = fold_convert (TREE_TYPE (curr->op), t);
2277         }
2278
2279       if (TREE_CODE (t) != INTEGER_CST
2280           && !operand_equal_p (t, curr->op, 0))
2281         {
2282           enum tree_code subcode;
2283           tree newop1, newop2;
2284           if (!COMPARISON_CLASS_P (t))
2285             continue;
2286           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2287           STRIP_USELESS_TYPE_CONVERSION (newop1);
2288           STRIP_USELESS_TYPE_CONVERSION (newop2);
2289           if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
2290             continue;
2291           if (lcode == TREE_CODE (t)
2292               && operand_equal_p (op1, newop1, 0)
2293               && operand_equal_p (op2, newop2, 0))
2294             t = curr->op;
2295           else if ((TREE_CODE (newop1) == SSA_NAME
2296                     && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (newop1))
2297                    || (TREE_CODE (newop2) == SSA_NAME
2298                        && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (newop2)))
2299             continue;
2300         }
2301
2302       if (dump_file && (dump_flags & TDF_DETAILS))
2303         {
2304           fprintf (dump_file, "Equivalence: ");
2305           print_generic_expr (dump_file, curr->op);
2306           fprintf (dump_file, " %s ", op_symbol_code (opcode));
2307           print_generic_expr (dump_file, oe->op);
2308           fprintf (dump_file, " -> ");
2309           print_generic_expr (dump_file, t);
2310           fprintf (dump_file, "\n");
2311         }
2312
2313       /* Now we can delete oe, as it has been subsumed by the new combined
2314          expression t.  */
2315       ops->ordered_remove (i);
2316       reassociate_stats.ops_eliminated ++;
2317
2318       /* If t is the same as curr->op, we're done.  Otherwise we must
2319          replace curr->op with t.  Special case is if we got a constant
2320          back, in which case we add it to the end instead of in place of
2321          the current entry.  */
2322       if (TREE_CODE (t) == INTEGER_CST)
2323         {
2324           ops->ordered_remove (currindex);
2325           add_to_ops_vec (ops, t);
2326         }
2327       else if (!operand_equal_p (t, curr->op, 0))
2328         {
2329           gimple *sum;
2330           enum tree_code subcode;
2331           tree newop1;
2332           tree newop2;
2333           gcc_assert (COMPARISON_CLASS_P (t));
2334           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2335           STRIP_USELESS_TYPE_CONVERSION (newop1);
2336           STRIP_USELESS_TYPE_CONVERSION (newop2);
2337           gcc_checking_assert (is_gimple_val (newop1)
2338                                && is_gimple_val (newop2));
2339           sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
2340           curr->op = gimple_get_lhs (sum);
2341         }
2342       return true;
2343     }
2344
2345   return false;
2346 }
2347
2348
2349 /* Transform repeated addition of same values into multiply with
2350    constant.  */
2351 static bool
2352 transform_add_to_multiply (vec<operand_entry *> *ops)
2353 {
2354   operand_entry *oe;
2355   tree op = NULL_TREE;
2356   int j;
2357   int i, start = -1, end = 0, count = 0;
2358   auto_vec<std::pair <int, int> > indxs;
2359   bool changed = false;
2360
2361   if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2362       && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2363           || !flag_unsafe_math_optimizations))
2364     return false;
2365
2366   /* Look for repeated operands.  */
2367   FOR_EACH_VEC_ELT (*ops, i, oe)
2368     {
2369       if (start == -1)
2370         {
2371           count = 1;
2372           op = oe->op;
2373           start = i;
2374         }
2375       else if (operand_equal_p (oe->op, op, 0))
2376         {
2377           count++;
2378           end = i;
2379         }
2380       else
2381         {
2382           if (count > 1)
2383             indxs.safe_push (std::make_pair (start, end));
2384           count = 1;
2385           op = oe->op;
2386           start = i;
2387         }
2388     }
2389
2390   if (count > 1)
2391     indxs.safe_push (std::make_pair (start, end));
2392
2393   for (j = indxs.length () - 1; j >= 0; --j)
2394     {
2395       /* Convert repeated operand addition to multiplication.  */
2396       start = indxs[j].first;
2397       end = indxs[j].second;
2398       op = (*ops)[start]->op;
2399       count = end - start + 1;
2400       for (i = end; i >= start; --i)
2401         ops->unordered_remove (i);
2402       tree tmp = make_ssa_name (TREE_TYPE (op));
2403       tree cst = build_int_cst (integer_type_node, count);
2404       gassign *mul_stmt
2405         = gimple_build_assign (tmp, MULT_EXPR,
2406                                op, fold_convert (TREE_TYPE (op), cst));
2407       gimple_set_visited (mul_stmt, true);
2408       add_to_ops_vec (ops, tmp, mul_stmt);
2409       changed = true;
2410     }
2411
2412   return changed;
2413 }
2414
2415
2416 /* Perform various identities and other optimizations on the list of
2417    operand entries, stored in OPS.  The tree code for the binary
2418    operation between all the operands is OPCODE.  */
2419
2420 static void
2421 optimize_ops_list (enum tree_code opcode,
2422                    vec<operand_entry *> *ops)
2423 {
2424   unsigned int length = ops->length ();
2425   unsigned int i;
2426   operand_entry *oe;
2427   operand_entry *oelast = NULL;
2428   bool iterate = false;
2429
2430   if (length == 1)
2431     return;
2432
2433   oelast = ops->last ();
2434
2435   /* If the last two are constants, pop the constants off, merge them
2436      and try the next two.  */
2437   if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
2438     {
2439       operand_entry *oelm1 = (*ops)[length - 2];
2440
2441       if (oelm1->rank == 0
2442           && is_gimple_min_invariant (oelm1->op)
2443           && useless_type_conversion_p (TREE_TYPE (oelm1->op),
2444                                        TREE_TYPE (oelast->op)))
2445         {
2446           tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
2447                                      oelm1->op, oelast->op);
2448
2449           if (folded && is_gimple_min_invariant (folded))
2450             {
2451               if (dump_file && (dump_flags & TDF_DETAILS))
2452                 fprintf (dump_file, "Merging constants\n");
2453
2454               ops->pop ();
2455               ops->pop ();
2456
2457               add_to_ops_vec (ops, folded);
2458               reassociate_stats.constants_eliminated++;
2459
2460               optimize_ops_list (opcode, ops);
2461               return;
2462             }
2463         }
2464     }
2465
2466   eliminate_using_constants (opcode, ops);
2467   oelast = NULL;
2468
2469   for (i = 0; ops->iterate (i, &oe);)
2470     {
2471       bool done = false;
2472
2473       if (eliminate_not_pairs (opcode, ops, i, oe))
2474         return;
2475       if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
2476           || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
2477           || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
2478         {
2479           if (done)
2480             return;
2481           iterate = true;
2482           oelast = NULL;
2483           continue;
2484         }
2485       oelast = oe;
2486       i++;
2487     }
2488
2489   if (iterate)
2490     optimize_ops_list (opcode, ops);
2491 }
2492
2493 /* The following functions are subroutines to optimize_range_tests and allow
2494    it to try to change a logical combination of comparisons into a range
2495    test.
2496
2497    For example, both
2498         X == 2 || X == 5 || X == 3 || X == 4
2499    and
2500         X >= 2 && X <= 5
2501    are converted to
2502         (unsigned) (X - 2) <= 3
2503
2504    For more information see comments above fold_test_range in fold-const.cc,
2505    this implementation is for GIMPLE.  */
2506
2507
2508
2509 /* Dump the range entry R to FILE, skipping its expression if SKIP_EXP.  */
2510
2511 void
2512 dump_range_entry (FILE *file, struct range_entry *r, bool skip_exp)
2513 {
2514   if (!skip_exp)
2515     print_generic_expr (file, r->exp);
2516   fprintf (file, " %c[", r->in_p ? '+' : '-');
2517   print_generic_expr (file, r->low);
2518   fputs (", ", file);
2519   print_generic_expr (file, r->high);
2520   fputc (']', file);
2521 }
2522
2523 /* Dump the range entry R to STDERR.  */
2524
2525 DEBUG_FUNCTION void
2526 debug_range_entry (struct range_entry *r)
2527 {
2528   dump_range_entry (stderr, r, false);
2529   fputc ('\n', stderr);
2530 }
2531
2532 /* This is similar to make_range in fold-const.cc, but on top of
2533    GIMPLE instead of trees.  If EXP is non-NULL, it should be
2534    an SSA_NAME and STMT argument is ignored, otherwise STMT
2535    argument should be a GIMPLE_COND.  */
2536
2537 void
2538 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
2539 {
2540   int in_p;
2541   tree low, high;
2542   bool is_bool, strict_overflow_p;
2543
2544   r->exp = NULL_TREE;
2545   r->in_p = false;
2546   r->strict_overflow_p = false;
2547   r->low = NULL_TREE;
2548   r->high = NULL_TREE;
2549   if (exp != NULL_TREE
2550       && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
2551     return;
2552
2553   /* Start with simply saying "EXP != 0" and then look at the code of EXP
2554      and see if we can refine the range.  Some of the cases below may not
2555      happen, but it doesn't seem worth worrying about this.  We "continue"
2556      the outer loop when we've changed something; otherwise we "break"
2557      the switch, which will "break" the while.  */
2558   low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
2559   high = low;
2560   in_p = 0;
2561   strict_overflow_p = false;
2562   is_bool = false;
2563   if (exp == NULL_TREE)
2564     is_bool = true;
2565   else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
2566     {
2567       if (TYPE_UNSIGNED (TREE_TYPE (exp)))
2568         is_bool = true;
2569       else
2570         return;
2571     }
2572   else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
2573     is_bool = true;
2574
2575   while (1)
2576     {
2577       enum tree_code code;
2578       tree arg0, arg1, exp_type;
2579       tree nexp;
2580       location_t loc;
2581
2582       if (exp != NULL_TREE)
2583         {
2584           if (TREE_CODE (exp) != SSA_NAME
2585               || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
2586             break;
2587
2588           stmt = SSA_NAME_DEF_STMT (exp);
2589           if (!is_gimple_assign (stmt))
2590             break;
2591
2592           code = gimple_assign_rhs_code (stmt);
2593           arg0 = gimple_assign_rhs1 (stmt);
2594           arg1 = gimple_assign_rhs2 (stmt);
2595           exp_type = TREE_TYPE (exp);
2596         }
2597       else
2598         {
2599           code = gimple_cond_code (stmt);
2600           arg0 = gimple_cond_lhs (stmt);
2601           arg1 = gimple_cond_rhs (stmt);
2602           exp_type = boolean_type_node;
2603         }
2604
2605       if (TREE_CODE (arg0) != SSA_NAME
2606           || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (arg0)
2607           || ssa_name_maybe_undef_p (arg0))
2608         break;
2609       loc = gimple_location (stmt);
2610       switch (code)
2611         {
2612         case BIT_NOT_EXPR:
2613           if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
2614               /* Ensure the range is either +[-,0], +[0,0],
2615                  -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2616                  -[1,1].  If it is e.g. +[-,-] or -[-,-]
2617                  or similar expression of unconditional true or
2618                  false, it should not be negated.  */
2619               && ((high && integer_zerop (high))
2620                   || (low && integer_onep (low))))
2621             {
2622               in_p = !in_p;
2623               exp = arg0;
2624               continue;
2625             }
2626           break;
2627         case SSA_NAME:
2628           exp = arg0;
2629           continue;
2630         CASE_CONVERT:
2631           if (is_bool)
2632             {
2633               if ((TYPE_PRECISION (exp_type) == 1
2634                    || TREE_CODE (exp_type) == BOOLEAN_TYPE)
2635                   && TYPE_PRECISION (TREE_TYPE (arg0)) > 1)
2636                 return;
2637             }
2638           else if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
2639             {
2640               if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
2641                 is_bool = true;
2642               else
2643                 return;
2644             }
2645           else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
2646             is_bool = true;
2647           goto do_default;
2648         case EQ_EXPR:
2649         case NE_EXPR:
2650         case LT_EXPR:
2651         case LE_EXPR:
2652         case GE_EXPR:
2653         case GT_EXPR:
2654           is_bool = true;
2655           /* FALLTHRU */
2656         default:
2657           if (!is_bool)
2658             return;
2659         do_default:
2660           nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2661                                   &low, &high, &in_p,
2662                                   &strict_overflow_p);
2663           if (nexp != NULL_TREE)
2664             {
2665               exp = nexp;
2666               gcc_assert (TREE_CODE (exp) == SSA_NAME);
2667               continue;
2668             }
2669           break;
2670         }
2671       break;
2672     }
2673   if (is_bool)
2674     {
2675       r->exp = exp;
2676       r->in_p = in_p;
2677       r->low = low;
2678       r->high = high;
2679       r->strict_overflow_p = strict_overflow_p;
2680     }
2681 }
2682
2683 /* Comparison function for qsort.  Sort entries
2684    without SSA_NAME exp first, then with SSA_NAMEs sorted
2685    by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2686    by increasing ->low and if ->low is the same, by increasing
2687    ->high.  ->low == NULL_TREE means minimum, ->high == NULL_TREE
2688    maximum.  */
2689
2690 static int
2691 range_entry_cmp (const void *a, const void *b)
2692 {
2693   const struct range_entry *p = (const struct range_entry *) a;
2694   const struct range_entry *q = (const struct range_entry *) b;
2695
2696   if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2697     {
2698       if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2699         {
2700           /* Group range_entries for the same SSA_NAME together.  */
2701           if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2702             return -1;
2703           else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2704             return 1;
2705           /* If ->low is different, NULL low goes first, then by
2706              ascending low.  */
2707           if (p->low != NULL_TREE)
2708             {
2709               if (q->low != NULL_TREE)
2710                 {
2711                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
2712                                           p->low, q->low);
2713                   if (tem && integer_onep (tem))
2714                     return -1;
2715                   tem = fold_binary (GT_EXPR, boolean_type_node,
2716                                      p->low, q->low);
2717                   if (tem && integer_onep (tem))
2718                     return 1;
2719                 }
2720               else
2721                 return 1;
2722             }
2723           else if (q->low != NULL_TREE)
2724             return -1;
2725           /* If ->high is different, NULL high goes last, before that by
2726              ascending high.  */
2727           if (p->high != NULL_TREE)
2728             {
2729               if (q->high != NULL_TREE)
2730                 {
2731                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
2732                                           p->high, q->high);
2733                   if (tem && integer_onep (tem))
2734                     return -1;
2735                   tem = fold_binary (GT_EXPR, boolean_type_node,
2736                                      p->high, q->high);
2737                   if (tem && integer_onep (tem))
2738                     return 1;
2739                 }
2740               else
2741                 return -1;
2742             }
2743           else if (q->high != NULL_TREE)
2744             return 1;
2745           /* If both ranges are the same, sort below by ascending idx.  */
2746         }
2747       else
2748         return 1;
2749     }
2750   else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2751     return -1;
2752
2753   if (p->idx < q->idx)
2754     return -1;
2755   else
2756     {
2757       gcc_checking_assert (p->idx > q->idx);
2758       return 1;
2759     }
2760 }
2761
2762 /* Helper function for update_range_test.  Force EXPR into an SSA_NAME,
2763    insert needed statements BEFORE or after GSI.  */
2764
2765 static tree
2766 force_into_ssa_name (gimple_stmt_iterator *gsi, tree expr, bool before)
2767 {
2768   enum gsi_iterator_update m = before ? GSI_SAME_STMT : GSI_CONTINUE_LINKING;
2769   tree ret = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, before, m);
2770   if (TREE_CODE (ret) != SSA_NAME)
2771     {
2772       gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (ret)), ret);
2773       if (before)
2774         gsi_insert_before (gsi, g, GSI_SAME_STMT);
2775       else
2776         gsi_insert_after (gsi, g, GSI_CONTINUE_LINKING);
2777       ret = gimple_assign_lhs (g);
2778     }
2779   return ret;
2780 }
2781
2782 /* Helper routine of optimize_range_test.
2783    [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2784    RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2785    OPCODE and OPS are arguments of optimize_range_tests.  If OTHERRANGE
2786    is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2787    an array of COUNT pointers to other ranges.  Return
2788    true if the range merge has been successful.
2789    If OPCODE is ERROR_MARK, this is called from within
2790    maybe_optimize_range_tests and is performing inter-bb range optimization.
2791    In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2792    oe->rank.  */
2793
2794 static bool
2795 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2796                    struct range_entry **otherrangep,
2797                    unsigned int count, enum tree_code opcode,
2798                    vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2799                    bool in_p, tree low, tree high, bool strict_overflow_p)
2800 {
2801   unsigned int idx = range->idx;
2802   struct range_entry *swap_with = NULL;
2803   basic_block rewrite_bb_first = NULL, rewrite_bb_last = NULL;
2804   if (opcode == ERROR_MARK)
2805     {
2806       /* For inter-bb range test optimization, pick from the range tests
2807          the one which is tested in the earliest condition (one dominating
2808          the others), because otherwise there could be some UB (e.g. signed
2809          overflow) in following bbs that we'd expose which wasn't there in
2810          the original program.  See PR104196.  */
2811       basic_block orig_range_bb = BASIC_BLOCK_FOR_FN (cfun, (*ops)[idx]->id);
2812       basic_block range_bb = orig_range_bb;
2813       for (unsigned int i = 0; i < count; i++)
2814         {
2815           struct range_entry *this_range;
2816           if (otherrange)
2817             this_range = otherrange + i;
2818           else
2819             this_range = otherrangep[i];
2820           operand_entry *oe = (*ops)[this_range->idx];
2821           basic_block this_bb = BASIC_BLOCK_FOR_FN (cfun, oe->id);
2822           if (range_bb != this_bb
2823               && dominated_by_p (CDI_DOMINATORS, range_bb, this_bb))
2824             {
2825               swap_with = this_range;
2826               range_bb = this_bb;
2827               idx = this_range->idx;
2828             }
2829         }
2830       /* If seq is non-NULL, it can contain statements that use SSA_NAMEs
2831          only defined in later blocks.  In this case we can't move the
2832          merged comparison earlier, so instead check if there are any stmts
2833          that might trigger signed integer overflow in between and rewrite
2834          them.  But only after we check if the optimization is possible.  */
2835       if (seq && swap_with)
2836         {
2837           rewrite_bb_first = range_bb;
2838           rewrite_bb_last = orig_range_bb;
2839           idx = range->idx;
2840           swap_with = NULL;
2841         }
2842     }
2843   operand_entry *oe = (*ops)[idx];
2844   tree op = oe->op;
2845   gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2846                     : last_nondebug_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2847   location_t loc = gimple_location (stmt);
2848   tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2849   tree tem = build_range_check (loc, optype, unshare_expr (exp),
2850                                 in_p, low, high);
2851   enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2852   gimple_stmt_iterator gsi;
2853   unsigned int i, uid;
2854
2855   if (tem == NULL_TREE)
2856     return false;
2857
2858   /* If op is default def SSA_NAME, there is no place to insert the
2859      new comparison.  Give up, unless we can use OP itself as the
2860      range test.  */
2861   if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2862     {
2863       if (op == range->exp
2864           && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2865               || TREE_CODE (optype) == BOOLEAN_TYPE)
2866           && (op == tem
2867               || (TREE_CODE (tem) == EQ_EXPR
2868                   && TREE_OPERAND (tem, 0) == op
2869                   && integer_onep (TREE_OPERAND (tem, 1))))
2870           && opcode != BIT_IOR_EXPR
2871           && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2872         {
2873           stmt = NULL;
2874           tem = op;
2875         }
2876       else
2877         return false;
2878     }
2879
2880   if (swap_with)
2881     std::swap (range->idx, swap_with->idx);
2882
2883   if (strict_overflow_p && issue_strict_overflow_warning (wc))
2884     warning_at (loc, OPT_Wstrict_overflow,
2885                 "assuming signed overflow does not occur "
2886                 "when simplifying range test");
2887
2888   if (dump_file && (dump_flags & TDF_DETAILS))
2889     {
2890       struct range_entry *r;
2891       fprintf (dump_file, "Optimizing range tests ");
2892       dump_range_entry (dump_file, range, false);
2893       for (i = 0; i < count; i++)
2894         {
2895           if (otherrange)
2896             r = otherrange + i;
2897           else
2898             r = otherrangep[i];
2899           if (r->exp
2900               && r->exp != range->exp
2901               && TREE_CODE (r->exp) == SSA_NAME)
2902             {
2903               fprintf (dump_file, " and ");
2904               dump_range_entry (dump_file, r, false);
2905             }
2906           else
2907             {
2908               fprintf (dump_file, " and");
2909               dump_range_entry (dump_file, r, true);
2910             }
2911         }
2912       fprintf (dump_file, "\n into ");
2913       print_generic_expr (dump_file, tem);
2914       fprintf (dump_file, "\n");
2915     }
2916
2917   /* In inter-bb range optimization mode, if we have a seq, we can't
2918      move the merged comparison to the earliest bb from the comparisons
2919      being replaced, so instead rewrite stmts that could trigger signed
2920      integer overflow.  */
2921   for (basic_block bb = rewrite_bb_last;
2922        bb != rewrite_bb_first; bb = single_pred (bb))
2923     for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
2924          !gsi_end_p (gsi); gsi_next (&gsi))
2925       {
2926         gimple *stmt = gsi_stmt (gsi);
2927         if (is_gimple_assign (stmt))
2928           if (tree lhs = gimple_assign_lhs (stmt))
2929             if ((INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2930                  || POINTER_TYPE_P (TREE_TYPE (lhs)))
2931                 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs)))
2932               {
2933                 enum tree_code code = gimple_assign_rhs_code (stmt);
2934                 if (arith_code_with_undefined_signed_overflow (code))
2935                   {
2936                     gimple_stmt_iterator gsip = gsi;
2937                     gimple_stmt_iterator gsin = gsi;
2938                     gsi_prev (&gsip);
2939                     gsi_next (&gsin);
2940                     rewrite_to_defined_overflow (&gsi);
2941                     unsigned uid = gimple_uid (stmt);
2942                     if (gsi_end_p (gsip))
2943                       gsip = gsi_after_labels (bb);
2944                     else
2945                       gsi_next (&gsip);
2946                     for (; gsi_stmt (gsip) != gsi_stmt (gsin);
2947                          gsi_next (&gsip))
2948                       gimple_set_uid (gsi_stmt (gsip), uid);
2949                   }
2950               }
2951       }
2952
2953   if (opcode == BIT_IOR_EXPR
2954       || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2955     tem = invert_truthvalue_loc (loc, tem);
2956
2957   tem = fold_convert_loc (loc, optype, tem);
2958   if (stmt)
2959     {
2960       gsi = gsi_for_stmt (stmt);
2961       uid = gimple_uid (stmt);
2962     }
2963   else
2964     {
2965       gsi = gsi_none ();
2966       uid = 0;
2967     }
2968   if (stmt == NULL)
2969     gcc_checking_assert (tem == op);
2970   /* In rare cases range->exp can be equal to lhs of stmt.
2971      In that case we have to insert after the stmt rather then before
2972      it.  If stmt is a PHI, insert it at the start of the basic block.  */
2973   else if (op != range->exp)
2974     {
2975       gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2976       tem = force_into_ssa_name (&gsi, tem, true);
2977       gsi_prev (&gsi);
2978     }
2979   else if (gimple_code (stmt) != GIMPLE_PHI)
2980     {
2981       gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2982       tem = force_into_ssa_name (&gsi, tem, false);
2983     }
2984   else
2985     {
2986       gsi = gsi_after_labels (gimple_bb (stmt));
2987       if (!gsi_end_p (gsi))
2988         uid = gimple_uid (gsi_stmt (gsi));
2989       else
2990         {
2991           gsi = gsi_start_bb (gimple_bb (stmt));
2992           uid = 1;
2993           while (!gsi_end_p (gsi))
2994             {
2995               uid = gimple_uid (gsi_stmt (gsi));
2996               gsi_next (&gsi);
2997             }
2998         }
2999       gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
3000       tem = force_into_ssa_name (&gsi, tem, true);
3001       if (gsi_end_p (gsi))
3002         gsi = gsi_last_bb (gimple_bb (stmt));
3003       else
3004         gsi_prev (&gsi);
3005     }
3006   for (; !gsi_end_p (gsi); gsi_prev (&gsi))
3007     if (gimple_uid (gsi_stmt (gsi)))
3008       break;
3009     else
3010       gimple_set_uid (gsi_stmt (gsi), uid);
3011
3012   oe->op = tem;
3013   range->exp = exp;
3014   range->low = low;
3015   range->high = high;
3016   range->in_p = in_p;
3017   range->strict_overflow_p = false;
3018
3019   for (i = 0; i < count; i++)
3020     {
3021       if (otherrange)
3022         range = otherrange + i;
3023       else
3024         range = otherrangep[i];
3025       oe = (*ops)[range->idx];
3026       /* Now change all the other range test immediate uses, so that
3027          those tests will be optimized away.  */
3028       if (opcode == ERROR_MARK)
3029         {
3030           if (oe->op)
3031             oe->op = build_int_cst (TREE_TYPE (oe->op),
3032                                     oe->rank == BIT_IOR_EXPR ? 0 : 1);
3033           else
3034             oe->op = (oe->rank == BIT_IOR_EXPR
3035                       ? boolean_false_node : boolean_true_node);
3036         }
3037       else
3038         oe->op = error_mark_node;
3039       range->exp = NULL_TREE;
3040       range->low = NULL_TREE;
3041       range->high = NULL_TREE;
3042     }
3043   return true;
3044 }
3045
3046 /* Optimize X == CST1 || X == CST2
3047    if popcount (CST1 ^ CST2) == 1 into
3048    (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
3049    Similarly for ranges.  E.g.
3050    X != 2 && X != 3 && X != 10 && X != 11
3051    will be transformed by the previous optimization into
3052    !((X - 2U) <= 1U || (X - 10U) <= 1U)
3053    and this loop can transform that into
3054    !(((X & ~8) - 2U) <= 1U).  */
3055
3056 static bool
3057 optimize_range_tests_xor (enum tree_code opcode, tree type,
3058                           tree lowi, tree lowj, tree highi, tree highj,
3059                           vec<operand_entry *> *ops,
3060                           struct range_entry *rangei,
3061                           struct range_entry *rangej)
3062 {
3063   tree lowxor, highxor, tem, exp;
3064   /* Check lowi ^ lowj == highi ^ highj and
3065      popcount (lowi ^ lowj) == 1.  */
3066   lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
3067   if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
3068     return false;
3069   if (!integer_pow2p (lowxor))
3070     return false;
3071   highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
3072   if (!tree_int_cst_equal (lowxor, highxor))
3073     return false;
3074
3075   exp = rangei->exp;
3076   scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
3077   int prec = GET_MODE_PRECISION (mode);
3078   if (TYPE_PRECISION (type) < prec
3079       || (wi::to_wide (TYPE_MIN_VALUE (type))
3080           != wi::min_value (prec, TYPE_SIGN (type)))
3081       || (wi::to_wide (TYPE_MAX_VALUE (type))
3082           != wi::max_value (prec, TYPE_SIGN (type))))
3083     {
3084       type = build_nonstandard_integer_type (prec, TYPE_UNSIGNED (type));
3085       exp = fold_convert (type, exp);
3086       lowxor = fold_convert (type, lowxor);
3087       lowi = fold_convert (type, lowi);
3088       highi = fold_convert (type, highi);
3089     }
3090   tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
3091   exp = fold_build2 (BIT_AND_EXPR, type, exp, tem);
3092   lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
3093   highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
3094   if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
3095                          NULL, rangei->in_p, lowj, highj,
3096                          rangei->strict_overflow_p
3097                          || rangej->strict_overflow_p))
3098     return true;
3099   return false;
3100 }
3101
3102 /* Optimize X == CST1 || X == CST2
3103    if popcount (CST2 - CST1) == 1 into
3104    ((X - CST1) & ~(CST2 - CST1)) == 0.
3105    Similarly for ranges.  E.g.
3106    X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
3107    || X == 75 || X == 45
3108    will be transformed by the previous optimization into
3109    (X - 43U) <= 3U || (X - 75U) <= 3U
3110    and this loop can transform that into
3111    ((X - 43U) & ~(75U - 43U)) <= 3U.  */
3112 static bool
3113 optimize_range_tests_diff (enum tree_code opcode, tree type,
3114                            tree lowi, tree lowj, tree highi, tree highj,
3115                            vec<operand_entry *> *ops,
3116                            struct range_entry *rangei,
3117                            struct range_entry *rangej)
3118 {
3119   tree tem1, tem2, mask;
3120   /* Check highi - lowi == highj - lowj.  */
3121   tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
3122   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
3123     return false;
3124   tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
3125   if (!tree_int_cst_equal (tem1, tem2))
3126     return false;
3127   /* Check popcount (lowj - lowi) == 1.  */
3128   tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
3129   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
3130     return false;
3131   if (!integer_pow2p (tem1))
3132     return false;
3133
3134   scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
3135   int prec = GET_MODE_PRECISION (mode);
3136   if (TYPE_PRECISION (type) < prec
3137       || (wi::to_wide (TYPE_MIN_VALUE (type))
3138           != wi::min_value (prec, TYPE_SIGN (type)))
3139       || (wi::to_wide (TYPE_MAX_VALUE (type))
3140           != wi::max_value (prec, TYPE_SIGN (type))))
3141     type = build_nonstandard_integer_type (prec, 1);
3142   else
3143     type = unsigned_type_for (type);
3144   tem1 = fold_convert (type, tem1);
3145   tem2 = fold_convert (type, tem2);
3146   lowi = fold_convert (type, lowi);
3147   mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
3148   tem1 = fold_build2 (MINUS_EXPR, type,
3149                       fold_convert (type, rangei->exp), lowi);
3150   tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
3151   lowj = build_int_cst (type, 0);
3152   if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
3153                          NULL, rangei->in_p, lowj, tem2,
3154                          rangei->strict_overflow_p
3155                          || rangej->strict_overflow_p))
3156     return true;
3157   return false;
3158 }
3159
3160 /* It does some common checks for function optimize_range_tests_xor and
3161    optimize_range_tests_diff.
3162    If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
3163    Else it calls optimize_range_tests_diff.  */
3164
3165 static bool
3166 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
3167                         bool optimize_xor, vec<operand_entry *> *ops,
3168                         struct range_entry *ranges)
3169 {
3170   int i, j;
3171   bool any_changes = false;
3172   for (i = first; i < length; i++)
3173     {
3174       tree lowi, highi, lowj, highj, type, tem;
3175
3176       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
3177         continue;
3178       type = TREE_TYPE (ranges[i].exp);
3179       if (!INTEGRAL_TYPE_P (type))
3180         continue;
3181       lowi = ranges[i].low;
3182       if (lowi == NULL_TREE)
3183         lowi = TYPE_MIN_VALUE (type);
3184       highi = ranges[i].high;
3185       if (highi == NULL_TREE)
3186         continue;
3187       for (j = i + 1; j < length && j < i + 64; j++)
3188         {
3189           bool changes;
3190           if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
3191             continue;
3192           lowj = ranges[j].low;
3193           if (lowj == NULL_TREE)
3194             continue;
3195           highj = ranges[j].high;
3196           if (highj == NULL_TREE)
3197             highj = TYPE_MAX_VALUE (type);
3198           /* Check lowj > highi.  */
3199           tem = fold_binary (GT_EXPR, boolean_type_node,
3200                              lowj, highi);
3201           if (tem == NULL_TREE || !integer_onep (tem))
3202             continue;
3203           if (optimize_xor)
3204             changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
3205                                                 highi, highj, ops,
3206                                                 ranges + i, ranges + j);
3207           else
3208             changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
3209                                                  highi, highj, ops,
3210                                                  ranges + i, ranges + j);
3211           if (changes)
3212             {
3213               any_changes = true;
3214               break;
3215             }
3216         }
3217     }
3218   return any_changes;
3219 }
3220
3221 /* Helper function of optimize_range_tests_to_bit_test.  Handle a single
3222    range, EXP, LOW, HIGH, compute bit mask of bits to test and return
3223    EXP on success, NULL otherwise.  */
3224
3225 static tree
3226 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
3227                        wide_int *mask, tree *totallowp)
3228 {
3229   tree tem = int_const_binop (MINUS_EXPR, high, low);
3230   if (tem == NULL_TREE
3231       || TREE_CODE (tem) != INTEGER_CST
3232       || TREE_OVERFLOW (tem)
3233       || tree_int_cst_sgn (tem) == -1
3234       || compare_tree_int (tem, prec) != -1)
3235     return NULL_TREE;
3236
3237   unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
3238   *mask = wi::shifted_mask (0, max, false, prec);
3239   if (TREE_CODE (exp) == BIT_AND_EXPR
3240       && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3241     {
3242       widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
3243       msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
3244       if (wi::popcount (msk) == 1
3245           && wi::ltu_p (msk, prec - max))
3246         {
3247           *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
3248           max += msk.to_uhwi ();
3249           exp = TREE_OPERAND (exp, 0);
3250           if (integer_zerop (low)
3251               && TREE_CODE (exp) == PLUS_EXPR
3252               && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3253             {
3254               tree ret = TREE_OPERAND (exp, 0);
3255               STRIP_NOPS (ret);
3256               widest_int bias
3257                 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
3258                                      TYPE_PRECISION (TREE_TYPE (low))));
3259               tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
3260               if (totallowp)
3261                 {
3262                   *totallowp = tbias;
3263                   return ret;
3264                 }
3265               else if (!tree_int_cst_lt (totallow, tbias))
3266                 return NULL_TREE;
3267               bias = wi::to_widest (tbias);
3268               bias -= wi::to_widest (totallow);
3269               if (bias >= 0 && bias < prec - max)
3270                 {
3271                   *mask = wi::lshift (*mask, bias);
3272                   return ret;
3273                 }
3274             }
3275         }
3276     }
3277   if (totallowp)
3278     return exp;
3279   if (!tree_int_cst_lt (totallow, low))
3280     return exp;
3281   tem = int_const_binop (MINUS_EXPR, low, totallow);
3282   if (tem == NULL_TREE
3283       || TREE_CODE (tem) != INTEGER_CST
3284       || TREE_OVERFLOW (tem)
3285       || compare_tree_int (tem, prec - max) == 1)
3286     return NULL_TREE;
3287
3288   *mask = wi::lshift (*mask, wi::to_widest (tem));
3289   return exp;
3290 }
3291
3292 /* Attempt to optimize small range tests using bit test.
3293    E.g.
3294    X != 43 && X != 76 && X != 44 && X != 78 && X != 49
3295    && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
3296    has been by earlier optimizations optimized into:
3297    ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
3298    As all the 43 through 82 range is less than 64 numbers,
3299    for 64-bit word targets optimize that into:
3300    (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0  */
3301
3302 static bool
3303 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
3304                                   vec<operand_entry *> *ops,
3305                                   struct range_entry *ranges)
3306 {
3307   int i, j;
3308   bool any_changes = false;
3309   int prec = GET_MODE_BITSIZE (word_mode);
3310   auto_vec<struct range_entry *, 64> candidates;
3311
3312   for (i = first; i < length - 1; i++)
3313     {
3314       tree lowi, highi, lowj, highj, type;
3315
3316       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
3317         continue;
3318       type = TREE_TYPE (ranges[i].exp);
3319       if (!INTEGRAL_TYPE_P (type))
3320         continue;
3321       lowi = ranges[i].low;
3322       if (lowi == NULL_TREE)
3323         lowi = TYPE_MIN_VALUE (type);
3324       highi = ranges[i].high;
3325       if (highi == NULL_TREE)
3326         continue;
3327       wide_int mask;
3328       tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
3329                                         highi, &mask, &lowi);
3330       if (exp == NULL_TREE)
3331         continue;
3332       bool strict_overflow_p = ranges[i].strict_overflow_p;
3333       candidates.truncate (0);
3334       int end = MIN (i + 64, length);
3335       for (j = i + 1; j < end; j++)
3336         {
3337           tree exp2;
3338           if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
3339             continue;
3340           if (ranges[j].exp == exp)
3341             ;
3342           else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
3343             {
3344               exp2 = TREE_OPERAND (ranges[j].exp, 0);
3345               if (exp2 == exp)
3346                 ;
3347               else if (TREE_CODE (exp2) == PLUS_EXPR)
3348                 {
3349                   exp2 = TREE_OPERAND (exp2, 0);
3350                   STRIP_NOPS (exp2);
3351                   if (exp2 != exp)
3352                     continue;
3353                 }
3354               else
3355                 continue;
3356             }
3357           else
3358             continue;
3359           lowj = ranges[j].low;
3360           if (lowj == NULL_TREE)
3361             continue;
3362           highj = ranges[j].high;
3363           if (highj == NULL_TREE)
3364             highj = TYPE_MAX_VALUE (type);
3365           wide_int mask2;
3366           exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
3367                                         highj, &mask2, NULL);
3368           if (exp2 != exp)
3369             continue;
3370           mask |= mask2;
3371           strict_overflow_p |= ranges[j].strict_overflow_p;
3372           candidates.safe_push (&ranges[j]);
3373         }
3374
3375       /* If every possible relative value of the expression is a valid shift
3376          amount, then we can merge the entry test in the bit test.  In this
3377          case, if we would need otherwise 2 or more comparisons, then use
3378          the bit test; in the other cases, the threshold is 3 comparisons.  */
3379       bool entry_test_needed;
3380       value_range r;
3381       if (TREE_CODE (exp) == SSA_NAME
3382           && get_range_query (cfun)->range_of_expr (r, exp)
3383           && !r.undefined_p ()
3384           && !r.varying_p ()
3385           && wi::leu_p (r.upper_bound () - r.lower_bound (), prec - 1))
3386         {
3387           wide_int min = r.lower_bound ();
3388           wide_int ilowi = wi::to_wide (lowi);
3389           if (wi::lt_p (min, ilowi, TYPE_SIGN (TREE_TYPE (lowi))))
3390             {
3391               lowi = wide_int_to_tree (TREE_TYPE (lowi), min);
3392               mask = wi::lshift (mask, ilowi - min);
3393             }
3394           else if (wi::gt_p (min, ilowi, TYPE_SIGN (TREE_TYPE (lowi))))
3395             {
3396               lowi = wide_int_to_tree (TREE_TYPE (lowi), min);
3397               mask = wi::lrshift (mask, min - ilowi);
3398             }
3399           entry_test_needed = false;
3400         }
3401       else
3402         entry_test_needed = true;
3403       if (candidates.length () >= (entry_test_needed ? 2 : 1))
3404         {
3405           tree high = wide_int_to_tree (TREE_TYPE (lowi),
3406                                         wi::to_widest (lowi)
3407                                         + prec - 1 - wi::clz (mask));
3408           operand_entry *oe = (*ops)[ranges[i].idx];
3409           tree op = oe->op;
3410           gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
3411                             : last_nondebug_stmt (BASIC_BLOCK_FOR_FN
3412                                                           (cfun, oe->id));
3413           location_t loc = gimple_location (stmt);
3414           tree optype = op ? TREE_TYPE (op) : boolean_type_node;
3415
3416           /* See if it isn't cheaper to pretend the minimum value of the
3417              range is 0, if maximum value is small enough.
3418              We can avoid then subtraction of the minimum value, but the
3419              mask constant could be perhaps more expensive.  */
3420           if (compare_tree_int (lowi, 0) > 0
3421               && compare_tree_int (high, prec) < 0)
3422             {
3423               int cost_diff;
3424               HOST_WIDE_INT m = tree_to_uhwi (lowi);
3425               rtx reg = gen_raw_REG (word_mode, 10000);
3426               bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
3427               cost_diff = set_src_cost (gen_rtx_PLUS (word_mode, reg,
3428                                                       GEN_INT (-m)),
3429                                         word_mode, speed_p);
3430               rtx r = immed_wide_int_const (mask, word_mode);
3431               cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
3432                                          word_mode, speed_p);
3433               r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
3434               cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
3435                                          word_mode, speed_p);
3436               if (cost_diff > 0)
3437                 {
3438                   mask = wi::lshift (mask, m);
3439                   lowi = build_zero_cst (TREE_TYPE (lowi));
3440                 }
3441             }
3442
3443           tree tem;
3444           if (entry_test_needed)
3445             {
3446               tem = build_range_check (loc, optype, unshare_expr (exp),
3447                                        false, lowi, high);
3448               if (tem == NULL_TREE || is_gimple_val (tem))
3449                 continue;
3450             }
3451           else
3452             tem = NULL_TREE;
3453           tree etype = unsigned_type_for (TREE_TYPE (exp));
3454           exp = fold_build2_loc (loc, MINUS_EXPR, etype,
3455                                  fold_convert_loc (loc, etype, exp),
3456                                  fold_convert_loc (loc, etype, lowi));
3457           exp = fold_convert_loc (loc, integer_type_node, exp);
3458           tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
3459           exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
3460                                  build_int_cst (word_type, 1), exp);
3461           exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
3462                                  wide_int_to_tree (word_type, mask));
3463           exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
3464                                  build_zero_cst (word_type));
3465           if (is_gimple_val (exp))
3466             continue;
3467
3468           /* The shift might have undefined behavior if TEM is true,
3469              but reassociate_bb isn't prepared to have basic blocks
3470              split when it is running.  So, temporarily emit a code
3471              with BIT_IOR_EXPR instead of &&, and fix it up in
3472              branch_fixup.  */
3473           gimple_seq seq = NULL;
3474           if (tem)
3475             {
3476               tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
3477               gcc_assert (TREE_CODE (tem) == SSA_NAME);
3478               gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
3479             }
3480           gimple_seq seq2;
3481           exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
3482           gimple_seq_add_seq_without_update (&seq, seq2);
3483           gcc_assert (TREE_CODE (exp) == SSA_NAME);
3484           gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
3485           if (tem)
3486             {
3487               gimple *g = gimple_build_assign (make_ssa_name (optype),
3488                                                BIT_IOR_EXPR, tem, exp);
3489               gimple_set_location (g, loc);
3490               gimple_seq_add_stmt_without_update (&seq, g);
3491               exp = gimple_assign_lhs (g);
3492             }
3493           tree val = build_zero_cst (optype);
3494           if (update_range_test (&ranges[i], NULL, candidates.address (),
3495                                  candidates.length (), opcode, ops, exp,
3496                                  seq, false, val, val, strict_overflow_p))
3497             {
3498               any_changes = true;
3499               if (tem)
3500                 reassoc_branch_fixups.safe_push (tem);
3501             }
3502           else
3503             gimple_seq_discard (seq);
3504         }
3505     }
3506   return any_changes;
3507 }
3508
3509 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
3510    and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1.
3511    Also, handle x < C && y < C && z < C where C is power of two as
3512    (x | y | z) < C.  And also handle signed x < 0 && y < 0 && z < 0
3513    as (x | y | z) < 0.  */
3514
3515 static bool
3516 optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
3517                                   vec<operand_entry *> *ops,
3518                                   struct range_entry *ranges)
3519 {
3520   int i;
3521   unsigned int b;
3522   bool any_changes = false;
3523   auto_vec<int, 128> buckets;
3524   auto_vec<int, 32> chains;
3525   auto_vec<struct range_entry *, 32> candidates;
3526
3527   for (i = first; i < length; i++)
3528     {
3529       int idx;
3530
3531       if (ranges[i].exp == NULL_TREE
3532           || TREE_CODE (ranges[i].exp) != SSA_NAME
3533           || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1
3534           || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE)
3535         continue;
3536
3537       if (ranges[i].low != NULL_TREE
3538           && ranges[i].high != NULL_TREE
3539           && ranges[i].in_p
3540           && tree_int_cst_equal (ranges[i].low, ranges[i].high))
3541         {
3542           idx = !integer_zerop (ranges[i].low);
3543           if (idx && !integer_all_onesp (ranges[i].low))
3544             continue;
3545         }
3546       else if (ranges[i].high != NULL_TREE
3547                && TREE_CODE (ranges[i].high) == INTEGER_CST
3548                && ranges[i].in_p)
3549         {
3550           wide_int w = wi::to_wide (ranges[i].high);
3551           int prec = TYPE_PRECISION (TREE_TYPE (ranges[i].exp));
3552           int l = wi::clz (w);
3553           idx = 2;
3554           if (l <= 0
3555               || l >= prec
3556               || w != wi::mask (prec - l, false, prec))
3557             continue;
3558           if (!((TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3559                  && ranges[i].low == NULL_TREE)
3560                 || (ranges[i].low
3561                     && integer_zerop (ranges[i].low))))
3562             continue;
3563         }
3564       else if (ranges[i].high == NULL_TREE
3565                && ranges[i].low != NULL_TREE
3566                /* Perform this optimization only in the last
3567                   reassoc pass, as it interferes with the reassociation
3568                   itself or could also with VRP etc. which might not
3569                   be able to virtually undo the optimization.  */
3570                && !reassoc_insert_powi_p
3571                && !TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3572                && integer_zerop (ranges[i].low))
3573         idx = 3;
3574       else
3575         continue;
3576
3577       b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 4 + idx;
3578       if (buckets.length () <= b)
3579         buckets.safe_grow_cleared (b + 1, true);
3580       if (chains.length () <= (unsigned) i)
3581         chains.safe_grow (i + 1, true);
3582       chains[i] = buckets[b];
3583       buckets[b] = i + 1;
3584     }
3585
3586   FOR_EACH_VEC_ELT (buckets, b, i)
3587     if (i && chains[i - 1])
3588       {
3589         int j, k = i;
3590         if ((b % 4) == 2)
3591           {
3592             /* When ranges[X - 1].high + 1 is a power of two,
3593                we need to process the same bucket up to
3594                precision - 1 times, each time split the entries
3595                with the same high bound into one chain and the
3596                rest into another one to be processed later.  */
3597             int this_prev = i;
3598             int other_prev = 0;
3599             for (j = chains[i - 1]; j; j = chains[j - 1])
3600               {
3601                 if (tree_int_cst_equal (ranges[i - 1].high,
3602                                         ranges[j - 1].high))
3603                   {
3604                     chains[this_prev - 1] = j;
3605                     this_prev = j;
3606                   }
3607                 else if (other_prev == 0)
3608                   {
3609                     buckets[b] = j;
3610                     other_prev = j;
3611                   }
3612                 else
3613                   {
3614                     chains[other_prev - 1] = j;
3615                     other_prev = j;
3616                   }
3617               }
3618             chains[this_prev - 1] = 0;
3619             if (other_prev)
3620               chains[other_prev - 1] = 0;
3621             if (chains[i - 1] == 0)
3622               {
3623                 if (other_prev)
3624                   b--;
3625                 continue;
3626               }
3627           }
3628         for (j = chains[i - 1]; j; j = chains[j - 1])
3629           {
3630             gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp);
3631             gimple *gj = SSA_NAME_DEF_STMT (ranges[j - 1].exp);
3632             if (reassoc_stmt_dominates_stmt_p (gk, gj))
3633               k = j;
3634           }
3635         tree type1 = TREE_TYPE (ranges[k - 1].exp);
3636         tree type2 = NULL_TREE;
3637         bool strict_overflow_p = false;
3638         candidates.truncate (0);
3639         if (POINTER_TYPE_P (type1) || TREE_CODE (type1) == OFFSET_TYPE)
3640           type1 = pointer_sized_int_node;
3641         for (j = i; j; j = chains[j - 1])
3642           {
3643             tree type = TREE_TYPE (ranges[j - 1].exp);
3644             strict_overflow_p |= ranges[j - 1].strict_overflow_p;
3645             if (POINTER_TYPE_P (type) || TREE_CODE (type) == OFFSET_TYPE)
3646               type = pointer_sized_int_node;
3647             if ((b % 4) == 3)
3648               {
3649                 /* For the signed < 0 cases, the types should be
3650                    really compatible (all signed with the same precision,
3651                    instead put ranges that have different in_p from
3652                    k first.  */
3653                 if (!useless_type_conversion_p (type1, type))
3654                   continue;
3655                 if (ranges[j - 1].in_p != ranges[k - 1].in_p)
3656                   candidates.safe_push (&ranges[j - 1]);
3657                 type2 = type1;
3658                 continue;
3659               }
3660             if (j == k
3661                 || useless_type_conversion_p (type1, type))
3662               ;
3663             else if (type2 == NULL_TREE
3664                      || useless_type_conversion_p (type2, type))
3665               {
3666                 if (type2 == NULL_TREE)
3667                   type2 = type;
3668                 candidates.safe_push (&ranges[j - 1]);
3669               }
3670           }
3671         unsigned l = candidates.length ();
3672         for (j = i; j; j = chains[j - 1])
3673           {
3674             tree type = TREE_TYPE (ranges[j - 1].exp);
3675             if (j == k)
3676               continue;
3677             if (POINTER_TYPE_P (type) || TREE_CODE (type) == OFFSET_TYPE)
3678               type = pointer_sized_int_node;
3679             if ((b % 4) == 3)
3680               {
3681                 if (!useless_type_conversion_p (type1, type))
3682                   continue;
3683                 if (ranges[j - 1].in_p == ranges[k - 1].in_p)
3684                   candidates.safe_push (&ranges[j - 1]);
3685                 continue;
3686               }
3687             if (useless_type_conversion_p (type1, type))
3688               ;
3689             else if (type2 == NULL_TREE
3690                      || useless_type_conversion_p (type2, type))
3691               continue;
3692             candidates.safe_push (&ranges[j - 1]);
3693           }
3694         gimple_seq seq = NULL;
3695         tree op = NULL_TREE;
3696         unsigned int id;
3697         struct range_entry *r;
3698         candidates.safe_push (&ranges[k - 1]);
3699         FOR_EACH_VEC_ELT (candidates, id, r)
3700           {
3701             gimple *g;
3702             enum tree_code code;
3703             if (id == 0)
3704               {
3705                 op = r->exp;
3706                 continue;
3707               }
3708             if (id == l
3709                 || POINTER_TYPE_P (TREE_TYPE (op))
3710                 || TREE_CODE (TREE_TYPE (op)) == OFFSET_TYPE)
3711               {
3712                 code = (b % 4) == 3 ? BIT_NOT_EXPR : NOP_EXPR;
3713                 tree type3 = id >= l ? type1 : pointer_sized_int_node;
3714                 if (code == BIT_NOT_EXPR
3715                     && TREE_CODE (TREE_TYPE (op)) == OFFSET_TYPE)
3716                   {
3717                     g = gimple_build_assign (make_ssa_name (type3),
3718                                              NOP_EXPR, op);
3719                     gimple_seq_add_stmt_without_update (&seq, g);
3720                     op = gimple_assign_lhs (g);
3721                   }
3722                 g = gimple_build_assign (make_ssa_name (type3), code, op);
3723                 gimple_seq_add_stmt_without_update (&seq, g);
3724                 op = gimple_assign_lhs (g);
3725               }
3726             tree type = TREE_TYPE (r->exp);
3727             tree exp = r->exp;
3728             if (POINTER_TYPE_P (type)
3729                 || TREE_CODE (type) == OFFSET_TYPE
3730                 || (id >= l && !useless_type_conversion_p (type1, type)))
3731               {
3732                 tree type3 = id >= l ? type1 : pointer_sized_int_node;
3733                 g = gimple_build_assign (make_ssa_name (type3), NOP_EXPR, exp);
3734                 gimple_seq_add_stmt_without_update (&seq, g);
3735                 exp = gimple_assign_lhs (g);
3736               }
3737             if ((b % 4) == 3)
3738               code = r->in_p ? BIT_IOR_EXPR : BIT_AND_EXPR;
3739             else
3740               code = (b % 4) == 1 ? BIT_AND_EXPR : BIT_IOR_EXPR;
3741             g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2),
3742                                      code, op, exp);
3743             gimple_seq_add_stmt_without_update (&seq, g);
3744             op = gimple_assign_lhs (g);
3745           }
3746         type1 = TREE_TYPE (ranges[k - 1].exp);
3747         if (POINTER_TYPE_P (type1) || TREE_CODE (type1) == OFFSET_TYPE)
3748           {
3749             gimple *g
3750               = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, op);
3751             gimple_seq_add_stmt_without_update (&seq, g);
3752             op = gimple_assign_lhs (g);
3753           }
3754         candidates.pop ();
3755         if (update_range_test (&ranges[k - 1], NULL, candidates.address (),
3756                                candidates.length (), opcode, ops, op,
3757                                seq, ranges[k - 1].in_p, ranges[k - 1].low,
3758                                ranges[k - 1].high, strict_overflow_p))
3759           any_changes = true;
3760         else
3761           gimple_seq_discard (seq);
3762         if ((b % 4) == 2 && buckets[b] != i)
3763           /* There is more work to do for this bucket.  */
3764           b--;
3765       }
3766
3767   return any_changes;
3768 }
3769
3770 /* Attempt to optimize for signed a and b where b is known to be >= 0:
3771    a >= 0 && a < b into (unsigned) a < (unsigned) b
3772    a >= 0 && a <= b into (unsigned) a <= (unsigned) b  */
3773
3774 static bool
3775 optimize_range_tests_var_bound (enum tree_code opcode, int first, int length,
3776                                 vec<operand_entry *> *ops,
3777                                 struct range_entry *ranges,
3778                                 basic_block first_bb)
3779 {
3780   int i;
3781   bool any_changes = false;
3782   hash_map<tree, int> *map = NULL;
3783
3784   for (i = first; i < length; i++)
3785     {
3786       if (ranges[i].exp == NULL_TREE
3787           || TREE_CODE (ranges[i].exp) != SSA_NAME
3788           || !ranges[i].in_p)
3789         continue;
3790
3791       tree type = TREE_TYPE (ranges[i].exp);
3792       if (!INTEGRAL_TYPE_P (type)
3793           || TYPE_UNSIGNED (type)
3794           || ranges[i].low == NULL_TREE
3795           || !integer_zerop (ranges[i].low)
3796           || ranges[i].high != NULL_TREE)
3797         continue;
3798       /* EXP >= 0 here.  */
3799       if (map == NULL)
3800         map = new hash_map <tree, int>;
3801       map->put (ranges[i].exp, i);
3802     }
3803
3804   if (map == NULL)
3805     return false;
3806
3807   for (i = 0; i < length; i++)
3808     {
3809       bool in_p = ranges[i].in_p;
3810       if (ranges[i].low == NULL_TREE
3811           || ranges[i].high == NULL_TREE)
3812         continue;
3813       if (!integer_zerop (ranges[i].low)
3814           || !integer_zerop (ranges[i].high))
3815         {
3816           if (ranges[i].exp
3817               && TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) == 1
3818               && TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3819               && integer_onep (ranges[i].low)
3820               && integer_onep (ranges[i].high))
3821             in_p = !in_p;
3822           else
3823             continue;
3824         }
3825
3826       gimple *stmt;
3827       tree_code ccode;
3828       tree rhs1, rhs2;
3829       if (ranges[i].exp)
3830         {
3831           if (TREE_CODE (ranges[i].exp) != SSA_NAME)
3832             continue;
3833           stmt = SSA_NAME_DEF_STMT (ranges[i].exp);
3834           if (!is_gimple_assign (stmt))
3835             continue;
3836           ccode = gimple_assign_rhs_code (stmt);
3837           rhs1 = gimple_assign_rhs1 (stmt);
3838           rhs2 = gimple_assign_rhs2 (stmt);
3839         }
3840       else
3841         {
3842           operand_entry *oe = (*ops)[ranges[i].idx];
3843           stmt = last_nondebug_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3844           if (gimple_code (stmt) != GIMPLE_COND)
3845             continue;
3846           ccode = gimple_cond_code (stmt);
3847           rhs1 = gimple_cond_lhs (stmt);
3848           rhs2 = gimple_cond_rhs (stmt);
3849         }
3850
3851       if (TREE_CODE (rhs1) != SSA_NAME
3852           || rhs2 == NULL_TREE
3853           || TREE_CODE (rhs2) != SSA_NAME)
3854         continue;
3855
3856       switch (ccode)
3857         {
3858         case GT_EXPR:
3859         case GE_EXPR:
3860         case LT_EXPR:
3861         case LE_EXPR:
3862           break;
3863         default:
3864           continue;
3865         }
3866       if (in_p)
3867         ccode = invert_tree_comparison (ccode, false);
3868       switch (ccode)
3869         {
3870         case GT_EXPR:
3871         case GE_EXPR:
3872           std::swap (rhs1, rhs2);
3873           ccode = swap_tree_comparison (ccode);
3874           break;
3875         case LT_EXPR:
3876         case LE_EXPR:
3877           break;
3878         default:
3879           gcc_unreachable ();
3880         }
3881
3882       int *idx = map->get (rhs1);
3883       if (idx == NULL)
3884         continue;
3885
3886       /* maybe_optimize_range_tests allows statements without side-effects
3887          in the basic blocks as long as they are consumed in the same bb.
3888          Make sure rhs2's def stmt is not among them, otherwise we can't
3889          use safely get_nonzero_bits on it.  E.g. in:
3890           # RANGE [-83, 1] NONZERO 173
3891           # k_32 = PHI <k_47(13), k_12(9)>
3892          ...
3893           if (k_32 >= 0)
3894             goto <bb 5>; [26.46%]
3895           else
3896             goto <bb 9>; [73.54%]
3897
3898           <bb 5> [local count: 140323371]:
3899           # RANGE [0, 1] NONZERO 1
3900           _5 = (int) k_32;
3901           # RANGE [0, 4] NONZERO 4
3902           _21 = _5 << 2;
3903           # RANGE [0, 4] NONZERO 4
3904           iftmp.0_44 = (char) _21;
3905           if (k_32 < iftmp.0_44)
3906             goto <bb 6>; [84.48%]
3907           else
3908             goto <bb 9>; [15.52%]
3909          the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that
3910          k_32 >= 0.  If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44
3911          to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute
3912          those stmts even for negative k_32 and the value ranges would be no
3913          longer guaranteed and so the optimization would be invalid.  */
3914       while (opcode == ERROR_MARK)
3915         {
3916           gimple *g = SSA_NAME_DEF_STMT (rhs2);
3917           basic_block bb2 = gimple_bb (g);
3918           if (bb2
3919               && bb2 != first_bb
3920               && dominated_by_p (CDI_DOMINATORS, bb2, first_bb))
3921             {
3922               /* As an exception, handle a few common cases.  */
3923               if (gimple_assign_cast_p (g)
3924                   && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g))))
3925                 {
3926                   tree op0 = gimple_assign_rhs1 (g);
3927                   if (TYPE_UNSIGNED (TREE_TYPE (op0))
3928                       && (TYPE_PRECISION (TREE_TYPE (rhs2))
3929                           > TYPE_PRECISION (TREE_TYPE (op0))))
3930                     /* Zero-extension is always ok.  */
3931                     break;
3932                   else if (TYPE_PRECISION (TREE_TYPE (rhs2))
3933                            == TYPE_PRECISION (TREE_TYPE (op0))
3934                            && TREE_CODE (op0) == SSA_NAME)
3935                     {
3936                       /* Cast from signed to unsigned or vice versa.  Retry
3937                          with the op0 as new rhs2.  */
3938                       rhs2 = op0;
3939                       continue;
3940                     }
3941                 }
3942               else if (is_gimple_assign (g)
3943                        && gimple_assign_rhs_code (g) == BIT_AND_EXPR
3944                        && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
3945                        && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g))))
3946                 /* Masking with INTEGER_CST with MSB clear is always ok
3947                    too.  */
3948                 break;
3949               rhs2 = NULL_TREE;
3950             }
3951           break;
3952         }
3953       if (rhs2 == NULL_TREE)
3954         continue;
3955
3956       wide_int nz = get_nonzero_bits (rhs2);
3957       if (wi::neg_p (nz))
3958         continue;
3959
3960       /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
3961          and RHS2 is known to be RHS2 >= 0.  */
3962       tree utype = unsigned_type_for (TREE_TYPE (rhs1));
3963
3964       enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
3965       if ((ranges[*idx].strict_overflow_p
3966            || ranges[i].strict_overflow_p)
3967           && issue_strict_overflow_warning (wc))
3968         warning_at (gimple_location (stmt), OPT_Wstrict_overflow,
3969                     "assuming signed overflow does not occur "
3970                     "when simplifying range test");
3971
3972       if (dump_file && (dump_flags & TDF_DETAILS))
3973         {
3974           struct range_entry *r = &ranges[*idx];
3975           fprintf (dump_file, "Optimizing range test ");
3976           print_generic_expr (dump_file, r->exp);
3977           fprintf (dump_file, " +[");
3978           print_generic_expr (dump_file, r->low);
3979           fprintf (dump_file, ", ");
3980           print_generic_expr (dump_file, r->high);
3981           fprintf (dump_file, "] and comparison ");
3982           print_generic_expr (dump_file, rhs1);
3983           fprintf (dump_file, " %s ", op_symbol_code (ccode));
3984           print_generic_expr (dump_file, rhs2);
3985           fprintf (dump_file, "\n into (");
3986           print_generic_expr (dump_file, utype);
3987           fprintf (dump_file, ") ");
3988           print_generic_expr (dump_file, rhs1);
3989           fprintf (dump_file, " %s (", op_symbol_code (ccode));
3990           print_generic_expr (dump_file, utype);
3991           fprintf (dump_file, ") ");
3992           print_generic_expr (dump_file, rhs2);
3993           fprintf (dump_file, "\n");
3994         }
3995
3996       operand_entry *oe = (*ops)[ranges[i].idx];
3997       ranges[i].in_p = 0;
3998       if (opcode == BIT_IOR_EXPR
3999           || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
4000         {
4001           ranges[i].in_p = 1;
4002           ccode = invert_tree_comparison (ccode, false);
4003         }
4004
4005       unsigned int uid = gimple_uid (stmt);
4006       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4007       gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1);
4008       gimple_set_uid (g, uid);
4009       rhs1 = gimple_assign_lhs (g);
4010       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4011       if (!useless_type_conversion_p (utype, TREE_TYPE (rhs2)))
4012         {
4013           g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2);
4014           gimple_set_uid (g, uid);
4015           rhs2 = gimple_assign_lhs (g);
4016           gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4017         }
4018       if (tree_swap_operands_p (rhs1, rhs2))
4019         {
4020           std::swap (rhs1, rhs2);
4021           ccode = swap_tree_comparison (ccode);
4022         }
4023       if (gimple_code (stmt) == GIMPLE_COND)
4024         {
4025           gcond *c = as_a <gcond *> (stmt);
4026           gimple_cond_set_code (c, ccode);
4027           gimple_cond_set_lhs (c, rhs1);
4028           gimple_cond_set_rhs (c, rhs2);
4029           update_stmt (stmt);
4030         }
4031       else
4032         {
4033           tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node;
4034           if (!INTEGRAL_TYPE_P (ctype)
4035               || (TREE_CODE (ctype) != BOOLEAN_TYPE
4036                   && TYPE_PRECISION (ctype) != 1))
4037             ctype = boolean_type_node;
4038           g = gimple_build_assign (make_ssa_name (ctype), ccode, rhs1, rhs2);
4039           gimple_set_uid (g, uid);
4040           gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4041           if (oe->op && ctype != TREE_TYPE (oe->op))
4042             {
4043               g = gimple_build_assign (make_ssa_name (TREE_TYPE (oe->op)),
4044                                        NOP_EXPR, gimple_assign_lhs (g));
4045               gimple_set_uid (g, uid);
4046               gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4047             }
4048           ranges[i].exp = gimple_assign_lhs (g);
4049           oe->op = ranges[i].exp;
4050           ranges[i].low = build_zero_cst (TREE_TYPE (ranges[i].exp));
4051           ranges[i].high = ranges[i].low;
4052         }
4053       ranges[i].strict_overflow_p = false;
4054       oe = (*ops)[ranges[*idx].idx];
4055       /* Now change all the other range test immediate uses, so that
4056          those tests will be optimized away.  */
4057       if (opcode == ERROR_MARK)
4058         {
4059           if (oe->op)
4060             oe->op = build_int_cst (TREE_TYPE (oe->op),
4061                                     oe->rank == BIT_IOR_EXPR ? 0 : 1);
4062           else
4063             oe->op = (oe->rank == BIT_IOR_EXPR
4064                       ? boolean_false_node : boolean_true_node);
4065         }
4066       else
4067         oe->op = error_mark_node;
4068       ranges[*idx].exp = NULL_TREE;
4069       ranges[*idx].low = NULL_TREE;
4070       ranges[*idx].high = NULL_TREE;
4071       any_changes = true;
4072     }
4073
4074   delete map;
4075   return any_changes;
4076 }
4077
4078 /* Optimize range tests, similarly how fold_range_test optimizes
4079    it on trees.  The tree code for the binary
4080    operation between all the operands is OPCODE.
4081    If OPCODE is ERROR_MARK, optimize_range_tests is called from within
4082    maybe_optimize_range_tests for inter-bb range optimization.
4083    In that case if oe->op is NULL, oe->id is bb->index whose
4084    GIMPLE_COND is && or ||ed into the test, and oe->rank says
4085    the actual opcode.
4086    FIRST_BB is the first basic block if OPCODE is ERROR_MARK.  */
4087
4088 static bool
4089 optimize_range_tests (enum tree_code opcode,
4090                       vec<operand_entry *> *ops, basic_block first_bb)
4091 {
4092   unsigned int length = ops->length (), i, j, first;
4093   operand_entry *oe;
4094   struct range_entry *ranges;
4095   bool any_changes = false;
4096
4097   if (length == 1)
4098     return false;
4099
4100   ranges = XNEWVEC (struct range_entry, length);
4101   for (i = 0; i < length; i++)
4102     {
4103       oe = (*ops)[i];
4104       ranges[i].idx = i;
4105       init_range_entry (ranges + i, oe->op,
4106                         oe->op
4107                         ? NULL
4108                         : last_nondebug_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
4109       /* For | invert it now, we will invert it again before emitting
4110          the optimized expression.  */
4111       if (opcode == BIT_IOR_EXPR
4112           || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
4113         ranges[i].in_p = !ranges[i].in_p;
4114     }
4115
4116   qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
4117   for (i = 0; i < length; i++)
4118     if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
4119       break;
4120
4121   /* Try to merge ranges.  */
4122   for (first = i; i < length; i++)
4123     {
4124       tree low = ranges[i].low;
4125       tree high = ranges[i].high;
4126       int in_p = ranges[i].in_p;
4127       bool strict_overflow_p = ranges[i].strict_overflow_p;
4128       int update_fail_count = 0;
4129
4130       for (j = i + 1; j < length; j++)
4131         {
4132           if (ranges[i].exp != ranges[j].exp)
4133             break;
4134           if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
4135                              ranges[j].in_p, ranges[j].low, ranges[j].high))
4136             break;
4137           strict_overflow_p |= ranges[j].strict_overflow_p;
4138         }
4139
4140       if (j == i + 1)
4141         continue;
4142
4143       if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
4144                              opcode, ops, ranges[i].exp, NULL, in_p,
4145                              low, high, strict_overflow_p))
4146         {
4147           i = j - 1;
4148           any_changes = true;
4149         }
4150       /* Avoid quadratic complexity if all merge_ranges calls would succeed,
4151          while update_range_test would fail.  */
4152       else if (update_fail_count == 64)
4153         i = j - 1;
4154       else
4155         ++update_fail_count;
4156     }
4157
4158   any_changes |= optimize_range_tests_1 (opcode, first, length, true,
4159                                          ops, ranges);
4160
4161   if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
4162     any_changes |= optimize_range_tests_1 (opcode, first, length, false,
4163                                            ops, ranges);
4164   if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
4165     any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
4166                                                      ops, ranges);
4167   any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops,
4168                                                  ranges, first_bb);
4169   any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length,
4170                                                    ops, ranges);
4171
4172   if (any_changes && opcode != ERROR_MARK)
4173     {
4174       j = 0;
4175       FOR_EACH_VEC_ELT (*ops, i, oe)
4176         {
4177           if (oe->op == error_mark_node)
4178             continue;
4179           else if (i != j)
4180             (*ops)[j] = oe;
4181           j++;
4182         }
4183       ops->truncate (j);
4184     }
4185
4186   XDELETEVEC (ranges);
4187   return any_changes;
4188 }
4189
4190 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
4191    the operands of the VEC_COND_EXPR.  Returns ERROR_MARK on failure,
4192    otherwise the comparison code.  TYPE is a return value that is set
4193    to type of comparison.  */
4194
4195 static tree_code
4196 ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type,
4197                   tree *lhs, tree *rhs, gassign **vcond)
4198 {
4199   if (TREE_CODE (var) != SSA_NAME)
4200     return ERROR_MARK;
4201
4202   gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
4203   if (stmt == NULL)
4204     return ERROR_MARK;
4205   if (vcond)
4206     *vcond = stmt;
4207
4208   /* ??? If we start creating more COND_EXPR, we could perform
4209      this same optimization with them.  For now, simplify.  */
4210   if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
4211     return ERROR_MARK;
4212
4213   tree cond = gimple_assign_rhs1 (stmt);
4214   tree_code cmp = TREE_CODE (cond);
4215   if (cmp != SSA_NAME)
4216     return ERROR_MARK;
4217
4218   gassign *assign = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (cond));
4219   if (assign == NULL
4220       || TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) != tcc_comparison)
4221     return ERROR_MARK;
4222
4223   cmp = gimple_assign_rhs_code (assign);
4224   if (lhs)
4225     *lhs = gimple_assign_rhs1 (assign);
4226   if (rhs)
4227     *rhs = gimple_assign_rhs2 (assign);
4228
4229   /* ??? For now, allow only canonical true and false result vectors.
4230      We could expand this to other constants should the need arise,
4231      but at the moment we don't create them.  */
4232   tree t = gimple_assign_rhs2 (stmt);
4233   tree f = gimple_assign_rhs3 (stmt);
4234   bool inv;
4235   if (integer_all_onesp (t))
4236     inv = false;
4237   else if (integer_all_onesp (f))
4238     {
4239       cmp = invert_tree_comparison (cmp, false);
4240       inv = true;
4241     }
4242   else
4243     return ERROR_MARK;
4244   if (!integer_zerop (f))
4245     return ERROR_MARK;
4246
4247   /* Success!  */
4248   if (rets)
4249     *rets = assign;
4250   if (reti)
4251     *reti = inv;
4252   if (type)
4253     *type = TREE_TYPE (cond);
4254   return cmp;
4255 }
4256
4257 /* Optimize the condition of VEC_COND_EXPRs which have been combined
4258    with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR).  */
4259
4260 static bool
4261 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
4262 {
4263   unsigned int length = ops->length (), i, j;
4264   bool any_changes = false;
4265
4266   if (length == 1)
4267     return false;
4268
4269   for (i = 0; i < length; ++i)
4270     {
4271       tree elt0 = (*ops)[i]->op;
4272
4273       gassign *stmt0, *vcond0;
4274       bool invert;
4275       tree type, lhs0, rhs0;
4276       tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert, &type, &lhs0,
4277                                          &rhs0, &vcond0);
4278       if (cmp0 == ERROR_MARK)
4279         continue;
4280
4281       for (j = i + 1; j < length; ++j)
4282         {
4283           tree &elt1 = (*ops)[j]->op;
4284
4285           gassign *stmt1, *vcond1;
4286           tree lhs1, rhs1;
4287           tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL, NULL, &lhs1,
4288                                              &rhs1, &vcond1);
4289           if (cmp1 == ERROR_MARK)
4290             continue;
4291
4292           tree comb;
4293           if (opcode == BIT_AND_EXPR)
4294             comb = maybe_fold_and_comparisons (type, cmp0, lhs0, rhs0,
4295                                                cmp1, lhs1, rhs1);
4296           else if (opcode == BIT_IOR_EXPR)
4297             comb = maybe_fold_or_comparisons (type, cmp0, lhs0, rhs0,
4298                                               cmp1, lhs1, rhs1);
4299           else
4300             gcc_unreachable ();
4301           if (comb == NULL)
4302             continue;
4303
4304           /* Success! */
4305           if (dump_file && (dump_flags & TDF_DETAILS))
4306             {
4307               fprintf (dump_file, "Transforming ");
4308               print_generic_expr (dump_file, gimple_assign_lhs (stmt0));
4309               fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
4310               print_generic_expr (dump_file, gimple_assign_lhs (stmt1));
4311               fprintf (dump_file, " into ");
4312               print_generic_expr (dump_file, comb);
4313               fputc ('\n', dump_file);
4314             }
4315
4316           gimple_stmt_iterator gsi = gsi_for_stmt (vcond0);
4317           tree exp = force_gimple_operand_gsi (&gsi, comb, true, NULL_TREE,
4318                                                true, GSI_SAME_STMT);
4319           if (invert)
4320             swap_ssa_operands (vcond0, gimple_assign_rhs2_ptr (vcond0),
4321                                gimple_assign_rhs3_ptr (vcond0));
4322           gimple_assign_set_rhs1 (vcond0, exp);
4323           update_stmt (vcond0);
4324
4325           elt1 = error_mark_node;
4326           any_changes = true;
4327         }
4328     }
4329
4330   if (any_changes)
4331     {
4332       operand_entry *oe;
4333       j = 0;
4334       FOR_EACH_VEC_ELT (*ops, i, oe)
4335         {
4336           if (oe->op == error_mark_node)
4337             continue;
4338           else if (i != j)
4339             (*ops)[j] = oe;
4340           j++;
4341         }
4342       ops->truncate (j);
4343     }
4344
4345   return any_changes;
4346 }
4347
4348 /* Return true if STMT is a cast like:
4349    <bb N>:
4350    ...
4351    _123 = (int) _234;
4352
4353    <bb M>:
4354    # _345 = PHI <_123(N), 1(...), 1(...)>
4355    where _234 has bool type, _123 has single use and
4356    bb N has a single successor M.  This is commonly used in
4357    the last block of a range test.
4358
4359    Also Return true if STMT is tcc_compare like:
4360    <bb N>:
4361    ...
4362    _234 = a_2(D) == 2;
4363
4364    <bb M>:
4365    # _345 = PHI <_234(N), 1(...), 1(...)>
4366    _346 = (int) _345;
4367    where _234 has booltype, single use and
4368    bb N has a single successor M.  This is commonly used in
4369    the last block of a range test.  */
4370
4371 static bool
4372 final_range_test_p (gimple *stmt)
4373 {
4374   basic_block bb, rhs_bb, lhs_bb;
4375   edge e;
4376   tree lhs, rhs;
4377   use_operand_p use_p;
4378   gimple *use_stmt;
4379
4380   if (!gimple_assign_cast_p (stmt)
4381       && (!is_gimple_assign (stmt)
4382           || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4383               != tcc_comparison)))
4384     return false;
4385   bb = gimple_bb (stmt);
4386   if (!single_succ_p (bb))
4387     return false;
4388   e = single_succ_edge (bb);
4389   if (e->flags & EDGE_COMPLEX)
4390     return false;
4391
4392   lhs = gimple_assign_lhs (stmt);
4393   rhs = gimple_assign_rhs1 (stmt);
4394   if (gimple_assign_cast_p (stmt)
4395       && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4396           || TREE_CODE (rhs) != SSA_NAME
4397           || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE))
4398     return false;
4399
4400   if (!gimple_assign_cast_p (stmt)
4401       && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE))
4402       return false;
4403
4404   /* Test whether lhs is consumed only by a PHI in the only successor bb.  */
4405   if (!single_imm_use (lhs, &use_p, &use_stmt))
4406     return false;
4407
4408   if (gimple_code (use_stmt) != GIMPLE_PHI
4409       || gimple_bb (use_stmt) != e->dest)
4410     return false;
4411
4412   /* And that the rhs is defined in the same loop.  */
4413   if (gimple_assign_cast_p (stmt))
4414     {
4415       if (TREE_CODE (rhs) != SSA_NAME
4416           || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)))
4417           || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
4418         return false;
4419     }
4420   else
4421     {
4422       if (TREE_CODE (lhs) != SSA_NAME
4423           || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs)))
4424           || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb))
4425         return false;
4426     }
4427
4428   return true;
4429 }
4430
4431 /* Return true if BB is suitable basic block for inter-bb range test
4432    optimization.  If BACKWARD is true, BB should be the only predecessor
4433    of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
4434    or compared with to find a common basic block to which all conditions
4435    branch to if true resp. false.  If BACKWARD is false, TEST_BB should
4436    be the only predecessor of BB.  *TEST_SWAPPED_P is set to true if
4437    TEST_BB is a bb ending in condition where the edge to non-*OTHER_BB
4438    block points to an empty block that falls through into *OTHER_BB and
4439    the phi args match that path.  */
4440
4441 static bool
4442 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
4443                   bool *test_swapped_p, bool backward)
4444 {
4445   edge_iterator ei, ei2;
4446   edge e, e2;
4447   gimple *stmt;
4448   gphi_iterator gsi;
4449   bool other_edge_seen = false;
4450   bool is_cond;
4451
4452   if (test_bb == bb)
4453     return false;
4454   /* Check last stmt first.  */
4455   stmt = last_nondebug_stmt (bb);
4456   if (stmt == NULL
4457       || (gimple_code (stmt) != GIMPLE_COND
4458           && (backward || !final_range_test_p (stmt)))
4459       || gimple_visited_p (stmt)
4460       || stmt_could_throw_p (cfun, stmt)
4461       || *other_bb == bb)
4462     return false;
4463   is_cond = gimple_code (stmt) == GIMPLE_COND;
4464   if (is_cond)
4465     {
4466       /* If last stmt is GIMPLE_COND, verify that one of the succ edges
4467          goes to the next bb (if BACKWARD, it is TEST_BB), and the other
4468          to *OTHER_BB (if not set yet, try to find it out).  */
4469       if (EDGE_COUNT (bb->succs) != 2)
4470         return false;
4471       FOR_EACH_EDGE (e, ei, bb->succs)
4472         {
4473           if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4474             return false;
4475           if (e->dest == test_bb)
4476             {
4477               if (backward)
4478                 continue;
4479               else
4480                 return false;
4481             }
4482           if (e->dest == bb)
4483             return false;
4484           if (*other_bb == NULL)
4485             {
4486               FOR_EACH_EDGE (e2, ei2, test_bb->succs)
4487                 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4488                   return false;
4489                 else if (e->dest == e2->dest)
4490                   *other_bb = e->dest;
4491               if (*other_bb == NULL)
4492                 return false;
4493             }
4494           if (e->dest == *other_bb)
4495             other_edge_seen = true;
4496           else if (backward)
4497             return false;
4498         }
4499       if (*other_bb == NULL || !other_edge_seen)
4500         return false;
4501     }
4502   else if (single_succ (bb) != *other_bb)
4503     return false;
4504
4505   /* Now check all PHIs of *OTHER_BB.  */
4506   e = find_edge (bb, *other_bb);
4507   e2 = find_edge (test_bb, *other_bb);
4508  retry:;
4509   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
4510     {
4511       gphi *phi = gsi.phi ();
4512       /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
4513          corresponding to BB and TEST_BB predecessor must be the same.  */
4514       if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
4515                             gimple_phi_arg_def (phi, e2->dest_idx), 0))
4516         {
4517           /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
4518              one of the PHIs should have the lhs of the last stmt in
4519              that block as PHI arg and that PHI should have 0 or 1
4520              corresponding to it in all other range test basic blocks
4521              considered.  */
4522           if (!is_cond)
4523             {
4524               if (gimple_phi_arg_def (phi, e->dest_idx)
4525                   == gimple_assign_lhs (stmt)
4526                   && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
4527                       || integer_onep (gimple_phi_arg_def (phi,
4528                                                            e2->dest_idx))))
4529                 continue;
4530             }
4531           else
4532             {
4533               gimple *test_last = last_nondebug_stmt (test_bb);
4534               if (gimple_code (test_last) == GIMPLE_COND)
4535                 {
4536                   if (backward ? e2->src != test_bb : e->src != bb)
4537                     return false;
4538
4539                   /* For last_bb, handle also:
4540                      if (x_3(D) == 3)
4541                        goto <bb 6>; [34.00%]
4542                      else
4543                        goto <bb 7>; [66.00%]
4544
4545                      <bb 6> [local count: 79512730]:
4546
4547                      <bb 7> [local count: 1073741824]:
4548                      # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4549                      where bb 7 is *OTHER_BB, but the PHI values from the
4550                      earlier bbs match the path through the empty bb
4551                      in between.  */
4552                   edge e3;
4553                   if (backward)
4554                     e3 = EDGE_SUCC (test_bb,
4555                                     e2 == EDGE_SUCC (test_bb, 0) ? 1 : 0);
4556                   else
4557                     e3 = EDGE_SUCC (bb,
4558                                     e == EDGE_SUCC (bb, 0) ? 1 : 0);
4559                   if (empty_block_p (e3->dest)
4560                       && single_succ_p (e3->dest)
4561                       && single_succ (e3->dest) == *other_bb
4562                       && single_pred_p (e3->dest)
4563                       && single_succ_edge (e3->dest)->flags == EDGE_FALLTHRU)
4564                     {
4565                       if (backward)
4566                         e2 = single_succ_edge (e3->dest);
4567                       else
4568                         e = single_succ_edge (e3->dest);
4569                       if (test_swapped_p)
4570                         *test_swapped_p = true;
4571                       goto retry;
4572                     }
4573                 }
4574               else if (gimple_phi_arg_def (phi, e2->dest_idx)
4575                        == gimple_assign_lhs (test_last)
4576                        && (integer_zerop (gimple_phi_arg_def (phi,
4577                                                               e->dest_idx))
4578                            || integer_onep (gimple_phi_arg_def (phi,
4579                                                                 e->dest_idx))))
4580                 continue;
4581             }
4582
4583           return false;
4584         }
4585     }
4586   return true;
4587 }
4588
4589 /* Return true if BB doesn't have side-effects that would disallow
4590    range test optimization, all SSA_NAMEs set in the bb are consumed
4591    in the bb and there are no PHIs.  */
4592
4593 bool
4594 no_side_effect_bb (basic_block bb)
4595 {
4596   gimple_stmt_iterator gsi;
4597   gimple *last;
4598
4599   if (!gimple_seq_empty_p (phi_nodes (bb)))
4600     return false;
4601   last = last_nondebug_stmt (bb);
4602   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4603     {
4604       gimple *stmt = gsi_stmt (gsi);
4605       tree lhs;
4606       imm_use_iterator imm_iter;
4607       use_operand_p use_p;
4608
4609       if (is_gimple_debug (stmt))
4610         continue;
4611       if (gimple_has_side_effects (stmt))
4612         return false;
4613       if (stmt == last)
4614         return true;
4615       if (!is_gimple_assign (stmt))
4616         return false;
4617       lhs = gimple_assign_lhs (stmt);
4618       if (TREE_CODE (lhs) != SSA_NAME)
4619         return false;
4620       if (gimple_assign_rhs_could_trap_p (stmt))
4621         return false;
4622       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
4623         {
4624           gimple *use_stmt = USE_STMT (use_p);
4625           if (is_gimple_debug (use_stmt))
4626             continue;
4627           if (gimple_bb (use_stmt) != bb)
4628             return false;
4629         }
4630     }
4631   return false;
4632 }
4633
4634 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
4635    return true and fill in *OPS recursively.  */
4636
4637 static bool
4638 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
4639          class loop *loop)
4640 {
4641   gimple *stmt = SSA_NAME_DEF_STMT (var);
4642   tree rhs[2];
4643   int i;
4644
4645   if (!is_reassociable_op (stmt, code, loop))
4646     return false;
4647
4648   rhs[0] = gimple_assign_rhs1 (stmt);
4649   rhs[1] = gimple_assign_rhs2 (stmt);
4650   gimple_set_visited (stmt, true);
4651   for (i = 0; i < 2; i++)
4652     if (TREE_CODE (rhs[i]) == SSA_NAME
4653         && !get_ops (rhs[i], code, ops, loop)
4654         && has_single_use (rhs[i]))
4655       {
4656         operand_entry *oe = operand_entry_pool.allocate ();
4657
4658         oe->op = rhs[i];
4659         oe->rank = code;
4660         oe->id = 0;
4661         oe->count = 1;
4662         oe->stmt_to_insert = NULL;
4663         ops->safe_push (oe);
4664       }
4665   return true;
4666 }
4667
4668 /* Find the ops that were added by get_ops starting from VAR, see if
4669    they were changed during update_range_test and if yes, create new
4670    stmts.  */
4671
4672 static tree
4673 update_ops (tree var, enum tree_code code, const vec<operand_entry *> &ops,
4674             unsigned int *pidx, class loop *loop)
4675 {
4676   gimple *stmt = SSA_NAME_DEF_STMT (var);
4677   tree rhs[4];
4678   int i;
4679
4680   if (!is_reassociable_op (stmt, code, loop))
4681     return NULL;
4682
4683   rhs[0] = gimple_assign_rhs1 (stmt);
4684   rhs[1] = gimple_assign_rhs2 (stmt);
4685   rhs[2] = rhs[0];
4686   rhs[3] = rhs[1];
4687   for (i = 0; i < 2; i++)
4688     if (TREE_CODE (rhs[i]) == SSA_NAME)
4689       {
4690         rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
4691         if (rhs[2 + i] == NULL_TREE)
4692           {
4693             if (has_single_use (rhs[i]))
4694               rhs[2 + i] = ops[(*pidx)++]->op;
4695             else
4696               rhs[2 + i] = rhs[i];
4697           }
4698       }
4699   if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
4700       && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
4701     {
4702       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4703       var = make_ssa_name (TREE_TYPE (var));
4704       gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
4705                                         rhs[2], rhs[3]);
4706       gimple_set_uid (g, gimple_uid (stmt));
4707       gimple_set_visited (g, true);
4708       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4709       gimple_stmt_iterator gsi2 = gsi_for_stmt (g);
4710       if (fold_stmt_inplace (&gsi2))
4711         update_stmt (g);
4712     }
4713   return var;
4714 }
4715
4716 /* Structure to track the initial value passed to get_ops and
4717    the range in the ops vector for each basic block.  */
4718
4719 struct inter_bb_range_test_entry
4720 {
4721   tree op;
4722   unsigned int first_idx, last_idx;
4723 };
4724
4725 /* Inter-bb range test optimization.
4726
4727    Returns TRUE if a gimple conditional is optimized to a true/false,
4728    otherwise return FALSE.
4729
4730    This indicates to the caller that it should run a CFG cleanup pass
4731    once reassociation is completed.  */
4732
4733 static bool
4734 maybe_optimize_range_tests (gimple *stmt)
4735 {
4736   basic_block first_bb = gimple_bb (stmt);
4737   basic_block last_bb = first_bb;
4738   basic_block other_bb = NULL;
4739   basic_block bb;
4740   edge_iterator ei;
4741   edge e;
4742   auto_vec<operand_entry *> ops;
4743   auto_vec<inter_bb_range_test_entry> bbinfo;
4744   bool any_changes = false;
4745   bool cfg_cleanup_needed = false;
4746
4747   /* Consider only basic blocks that end with GIMPLE_COND or
4748      a cast statement satisfying final_range_test_p.  All
4749      but the last bb in the first_bb .. last_bb range
4750      should end with GIMPLE_COND.  */
4751   if (gimple_code (stmt) == GIMPLE_COND)
4752     {
4753       if (EDGE_COUNT (first_bb->succs) != 2)
4754         return cfg_cleanup_needed;
4755     }
4756   else if (final_range_test_p (stmt))
4757     other_bb = single_succ (first_bb);
4758   else
4759     return cfg_cleanup_needed;
4760
4761   if (stmt_could_throw_p (cfun, stmt))
4762     return cfg_cleanup_needed;
4763
4764   /* As relative ordering of post-dominator sons isn't fixed,
4765      maybe_optimize_range_tests can be called first on any
4766      bb in the range we want to optimize.  So, start searching
4767      backwards, if first_bb can be set to a predecessor.  */
4768   while (single_pred_p (first_bb))
4769     {
4770       basic_block pred_bb = single_pred (first_bb);
4771       if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, NULL, true))
4772         break;
4773       if (!no_side_effect_bb (first_bb))
4774         break;
4775       first_bb = pred_bb;
4776     }
4777   /* If first_bb is last_bb, other_bb hasn't been computed yet.
4778      Before starting forward search in last_bb successors, find
4779      out the other_bb.  */
4780   if (first_bb == last_bb)
4781     {
4782       other_bb = NULL;
4783       /* As non-GIMPLE_COND last stmt always terminates the range,
4784          if forward search didn't discover anything, just give up.  */
4785       if (gimple_code (stmt) != GIMPLE_COND)
4786         return cfg_cleanup_needed;
4787       /* Look at both successors.  Either it ends with a GIMPLE_COND
4788          and satisfies suitable_cond_bb, or ends with a cast and
4789          other_bb is that cast's successor.  */
4790       FOR_EACH_EDGE (e, ei, first_bb->succs)
4791         if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
4792             || e->dest == first_bb)
4793           return cfg_cleanup_needed;
4794         else if (single_pred_p (e->dest))
4795           {
4796             stmt = last_nondebug_stmt (e->dest);
4797             if (stmt
4798                 && gimple_code (stmt) == GIMPLE_COND
4799                 && EDGE_COUNT (e->dest->succs) == 2)
4800               {
4801                 if (suitable_cond_bb (first_bb, e->dest, &other_bb,
4802                                       NULL, true))
4803                   break;
4804                 else
4805                   other_bb = NULL;
4806               }
4807             else if (stmt
4808                      && final_range_test_p (stmt)
4809                      && find_edge (first_bb, single_succ (e->dest)))
4810               {
4811                 other_bb = single_succ (e->dest);
4812                 if (other_bb == first_bb)
4813                   other_bb = NULL;
4814               }
4815           }
4816       if (other_bb == NULL)
4817         return cfg_cleanup_needed;
4818     }
4819   /* Now do the forward search, moving last_bb to successor bbs
4820      that aren't other_bb.  */
4821   while (EDGE_COUNT (last_bb->succs) == 2)
4822     {
4823       FOR_EACH_EDGE (e, ei, last_bb->succs)
4824         if (e->dest != other_bb)
4825           break;
4826       if (e == NULL)
4827         break;
4828       if (!single_pred_p (e->dest))
4829         break;
4830       if (!suitable_cond_bb (e->dest, last_bb, &other_bb, NULL, false))
4831         break;
4832       if (!no_side_effect_bb (e->dest))
4833         break;
4834       last_bb = e->dest;
4835     }
4836   if (first_bb == last_bb)
4837     return cfg_cleanup_needed;
4838   /* Here basic blocks first_bb through last_bb's predecessor
4839      end with GIMPLE_COND, all of them have one of the edges to
4840      other_bb and another to another block in the range,
4841      all blocks except first_bb don't have side-effects and
4842      last_bb ends with either GIMPLE_COND, or cast satisfying
4843      final_range_test_p.  */
4844   for (bb = last_bb; ; bb = single_pred (bb))
4845     {
4846       enum tree_code code;
4847       tree lhs, rhs;
4848       inter_bb_range_test_entry bb_ent;
4849
4850       bb_ent.op = NULL_TREE;
4851       bb_ent.first_idx = ops.length ();
4852       bb_ent.last_idx = bb_ent.first_idx;
4853       e = find_edge (bb, other_bb);
4854       stmt = last_nondebug_stmt (bb);
4855       gimple_set_visited (stmt, true);
4856       if (gimple_code (stmt) != GIMPLE_COND)
4857         {
4858           use_operand_p use_p;
4859           gimple *phi;
4860           edge e2;
4861           unsigned int d;
4862
4863           lhs = gimple_assign_lhs (stmt);
4864           rhs = gimple_assign_rhs1 (stmt);
4865           gcc_assert (bb == last_bb);
4866
4867           /* stmt is
4868              _123 = (int) _234;
4869              OR
4870              _234 = a_2(D) == 2;
4871
4872              followed by:
4873              <bb M>:
4874              # _345 = PHI <_123(N), 1(...), 1(...)>
4875
4876              or 0 instead of 1.  If it is 0, the _234
4877              range test is anded together with all the
4878              other range tests, if it is 1, it is ored with
4879              them.  */
4880           single_imm_use (lhs, &use_p, &phi);
4881           gcc_assert (gimple_code (phi) == GIMPLE_PHI);
4882           e2 = find_edge (first_bb, other_bb);
4883           d = e2->dest_idx;
4884           gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
4885           if (integer_zerop (gimple_phi_arg_def (phi, d)))
4886             code = BIT_AND_EXPR;
4887           else
4888             {
4889               gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
4890               code = BIT_IOR_EXPR;
4891             }
4892
4893           /* If _234 SSA_NAME_DEF_STMT is
4894              _234 = _567 | _789;
4895              (or &, corresponding to 1/0 in the phi arguments,
4896              push into ops the individual range test arguments
4897              of the bitwise or resp. and, recursively.  */
4898           if (TREE_CODE (rhs) == SSA_NAME
4899               && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4900                   != tcc_comparison)
4901               && !get_ops (rhs, code, &ops,
4902                            loop_containing_stmt (stmt))
4903               && has_single_use (rhs))
4904             {
4905               /* Otherwise, push the _234 range test itself.  */
4906               operand_entry *oe = operand_entry_pool.allocate ();
4907
4908               oe->op = rhs;
4909               oe->rank = code;
4910               oe->id = 0;
4911               oe->count = 1;
4912               oe->stmt_to_insert = NULL;
4913               ops.safe_push (oe);
4914               bb_ent.last_idx++;
4915               bb_ent.op = rhs;
4916             }
4917           else if (is_gimple_assign (stmt)
4918                    && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4919                        == tcc_comparison)
4920                    && !get_ops (lhs, code, &ops,
4921                                 loop_containing_stmt (stmt))
4922                    && has_single_use (lhs))
4923             {
4924               operand_entry *oe = operand_entry_pool.allocate ();
4925               oe->op = lhs;
4926               oe->rank = code;
4927               oe->id = 0;
4928               oe->count = 1;
4929               ops.safe_push (oe);
4930               bb_ent.last_idx++;
4931               bb_ent.op = lhs;
4932             }
4933           else
4934             {
4935               bb_ent.last_idx = ops.length ();
4936               bb_ent.op = rhs;
4937             }
4938           bbinfo.safe_push (bb_ent);
4939           for (unsigned int i = bb_ent.first_idx; i < bb_ent.last_idx; ++i)
4940             ops[i]->id = bb->index;
4941           continue;
4942         }
4943       else if (bb == last_bb)
4944         {
4945           /* For last_bb, handle also:
4946              if (x_3(D) == 3)
4947                goto <bb 6>; [34.00%]
4948              else
4949                goto <bb 7>; [66.00%]
4950
4951              <bb 6> [local count: 79512730]:
4952
4953              <bb 7> [local count: 1073741824]:
4954              # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4955              where bb 7 is OTHER_BB, but the PHI values from the
4956              earlier bbs match the path through the empty bb
4957              in between.  */
4958           bool test_swapped_p = false;
4959           bool ok = suitable_cond_bb (single_pred (last_bb), last_bb,
4960                                       &other_bb, &test_swapped_p, true);
4961           gcc_assert (ok);
4962           if (test_swapped_p)
4963             e = EDGE_SUCC (bb, e == EDGE_SUCC (bb, 0) ? 1 : 0);
4964         }
4965       /* Otherwise stmt is GIMPLE_COND.  */
4966       code = gimple_cond_code (stmt);
4967       lhs = gimple_cond_lhs (stmt);
4968       rhs = gimple_cond_rhs (stmt);
4969       if (TREE_CODE (lhs) == SSA_NAME
4970           && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4971           && ((code != EQ_EXPR && code != NE_EXPR)
4972               || rhs != boolean_false_node
4973                  /* Either push into ops the individual bitwise
4974                     or resp. and operands, depending on which
4975                     edge is other_bb.  */
4976               || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
4977                                  ^ (code == EQ_EXPR))
4978                                 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
4979                            loop_containing_stmt (stmt))))
4980         {
4981           /* Or push the GIMPLE_COND stmt itself.  */
4982           operand_entry *oe = operand_entry_pool.allocate ();
4983
4984           oe->op = NULL;
4985           oe->rank = (e->flags & EDGE_TRUE_VALUE)
4986                      ? BIT_IOR_EXPR : BIT_AND_EXPR;
4987           /* oe->op = NULL signs that there is no SSA_NAME
4988              for the range test, and oe->id instead is the
4989              basic block number, at which's end the GIMPLE_COND
4990              is.  */
4991           oe->id = bb->index;
4992           oe->count = 1;
4993           oe->stmt_to_insert = NULL;
4994           ops.safe_push (oe);
4995           bb_ent.op = NULL;
4996           bb_ent.last_idx++;
4997         }
4998       else if (ops.length () > bb_ent.first_idx)
4999         {
5000           bb_ent.op = lhs;
5001           bb_ent.last_idx = ops.length ();
5002         }
5003       bbinfo.safe_push (bb_ent);
5004       for (unsigned int i = bb_ent.first_idx; i < bb_ent.last_idx; ++i)
5005         ops[i]->id = bb->index;
5006       if (bb == first_bb)
5007         break;
5008     }
5009   if (ops.length () > 1)
5010     any_changes = optimize_range_tests (ERROR_MARK, &ops, first_bb);
5011   if (any_changes)
5012     {
5013       unsigned int idx, max_idx = 0;
5014       /* update_ops relies on has_single_use predicates returning the
5015          same values as it did during get_ops earlier.  Additionally it
5016          never removes statements, only adds new ones and it should walk
5017          from the single imm use and check the predicate already before
5018          making those changes.
5019          On the other side, the handling of GIMPLE_COND directly can turn
5020          previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
5021          it needs to be done in a separate loop afterwards.  */
5022       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
5023         {
5024           if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
5025               && bbinfo[idx].op != NULL_TREE)
5026             {
5027               tree new_op;
5028
5029               max_idx = idx;
5030               stmt = last_nondebug_stmt (bb);
5031               new_op = update_ops (bbinfo[idx].op,
5032                                    (enum tree_code)
5033                                    ops[bbinfo[idx].first_idx]->rank,
5034                                    ops, &bbinfo[idx].first_idx,
5035                                    loop_containing_stmt (stmt));
5036               if (new_op == NULL_TREE)
5037                 {
5038                   gcc_assert (bb == last_bb);
5039                   new_op = ops[bbinfo[idx].first_idx++]->op;
5040                 }
5041               if (bbinfo[idx].op != new_op)
5042                 {
5043                   imm_use_iterator iter;
5044                   use_operand_p use_p;
5045                   gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL;
5046
5047                   FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
5048                     if (is_gimple_debug (use_stmt))
5049                       continue;
5050                     else if (gimple_code (use_stmt) == GIMPLE_COND
5051                              || gimple_code (use_stmt) == GIMPLE_PHI)
5052                       FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
5053                         SET_USE (use_p, new_op);
5054                     else if ((is_gimple_assign (use_stmt)
5055                               && (TREE_CODE_CLASS
5056                                   (gimple_assign_rhs_code (use_stmt))
5057                                   == tcc_comparison)))
5058                       cast_or_tcc_cmp_stmt = use_stmt;
5059                     else if (gimple_assign_cast_p (use_stmt))
5060                       cast_or_tcc_cmp_stmt = use_stmt;
5061                     else
5062                       gcc_unreachable ();
5063
5064                   if (cast_or_tcc_cmp_stmt)
5065                     {
5066                       gcc_assert (bb == last_bb);
5067                       tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt);
5068                       tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
5069                       enum tree_code rhs_code
5070                         = gimple_assign_cast_p (cast_or_tcc_cmp_stmt)
5071                         ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt)
5072                         : CONVERT_EXPR;
5073                       gassign *g;
5074                       if (is_gimple_min_invariant (new_op))
5075                         {
5076                           new_op = fold_convert (TREE_TYPE (lhs), new_op);
5077                           g = gimple_build_assign (new_lhs, new_op);
5078                         }
5079                       else
5080                         g = gimple_build_assign (new_lhs, rhs_code, new_op);
5081                       gimple_stmt_iterator gsi
5082                         = gsi_for_stmt (cast_or_tcc_cmp_stmt);
5083                       gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt));
5084                       gimple_set_visited (g, true);
5085                       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5086                       FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
5087                         if (is_gimple_debug (use_stmt))
5088                           continue;
5089                         else if (gimple_code (use_stmt) == GIMPLE_COND
5090                                  || gimple_code (use_stmt) == GIMPLE_PHI)
5091                           FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
5092                             SET_USE (use_p, new_lhs);
5093                         else
5094                           gcc_unreachable ();
5095                     }
5096                 }
5097             }
5098           if (bb == first_bb)
5099             break;
5100         }
5101       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
5102         {
5103           if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
5104               && bbinfo[idx].op == NULL_TREE
5105               && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
5106             {
5107               gcond *cond_stmt = as_a <gcond *> (*gsi_last_bb (bb));
5108
5109               if (idx > max_idx)
5110                 max_idx = idx;
5111
5112               /* If we collapse the conditional to a true/false
5113                  condition, then bubble that knowledge up to our caller.  */
5114               if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
5115                 {
5116                   gimple_cond_make_false (cond_stmt);
5117                   cfg_cleanup_needed = true;
5118                 }
5119               else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
5120                 {
5121                   gimple_cond_make_true (cond_stmt);
5122                   cfg_cleanup_needed = true;
5123                 }
5124               else
5125                 {
5126                   gimple_cond_set_code (cond_stmt, NE_EXPR);
5127                   gimple_cond_set_lhs (cond_stmt,
5128                                        ops[bbinfo[idx].first_idx]->op);
5129                   gimple_cond_set_rhs (cond_stmt, boolean_false_node);
5130                 }
5131               update_stmt (cond_stmt);
5132             }
5133           if (bb == first_bb)
5134             break;
5135         }
5136
5137       /* The above changes could result in basic blocks after the first
5138          modified one, up to and including last_bb, to be executed even if
5139          they would not be in the original program.  If the value ranges of
5140          assignment lhs' in those bbs were dependent on the conditions
5141          guarding those basic blocks which now can change, the VRs might
5142          be incorrect.  As no_side_effect_bb should ensure those SSA_NAMEs
5143          are only used within the same bb, it should be not a big deal if
5144          we just reset all the VRs in those bbs.  See PR68671.  */
5145       for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
5146         reset_flow_sensitive_info_in_bb (bb);
5147     }
5148   return cfg_cleanup_needed;
5149 }
5150
5151 /* Remove def stmt of VAR if VAR has zero uses and recurse
5152    on rhs1 operand if so.  */
5153
5154 static void
5155 remove_visited_stmt_chain (tree var)
5156 {
5157   gimple *stmt;
5158   gimple_stmt_iterator gsi;
5159
5160   while (1)
5161     {
5162       if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
5163         return;
5164       stmt = SSA_NAME_DEF_STMT (var);
5165       if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
5166         {
5167           var = gimple_assign_rhs1 (stmt);
5168           gsi = gsi_for_stmt (stmt);
5169           reassoc_remove_stmt (&gsi);
5170           release_defs (stmt);
5171         }
5172       else
5173         return;
5174     }
5175 }
5176
5177 /* This function checks three consequtive operands in
5178    passed operands vector OPS starting from OPINDEX and
5179    swaps two operands if it is profitable for binary operation
5180    consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
5181
5182    We pair ops with the same rank if possible.  */
5183
5184 static void
5185 swap_ops_for_binary_stmt (const vec<operand_entry *> &ops,
5186                           unsigned int opindex)
5187 {
5188   operand_entry *oe1, *oe2, *oe3;
5189
5190   oe1 = ops[opindex];
5191   oe2 = ops[opindex + 1];
5192   oe3 = ops[opindex + 2];
5193
5194   if (oe1->rank == oe2->rank && oe2->rank != oe3->rank)
5195     std::swap (*oe1, *oe3);
5196   else if (oe1->rank == oe3->rank && oe2->rank != oe3->rank)
5197     std::swap (*oe1, *oe2);
5198 }
5199
5200 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
5201    two definitions, otherwise return STMT.  Sets INSERT_BEFORE to indicate
5202    whether RHS1 op RHS2 can be inserted before or needs to be inserted
5203    after the returned stmt.  */
5204
5205 static inline gimple *
5206 find_insert_point (gimple *stmt, tree rhs1, tree rhs2, bool &insert_before)
5207 {
5208   insert_before = true;
5209   if (TREE_CODE (rhs1) == SSA_NAME
5210       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
5211     {
5212       stmt = SSA_NAME_DEF_STMT (rhs1);
5213       insert_before = false;
5214     }
5215   if (TREE_CODE (rhs2) == SSA_NAME
5216       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
5217     {
5218       stmt = SSA_NAME_DEF_STMT (rhs2);
5219       insert_before = false;
5220     }
5221   return stmt;
5222 }
5223
5224 /* If the stmt that defines operand has to be inserted, insert it
5225    before the use.  */
5226 static void
5227 insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert)
5228 {
5229   gcc_assert (is_gimple_assign (stmt_to_insert));
5230   tree rhs1 = gimple_assign_rhs1 (stmt_to_insert);
5231   tree rhs2 = gimple_assign_rhs2 (stmt_to_insert);
5232   bool insert_before;
5233   gimple *insert_point = find_insert_point (stmt, rhs1, rhs2, insert_before);
5234   gimple_stmt_iterator gsi = gsi_for_stmt (insert_point);
5235   gimple_set_uid (stmt_to_insert, gimple_uid (insert_point));
5236
5237   /* If the insert point is not stmt, then insert_point would be
5238      the point where operand rhs1 or rhs2 is defined. In this case,
5239      stmt_to_insert has to be inserted afterwards. This would
5240      only happen when the stmt insertion point is flexible. */
5241   if (insert_before)
5242     gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT);
5243   else
5244     insert_stmt_after (stmt_to_insert, insert_point);
5245 }
5246
5247
5248 /* Recursively rewrite our linearized statements so that the operators
5249    match those in OPS[OPINDEX], putting the computation in rank
5250    order.  Return new lhs.
5251    CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in
5252    the current stmt and during recursive invocations.
5253    NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in
5254    recursive invocations.  */
5255
5256 static tree
5257 rewrite_expr_tree (gimple *stmt, enum tree_code rhs_code, unsigned int opindex,
5258                    const vec<operand_entry *> &ops, bool changed,
5259                    bool next_changed)
5260 {
5261   tree rhs1 = gimple_assign_rhs1 (stmt);
5262   tree rhs2 = gimple_assign_rhs2 (stmt);
5263   tree lhs = gimple_assign_lhs (stmt);
5264   operand_entry *oe;
5265
5266   /* The final recursion case for this function is that you have
5267      exactly two operations left.
5268      If we had exactly one op in the entire list to start with, we
5269      would have never called this function, and the tail recursion
5270      rewrites them one at a time.  */
5271   if (opindex + 2 == ops.length ())
5272     {
5273       operand_entry *oe1, *oe2;
5274
5275       oe1 = ops[opindex];
5276       oe2 = ops[opindex + 1];
5277
5278       if (rhs1 != oe1->op || rhs2 != oe2->op)
5279         {
5280           gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5281           unsigned int uid = gimple_uid (stmt);
5282
5283           if (dump_file && (dump_flags & TDF_DETAILS))
5284             {
5285               fprintf (dump_file, "Transforming ");
5286               print_gimple_stmt (dump_file, stmt, 0);
5287             }
5288
5289           /* If the stmt that defines operand has to be inserted, insert it
5290              before the use.  */
5291           if (oe1->stmt_to_insert)
5292             insert_stmt_before_use (stmt, oe1->stmt_to_insert);
5293           if (oe2->stmt_to_insert)
5294             insert_stmt_before_use (stmt, oe2->stmt_to_insert);
5295           /* Even when changed is false, reassociation could have e.g. removed
5296              some redundant operations, so unless we are just swapping the
5297              arguments or unless there is no change at all (then we just
5298              return lhs), force creation of a new SSA_NAME.  */
5299           if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
5300             {
5301               bool insert_before;
5302               gimple *insert_point
5303                 = find_insert_point (stmt, oe1->op, oe2->op, insert_before);
5304               lhs = make_ssa_name (TREE_TYPE (lhs));
5305               stmt
5306                 = gimple_build_assign (lhs, rhs_code,
5307                                        oe1->op, oe2->op);
5308               gimple_set_uid (stmt, uid);
5309               gimple_set_visited (stmt, true);
5310               if (insert_before)
5311                 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5312               else
5313                 insert_stmt_after (stmt, insert_point);
5314             }
5315           else
5316             {
5317               bool insert_before;
5318               gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op,
5319                                                       insert_before)
5320                                    == stmt);
5321               gimple_assign_set_rhs1 (stmt, oe1->op);
5322               gimple_assign_set_rhs2 (stmt, oe2->op);
5323               update_stmt (stmt);
5324             }
5325
5326           if (rhs1 != oe1->op && rhs1 != oe2->op)
5327             remove_visited_stmt_chain (rhs1);
5328
5329           if (dump_file && (dump_flags & TDF_DETAILS))
5330             {
5331               fprintf (dump_file, " into ");
5332               print_gimple_stmt (dump_file, stmt, 0);
5333             }
5334         }
5335       return lhs;
5336     }
5337
5338   /* If we hit here, we should have 3 or more ops left.  */
5339   gcc_assert (opindex + 2 < ops.length ());
5340
5341   /* Rewrite the next operator.  */
5342   oe = ops[opindex];
5343
5344   /* If the stmt that defines operand has to be inserted, insert it
5345      before the use.  */
5346   if (oe->stmt_to_insert)
5347     insert_stmt_before_use (stmt, oe->stmt_to_insert);
5348
5349   /* Recurse on the LHS of the binary operator, which is guaranteed to
5350      be the non-leaf side.  */
5351   tree new_rhs1
5352     = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), rhs_code, opindex + 1, ops,
5353                          changed || oe->op != rhs2 || next_changed,
5354                          false);
5355
5356   if (oe->op != rhs2 || new_rhs1 != rhs1)
5357     {
5358       if (dump_file && (dump_flags & TDF_DETAILS))
5359         {
5360           fprintf (dump_file, "Transforming ");
5361           print_gimple_stmt (dump_file, stmt, 0);
5362         }
5363
5364       /* If changed is false, this is either opindex == 0
5365          or all outer rhs2's were equal to corresponding oe->op,
5366          and powi_result is NULL.
5367          That means lhs is equivalent before and after reassociation.
5368          Otherwise ensure the old lhs SSA_NAME is not reused and
5369          create a new stmt as well, so that any debug stmts will be
5370          properly adjusted.  */
5371       if (changed)
5372         {
5373           gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5374           unsigned int uid = gimple_uid (stmt);
5375           bool insert_before;
5376           gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op,
5377                                                     insert_before);
5378
5379           lhs = make_ssa_name (TREE_TYPE (lhs));
5380           stmt = gimple_build_assign (lhs, rhs_code,
5381                                       new_rhs1, oe->op);
5382           gimple_set_uid (stmt, uid);
5383           gimple_set_visited (stmt, true);
5384           if (insert_before)
5385             gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5386           else
5387             insert_stmt_after (stmt, insert_point);
5388         }
5389       else
5390         {
5391           bool insert_before;
5392           gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op,
5393                                                   insert_before)
5394                                == stmt);
5395           gimple_assign_set_rhs1 (stmt, new_rhs1);
5396           gimple_assign_set_rhs2 (stmt, oe->op);
5397           update_stmt (stmt);
5398         }
5399
5400       if (dump_file && (dump_flags & TDF_DETAILS))
5401         {
5402           fprintf (dump_file, " into ");
5403           print_gimple_stmt (dump_file, stmt, 0);
5404         }
5405     }
5406   return lhs;
5407 }
5408
5409 /* Find out how many cycles we need to compute statements chain.
5410    OPS_NUM holds number os statements in a chain.  CPU_WIDTH is a
5411    maximum number of independent statements we may execute per cycle.  */
5412
5413 static int
5414 get_required_cycles (int ops_num, int cpu_width)
5415 {
5416   int res;
5417   int elog;
5418   unsigned int rest;
5419
5420   /* While we have more than 2 * cpu_width operands
5421      we may reduce number of operands by cpu_width
5422      per cycle.  */
5423   res = ops_num / (2 * cpu_width);
5424
5425   /* Remained operands count may be reduced twice per cycle
5426      until we have only one operand.  */
5427   rest = (unsigned)(ops_num - res * cpu_width);
5428   elog = exact_log2 (rest);
5429   if (elog >= 0)
5430     res += elog;
5431   else
5432     res += floor_log2 (rest) + 1;
5433
5434   return res;
5435 }
5436
5437 /* Given that the target fully pipelines FMA instructions, return the latency
5438    of MULT_EXPRs that can't be hidden by the FMAs.  WIDTH is the number of
5439    pipes.  */
5440
5441 static inline int
5442 get_mult_latency_consider_fma (int ops_num, int mult_num, int width)
5443 {
5444   gcc_checking_assert (mult_num && mult_num <= ops_num);
5445
5446   /* For each partition, if mult_num == ops_num, there's latency(MULT)*2.
5447      e.g:
5448
5449         A * B + C * D
5450         =>
5451         _1 = A * B;
5452         _2 = .FMA (C, D, _1);
5453
5454       Otherwise there's latency(MULT)*1 in the first FMA.  */
5455   return CEIL (ops_num, width) == CEIL (mult_num, width) ? 2 : 1;
5456 }
5457
5458 /* Returns an optimal number of registers to use for computation of
5459    given statements.
5460
5461    LHS is the result ssa name of OPS.  MULT_NUM is number of sub-expressions
5462    that are MULT_EXPRs, when OPS are PLUS_EXPRs or MINUS_EXPRs.  */
5463
5464 static int
5465 get_reassociation_width (vec<operand_entry *> *ops, int mult_num, tree lhs,
5466                          enum tree_code opc, machine_mode mode)
5467 {
5468   int param_width = param_tree_reassoc_width;
5469   int width;
5470   int width_min;
5471   int cycles_best;
5472   int ops_num = ops->length ();
5473
5474   if (param_width > 0)
5475     width = param_width;
5476   else
5477     width = targetm.sched.reassociation_width (opc, mode);
5478
5479   if (width == 1)
5480     return width;
5481
5482   /* Get the minimal time required for sequence computation.  */
5483   cycles_best = get_required_cycles (ops_num, width);
5484
5485   /* Check if we may use less width and still compute sequence for
5486      the same time.  It will allow us to reduce registers usage.
5487      get_required_cycles is monotonically increasing with lower width
5488      so we can perform a binary search for the minimal width that still
5489      results in the optimal cycle count.  */
5490   width_min = 1;
5491
5492   /* If the target fully pipelines FMA instruction, the multiply part can start
5493      already if its operands are ready.  Assuming symmetric pipes are used for
5494      FMUL/FADD/FMA, then for a sequence of FMA like:
5495
5496         _8 = .FMA (_2, _3, _1);
5497         _9 = .FMA (_5, _4, _8);
5498         _10 = .FMA (_7, _6, _9);
5499
5500      , if width=1, the latency is latency(MULT) + latency(ADD)*3.
5501      While with width=2:
5502
5503         _8 = _4 * _5;
5504         _9 = .FMA (_2, _3, _1);
5505         _10 = .FMA (_6, _7, _8);
5506         _11 = _9 + _10;
5507
5508      , it is latency(MULT)*2 + latency(ADD)*2.  Assuming latency(MULT) >=
5509      latency(ADD), the first variant is preferred.
5510
5511      Find out if we can get a smaller width considering FMA.  */
5512   if (width > 1 && mult_num && param_fully_pipelined_fma)
5513     {
5514       /* When param_fully_pipelined_fma is set, assume FMUL and FMA use the
5515          same units that can also do FADD.  For other scenarios, such as when
5516          FMUL and FADD are using separated units, the following code may not
5517          appy.  */
5518       int width_mult = targetm.sched.reassociation_width (MULT_EXPR, mode);
5519       gcc_checking_assert (width_mult <= width);
5520
5521       /* Latency of MULT_EXPRs.  */
5522       int lat_mul
5523         = get_mult_latency_consider_fma (ops_num, mult_num, width_mult);
5524
5525       /* Quick search might not apply.  So start from 1.  */
5526       for (int i = 1; i < width_mult; i++)
5527         {
5528           int lat_mul_new
5529             = get_mult_latency_consider_fma (ops_num, mult_num, i);
5530           int lat_add_new = get_required_cycles (ops_num, i);
5531
5532           /* Assume latency(MULT) >= latency(ADD).  */
5533           if (lat_mul - lat_mul_new >= lat_add_new - cycles_best)
5534             {
5535               width = i;
5536               break;
5537             }
5538         }
5539     }
5540   else
5541     {
5542       while (width > width_min)
5543         {
5544           int width_mid = (width + width_min) / 2;
5545
5546           if (get_required_cycles (ops_num, width_mid) == cycles_best)
5547             width = width_mid;
5548           else if (width_min < width_mid)
5549             width_min = width_mid;
5550           else
5551             break;
5552         }
5553     }
5554
5555   /* If there's loop dependent FMA result, return width=2 to avoid it.  This is
5556      better than skipping these FMA candidates in widening_mul.  */
5557   if (width == 1
5558       && maybe_le (tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (lhs))),
5559                    param_avoid_fma_max_bits))
5560     {
5561       /* Look for cross backedge dependency:
5562         1. LHS is a phi argument in the same basic block it is defined.
5563         2. And the result of the phi node is used in OPS.  */
5564       basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (lhs));
5565
5566       use_operand_p use_p;
5567       imm_use_iterator iter;
5568       FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
5569         if (gphi *phi = dyn_cast<gphi *> (USE_STMT (use_p)))
5570           {
5571             if (gimple_phi_arg_edge (phi, phi_arg_index_from_use (use_p))->src
5572                 != bb)
5573               continue;
5574             tree phi_result = gimple_phi_result (phi);
5575             operand_entry *oe;
5576             unsigned int j;
5577             FOR_EACH_VEC_ELT (*ops, j, oe)
5578               {
5579                 if (TREE_CODE (oe->op) != SSA_NAME)
5580                   continue;
5581
5582                 /* Result of phi is operand of PLUS_EXPR.  */
5583                 if (oe->op == phi_result)
5584                   return 2;
5585
5586                 /* Check is result of phi is operand of MULT_EXPR.  */
5587                 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
5588                 if (is_gimple_assign (def_stmt)
5589                     && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR)
5590                   {
5591                     tree rhs = gimple_assign_rhs1 (def_stmt);
5592                     if (TREE_CODE (rhs) == SSA_NAME)
5593                       {
5594                         if (rhs == phi_result)
5595                           return 2;
5596                         def_stmt = SSA_NAME_DEF_STMT (rhs);
5597                       }
5598                   }
5599                 if (is_gimple_assign (def_stmt)
5600                     && gimple_assign_rhs_code (def_stmt) == MULT_EXPR)
5601                   {
5602                     if (gimple_assign_rhs1 (def_stmt) == phi_result
5603                         || gimple_assign_rhs2 (def_stmt) == phi_result)
5604                       return 2;
5605                   }
5606               }
5607           }
5608     }
5609
5610   return width;
5611 }
5612
5613 #define SPECIAL_BIASED_END_STMT 0 /* It is the end stmt of all ops.  */
5614 #define BIASED_END_STMT 1 /* It is the end stmt of normal or biased ops.  */
5615 #define NORMAL_END_STMT 2 /* It is the end stmt of normal ops.  */
5616
5617 /* Rewrite statements with dependency chain with regard the chance to generate
5618    FMA.
5619    For the chain with FMA: Try to keep fma opportunity as much as possible.
5620    For the chain without FMA: Putting the computation in rank order and trying
5621    to allow operations to be executed in parallel.
5622    E.g.
5623    e + f + a * b + c * d;
5624
5625    ssa1 = e + a * b;
5626    ssa2 = f + c * d;
5627    ssa3 = ssa1 + ssa2;
5628
5629    This reassociation approach preserves the chance of fma generation as much
5630    as possible.
5631
5632    Another thing is to avoid adding loop-carried ops to long chains, otherwise
5633    the whole chain will have dependencies across the loop iteration.  Just keep
5634    loop-carried ops in a separate chain.
5635    E.g.
5636    x_1 = phi (x_0, x_2)
5637    y_1 = phi (y_0, y_2)
5638
5639    a + b + c + d + e + x1 + y1
5640
5641    SSA1 = a + b;
5642    SSA2 = c + d;
5643    SSA3 = SSA1 + e;
5644    SSA4 = SSA3 + SSA2;
5645    SSA5 = x1 + y1;
5646    SSA6 = SSA4 + SSA5;
5647  */
5648 static void
5649 rewrite_expr_tree_parallel (gassign *stmt, int width, bool has_fma,
5650                             const vec<operand_entry *> &ops)
5651 {
5652   enum tree_code opcode = gimple_assign_rhs_code (stmt);
5653   int op_num = ops.length ();
5654   int op_normal_num = op_num;
5655   gcc_assert (op_num > 0);
5656   int stmt_num = op_num - 1;
5657   gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
5658   int i = 0, j = 0;
5659   tree tmp_op[2], op1;
5660   operand_entry *oe;
5661   gimple *stmt1 = NULL;
5662   tree last_rhs1 = gimple_assign_rhs1 (stmt);
5663   int last_rhs1_stmt_index = 0, last_rhs2_stmt_index = 0;
5664   int width_active = 0, width_count = 0;
5665   bool has_biased = false, ops_changed = false;
5666   auto_vec<operand_entry *> ops_normal;
5667   auto_vec<operand_entry *> ops_biased;
5668   vec<operand_entry *> *ops1;
5669
5670   /* We start expression rewriting from the top statements.
5671      So, in this loop we create a full list of statements
5672      we will work with.  */
5673   stmts[stmt_num - 1] = stmt;
5674   for (i = stmt_num - 2; i >= 0; i--)
5675     stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
5676
5677   /* Avoid adding loop-carried ops to long chains, first filter out the
5678      loop-carried.  But we need to make sure that the length of the remainder
5679      is not less than 4, which is the smallest ops length we can break the
5680      dependency.  */
5681   FOR_EACH_VEC_ELT (ops, i, oe)
5682     {
5683       if (TREE_CODE (oe->op) == SSA_NAME
5684           && bitmap_bit_p (biased_names, SSA_NAME_VERSION (oe->op))
5685           && op_normal_num > 4)
5686         {
5687           ops_biased.safe_push (oe);
5688           has_biased = true;
5689           op_normal_num --;
5690         }
5691       else
5692         ops_normal.safe_push (oe);
5693     }
5694
5695   /* Width should not be larger than ops length / 2, since we can not create
5696      more parallel dependency chains that exceeds such value.  */
5697   int width_normal = op_normal_num / 2;
5698   int width_biased = (op_num - op_normal_num) / 2;
5699   width_normal = width <= width_normal ? width : width_normal;
5700   width_biased = width <= width_biased ? width : width_biased;
5701
5702   ops1 = &ops_normal;
5703   width_count = width_active = width_normal;
5704
5705   /* Build parallel dependency chain according to width.  */
5706   for (i = 0; i < stmt_num; i++)
5707     {
5708       if (dump_file && (dump_flags & TDF_DETAILS))
5709         {
5710           fprintf (dump_file, "Transforming ");
5711           print_gimple_stmt (dump_file, stmts[i], 0);
5712         }
5713
5714       /* When the work of normal ops is over, but the loop is not over,
5715          continue to do biased ops.  */
5716       if (width_count == 0 && ops1 == &ops_normal)
5717         {
5718           ops1 = &ops_biased;
5719           width_count = width_active = width_biased;
5720           ops_changed = true;
5721         }
5722
5723       /* Swap the operands if no FMA in the chain.  */
5724       if (ops1->length () > 2 && !has_fma)
5725         swap_ops_for_binary_stmt (*ops1, ops1->length () - 3);
5726
5727       if (i < width_active
5728           || (ops_changed && i <= (last_rhs1_stmt_index + width_active)))
5729         {
5730           for (j = 0; j < 2; j++)
5731             {
5732               oe = ops1->pop ();
5733               tmp_op[j] = oe->op;
5734               /* If the stmt that defines operand has to be inserted, insert it
5735                  before the use.  */
5736               stmt1 = oe->stmt_to_insert;
5737               if (stmt1)
5738                 insert_stmt_before_use (stmts[i], stmt1);
5739               stmt1 = NULL;
5740             }
5741           stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1),
5742                                         tmp_op[1],
5743                                         tmp_op[0],
5744                                         opcode);
5745           gimple_set_visited (stmts[i], true);
5746
5747         }
5748       else
5749         {
5750           /* We keep original statement only for the last one.  All others are
5751              recreated.  */
5752           if (!ops1->length ())
5753             {
5754               /* For biased length equal to 2.  */
5755               if (width_count == BIASED_END_STMT && !last_rhs2_stmt_index)
5756                 last_rhs2_stmt_index = i - 1;
5757
5758               /* When width_count == 2 and there is no biased, just finish.  */
5759               if (width_count == NORMAL_END_STMT && !has_biased)
5760                 {
5761                   last_rhs1_stmt_index = i - 1;
5762                   last_rhs2_stmt_index = i - 2;
5763                 }
5764               if (last_rhs1_stmt_index && (last_rhs2_stmt_index || !has_biased))
5765                 {
5766                   /* We keep original statement only for the last one.  All
5767                      others are recreated.  */
5768                   gimple_assign_set_rhs1 (stmts[i], gimple_assign_lhs
5769                                           (stmts[last_rhs1_stmt_index]));
5770                   gimple_assign_set_rhs2 (stmts[i], gimple_assign_lhs
5771                                           (stmts[last_rhs2_stmt_index]));
5772                   update_stmt (stmts[i]);
5773                 }
5774               else
5775                 {
5776                   stmts[i] =
5777                     build_and_add_sum (TREE_TYPE (last_rhs1),
5778                                        gimple_assign_lhs (stmts[i-width_count]),
5779                                        gimple_assign_lhs
5780                                        (stmts[i-width_count+1]),
5781                                        opcode);
5782                   gimple_set_visited (stmts[i], true);
5783                   width_count--;
5784
5785                   /* It is the end of normal or biased ops.
5786                      last_rhs1_stmt_index used to record the last stmt index
5787                      for normal ops.  last_rhs2_stmt_index used to record the
5788                      last stmt index for biased ops.  */
5789                   if (width_count == BIASED_END_STMT)
5790                     {
5791                       gcc_assert (has_biased);
5792                       if (ops_biased.length ())
5793                         last_rhs1_stmt_index = i;
5794                       else
5795                         last_rhs2_stmt_index = i;
5796                       width_count--;
5797                     }
5798                 }
5799             }
5800           else
5801             {
5802               /* Attach the rest ops to the parallel dependency chain.  */
5803               oe = ops1->pop ();
5804               op1 = oe->op;
5805               stmt1 = oe->stmt_to_insert;
5806               if (stmt1)
5807                 insert_stmt_before_use (stmts[i], stmt1);
5808               stmt1 = NULL;
5809
5810               /* For only one biased ops.  */
5811               if (width_count == SPECIAL_BIASED_END_STMT)
5812                 {
5813                   /* We keep original statement only for the last one.  All
5814                      others are recreated.  */
5815                   gcc_assert (has_biased);
5816                   gimple_assign_set_rhs1 (stmts[i], gimple_assign_lhs
5817                                           (stmts[last_rhs1_stmt_index]));
5818                   gimple_assign_set_rhs2 (stmts[i], op1);
5819                   update_stmt (stmts[i]);
5820                 }
5821               else
5822                 {
5823                   stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1),
5824                                                 gimple_assign_lhs
5825                                                 (stmts[i-width_active]),
5826                                                 op1,
5827                                                 opcode);
5828                   gimple_set_visited (stmts[i], true);
5829                 }
5830             }
5831         }
5832
5833       if (dump_file && (dump_flags & TDF_DETAILS))
5834         {
5835           fprintf (dump_file, " into ");
5836           print_gimple_stmt (dump_file, stmts[i], 0);
5837         }
5838     }
5839
5840   remove_visited_stmt_chain (last_rhs1);
5841 }
5842
5843 /* Transform STMT, which is really (A +B) + (C + D) into the left
5844    linear form, ((A+B)+C)+D.
5845    Recurse on D if necessary.  */
5846
5847 static void
5848 linearize_expr (gimple *stmt)
5849 {
5850   gimple_stmt_iterator gsi;
5851   gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
5852   gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5853   gimple *oldbinrhs = binrhs;
5854   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5855   gimple *newbinrhs = NULL;
5856   class loop *loop = loop_containing_stmt (stmt);
5857   tree lhs = gimple_assign_lhs (stmt);
5858
5859   gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
5860               && is_reassociable_op (binrhs, rhscode, loop));
5861
5862   gsi = gsi_for_stmt (stmt);
5863
5864   gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
5865   binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
5866                                 gimple_assign_rhs_code (binrhs),
5867                                 gimple_assign_lhs (binlhs),
5868                                 gimple_assign_rhs2 (binrhs));
5869   gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
5870   gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
5871   gimple_set_uid (binrhs, gimple_uid (stmt));
5872
5873   if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
5874     newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5875
5876   if (dump_file && (dump_flags & TDF_DETAILS))
5877     {
5878       fprintf (dump_file, "Linearized: ");
5879       print_gimple_stmt (dump_file, stmt, 0);
5880     }
5881
5882   reassociate_stats.linearized++;
5883   update_stmt (stmt);
5884
5885   gsi = gsi_for_stmt (oldbinrhs);
5886   reassoc_remove_stmt (&gsi);
5887   release_defs (oldbinrhs);
5888
5889   gimple_set_visited (stmt, true);
5890   gimple_set_visited (binlhs, true);
5891   gimple_set_visited (binrhs, true);
5892
5893   /* Tail recurse on the new rhs if it still needs reassociation.  */
5894   if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
5895     /* ??? This should probably be linearize_expr (newbinrhs) but I don't
5896            want to change the algorithm while converting to tuples.  */
5897     linearize_expr (stmt);
5898 }
5899
5900 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
5901    it.  Otherwise, return NULL.  */
5902
5903 static gimple *
5904 get_single_immediate_use (tree lhs)
5905 {
5906   use_operand_p immuse;
5907   gimple *immusestmt;
5908
5909   if (TREE_CODE (lhs) == SSA_NAME
5910       && single_imm_use (lhs, &immuse, &immusestmt)
5911       && is_gimple_assign (immusestmt))
5912     return immusestmt;
5913
5914   return NULL;
5915 }
5916
5917 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
5918    representing the negated value.  Insertions of any necessary
5919    instructions go before GSI.
5920    This function is recursive in that, if you hand it "a_5" as the
5921    value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
5922    transform b_3 + b_4 into a_5 = -b_3 + -b_4.  */
5923
5924 static tree
5925 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
5926 {
5927   gimple *negatedefstmt = NULL;
5928   tree resultofnegate;
5929   gimple_stmt_iterator gsi;
5930   unsigned int uid;
5931
5932   /* If we are trying to negate a name, defined by an add, negate the
5933      add operands instead.  */
5934   if (TREE_CODE (tonegate) == SSA_NAME)
5935     negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
5936   if (TREE_CODE (tonegate) == SSA_NAME
5937       && is_gimple_assign (negatedefstmt)
5938       && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
5939       && has_single_use (gimple_assign_lhs (negatedefstmt))
5940       && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
5941     {
5942       tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
5943       tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
5944       tree lhs = gimple_assign_lhs (negatedefstmt);
5945       gimple *g;
5946
5947       gsi = gsi_for_stmt (negatedefstmt);
5948       rhs1 = negate_value (rhs1, &gsi);
5949
5950       gsi = gsi_for_stmt (negatedefstmt);
5951       rhs2 = negate_value (rhs2, &gsi);
5952
5953       gsi = gsi_for_stmt (negatedefstmt);
5954       lhs = make_ssa_name (TREE_TYPE (lhs));
5955       gimple_set_visited (negatedefstmt, true);
5956       g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
5957       gimple_set_uid (g, gimple_uid (negatedefstmt));
5958       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5959       return lhs;
5960     }
5961
5962   tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
5963   resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
5964                                              NULL_TREE, true, GSI_SAME_STMT);
5965   gsi = *gsip;
5966   uid = gimple_uid (gsi_stmt (gsi));
5967   for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
5968     {
5969       gimple *stmt = gsi_stmt (gsi);
5970       if (gimple_uid (stmt) != 0)
5971         break;
5972       gimple_set_uid (stmt, uid);
5973     }
5974   return resultofnegate;
5975 }
5976
5977 /* Return true if we should break up the subtract in STMT into an add
5978    with negate.  This is true when we the subtract operands are really
5979    adds, or the subtract itself is used in an add expression.  In
5980    either case, breaking up the subtract into an add with negate
5981    exposes the adds to reassociation.  */
5982
5983 static bool
5984 should_break_up_subtract (gimple *stmt)
5985 {
5986   tree lhs = gimple_assign_lhs (stmt);
5987   tree binlhs = gimple_assign_rhs1 (stmt);
5988   tree binrhs = gimple_assign_rhs2 (stmt);
5989   gimple *immusestmt;
5990   class loop *loop = loop_containing_stmt (stmt);
5991
5992   if (TREE_CODE (binlhs) == SSA_NAME
5993       && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
5994     return true;
5995
5996   if (TREE_CODE (binrhs) == SSA_NAME
5997       && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
5998     return true;
5999
6000   if (TREE_CODE (lhs) == SSA_NAME
6001       && (immusestmt = get_single_immediate_use (lhs))
6002       && is_gimple_assign (immusestmt)
6003       && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
6004           || (gimple_assign_rhs_code (immusestmt) == MINUS_EXPR
6005               && gimple_assign_rhs1 (immusestmt) == lhs)
6006           || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
6007     return true;
6008   return false;
6009 }
6010
6011 /* Transform STMT from A - B into A + -B.  */
6012
6013 static void
6014 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
6015 {
6016   tree rhs1 = gimple_assign_rhs1 (stmt);
6017   tree rhs2 = gimple_assign_rhs2 (stmt);
6018
6019   if (dump_file && (dump_flags & TDF_DETAILS))
6020     {
6021       fprintf (dump_file, "Breaking up subtract ");
6022       print_gimple_stmt (dump_file, stmt, 0);
6023     }
6024
6025   rhs2 = negate_value (rhs2, gsip);
6026   gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
6027   update_stmt (stmt);
6028 }
6029
6030 /* Determine whether STMT is a builtin call that raises an SSA name
6031    to an integer power and has only one use.  If so, and this is early
6032    reassociation and unsafe math optimizations are permitted, place
6033    the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
6034    If any of these conditions does not hold, return FALSE.  */
6035
6036 static bool
6037 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent)
6038 {
6039   tree arg1;
6040   REAL_VALUE_TYPE c, cint;
6041
6042   switch (gimple_call_combined_fn (stmt))
6043     {
6044     CASE_CFN_POW:
6045       if (flag_errno_math)
6046         return false;
6047
6048       *base = gimple_call_arg (stmt, 0);
6049       arg1 = gimple_call_arg (stmt, 1);
6050
6051       if (TREE_CODE (arg1) != REAL_CST)
6052         return false;
6053
6054       c = TREE_REAL_CST (arg1);
6055
6056       if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
6057         return false;
6058
6059       *exponent = real_to_integer (&c);
6060       real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
6061       if (!real_identical (&c, &cint))
6062         return false;
6063
6064       break;
6065
6066     CASE_CFN_POWI:
6067       *base = gimple_call_arg (stmt, 0);
6068       arg1 = gimple_call_arg (stmt, 1);
6069
6070       if (!tree_fits_shwi_p (arg1))
6071         return false;
6072
6073       *exponent = tree_to_shwi (arg1);
6074       break;
6075
6076     default:
6077       return false;
6078     }
6079
6080   /* Expanding negative exponents is generally unproductive, so we don't
6081      complicate matters with those.  Exponents of zero and one should
6082      have been handled by expression folding.  */
6083   if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
6084     return false;
6085
6086   return true;
6087 }
6088
6089 /* Try to derive and add operand entry for OP to *OPS.  Return false if
6090    unsuccessful.  */
6091
6092 static bool
6093 try_special_add_to_ops (vec<operand_entry *> *ops,
6094                         enum tree_code code,
6095                         tree op, gimple* def_stmt)
6096 {
6097   tree base = NULL_TREE;
6098   HOST_WIDE_INT exponent = 0;
6099
6100   if (TREE_CODE (op) != SSA_NAME
6101       || ! has_single_use (op))
6102     return false;
6103
6104   if (code == MULT_EXPR
6105       && reassoc_insert_powi_p
6106       && flag_unsafe_math_optimizations
6107       && is_gimple_call (def_stmt)
6108       && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent))
6109     {
6110       add_repeat_to_ops_vec (ops, base, exponent);
6111       gimple_set_visited (def_stmt, true);
6112       return true;
6113     }
6114   else if (code == MULT_EXPR
6115            && is_gimple_assign (def_stmt)
6116            && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
6117            && !HONOR_SNANS (TREE_TYPE (op))
6118            && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op))
6119                || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op)))
6120            && (!FLOAT_TYPE_P (TREE_TYPE (op))
6121                || !DECIMAL_FLOAT_MODE_P (element_mode (op))))
6122     {
6123       tree rhs1 = gimple_assign_rhs1 (def_stmt);
6124       tree cst = build_minus_one_cst (TREE_TYPE (op));
6125       add_to_ops_vec (ops, rhs1);
6126       add_to_ops_vec (ops, cst);
6127       gimple_set_visited (def_stmt, true);
6128       return true;
6129     }
6130
6131   return false;
6132 }
6133
6134 /* Recursively linearize a binary expression that is the RHS of STMT.
6135    Place the operands of the expression tree in the vector named OPS.  */
6136
6137 static void
6138 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
6139                      bool is_associative, bool set_visited)
6140 {
6141   tree binlhs = gimple_assign_rhs1 (stmt);
6142   tree binrhs = gimple_assign_rhs2 (stmt);
6143   gimple *binlhsdef = NULL, *binrhsdef = NULL;
6144   bool binlhsisreassoc = false;
6145   bool binrhsisreassoc = false;
6146   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
6147   class loop *loop = loop_containing_stmt (stmt);
6148
6149   if (set_visited)
6150     gimple_set_visited (stmt, true);
6151
6152   if (TREE_CODE (binlhs) == SSA_NAME)
6153     {
6154       binlhsdef = SSA_NAME_DEF_STMT (binlhs);
6155       binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
6156                          && !stmt_could_throw_p (cfun, binlhsdef));
6157     }
6158
6159   if (TREE_CODE (binrhs) == SSA_NAME)
6160     {
6161       binrhsdef = SSA_NAME_DEF_STMT (binrhs);
6162       binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
6163                          && !stmt_could_throw_p (cfun, binrhsdef));
6164     }
6165
6166   /* If the LHS is not reassociable, but the RHS is, we need to swap
6167      them.  If neither is reassociable, there is nothing we can do, so
6168      just put them in the ops vector.  If the LHS is reassociable,
6169      linearize it.  If both are reassociable, then linearize the RHS
6170      and the LHS.  */
6171
6172   if (!binlhsisreassoc)
6173     {
6174       /* If this is not a associative operation like division, give up.  */
6175       if (!is_associative)
6176         {
6177           add_to_ops_vec (ops, binrhs);
6178           return;
6179         }
6180
6181       if (!binrhsisreassoc)
6182         {
6183           bool swap = false;
6184           if (try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
6185             /* If we add ops for the rhs we expect to be able to recurse
6186                to it via the lhs during expression rewrite so swap
6187                operands.  */
6188             swap = true;
6189           else
6190             add_to_ops_vec (ops, binrhs);
6191
6192           if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef))
6193             add_to_ops_vec (ops, binlhs);
6194
6195           if (!swap)
6196             return;
6197         }
6198
6199       if (dump_file && (dump_flags & TDF_DETAILS))
6200         {
6201           fprintf (dump_file, "swapping operands of ");
6202           print_gimple_stmt (dump_file, stmt, 0);
6203         }
6204
6205       swap_ssa_operands (stmt,
6206                          gimple_assign_rhs1_ptr (stmt),
6207                          gimple_assign_rhs2_ptr (stmt));
6208       update_stmt (stmt);
6209
6210       if (dump_file && (dump_flags & TDF_DETAILS))
6211         {
6212           fprintf (dump_file, " is now ");
6213           print_gimple_stmt (dump_file, stmt, 0);
6214         }
6215       if (!binrhsisreassoc)
6216         return;
6217
6218       /* We want to make it so the lhs is always the reassociative op,
6219          so swap.  */
6220       std::swap (binlhs, binrhs);
6221     }
6222   else if (binrhsisreassoc)
6223     {
6224       linearize_expr (stmt);
6225       binlhs = gimple_assign_rhs1 (stmt);
6226       binrhs = gimple_assign_rhs2 (stmt);
6227     }
6228
6229   gcc_assert (TREE_CODE (binrhs) != SSA_NAME
6230               || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
6231                                       rhscode, loop));
6232   linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
6233                        is_associative, set_visited);
6234
6235   if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
6236     add_to_ops_vec (ops, binrhs);
6237 }
6238
6239 /* Repropagate the negates back into subtracts, since no other pass
6240    currently does it.  */
6241
6242 static void
6243 repropagate_negates (void)
6244 {
6245   unsigned int i = 0;
6246   tree negate;
6247
6248   FOR_EACH_VEC_ELT (plus_negates, i, negate)
6249     {
6250       gimple *user = get_single_immediate_use (negate);
6251       if (!user || !is_gimple_assign (user))
6252         continue;
6253
6254       tree negateop = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
6255       if (TREE_CODE (negateop) == SSA_NAME
6256           && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (negateop))
6257         continue;
6258
6259       /* The negate operand can be either operand of a PLUS_EXPR
6260          (it can be the LHS if the RHS is a constant for example).
6261
6262          Force the negate operand to the RHS of the PLUS_EXPR, then
6263          transform the PLUS_EXPR into a MINUS_EXPR.  */
6264       if (gimple_assign_rhs_code (user) == PLUS_EXPR)
6265         {
6266           /* If the negated operand appears on the LHS of the
6267              PLUS_EXPR, exchange the operands of the PLUS_EXPR
6268              to force the negated operand to the RHS of the PLUS_EXPR.  */
6269           if (gimple_assign_rhs1 (user) == negate)
6270             {
6271               swap_ssa_operands (user,
6272                                  gimple_assign_rhs1_ptr (user),
6273                                  gimple_assign_rhs2_ptr (user));
6274             }
6275
6276           /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
6277              the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR.  */
6278           if (gimple_assign_rhs2 (user) == negate)
6279             {
6280               tree rhs1 = gimple_assign_rhs1 (user);
6281               gimple_stmt_iterator gsi = gsi_for_stmt (user);
6282               gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1,
6283                                               negateop);
6284               update_stmt (user);
6285             }
6286         }
6287       else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
6288         {
6289           if (gimple_assign_rhs1 (user) == negate)
6290             {
6291               /* We have
6292                    x = -negateop
6293                    y = x - b
6294                  which we transform into
6295                    x = negateop + b
6296                    y = -x .
6297                  This pushes down the negate which we possibly can merge
6298                  into some other operation, hence insert it into the
6299                  plus_negates vector.  */
6300               gimple *feed = SSA_NAME_DEF_STMT (negate);
6301               tree b = gimple_assign_rhs2 (user);
6302               gimple_stmt_iterator gsi = gsi_for_stmt (feed);
6303               gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
6304               tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
6305               gimple *g = gimple_build_assign (x, PLUS_EXPR, negateop, b);
6306               gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
6307               gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
6308               user = gsi_stmt (gsi2);
6309               update_stmt (user);
6310               reassoc_remove_stmt (&gsi);
6311               release_defs (feed);
6312               plus_negates.safe_push (gimple_assign_lhs (user));
6313             }
6314           else
6315             {
6316               /* Transform "x = -negateop; y = b - x" into "y = b + negateop",
6317                  getting rid of one operation.  */
6318               tree rhs1 = gimple_assign_rhs1 (user);
6319               gimple_stmt_iterator gsi = gsi_for_stmt (user);
6320               gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, negateop);
6321               update_stmt (gsi_stmt (gsi));
6322             }
6323         }
6324     }
6325 }
6326
6327 /* Break up subtract operations in block BB.
6328
6329    We do this top down because we don't know whether the subtract is
6330    part of a possible chain of reassociation except at the top.
6331
6332    IE given
6333    d = f + g
6334    c = a + e
6335    b = c - d
6336    q = b - r
6337    k = t - q
6338
6339    we want to break up k = t - q, but we won't until we've transformed q
6340    = b - r, which won't be broken up until we transform b = c - d.
6341
6342    En passant, clear the GIMPLE visited flag on every statement
6343    and set UIDs within each basic block.  */
6344
6345 static void
6346 break_up_subtract_bb (basic_block bb)
6347 {
6348   gimple_stmt_iterator gsi;
6349   basic_block son;
6350   unsigned int uid = 1;
6351
6352   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
6353     {
6354       gimple *stmt = gsi_stmt (gsi);
6355       gimple_set_visited (stmt, false);
6356       gimple_set_uid (stmt, uid++);
6357
6358       if (!is_gimple_assign (stmt)
6359           || !can_reassociate_type_p (TREE_TYPE (gimple_assign_lhs (stmt)))
6360           || !can_reassociate_op_p (gimple_assign_lhs (stmt)))
6361         continue;
6362
6363       /* Look for simple gimple subtract operations.  */
6364       if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
6365         {
6366           if (!can_reassociate_op_p (gimple_assign_rhs1 (stmt))
6367               || !can_reassociate_op_p (gimple_assign_rhs2 (stmt)))
6368             continue;
6369
6370           /* Check for a subtract used only in an addition.  If this
6371              is the case, transform it into add of a negate for better
6372              reassociation.  IE transform C = A-B into C = A + -B if C
6373              is only used in an addition.  */
6374           if (should_break_up_subtract (stmt))
6375             break_up_subtract (stmt, &gsi);
6376         }
6377       else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
6378                && can_reassociate_op_p (gimple_assign_rhs1 (stmt)))
6379         plus_negates.safe_push (gimple_assign_lhs (stmt));
6380     }
6381   for (son = first_dom_son (CDI_DOMINATORS, bb);
6382        son;
6383        son = next_dom_son (CDI_DOMINATORS, son))
6384     break_up_subtract_bb (son);
6385 }
6386
6387 /* Used for repeated factor analysis.  */
6388 struct repeat_factor
6389 {
6390   /* An SSA name that occurs in a multiply chain.  */
6391   tree factor;
6392
6393   /* Cached rank of the factor.  */
6394   unsigned rank;
6395
6396   /* Number of occurrences of the factor in the chain.  */
6397   HOST_WIDE_INT count;
6398
6399   /* An SSA name representing the product of this factor and
6400      all factors appearing later in the repeated factor vector.  */
6401   tree repr;
6402 };
6403
6404
6405 static vec<repeat_factor> repeat_factor_vec;
6406
6407 /* Used for sorting the repeat factor vector.  Sort primarily by
6408    ascending occurrence count, secondarily by descending rank.  */
6409
6410 static int
6411 compare_repeat_factors (const void *x1, const void *x2)
6412 {
6413   const repeat_factor *rf1 = (const repeat_factor *) x1;
6414   const repeat_factor *rf2 = (const repeat_factor *) x2;
6415
6416   if (rf1->count != rf2->count)
6417     return rf1->count - rf2->count;
6418
6419   return rf2->rank - rf1->rank;
6420 }
6421
6422 /* Look for repeated operands in OPS in the multiply tree rooted at
6423    STMT.  Replace them with an optimal sequence of multiplies and powi
6424    builtin calls, and remove the used operands from OPS.  Return an
6425    SSA name representing the value of the replacement sequence.  */
6426
6427 static tree
6428 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
6429 {
6430   unsigned i, j, vec_len;
6431   int ii;
6432   operand_entry *oe;
6433   repeat_factor *rf1, *rf2;
6434   repeat_factor rfnew;
6435   tree result = NULL_TREE;
6436   tree target_ssa, iter_result;
6437   tree type = TREE_TYPE (gimple_get_lhs (stmt));
6438   tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
6439   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
6440   gimple *mul_stmt, *pow_stmt;
6441
6442   /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
6443      target, unless type is integral.  */
6444   if (!powi_fndecl && !INTEGRAL_TYPE_P (type))
6445     return NULL_TREE;
6446
6447   /* Allocate the repeated factor vector.  */
6448   repeat_factor_vec.create (10);
6449
6450   /* Scan the OPS vector for all SSA names in the product and build
6451      up a vector of occurrence counts for each factor.  */
6452   FOR_EACH_VEC_ELT (*ops, i, oe)
6453     {
6454       if (TREE_CODE (oe->op) == SSA_NAME)
6455         {
6456           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6457             {
6458               if (rf1->factor == oe->op)
6459                 {
6460                   rf1->count += oe->count;
6461                   break;
6462                 }
6463             }
6464
6465           if (j >= repeat_factor_vec.length ())
6466             {
6467               rfnew.factor = oe->op;
6468               rfnew.rank = oe->rank;
6469               rfnew.count = oe->count;
6470               rfnew.repr = NULL_TREE;
6471               repeat_factor_vec.safe_push (rfnew);
6472             }
6473         }
6474     }
6475
6476   /* Sort the repeated factor vector by (a) increasing occurrence count,
6477      and (b) decreasing rank.  */
6478   repeat_factor_vec.qsort (compare_repeat_factors);
6479
6480   /* It is generally best to combine as many base factors as possible
6481      into a product before applying __builtin_powi to the result.
6482      However, the sort order chosen for the repeated factor vector
6483      allows us to cache partial results for the product of the base
6484      factors for subsequent use.  When we already have a cached partial
6485      result from a previous iteration, it is best to make use of it
6486      before looking for another __builtin_pow opportunity.
6487
6488      As an example, consider x * x * y * y * y * z * z * z * z.
6489      We want to first compose the product x * y * z, raise it to the
6490      second power, then multiply this by y * z, and finally multiply
6491      by z.  This can be done in 5 multiplies provided we cache y * z
6492      for use in both expressions:
6493
6494         t1 = y * z
6495         t2 = t1 * x
6496         t3 = t2 * t2
6497         t4 = t1 * t3
6498         result = t4 * z
6499
6500      If we instead ignored the cached y * z and first multiplied by
6501      the __builtin_pow opportunity z * z, we would get the inferior:
6502
6503         t1 = y * z
6504         t2 = t1 * x
6505         t3 = t2 * t2
6506         t4 = z * z
6507         t5 = t3 * t4
6508         result = t5 * y  */
6509
6510   vec_len = repeat_factor_vec.length ();
6511
6512   /* Repeatedly look for opportunities to create a builtin_powi call.  */
6513   while (true)
6514     {
6515       HOST_WIDE_INT power;
6516
6517       /* First look for the largest cached product of factors from
6518          preceding iterations.  If found, create a builtin_powi for
6519          it if the minimum occurrence count for its factors is at
6520          least 2, or just use this cached product as our next
6521          multiplicand if the minimum occurrence count is 1.  */
6522       FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6523         {
6524           if (rf1->repr && rf1->count > 0)
6525             break;
6526         }
6527
6528       if (j < vec_len)
6529         {
6530           power = rf1->count;
6531
6532           if (power == 1)
6533             {
6534               iter_result = rf1->repr;
6535
6536               if (dump_file && (dump_flags & TDF_DETAILS))
6537                 {
6538                   unsigned elt;
6539                   repeat_factor *rf;
6540                   fputs ("Multiplying by cached product ", dump_file);
6541                   for (elt = j; elt < vec_len; elt++)
6542                     {
6543                       rf = &repeat_factor_vec[elt];
6544                       print_generic_expr (dump_file, rf->factor);
6545                       if (elt < vec_len - 1)
6546                         fputs (" * ", dump_file);
6547                     }
6548                   fputs ("\n", dump_file);
6549                 }
6550             }
6551           else
6552             {
6553               if (INTEGRAL_TYPE_P (type))
6554                 {
6555                   gcc_assert (power > 1);
6556                   gimple_stmt_iterator gsip = gsi;
6557                   gsi_prev (&gsip);
6558                   iter_result = powi_as_mults (&gsi, gimple_location (stmt),
6559                                                rf1->repr, power);
6560                   gimple_stmt_iterator gsic = gsi;
6561                   while (gsi_stmt (gsic) != gsi_stmt (gsip))
6562                     {
6563                       gimple_set_uid (gsi_stmt (gsic), gimple_uid (stmt));
6564                       gimple_set_visited (gsi_stmt (gsic), true);
6565                       gsi_prev (&gsic);
6566                     }
6567                 }
6568               else
6569                 {
6570                   iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
6571                   pow_stmt
6572                     = gimple_build_call (powi_fndecl, 2, rf1->repr,
6573                                          build_int_cst (integer_type_node,
6574                                                         power));
6575                   gimple_call_set_lhs (pow_stmt, iter_result);
6576                   gimple_set_location (pow_stmt, gimple_location (stmt));
6577                   gimple_set_uid (pow_stmt, gimple_uid (stmt));
6578                   gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
6579                 }
6580
6581               if (dump_file && (dump_flags & TDF_DETAILS))
6582                 {
6583                   unsigned elt;
6584                   repeat_factor *rf;
6585                   fputs ("Building __builtin_pow call for cached product (",
6586                          dump_file);
6587                   for (elt = j; elt < vec_len; elt++)
6588                     {
6589                       rf = &repeat_factor_vec[elt];
6590                       print_generic_expr (dump_file, rf->factor);
6591                       if (elt < vec_len - 1)
6592                         fputs (" * ", dump_file);
6593                     }
6594                   fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
6595                            power);
6596                 }
6597             }
6598         }
6599       else
6600         {
6601           /* Otherwise, find the first factor in the repeated factor
6602              vector whose occurrence count is at least 2.  If no such
6603              factor exists, there are no builtin_powi opportunities
6604              remaining.  */
6605           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6606             {
6607               if (rf1->count >= 2)
6608                 break;
6609             }
6610
6611           if (j >= vec_len)
6612             break;
6613
6614           power = rf1->count;
6615
6616           if (dump_file && (dump_flags & TDF_DETAILS))
6617             {
6618               unsigned elt;
6619               repeat_factor *rf;
6620               fputs ("Building __builtin_pow call for (", dump_file);
6621               for (elt = j; elt < vec_len; elt++)
6622                 {
6623                   rf = &repeat_factor_vec[elt];
6624                   print_generic_expr (dump_file, rf->factor);
6625                   if (elt < vec_len - 1)
6626                     fputs (" * ", dump_file);
6627                 }
6628               fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
6629             }
6630
6631           reassociate_stats.pows_created++;
6632
6633           /* Visit each element of the vector in reverse order (so that
6634              high-occurrence elements are visited first, and within the
6635              same occurrence count, lower-ranked elements are visited
6636              first).  Form a linear product of all elements in this order
6637              whose occurrencce count is at least that of element J.
6638              Record the SSA name representing the product of each element
6639              with all subsequent elements in the vector.  */
6640           if (j == vec_len - 1)
6641             rf1->repr = rf1->factor;
6642           else
6643             {
6644               for (ii = vec_len - 2; ii >= (int)j; ii--)
6645                 {
6646                   tree op1, op2;
6647
6648                   rf1 = &repeat_factor_vec[ii];
6649                   rf2 = &repeat_factor_vec[ii + 1];
6650
6651                   /* Init the last factor's representative to be itself.  */
6652                   if (!rf2->repr)
6653                     rf2->repr = rf2->factor;
6654
6655                   op1 = rf1->factor;
6656                   op2 = rf2->repr;
6657
6658                   target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
6659                   mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
6660                                                   op1, op2);
6661                   gimple_set_location (mul_stmt, gimple_location (stmt));
6662                   gimple_set_uid (mul_stmt, gimple_uid (stmt));
6663                   gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
6664                   rf1->repr = target_ssa;
6665
6666                   /* Don't reprocess the multiply we just introduced.  */
6667                   gimple_set_visited (mul_stmt, true);
6668                 }
6669             }
6670
6671           /* Form a call to __builtin_powi for the maximum product
6672              just formed, raised to the power obtained earlier.  */
6673           rf1 = &repeat_factor_vec[j];
6674           if (INTEGRAL_TYPE_P (type))
6675             {
6676               gcc_assert (power > 1);
6677               gimple_stmt_iterator gsip = gsi;
6678               gsi_prev (&gsip);
6679               iter_result = powi_as_mults (&gsi, gimple_location (stmt),
6680                                            rf1->repr, power);
6681               gimple_stmt_iterator gsic = gsi;
6682               while (gsi_stmt (gsic) != gsi_stmt (gsip))
6683                 {
6684                   gimple_set_uid (gsi_stmt (gsic), gimple_uid (stmt));
6685                   gimple_set_visited (gsi_stmt (gsic), true);
6686                   gsi_prev (&gsic);
6687                 }
6688             }
6689           else
6690             {
6691               iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
6692               pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
6693                                             build_int_cst (integer_type_node,
6694                                                            power));
6695               gimple_call_set_lhs (pow_stmt, iter_result);
6696               gimple_set_location (pow_stmt, gimple_location (stmt));
6697               gimple_set_uid (pow_stmt, gimple_uid (stmt));
6698               gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
6699             }
6700         }
6701
6702       /* If we previously formed at least one other builtin_powi call,
6703          form the product of this one and those others.  */
6704       if (result)
6705         {
6706           tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
6707           mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
6708                                           result, iter_result);
6709           gimple_set_location (mul_stmt, gimple_location (stmt));
6710           gimple_set_uid (mul_stmt, gimple_uid (stmt));
6711           gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
6712           gimple_set_visited (mul_stmt, true);
6713           result = new_result;
6714         }
6715       else
6716         result = iter_result;
6717
6718       /* Decrement the occurrence count of each element in the product
6719          by the count found above, and remove this many copies of each
6720          factor from OPS.  */
6721       for (i = j; i < vec_len; i++)
6722         {
6723           unsigned k = power;
6724           unsigned n;
6725
6726           rf1 = &repeat_factor_vec[i];
6727           rf1->count -= power;
6728
6729           FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
6730             {
6731               if (oe->op == rf1->factor)
6732                 {
6733                   if (oe->count <= k)
6734                     {
6735                       ops->ordered_remove (n);
6736                       k -= oe->count;
6737
6738                       if (k == 0)
6739                         break;
6740                     }
6741                   else
6742                     {
6743                       oe->count -= k;
6744                       break;
6745                     }
6746                 }
6747             }
6748         }
6749     }
6750
6751   /* At this point all elements in the repeated factor vector have a
6752      remaining occurrence count of 0 or 1, and those with a count of 1
6753      don't have cached representatives.  Re-sort the ops vector and
6754      clean up.  */
6755   ops->qsort (sort_by_operand_rank);
6756   repeat_factor_vec.release ();
6757
6758   /* Return the final product computed herein.  Note that there may
6759      still be some elements with single occurrence count left in OPS;
6760      those will be handled by the normal reassociation logic.  */
6761   return result;
6762 }
6763
6764 /* Attempt to optimize
6765    CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
6766    CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0.  */
6767
6768 static void
6769 attempt_builtin_copysign (vec<operand_entry *> *ops)
6770 {
6771   operand_entry *oe;
6772   unsigned int i;
6773   unsigned int length = ops->length ();
6774   tree cst = ops->last ()->op;
6775
6776   if (length == 1 || TREE_CODE (cst) != REAL_CST)
6777     return;
6778
6779   FOR_EACH_VEC_ELT (*ops, i, oe)
6780     {
6781       if (TREE_CODE (oe->op) == SSA_NAME
6782           && has_single_use (oe->op))
6783         {
6784           gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
6785           if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
6786             {
6787               tree arg0, arg1;
6788               switch (gimple_call_combined_fn (old_call))
6789                 {
6790                 CASE_CFN_COPYSIGN:
6791                 CASE_CFN_COPYSIGN_FN:
6792                   arg0 = gimple_call_arg (old_call, 0);
6793                   arg1 = gimple_call_arg (old_call, 1);
6794                   /* The first argument of copysign must be a constant,
6795                      otherwise there's nothing to do.  */
6796                   if (TREE_CODE (arg0) == REAL_CST)
6797                     {
6798                       tree type = TREE_TYPE (arg0);
6799                       tree mul = const_binop (MULT_EXPR, type, cst, arg0);
6800                       /* If we couldn't fold to a single constant, skip it.
6801                          That happens e.g. for inexact multiplication when
6802                          -frounding-math.  */
6803                       if (mul == NULL_TREE)
6804                         break;
6805                       /* Instead of adjusting OLD_CALL, let's build a new
6806                          call to not leak the LHS and prevent keeping bogus
6807                          debug statements.  DCE will clean up the old call.  */
6808                       gcall *new_call;
6809                       if (gimple_call_internal_p (old_call))
6810                         new_call = gimple_build_call_internal
6811                           (IFN_COPYSIGN, 2, mul, arg1);
6812                       else
6813                         new_call = gimple_build_call
6814                           (gimple_call_fndecl (old_call), 2, mul, arg1);
6815                       tree lhs = make_ssa_name (type);
6816                       gimple_call_set_lhs (new_call, lhs);
6817                       gimple_set_location (new_call,
6818                                            gimple_location (old_call));
6819                       insert_stmt_after (new_call, old_call);
6820                       /* We've used the constant, get rid of it.  */
6821                       ops->pop ();
6822                       bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
6823                       /* Handle the CST1 < 0 case by negating the result.  */
6824                       if (cst1_neg)
6825                         {
6826                           tree negrhs = make_ssa_name (TREE_TYPE (lhs));
6827                           gimple *negate_stmt
6828                             = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
6829                           insert_stmt_after (negate_stmt, new_call);
6830                           oe->op = negrhs;
6831                         }
6832                       else
6833                         oe->op = lhs;
6834                       if (dump_file && (dump_flags & TDF_DETAILS))
6835                         {
6836                           fprintf (dump_file, "Optimizing copysign: ");
6837                           print_generic_expr (dump_file, cst);
6838                           fprintf (dump_file, " * COPYSIGN (");
6839                           print_generic_expr (dump_file, arg0);
6840                           fprintf (dump_file, ", ");
6841                           print_generic_expr (dump_file, arg1);
6842                           fprintf (dump_file, ") into %sCOPYSIGN (",
6843                                    cst1_neg ? "-" : "");
6844                           print_generic_expr (dump_file, mul);
6845                           fprintf (dump_file, ", ");
6846                           print_generic_expr (dump_file, arg1);
6847                           fprintf (dump_file, "\n");
6848                         }
6849                       return;
6850                     }
6851                   break;
6852                 default:
6853                   break;
6854                 }
6855             }
6856         }
6857     }
6858 }
6859
6860 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS.  */
6861
6862 static void
6863 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
6864 {
6865   tree rhs1;
6866
6867   if (dump_file && (dump_flags & TDF_DETAILS))
6868     {
6869       fprintf (dump_file, "Transforming ");
6870       print_gimple_stmt (dump_file, stmt, 0);
6871     }
6872
6873   rhs1 = gimple_assign_rhs1 (stmt);
6874   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
6875   update_stmt (stmt);
6876   remove_visited_stmt_chain (rhs1);
6877
6878   if (dump_file && (dump_flags & TDF_DETAILS))
6879     {
6880       fprintf (dump_file, " into ");
6881       print_gimple_stmt (dump_file, stmt, 0);
6882     }
6883 }
6884
6885 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2.  */
6886
6887 static void
6888 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
6889                             tree rhs1, tree rhs2)
6890 {
6891   if (dump_file && (dump_flags & TDF_DETAILS))
6892     {
6893       fprintf (dump_file, "Transforming ");
6894       print_gimple_stmt (dump_file, stmt, 0);
6895     }
6896
6897   gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
6898   update_stmt (gsi_stmt (*gsi));
6899   remove_visited_stmt_chain (rhs1);
6900
6901   if (dump_file && (dump_flags & TDF_DETAILS))
6902     {
6903       fprintf (dump_file, " into ");
6904       print_gimple_stmt (dump_file, stmt, 0);
6905     }
6906 }
6907
6908 /* Rearrange ops may have more FMA when the chain may has more than 2 FMAs.
6909    Put no-mult ops and mult ops alternately at the end of the queue, which is
6910    conducive to generating more FMA and reducing the loss of FMA when breaking
6911    the chain.
6912    E.g.
6913    a * b + c * d + e generates:
6914
6915    _4  = c_9(D) * d_10(D);
6916    _12 = .FMA (a_7(D), b_8(D), _4);
6917    _11 = e_6(D) + _12;
6918
6919    Rearrange ops to -> e + a * b + c * d generates:
6920
6921    _4  = .FMA (c_7(D), d_8(D), _3);
6922    _11 = .FMA (a_5(D), b_6(D), _4);
6923
6924    Return the number of MULT_EXPRs in the chain.  */
6925 static int
6926 rank_ops_for_fma (vec<operand_entry *> *ops)
6927 {
6928   operand_entry *oe;
6929   unsigned int i;
6930   unsigned int ops_length = ops->length ();
6931   auto_vec<operand_entry *> ops_mult;
6932   auto_vec<operand_entry *> ops_others;
6933
6934   FOR_EACH_VEC_ELT (*ops, i, oe)
6935     {
6936       if (TREE_CODE (oe->op) == SSA_NAME)
6937         {
6938           gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
6939           if (is_gimple_assign (def_stmt))
6940             {
6941               if (gimple_assign_rhs_code (def_stmt) == MULT_EXPR)
6942                 ops_mult.safe_push (oe);
6943               /* A negate on the multiplication leads to FNMA.  */
6944               else if (gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
6945                        && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME)
6946                 {
6947                   gimple *neg_def_stmt
6948                     = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (def_stmt));
6949                   if (is_gimple_assign (neg_def_stmt)
6950                       && gimple_bb (neg_def_stmt) == gimple_bb (def_stmt)
6951                       && gimple_assign_rhs_code (neg_def_stmt) == MULT_EXPR)
6952                     ops_mult.safe_push (oe);
6953                   else
6954                     ops_others.safe_push (oe);
6955                 }
6956               else
6957                 ops_others.safe_push (oe);
6958             }
6959           else
6960             ops_others.safe_push (oe);
6961         }
6962       else
6963         ops_others.safe_push (oe);
6964     }
6965   /* 1. When ops_mult.length == 2, like the following case,
6966
6967      a * b + c * d + e.
6968
6969      we need to rearrange the ops.
6970
6971      Putting ops that not def from mult in front can generate more FMAs.
6972
6973      2. If all ops are defined with mult, we don't need to rearrange them.  */
6974   unsigned mult_num = ops_mult.length ();
6975   if (mult_num >= 2 && mult_num != ops_length)
6976     {
6977       /* Put no-mult ops and mult ops alternately at the end of the
6978          queue, which is conducive to generating more FMA and reducing the
6979          loss of FMA when breaking the chain.  */
6980       ops->truncate (0);
6981       ops->splice (ops_mult);
6982       int j, opindex = ops->length ();
6983       int others_length = ops_others.length ();
6984       for (j = 0; j < others_length; j++)
6985         {
6986           oe = ops_others.pop ();
6987           ops->quick_insert (opindex, oe);
6988           if (opindex > 0)
6989             opindex--;
6990         }
6991     }
6992   return mult_num;
6993 }
6994 /* Reassociate expressions in basic block BB and its post-dominator as
6995    children.
6996
6997    Bubble up return status from maybe_optimize_range_tests.  */
6998
6999 static bool
7000 reassociate_bb (basic_block bb)
7001 {
7002   gimple_stmt_iterator gsi;
7003   basic_block son;
7004   gimple *stmt = last_nondebug_stmt (bb);
7005   bool cfg_cleanup_needed = false;
7006
7007   if (stmt && !gimple_visited_p (stmt))
7008     cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
7009
7010   bool do_prev = false;
7011   for (gsi = gsi_last_bb (bb);
7012        !gsi_end_p (gsi); do_prev ? gsi_prev (&gsi) : (void) 0)
7013     {
7014       do_prev = true;
7015       stmt = gsi_stmt (gsi);
7016
7017       if (is_gimple_assign (stmt)
7018           && !stmt_could_throw_p (cfun, stmt))
7019         {
7020           tree lhs, rhs1, rhs2;
7021           enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
7022
7023           /* If this was part of an already processed statement,
7024              we don't need to touch it again. */
7025           if (gimple_visited_p (stmt))
7026             {
7027               /* This statement might have become dead because of previous
7028                  reassociations.  */
7029               if (has_zero_uses (gimple_get_lhs (stmt)))
7030                 {
7031                   reassoc_remove_stmt (&gsi);
7032                   release_defs (stmt);
7033                   /* We might end up removing the last stmt above which
7034                      places the iterator to the end of the sequence.
7035                      Reset it to the last stmt in this case and make sure
7036                      we don't do gsi_prev in that case.  */
7037                   if (gsi_end_p (gsi))
7038                     {
7039                       gsi = gsi_last_bb (bb);
7040                       do_prev = false;
7041                     }
7042                 }
7043               continue;
7044             }
7045
7046           /* If this is not a gimple binary expression, there is
7047              nothing for us to do with it.  */
7048           if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
7049             continue;
7050
7051           lhs = gimple_assign_lhs (stmt);
7052           rhs1 = gimple_assign_rhs1 (stmt);
7053           rhs2 = gimple_assign_rhs2 (stmt);
7054
7055           /* For non-bit or min/max operations we can't associate
7056              all types.  Verify that here.  */
7057           if ((rhs_code != BIT_IOR_EXPR
7058                && rhs_code != BIT_AND_EXPR
7059                && rhs_code != BIT_XOR_EXPR
7060                && rhs_code != MIN_EXPR
7061                && rhs_code != MAX_EXPR
7062                && !can_reassociate_type_p (TREE_TYPE (lhs)))
7063               || !can_reassociate_op_p (rhs1)
7064               || !can_reassociate_op_p (rhs2))
7065             continue;
7066
7067           if (associative_tree_code (rhs_code))
7068             {
7069               auto_vec<operand_entry *> ops;
7070               tree powi_result = NULL_TREE;
7071               bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
7072
7073               /* There may be no immediate uses left by the time we
7074                  get here because we may have eliminated them all.  */
7075               if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
7076                 continue;
7077
7078               gimple_set_visited (stmt, true);
7079               linearize_expr_tree (&ops, stmt, true, true);
7080               ops.qsort (sort_by_operand_rank);
7081               int orig_len = ops.length ();
7082               optimize_ops_list (rhs_code, &ops);
7083               if (undistribute_ops_list (rhs_code, &ops,
7084                                          loop_containing_stmt (stmt)))
7085                 {
7086                   ops.qsort (sort_by_operand_rank);
7087                   optimize_ops_list (rhs_code, &ops);
7088                 }
7089               if (undistribute_bitref_for_vector (rhs_code, &ops,
7090                                                   loop_containing_stmt (stmt)))
7091                 {
7092                   ops.qsort (sort_by_operand_rank);
7093                   optimize_ops_list (rhs_code, &ops);
7094                 }
7095               if (rhs_code == PLUS_EXPR
7096                   && transform_add_to_multiply (&ops))
7097                 ops.qsort (sort_by_operand_rank);
7098
7099               if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
7100                 {
7101                   if (is_vector)
7102                     optimize_vec_cond_expr (rhs_code, &ops);
7103                   else
7104                     optimize_range_tests (rhs_code, &ops, NULL);
7105                 }
7106
7107               if (rhs_code == MULT_EXPR && !is_vector)
7108                 {
7109                   attempt_builtin_copysign (&ops);
7110
7111                   if (reassoc_insert_powi_p
7112                       && (flag_unsafe_math_optimizations
7113                           || (INTEGRAL_TYPE_P (TREE_TYPE (lhs)))))
7114                     powi_result = attempt_builtin_powi (stmt, &ops);
7115                 }
7116
7117               operand_entry *last;
7118               bool negate_result = false;
7119               if (ops.length () > 1
7120                   && rhs_code == MULT_EXPR)
7121                 {
7122                   last = ops.last ();
7123                   if ((integer_minus_onep (last->op)
7124                        || real_minus_onep (last->op))
7125                       && !HONOR_SNANS (TREE_TYPE (lhs))
7126                       && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs))
7127                           || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs))))
7128                     {
7129                       ops.pop ();
7130                       negate_result = true;
7131                     }
7132                 }
7133
7134               tree new_lhs = lhs;
7135               /* If the operand vector is now empty, all operands were
7136                  consumed by the __builtin_powi optimization.  */
7137               if (ops.length () == 0)
7138                 transform_stmt_to_copy (&gsi, stmt, powi_result);
7139               else if (ops.length () == 1)
7140                 {
7141                   tree last_op = ops.last ()->op;
7142
7143                   /* If the stmt that defines operand has to be inserted, insert it
7144                      before the use.  */
7145                   if (ops.last ()->stmt_to_insert)
7146                     insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert);
7147                   if (powi_result)
7148                     transform_stmt_to_multiply (&gsi, stmt, last_op,
7149                                                 powi_result);
7150                   else
7151                     transform_stmt_to_copy (&gsi, stmt, last_op);
7152                 }
7153               else
7154                 {
7155                   machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
7156                   int ops_num = ops.length ();
7157                   int width = 0;
7158                   int mult_num = 0;
7159
7160                   /* For binary bit operations, if there are at least 3
7161                      operands and the last operand in OPS is a constant,
7162                      move it to the front.  This helps ensure that we generate
7163                      (X & Y) & C rather than (X & C) & Y.  The former will
7164                      often match a canonical bit test when we get to RTL.  */
7165                   if (ops.length () > 2
7166                       && (rhs_code == BIT_AND_EXPR
7167                           || rhs_code == BIT_IOR_EXPR
7168                           || rhs_code == BIT_XOR_EXPR)
7169                       && TREE_CODE (ops.last ()->op) == INTEGER_CST)
7170                     std::swap (*ops[0], *ops[ops_num - 1]);
7171
7172                   optimization_type opt_type = bb_optimization_type (bb);
7173
7174                   /* If the target support FMA, rank_ops_for_fma will detect if
7175                      the chain has fmas and rearrange the ops if so.  */
7176                   if (direct_internal_fn_supported_p (IFN_FMA,
7177                                                       TREE_TYPE (lhs),
7178                                                       opt_type)
7179                       && (rhs_code == PLUS_EXPR || rhs_code == MINUS_EXPR))
7180                     {
7181                       mult_num = rank_ops_for_fma (&ops);
7182                     }
7183
7184                   /* Only rewrite the expression tree to parallel in the
7185                      last reassoc pass to avoid useless work back-and-forth
7186                      with initial linearization.  */
7187                   bool has_fma = mult_num >= 2 && mult_num != ops_num;
7188                   if (!reassoc_insert_powi_p
7189                       && ops.length () > 3
7190                       && (width = get_reassociation_width (&ops, mult_num, lhs,
7191                                                            rhs_code, mode))
7192                            > 1)
7193                     {
7194                       if (dump_file && (dump_flags & TDF_DETAILS))
7195                         fprintf (dump_file,
7196                                  "Width = %d was chosen for reassociation\n",
7197                                  width);
7198                       rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
7199                                                   width,
7200                                                   has_fma,
7201                                                   ops);
7202                     }
7203                   else
7204                     {
7205                       /* When there are three operands left, we want
7206                          to make sure the ones that get the double
7207                          binary op are chosen wisely.  */
7208                       int len = ops.length ();
7209                       if (len >= 3
7210                           && (!has_fma
7211                               /* width > 1 means ranking ops results in better
7212                                  parallelism.  Check current value to avoid
7213                                  calling get_reassociation_width again.  */
7214                               || (width != 1
7215                                   && get_reassociation_width (
7216                                        &ops, mult_num, lhs, rhs_code, mode)
7217                                        > 1)))
7218                         swap_ops_for_binary_stmt (ops, len - 3);
7219
7220                       new_lhs = rewrite_expr_tree (stmt, rhs_code, 0, ops,
7221                                                    powi_result != NULL
7222                                                    || negate_result,
7223                                                    len != orig_len);
7224                     }
7225
7226                   /* If we combined some repeated factors into a
7227                      __builtin_powi call, multiply that result by the
7228                      reassociated operands.  */
7229                   if (powi_result)
7230                     {
7231                       gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
7232                       tree type = TREE_TYPE (lhs);
7233                       tree target_ssa = make_temp_ssa_name (type, NULL,
7234                                                             "reassocpow");
7235                       gimple_set_lhs (lhs_stmt, target_ssa);
7236                       update_stmt (lhs_stmt);
7237                       if (lhs != new_lhs)
7238                         {
7239                           target_ssa = new_lhs;
7240                           new_lhs = lhs;
7241                         }
7242                       mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
7243                                                       powi_result, target_ssa);
7244                       gimple_set_location (mul_stmt, gimple_location (stmt));
7245                       gimple_set_uid (mul_stmt, gimple_uid (stmt));
7246                       gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
7247                     }
7248                 }
7249
7250               if (negate_result)
7251                 {
7252                   stmt = SSA_NAME_DEF_STMT (lhs);
7253                   tree tmp = make_ssa_name (TREE_TYPE (lhs));
7254                   gimple_set_lhs (stmt, tmp);
7255                   if (lhs != new_lhs)
7256                     tmp = new_lhs;
7257                   gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR,
7258                                                            tmp);
7259                   gimple_set_uid (neg_stmt, gimple_uid (stmt));
7260                   gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT);
7261                   update_stmt (stmt);
7262                 }
7263             }
7264         }
7265     }
7266   for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
7267        son;
7268        son = next_dom_son (CDI_POST_DOMINATORS, son))
7269     cfg_cleanup_needed |= reassociate_bb (son);
7270
7271   return cfg_cleanup_needed;
7272 }
7273
7274 /* Add jumps around shifts for range tests turned into bit tests.
7275    For each SSA_NAME VAR we have code like:
7276    VAR = ...; // final stmt of range comparison
7277    // bit test here...;
7278    OTHERVAR = ...; // final stmt of the bit test sequence
7279    RES = VAR | OTHERVAR;
7280    Turn the above into:
7281    VAR = ...;
7282    if (VAR != 0)
7283      goto <l3>;
7284    else
7285      goto <l2>;
7286    <l2>:
7287    // bit test here...;
7288    OTHERVAR = ...;
7289    <l3>:
7290    # RES = PHI<1(l1), OTHERVAR(l2)>;  */
7291
7292 static void
7293 branch_fixup (void)
7294 {
7295   tree var;
7296   unsigned int i;
7297
7298   FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
7299     {
7300       gimple *def_stmt = SSA_NAME_DEF_STMT (var);
7301       gimple *use_stmt;
7302       use_operand_p use;
7303       bool ok = single_imm_use (var, &use, &use_stmt);
7304       gcc_assert (ok
7305                   && is_gimple_assign (use_stmt)
7306                   && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
7307                   && gimple_bb (def_stmt) == gimple_bb (use_stmt));
7308
7309       basic_block cond_bb = gimple_bb (def_stmt);
7310       basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
7311       basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
7312
7313       gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
7314       gimple *g = gimple_build_cond (NE_EXPR, var,
7315                                      build_zero_cst (TREE_TYPE (var)),
7316                                      NULL_TREE, NULL_TREE);
7317       location_t loc = gimple_location (use_stmt);
7318       gimple_set_location (g, loc);
7319       gsi_insert_after (&gsi, g, GSI_NEW_STMT);
7320
7321       edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
7322       etrue->probability = profile_probability::even ();
7323       edge efalse = find_edge (cond_bb, then_bb);
7324       efalse->flags = EDGE_FALSE_VALUE;
7325       efalse->probability -= etrue->probability;
7326       then_bb->count -= etrue->count ();
7327
7328       tree othervar = NULL_TREE;
7329       if (gimple_assign_rhs1 (use_stmt) == var)
7330         othervar = gimple_assign_rhs2 (use_stmt);
7331       else if (gimple_assign_rhs2 (use_stmt) == var)
7332         othervar = gimple_assign_rhs1 (use_stmt);
7333       else
7334         gcc_unreachable ();
7335       tree lhs = gimple_assign_lhs (use_stmt);
7336       gphi *phi = create_phi_node (lhs, merge_bb);
7337       add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
7338       add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
7339       gsi = gsi_for_stmt (use_stmt);
7340       gsi_remove (&gsi, true);
7341
7342       set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
7343       set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
7344     }
7345   reassoc_branch_fixups.release ();
7346 }
7347
7348 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
7349 void debug_ops_vector (vec<operand_entry *> ops);
7350
7351 /* Dump the operand entry vector OPS to FILE.  */
7352
7353 void
7354 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
7355 {
7356   operand_entry *oe;
7357   unsigned int i;
7358
7359   FOR_EACH_VEC_ELT (ops, i, oe)
7360     {
7361       fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
7362       print_generic_expr (file, oe->op);
7363       fprintf (file, "\n");
7364     }
7365 }
7366
7367 /* Dump the operand entry vector OPS to STDERR.  */
7368
7369 DEBUG_FUNCTION void
7370 debug_ops_vector (vec<operand_entry *> ops)
7371 {
7372   dump_ops_vector (stderr, ops);
7373 }
7374
7375 /* Bubble up return status from reassociate_bb.  */
7376
7377 static bool
7378 do_reassoc (void)
7379 {
7380   break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
7381   return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
7382 }
7383
7384 /* Initialize the reassociation pass.  */
7385
7386 static void
7387 init_reassoc (void)
7388 {
7389   int i;
7390   int64_t rank = 2;
7391   int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
7392
7393   /* Find the loops, so that we can prevent moving calculations in
7394      them.  */
7395   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
7396
7397   memset (&reassociate_stats, 0, sizeof (reassociate_stats));
7398
7399   next_operand_entry_id = 0;
7400
7401   /* Reverse RPO (Reverse Post Order) will give us something where
7402      deeper loops come later.  */
7403   pre_and_rev_post_order_compute (NULL, bbs, false);
7404   bb_rank = XCNEWVEC (int64_t, last_basic_block_for_fn (cfun));
7405   operand_rank = new hash_map<tree, int64_t>;
7406
7407   /* Give each default definition a distinct rank.  This includes
7408      parameters and the static chain.  Walk backwards over all
7409      SSA names so that we get proper rank ordering according
7410      to tree_swap_operands_p.  */
7411   for (i = num_ssa_names - 1; i > 0; --i)
7412     {
7413       tree name = ssa_name (i);
7414       if (name && SSA_NAME_IS_DEFAULT_DEF (name))
7415         insert_operand_rank (name, ++rank);
7416     }
7417
7418   /* Set up rank for each BB  */
7419   for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
7420     bb_rank[bbs[i]] = ++rank << 16;
7421
7422   free (bbs);
7423   calculate_dominance_info (CDI_POST_DOMINATORS);
7424   plus_negates = vNULL;
7425   mark_ssa_maybe_undefs ();
7426 }
7427
7428 /* Cleanup after the reassociation pass, and print stats if
7429    requested.  */
7430
7431 static void
7432 fini_reassoc (void)
7433 {
7434   statistics_counter_event (cfun, "Linearized",
7435                             reassociate_stats.linearized);
7436   statistics_counter_event (cfun, "Constants eliminated",
7437                             reassociate_stats.constants_eliminated);
7438   statistics_counter_event (cfun, "Ops eliminated",
7439                             reassociate_stats.ops_eliminated);
7440   statistics_counter_event (cfun, "Statements rewritten",
7441                             reassociate_stats.rewritten);
7442   statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
7443                             reassociate_stats.pows_encountered);
7444   statistics_counter_event (cfun, "Built-in powi calls created",
7445                             reassociate_stats.pows_created);
7446
7447   delete operand_rank;
7448   bitmap_clear (biased_names);
7449   operand_entry_pool.release ();
7450   free (bb_rank);
7451   plus_negates.release ();
7452   free_dominance_info (CDI_POST_DOMINATORS);
7453   loop_optimizer_finalize ();
7454 }
7455
7456 /* Gate and execute functions for Reassociation.  If INSERT_POWI_P, enable
7457    insertion of __builtin_powi calls.
7458
7459    Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
7460    optimization of a gimple conditional.  Otherwise returns zero.  */
7461
7462 static unsigned int
7463 execute_reassoc (bool insert_powi_p, bool bias_loop_carried_phi_ranks_p)
7464 {
7465   reassoc_insert_powi_p = insert_powi_p;
7466   reassoc_bias_loop_carried_phi_ranks_p = bias_loop_carried_phi_ranks_p;
7467
7468   init_reassoc ();
7469
7470   bool cfg_cleanup_needed;
7471   cfg_cleanup_needed = do_reassoc ();
7472   repropagate_negates ();
7473   branch_fixup ();
7474
7475   fini_reassoc ();
7476   return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
7477 }
7478
7479 namespace {
7480
7481 const pass_data pass_data_reassoc =
7482 {
7483   GIMPLE_PASS, /* type */
7484   "reassoc", /* name */
7485   OPTGROUP_NONE, /* optinfo_flags */
7486   TV_TREE_REASSOC, /* tv_id */
7487   ( PROP_cfg | PROP_ssa ), /* properties_required */
7488   0, /* properties_provided */
7489   0, /* properties_destroyed */
7490   0, /* todo_flags_start */
7491   TODO_update_ssa_only_virtuals, /* todo_flags_finish */
7492 };
7493
7494 class pass_reassoc : public gimple_opt_pass
7495 {
7496 public:
7497   pass_reassoc (gcc::context *ctxt)
7498     : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
7499   {}
7500
7501   /* opt_pass methods: */
7502   opt_pass * clone () final override { return new pass_reassoc (m_ctxt); }
7503   void set_pass_param (unsigned int n, bool param) final override
7504     {
7505       gcc_assert (n == 0);
7506       insert_powi_p = param;
7507       bias_loop_carried_phi_ranks_p = !param;
7508     }
7509   bool gate (function *) final override { return flag_tree_reassoc != 0; }
7510   unsigned int execute (function *) final override
7511   {
7512     return execute_reassoc (insert_powi_p, bias_loop_carried_phi_ranks_p);
7513   }
7514
7515  private:
7516   /* Enable insertion of __builtin_powi calls during execute_reassoc.  See
7517      point 3a in the pass header comment.  */
7518   bool insert_powi_p;
7519   bool bias_loop_carried_phi_ranks_p;
7520 }; // class pass_reassoc
7521
7522 } // anon namespace
7523
7524 gimple_opt_pass *
7525 make_pass_reassoc (gcc::context *ctxt)
7526 {
7527   return new pass_reassoc (ctxt);
7528 }