gcc/tree-ssa-reassoc.c

   1 /* Reassociation for trees.
   2    Copyright (C) 2005-2016 Free Software Foundation, Inc.
   3    Contributed by Daniel Berlin <dan@dberlin.org>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 3, or (at your option)
  10 any later version.
  11
  12 GCC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "backend.h"
  25 #include "target.h"
  26 #include "rtl.h"
  27 #include "tree.h"
  28 #include "gimple.h"
  29 #include "cfghooks.h"
  30 #include "alloc-pool.h"
  31 #include "tree-pass.h"
  32 #include "tm_p.h"
  33 #include "ssa.h"
  34 #include "optabs-tree.h"
  35 #include "gimple-pretty-print.h"
  36 #include "diagnostic-core.h"
  37 #include "fold-const.h"
  38 #include "stor-layout.h"
  39 #include "cfganal.h"
  40 #include "gimple-fold.h"
  41 #include "tree-eh.h"
  42 #include "gimple-iterator.h"
  43 #include "gimplify-me.h"
  44 #include "tree-cfg.h"
  45 #include "tree-ssa-loop.h"
  46 #include "flags.h"
  47 #include "tree-ssa.h"
  48 #include "langhooks.h"
  49 #include "cfgloop.h"
  50 #include "params.h"
  51 #include "builtins.h"
  52 #include "gimplify.h"
  53 #include "case-cfn-macros.h"
  54
  55 /*  This is a simple global reassociation pass.  It is, in part, based
  56     on the LLVM pass of the same name (They do some things more/less
  57     than we do, in different orders, etc).
  58
  59     It consists of five steps:
  60
  61     1. Breaking up subtract operations into addition + negate, where
  62     it would promote the reassociation of adds.
  63
  64     2. Left linearization of the expression trees, so that (A+B)+(C+D)
  65     becomes (((A+B)+C)+D), which is easier for us to rewrite later.
  66     During linearization, we place the operands of the binary
  67     expressions into a vector of operand_entry_*
  68
  69     3. Optimization of the operand lists, eliminating things like a +
  70     -a, a & a, etc.
  71
  72     3a. Combine repeated factors with the same occurrence counts
  73     into a __builtin_powi call that will later be optimized into
  74     an optimal number of multiplies.
  75
  76     4. Rewrite the expression trees we linearized and optimized so
  77     they are in proper rank order.
  78
  79     5. Repropagate negates, as nothing else will clean it up ATM.
  80
  81     A bit of theory on #4, since nobody seems to write anything down
  82     about why it makes sense to do it the way they do it:
  83
  84     We could do this much nicer theoretically, but don't (for reasons
  85     explained after how to do it theoretically nice :P).
  86
  87     In order to promote the most redundancy elimination, you want
  88     binary expressions whose operands are the same rank (or
  89     preferably, the same value) exposed to the redundancy eliminator,
  90     for possible elimination.
  91
  92     So the way to do this if we really cared, is to build the new op
  93     tree from the leaves to the roots, merging as you go, and putting the
  94     new op on the end of the worklist, until you are left with one
  95     thing on the worklist.
  96
  97     IE if you have to rewrite the following set of operands (listed with
  98     rank in parentheses), with opcode PLUS_EXPR:
  99
 100     a (1),  b (1),  c (1),  d (2), e (2)
 101
 102
 103     We start with our merge worklist empty, and the ops list with all of
 104     those on it.
 105
 106     You want to first merge all leaves of the same rank, as much as
 107     possible.
 108
 109     So first build a binary op of
 110
 111     mergetmp = a + b, and put "mergetmp" on the merge worklist.
 112
 113     Because there is no three operand form of PLUS_EXPR, c is not going to
 114     be exposed to redundancy elimination as a rank 1 operand.
 115
 116     So you might as well throw it on the merge worklist (you could also
 117     consider it to now be a rank two operand, and merge it with d and e,
 118     but in this case, you then have evicted e from a binary op. So at
 119     least in this situation, you can't win.)
 120
 121     Then build a binary op of d + e
 122     mergetmp2 = d + e
 123
 124     and put mergetmp2 on the merge worklist.
 125
 126     so merge worklist = {mergetmp, c, mergetmp2}
 127
 128     Continue building binary ops of these operations until you have only
 129     one operation left on the worklist.
 130
 131     So we have
 132
 133     build binary op
 134     mergetmp3 = mergetmp + c
 135
 136     worklist = {mergetmp2, mergetmp3}
 137
 138     mergetmp4 = mergetmp2 + mergetmp3
 139
 140     worklist = {mergetmp4}
 141
 142     because we have one operation left, we can now just set the original
 143     statement equal to the result of that operation.
 144
 145     This will at least expose a + b  and d + e to redundancy elimination
 146     as binary operations.
 147
 148     For extra points, you can reuse the old statements to build the
 149     mergetmps, since you shouldn't run out.
 150
 151     So why don't we do this?
 152
 153     Because it's expensive, and rarely will help.  Most trees we are
 154     reassociating have 3 or less ops.  If they have 2 ops, they already
 155     will be written into a nice single binary op.  If you have 3 ops, a
 156     single simple check suffices to tell you whether the first two are of the
 157     same rank.  If so, you know to order it
 158
 159     mergetmp = op1 + op2
 160     newstmt = mergetmp + op3
 161
 162     instead of
 163     mergetmp = op2 + op3
 164     newstmt = mergetmp + op1
 165
 166     If all three are of the same rank, you can't expose them all in a
 167     single binary operator anyway, so the above is *still* the best you
 168     can do.
 169
 170     Thus, this is what we do.  When we have three ops left, we check to see
 171     what order to put them in, and call it a day.  As a nod to vector sum
 172     reduction, we check if any of the ops are really a phi node that is a
 173     destructive update for the associating op, and keep the destructive
 174     update together for vector sum reduction recognition.  */
 175
 176 /* Enable insertion of __builtin_powi calls during execute_reassoc.  See
 177    point 3a in the pass header comment.  */
 178 static bool reassoc_insert_powi_p;
 179
 180 /* Statistics */
 181 static struct
 182 {
 183   int linearized;
 184   int constants_eliminated;
 185   int ops_eliminated;
 186   int rewritten;
 187   int pows_encountered;
 188   int pows_created;
 189 } reassociate_stats;
 190
 191 /* Operator, rank pair.  */
 192 struct operand_entry
 193 {
 194   unsigned int rank;
 195   int id;
 196   tree op;
 197   unsigned int count;
 198 };
 199
 200 static object_allocator<operand_entry> operand_entry_pool
 201   ("operand entry pool");
 202
 203 /* This is used to assign a unique ID to each struct operand_entry
 204    so that qsort results are identical on different hosts.  */
 205 static int next_operand_entry_id;
 206
 207 /* Starting rank number for a given basic block, so that we can rank
 208    operations using unmovable instructions in that BB based on the bb
 209    depth.  */
 210 static long *bb_rank;
 211
 212 /* Operand->rank hashtable.  */
 213 static hash_map<tree, long> *operand_rank;
 214
 215 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
 216    all basic blocks the CFG should be adjusted - basic blocks
 217    split right after that SSA_NAME's definition statement and before
 218    the only use, which must be a bit ior.  */
 219 static vec<tree> reassoc_branch_fixups;
 220
 221 /* Forward decls.  */
 222 static long get_rank (tree);
 223 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
 224
 225 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
 226    possibly added by gsi_remove.  */
 227
 228 bool
 229 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
 230 {
 231   gimple *stmt = gsi_stmt (*gsi);
 232
 233   if (!MAY_HAVE_DEBUG_STMTS || gimple_code (stmt) == GIMPLE_PHI)
 234     return gsi_remove (gsi, true);
 235
 236   gimple_stmt_iterator prev = *gsi;
 237   gsi_prev (&prev);
 238   unsigned uid = gimple_uid (stmt);
 239   basic_block bb = gimple_bb (stmt);
 240   bool ret = gsi_remove (gsi, true);
 241   if (!gsi_end_p (prev))
 242     gsi_next (&prev);
 243   else
 244     prev = gsi_start_bb (bb);
 245   gimple *end_stmt = gsi_stmt (*gsi);
 246   while ((stmt = gsi_stmt (prev)) != end_stmt)
 247     {
 248       gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
 249       gimple_set_uid (stmt, uid);
 250       gsi_next (&prev);
 251     }
 252   return ret;
 253 }
 254
 255 /* Bias amount for loop-carried phis.  We want this to be larger than
 256    the depth of any reassociation tree we can see, but not larger than
 257    the rank difference between two blocks.  */
 258 #define PHI_LOOP_BIAS (1 << 15)
 259
 260 /* Rank assigned to a phi statement.  If STMT is a loop-carried phi of
 261    an innermost loop, and the phi has only a single use which is inside
 262    the loop, then the rank is the block rank of the loop latch plus an
 263    extra bias for the loop-carried dependence.  This causes expressions
 264    calculated into an accumulator variable to be independent for each
 265    iteration of the loop.  If STMT is some other phi, the rank is the
 266    block rank of its containing block.  */
 267 static long
 268 phi_rank (gimple *stmt)
 269 {
 270   basic_block bb = gimple_bb (stmt);
 271   struct loop *father = bb->loop_father;
 272   tree res;
 273   unsigned i;
 274   use_operand_p use;
 275   gimple *use_stmt;
 276
 277   /* We only care about real loops (those with a latch).  */
 278   if (!father->latch)
 279     return bb_rank[bb->index];
 280
 281   /* Interesting phis must be in headers of innermost loops.  */
 282   if (bb != father->header
 283       || father->inner)
 284     return bb_rank[bb->index];
 285
 286   /* Ignore virtual SSA_NAMEs.  */
 287   res = gimple_phi_result (stmt);
 288   if (virtual_operand_p (res))
 289     return bb_rank[bb->index];
 290
 291   /* The phi definition must have a single use, and that use must be
 292      within the loop.  Otherwise this isn't an accumulator pattern.  */
 293   if (!single_imm_use (res, &use, &use_stmt)
 294       || gimple_bb (use_stmt)->loop_father != father)
 295     return bb_rank[bb->index];
 296
 297   /* Look for phi arguments from within the loop.  If found, bias this phi.  */
 298   for (i = 0; i < gimple_phi_num_args (stmt); i++)
 299     {
 300       tree arg = gimple_phi_arg_def (stmt, i);
 301       if (TREE_CODE (arg) == SSA_NAME
 302           && !SSA_NAME_IS_DEFAULT_DEF (arg))
 303         {
 304           gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
 305           if (gimple_bb (def_stmt)->loop_father == father)
 306             return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
 307         }
 308     }
 309
 310   /* Must be an uninteresting phi.  */
 311   return bb_rank[bb->index];
 312 }
 313
 314 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
 315    loop-carried dependence of an innermost loop, return TRUE; else
 316    return FALSE.  */
 317 static bool
 318 loop_carried_phi (tree exp)
 319 {
 320   gimple *phi_stmt;
 321   long block_rank;
 322
 323   if (TREE_CODE (exp) != SSA_NAME
 324       || SSA_NAME_IS_DEFAULT_DEF (exp))
 325     return false;
 326
 327   phi_stmt = SSA_NAME_DEF_STMT (exp);
 328
 329   if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
 330     return false;
 331
 332   /* Non-loop-carried phis have block rank.  Loop-carried phis have
 333      an additional bias added in.  If this phi doesn't have block rank,
 334      it's biased and should not be propagated.  */
 335   block_rank = bb_rank[gimple_bb (phi_stmt)->index];
 336
 337   if (phi_rank (phi_stmt) != block_rank)
 338     return true;
 339
 340   return false;
 341 }
 342
 343 /* Return the maximum of RANK and the rank that should be propagated
 344    from expression OP.  For most operands, this is just the rank of OP.
 345    For loop-carried phis, the value is zero to avoid undoing the bias
 346    in favor of the phi.  */
 347 static long
 348 propagate_rank (long rank, tree op)
 349 {
 350   long op_rank;
 351
 352   if (loop_carried_phi (op))
 353     return rank;
 354
 355   op_rank = get_rank (op);
 356
 357   return MAX (rank, op_rank);
 358 }
 359
 360 /* Look up the operand rank structure for expression E.  */
 361
 362 static inline long
 363 find_operand_rank (tree e)
 364 {
 365   long *slot = operand_rank->get (e);
 366   return slot ? *slot : -1;
 367 }
 368
 369 /* Insert {E,RANK} into the operand rank hashtable.  */
 370
 371 static inline void
 372 insert_operand_rank (tree e, long rank)
 373 {
 374   gcc_assert (rank > 0);
 375   gcc_assert (!operand_rank->put (e, rank));
 376 }
 377
 378 /* Given an expression E, return the rank of the expression.  */
 379
 380 static long
 381 get_rank (tree e)
 382 {
 383   /* SSA_NAME's have the rank of the expression they are the result
 384      of.
 385      For globals and uninitialized values, the rank is 0.
 386      For function arguments, use the pre-setup rank.
 387      For PHI nodes, stores, asm statements, etc, we use the rank of
 388      the BB.
 389      For simple operations, the rank is the maximum rank of any of
 390      its operands, or the bb_rank, whichever is less.
 391      I make no claims that this is optimal, however, it gives good
 392      results.  */
 393
 394   /* We make an exception to the normal ranking system to break
 395      dependences of accumulator variables in loops.  Suppose we
 396      have a simple one-block loop containing:
 397
 398        x_1 = phi(x_0, x_2)
 399        b = a + x_1
 400        c = b + d
 401        x_2 = c + e
 402
 403      As shown, each iteration of the calculation into x is fully
 404      dependent upon the iteration before it.  We would prefer to
 405      see this in the form:
 406
 407        x_1 = phi(x_0, x_2)
 408        b = a + d
 409        c = b + e
 410        x_2 = c + x_1
 411
 412      If the loop is unrolled, the calculations of b and c from
 413      different iterations can be interleaved.
 414
 415      To obtain this result during reassociation, we bias the rank
 416      of the phi definition x_1 upward, when it is recognized as an
 417      accumulator pattern.  The artificial rank causes it to be
 418      added last, providing the desired independence.  */
 419
 420   if (TREE_CODE (e) == SSA_NAME)
 421     {
 422       ssa_op_iter iter;
 423       gimple *stmt;
 424       long rank;
 425       tree op;
 426
 427       if (SSA_NAME_IS_DEFAULT_DEF (e))
 428         return find_operand_rank (e);
 429
 430       stmt = SSA_NAME_DEF_STMT (e);
 431       if (gimple_code (stmt) == GIMPLE_PHI)
 432         return phi_rank (stmt);
 433
 434       if (!is_gimple_assign (stmt))
 435         return bb_rank[gimple_bb (stmt)->index];
 436
 437       /* If we already have a rank for this expression, use that.  */
 438       rank = find_operand_rank (e);
 439       if (rank != -1)
 440         return rank;
 441
 442       /* Otherwise, find the maximum rank for the operands.  As an
 443          exception, remove the bias from loop-carried phis when propagating
 444          the rank so that dependent operations are not also biased.  */
 445       /* Simply walk over all SSA uses - this takes advatage of the
 446          fact that non-SSA operands are is_gimple_min_invariant and
 447          thus have rank 0.  */
 448       rank = 0;
 449       FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
 450         rank = propagate_rank (rank, op);
 451
 452       if (dump_file && (dump_flags & TDF_DETAILS))
 453         {
 454           fprintf (dump_file, "Rank for ");
 455           print_generic_expr (dump_file, e, 0);
 456           fprintf (dump_file, " is %ld\n", (rank + 1));
 457         }
 458
 459       /* Note the rank in the hashtable so we don't recompute it.  */
 460       insert_operand_rank (e, (rank + 1));
 461       return (rank + 1);
 462     }
 463
 464   /* Constants, globals, etc., are rank 0 */
 465   return 0;
 466 }
 467
 468
 469 /* We want integer ones to end up last no matter what, since they are
 470    the ones we can do the most with.  */
 471 #define INTEGER_CONST_TYPE 1 << 3
 472 #define FLOAT_CONST_TYPE 1 << 2
 473 #define OTHER_CONST_TYPE 1 << 1
 474
 475 /* Classify an invariant tree into integer, float, or other, so that
 476    we can sort them to be near other constants of the same type.  */
 477 static inline int
 478 constant_type (tree t)
 479 {
 480   if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
 481     return INTEGER_CONST_TYPE;
 482   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
 483     return FLOAT_CONST_TYPE;
 484   else
 485     return OTHER_CONST_TYPE;
 486 }
 487
 488 /* qsort comparison function to sort operand entries PA and PB by rank
 489    so that the sorted array is ordered by rank in decreasing order.  */
 490 static int
 491 sort_by_operand_rank (const void *pa, const void *pb)
 492 {
 493   const operand_entry *oea = *(const operand_entry *const *)pa;
 494   const operand_entry *oeb = *(const operand_entry *const *)pb;
 495
 496   /* It's nicer for optimize_expression if constants that are likely
 497      to fold when added/multiplied//whatever are put next to each
 498      other.  Since all constants have rank 0, order them by type.  */
 499   if (oeb->rank == 0 && oea->rank == 0)
 500     {
 501       if (constant_type (oeb->op) != constant_type (oea->op))
 502         return constant_type (oeb->op) - constant_type (oea->op);
 503       else
 504         /* To make sorting result stable, we use unique IDs to determine
 505            order.  */
 506         return oeb->id - oea->id;
 507     }
 508
 509   /* Lastly, make sure the versions that are the same go next to each
 510      other.  */
 511   if ((oeb->rank - oea->rank == 0)
 512       && TREE_CODE (oea->op) == SSA_NAME
 513       && TREE_CODE (oeb->op) == SSA_NAME)
 514     {
 515       /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
 516          versions of removed SSA_NAMEs, so if possible, prefer to sort
 517          based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
 518          See PR60418.  */
 519       if (!SSA_NAME_IS_DEFAULT_DEF (oea->op)
 520           && !SSA_NAME_IS_DEFAULT_DEF (oeb->op)
 521           && SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
 522         {
 523           gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
 524           gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
 525           basic_block bba = gimple_bb (stmta);
 526           basic_block bbb = gimple_bb (stmtb);
 527           if (bbb != bba)
 528             {
 529               if (bb_rank[bbb->index] != bb_rank[bba->index])
 530                 return bb_rank[bbb->index] - bb_rank[bba->index];
 531             }
 532           else
 533             {
 534               bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
 535               bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
 536               if (da != db)
 537                 return da ? 1 : -1;
 538             }
 539         }
 540
 541       if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
 542         return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
 543       else
 544         return oeb->id - oea->id;
 545     }
 546
 547   if (oeb->rank != oea->rank)
 548     return oeb->rank - oea->rank;
 549   else
 550     return oeb->id - oea->id;
 551 }
 552
 553 /* Add an operand entry to *OPS for the tree operand OP.  */
 554
 555 static void
 556 add_to_ops_vec (vec<operand_entry *> *ops, tree op)
 557 {
 558   operand_entry *oe = operand_entry_pool.allocate ();
 559
 560   oe->op = op;
 561   oe->rank = get_rank (op);
 562   oe->id = next_operand_entry_id++;
 563   oe->count = 1;
 564   ops->safe_push (oe);
 565 }
 566
 567 /* Add an operand entry to *OPS for the tree operand OP with repeat
 568    count REPEAT.  */
 569
 570 static void
 571 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
 572                        HOST_WIDE_INT repeat)
 573 {
 574   operand_entry *oe = operand_entry_pool.allocate ();
 575
 576   oe->op = op;
 577   oe->rank = get_rank (op);
 578   oe->id = next_operand_entry_id++;
 579   oe->count = repeat;
 580   ops->safe_push (oe);
 581
 582   reassociate_stats.pows_encountered++;
 583 }
 584
 585 /* Return true if STMT is reassociable operation containing a binary
 586    operation with tree code CODE, and is inside LOOP.  */
 587
 588 static bool
 589 is_reassociable_op (gimple *stmt, enum tree_code code, struct loop *loop)
 590 {
 591   basic_block bb = gimple_bb (stmt);
 592
 593   if (gimple_bb (stmt) == NULL)
 594     return false;
 595
 596   if (!flow_bb_inside_loop_p (loop, bb))
 597     return false;
 598
 599   if (is_gimple_assign (stmt)
 600       && gimple_assign_rhs_code (stmt) == code
 601       && has_single_use (gimple_assign_lhs (stmt)))
 602     return true;
 603
 604   return false;
 605 }
 606
 607
 608 /* Return true if STMT is a nop-conversion.  */
 609
 610 static bool
 611 gimple_nop_conversion_p (gimple *stmt)
 612 {
 613   if (gassign *ass = dyn_cast <gassign *> (stmt))
 614     {
 615       if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
 616           && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
 617                                     TREE_TYPE (gimple_assign_rhs1 (ass))))
 618         return true;
 619     }
 620   return false;
 621 }
 622
 623 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
 624    operand of the negate operation.  Otherwise, return NULL.  */
 625
 626 static tree
 627 get_unary_op (tree name, enum tree_code opcode)
 628 {
 629   gimple *stmt = SSA_NAME_DEF_STMT (name);
 630
 631   /* Look through nop conversions (sign changes).  */
 632   if (gimple_nop_conversion_p (stmt)
 633       && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
 634     stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
 635
 636   if (!is_gimple_assign (stmt))
 637     return NULL_TREE;
 638
 639   if (gimple_assign_rhs_code (stmt) == opcode)
 640     return gimple_assign_rhs1 (stmt);
 641   return NULL_TREE;
 642 }
 643
 644 /* Return true if OP1 and OP2 have the same value if casted to either type.  */
 645
 646 static bool
 647 ops_equal_values_p (tree op1, tree op2)
 648 {
 649   if (op1 == op2)
 650     return true;
 651
 652   tree orig_op1 = op1;
 653   if (TREE_CODE (op1) == SSA_NAME)
 654     {
 655       gimple *stmt = SSA_NAME_DEF_STMT (op1);
 656       if (gimple_nop_conversion_p (stmt))
 657         {
 658           op1 = gimple_assign_rhs1 (stmt);
 659           if (op1 == op2)
 660             return true;
 661         }
 662     }
 663
 664   if (TREE_CODE (op2) == SSA_NAME)
 665     {
 666       gimple *stmt = SSA_NAME_DEF_STMT (op2);
 667       if (gimple_nop_conversion_p (stmt))
 668         {
 669           op2 = gimple_assign_rhs1 (stmt);
 670           if (op1 == op2
 671               || orig_op1 == op2)
 672             return true;
 673         }
 674     }
 675
 676   return false;
 677 }
 678
 679
 680 /* If CURR and LAST are a pair of ops that OPCODE allows us to
 681    eliminate through equivalences, do so, remove them from OPS, and
 682    return true.  Otherwise, return false.  */
 683
 684 static bool
 685 eliminate_duplicate_pair (enum tree_code opcode,
 686                           vec<operand_entry *> *ops,
 687                           bool *all_done,
 688                           unsigned int i,
 689                           operand_entry *curr,
 690                           operand_entry *last)
 691 {
 692
 693   /* If we have two of the same op, and the opcode is & |, min, or max,
 694      we can eliminate one of them.
 695      If we have two of the same op, and the opcode is ^, we can
 696      eliminate both of them.  */
 697
 698   if (last && last->op == curr->op)
 699     {
 700       switch (opcode)
 701         {
 702         case MAX_EXPR:
 703         case MIN_EXPR:
 704         case BIT_IOR_EXPR:
 705         case BIT_AND_EXPR:
 706           if (dump_file && (dump_flags & TDF_DETAILS))
 707             {
 708               fprintf (dump_file, "Equivalence: ");
 709               print_generic_expr (dump_file, curr->op, 0);
 710               fprintf (dump_file, " [&|minmax] ");
 711               print_generic_expr (dump_file, last->op, 0);
 712               fprintf (dump_file, " -> ");
 713               print_generic_stmt (dump_file, last->op, 0);
 714             }
 715
 716           ops->ordered_remove (i);
 717           reassociate_stats.ops_eliminated ++;
 718
 719           return true;
 720
 721         case BIT_XOR_EXPR:
 722           if (dump_file && (dump_flags & TDF_DETAILS))
 723             {
 724               fprintf (dump_file, "Equivalence: ");
 725               print_generic_expr (dump_file, curr->op, 0);
 726               fprintf (dump_file, " ^ ");
 727               print_generic_expr (dump_file, last->op, 0);
 728               fprintf (dump_file, " -> nothing\n");
 729             }
 730
 731           reassociate_stats.ops_eliminated += 2;
 732
 733           if (ops->length () == 2)
 734             {
 735               ops->truncate (0);
 736               add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
 737               *all_done = true;
 738             }
 739           else
 740             {
 741               ops->ordered_remove (i-1);
 742               ops->ordered_remove (i-1);
 743             }
 744
 745           return true;
 746
 747         default:
 748           break;
 749         }
 750     }
 751   return false;
 752 }
 753
 754 static vec<tree> plus_negates;
 755
 756 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
 757    expression, look in OPS for a corresponding positive operation to cancel
 758    it out.  If we find one, remove the other from OPS, replace
 759    OPS[CURRINDEX] with 0 or -1, respectively, and return true.  Otherwise,
 760    return false. */
 761
 762 static bool
 763 eliminate_plus_minus_pair (enum tree_code opcode,
 764                            vec<operand_entry *> *ops,
 765                            unsigned int currindex,
 766                            operand_entry *curr)
 767 {
 768   tree negateop;
 769   tree notop;
 770   unsigned int i;
 771   operand_entry *oe;
 772
 773   if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
 774     return false;
 775
 776   negateop = get_unary_op (curr->op, NEGATE_EXPR);
 777   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 778   if (negateop == NULL_TREE && notop == NULL_TREE)
 779     return false;
 780
 781   /* Any non-negated version will have a rank that is one less than
 782      the current rank.  So once we hit those ranks, if we don't find
 783      one, we can stop.  */
 784
 785   for (i = currindex + 1;
 786        ops->iterate (i, &oe)
 787        && oe->rank >= curr->rank - 1 ;
 788        i++)
 789     {
 790       if (negateop
 791           && ops_equal_values_p (oe->op, negateop))
 792         {
 793           if (dump_file && (dump_flags & TDF_DETAILS))
 794             {
 795               fprintf (dump_file, "Equivalence: ");
 796               print_generic_expr (dump_file, negateop, 0);
 797               fprintf (dump_file, " + -");
 798               print_generic_expr (dump_file, oe->op, 0);
 799               fprintf (dump_file, " -> 0\n");
 800             }
 801
 802           ops->ordered_remove (i);
 803           add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
 804           ops->ordered_remove (currindex);
 805           reassociate_stats.ops_eliminated ++;
 806
 807           return true;
 808         }
 809       else if (notop
 810                && ops_equal_values_p (oe->op, notop))
 811         {
 812           tree op_type = TREE_TYPE (oe->op);
 813
 814           if (dump_file && (dump_flags & TDF_DETAILS))
 815             {
 816               fprintf (dump_file, "Equivalence: ");
 817               print_generic_expr (dump_file, notop, 0);
 818               fprintf (dump_file, " + ~");
 819               print_generic_expr (dump_file, oe->op, 0);
 820               fprintf (dump_file, " -> -1\n");
 821             }
 822
 823           ops->ordered_remove (i);
 824           add_to_ops_vec (ops, build_all_ones_cst (op_type));
 825           ops->ordered_remove (currindex);
 826           reassociate_stats.ops_eliminated ++;
 827
 828           return true;
 829         }
 830     }
 831
 832   /* If CURR->OP is a negate expr without nop conversion in a plus expr:
 833      save it for later inspection in repropagate_negates().  */
 834   if (negateop != NULL_TREE
 835       && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
 836     plus_negates.safe_push (curr->op);
 837
 838   return false;
 839 }
 840
 841 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
 842    bitwise not expression, look in OPS for a corresponding operand to
 843    cancel it out.  If we find one, remove the other from OPS, replace
 844    OPS[CURRINDEX] with 0, and return true.  Otherwise, return
 845    false. */
 846
 847 static bool
 848 eliminate_not_pairs (enum tree_code opcode,
 849                      vec<operand_entry *> *ops,
 850                      unsigned int currindex,
 851                      operand_entry *curr)
 852 {
 853   tree notop;
 854   unsigned int i;
 855   operand_entry *oe;
 856
 857   if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
 858       || TREE_CODE (curr->op) != SSA_NAME)
 859     return false;
 860
 861   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 862   if (notop == NULL_TREE)
 863     return false;
 864
 865   /* Any non-not version will have a rank that is one less than
 866      the current rank.  So once we hit those ranks, if we don't find
 867      one, we can stop.  */
 868
 869   for (i = currindex + 1;
 870        ops->iterate (i, &oe)
 871        && oe->rank >= curr->rank - 1;
 872        i++)
 873     {
 874       if (oe->op == notop)
 875         {
 876           if (dump_file && (dump_flags & TDF_DETAILS))
 877             {
 878               fprintf (dump_file, "Equivalence: ");
 879               print_generic_expr (dump_file, notop, 0);
 880               if (opcode == BIT_AND_EXPR)
 881                 fprintf (dump_file, " & ~");
 882               else if (opcode == BIT_IOR_EXPR)
 883                 fprintf (dump_file, " | ~");
 884               print_generic_expr (dump_file, oe->op, 0);
 885               if (opcode == BIT_AND_EXPR)
 886                 fprintf (dump_file, " -> 0\n");
 887               else if (opcode == BIT_IOR_EXPR)
 888                 fprintf (dump_file, " -> -1\n");
 889             }
 890
 891           if (opcode == BIT_AND_EXPR)
 892             oe->op = build_zero_cst (TREE_TYPE (oe->op));
 893           else if (opcode == BIT_IOR_EXPR)
 894             oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
 895
 896           reassociate_stats.ops_eliminated += ops->length () - 1;
 897           ops->truncate (0);
 898           ops->quick_push (oe);
 899           return true;
 900         }
 901     }
 902
 903   return false;
 904 }
 905
 906 /* Use constant value that may be present in OPS to try to eliminate
 907    operands.  Note that this function is only really used when we've
 908    eliminated ops for other reasons, or merged constants.  Across
 909    single statements, fold already does all of this, plus more.  There
 910    is little point in duplicating logic, so I've only included the
 911    identities that I could ever construct testcases to trigger.  */
 912
 913 static void
 914 eliminate_using_constants (enum tree_code opcode,
 915                            vec<operand_entry *> *ops)
 916 {
 917   operand_entry *oelast = ops->last ();
 918   tree type = TREE_TYPE (oelast->op);
 919
 920   if (oelast->rank == 0
 921       && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
 922     {
 923       switch (opcode)
 924         {
 925         case BIT_AND_EXPR:
 926           if (integer_zerop (oelast->op))
 927             {
 928               if (ops->length () != 1)
 929                 {
 930                   if (dump_file && (dump_flags & TDF_DETAILS))
 931                     fprintf (dump_file, "Found & 0, removing all other ops\n");
 932
 933                   reassociate_stats.ops_eliminated += ops->length () - 1;
 934
 935                   ops->truncate (0);
 936                   ops->quick_push (oelast);
 937                   return;
 938                 }
 939             }
 940           else if (integer_all_onesp (oelast->op))
 941             {
 942               if (ops->length () != 1)
 943                 {
 944                   if (dump_file && (dump_flags & TDF_DETAILS))
 945                     fprintf (dump_file, "Found & -1, removing\n");
 946                   ops->pop ();
 947                   reassociate_stats.ops_eliminated++;
 948                 }
 949             }
 950           break;
 951         case BIT_IOR_EXPR:
 952           if (integer_all_onesp (oelast->op))
 953             {
 954               if (ops->length () != 1)
 955                 {
 956                   if (dump_file && (dump_flags & TDF_DETAILS))
 957                     fprintf (dump_file, "Found | -1, removing all other ops\n");
 958
 959                   reassociate_stats.ops_eliminated += ops->length () - 1;
 960
 961                   ops->truncate (0);
 962                   ops->quick_push (oelast);
 963                   return;
 964                 }
 965             }
 966           else if (integer_zerop (oelast->op))
 967             {
 968               if (ops->length () != 1)
 969                 {
 970                   if (dump_file && (dump_flags & TDF_DETAILS))
 971                     fprintf (dump_file, "Found | 0, removing\n");
 972                   ops->pop ();
 973                   reassociate_stats.ops_eliminated++;
 974                 }
 975             }
 976           break;
 977         case MULT_EXPR:
 978           if (integer_zerop (oelast->op)
 979               || (FLOAT_TYPE_P (type)
 980                   && !HONOR_NANS (type)
 981                   && !HONOR_SIGNED_ZEROS (type)
 982                   && real_zerop (oelast->op)))
 983             {
 984               if (ops->length () != 1)
 985                 {
 986                   if (dump_file && (dump_flags & TDF_DETAILS))
 987                     fprintf (dump_file, "Found * 0, removing all other ops\n");
 988
 989                   reassociate_stats.ops_eliminated += ops->length () - 1;
 990                   ops->truncate (1);
 991                   ops->quick_push (oelast);
 992                   return;
 993                 }
 994             }
 995           else if (integer_onep (oelast->op)
 996                    || (FLOAT_TYPE_P (type)
 997                        && !HONOR_SNANS (type)
 998                        && real_onep (oelast->op)))
 999             {
1000               if (ops->length () != 1)
1001                 {
1002                   if (dump_file && (dump_flags & TDF_DETAILS))
1003                     fprintf (dump_file, "Found * 1, removing\n");
1004                   ops->pop ();
1005                   reassociate_stats.ops_eliminated++;
1006                   return;
1007                 }
1008             }
1009           break;
1010         case BIT_XOR_EXPR:
1011         case PLUS_EXPR:
1012         case MINUS_EXPR:
1013           if (integer_zerop (oelast->op)
1014               || (FLOAT_TYPE_P (type)
1015                   && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1016                   && fold_real_zero_addition_p (type, oelast->op,
1017                                                 opcode == MINUS_EXPR)))
1018             {
1019               if (ops->length () != 1)
1020                 {
1021                   if (dump_file && (dump_flags & TDF_DETAILS))
1022                     fprintf (dump_file, "Found [|^+] 0, removing\n");
1023                   ops->pop ();
1024                   reassociate_stats.ops_eliminated++;
1025                   return;
1026                 }
1027             }
1028           break;
1029         default:
1030           break;
1031         }
1032     }
1033 }
1034
1035
1036 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1037                                  bool, bool);
1038
1039 /* Structure for tracking and counting operands.  */
1040 struct oecount {
1041   int cnt;
1042   int id;
1043   enum tree_code oecode;
1044   tree op;
1045 };
1046
1047
1048 /* The heap for the oecount hashtable and the sorted list of operands.  */
1049 static vec<oecount> cvec;
1050
1051
1052 /* Oecount hashtable helpers.  */
1053
1054 struct oecount_hasher : int_hash <int, 0, 1>
1055 {
1056   static inline hashval_t hash (int);
1057   static inline bool equal (int, int);
1058 };
1059
1060 /* Hash function for oecount.  */
1061
1062 inline hashval_t
1063 oecount_hasher::hash (int p)
1064 {
1065   const oecount *c = &cvec[p - 42];
1066   return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1067 }
1068
1069 /* Comparison function for oecount.  */
1070
1071 inline bool
1072 oecount_hasher::equal (int p1, int p2)
1073 {
1074   const oecount *c1 = &cvec[p1 - 42];
1075   const oecount *c2 = &cvec[p2 - 42];
1076   return (c1->oecode == c2->oecode
1077           && c1->op == c2->op);
1078 }
1079
1080 /* Comparison function for qsort sorting oecount elements by count.  */
1081
1082 static int
1083 oecount_cmp (const void *p1, const void *p2)
1084 {
1085   const oecount *c1 = (const oecount *)p1;
1086   const oecount *c2 = (const oecount *)p2;
1087   if (c1->cnt != c2->cnt)
1088     return c1->cnt - c2->cnt;
1089   else
1090     /* If counts are identical, use unique IDs to stabilize qsort.  */
1091     return c1->id - c2->id;
1092 }
1093
1094 /* Return TRUE iff STMT represents a builtin call that raises OP
1095    to some exponent.  */
1096
1097 static bool
1098 stmt_is_power_of_op (gimple *stmt, tree op)
1099 {
1100   if (!is_gimple_call (stmt))
1101     return false;
1102
1103   switch (gimple_call_combined_fn (stmt))
1104     {
1105     CASE_CFN_POW:
1106     CASE_CFN_POWI:
1107       return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1108
1109     default:
1110       return false;
1111     }
1112 }
1113
1114 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1115    in place and return the result.  Assumes that stmt_is_power_of_op
1116    was previously called for STMT and returned TRUE.  */
1117
1118 static HOST_WIDE_INT
1119 decrement_power (gimple *stmt)
1120 {
1121   REAL_VALUE_TYPE c, cint;
1122   HOST_WIDE_INT power;
1123   tree arg1;
1124
1125   switch (gimple_call_combined_fn (stmt))
1126     {
1127     CASE_CFN_POW:
1128       arg1 = gimple_call_arg (stmt, 1);
1129       c = TREE_REAL_CST (arg1);
1130       power = real_to_integer (&c) - 1;
1131       real_from_integer (&cint, VOIDmode, power, SIGNED);
1132       gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1133       return power;
1134
1135     CASE_CFN_POWI:
1136       arg1 = gimple_call_arg (stmt, 1);
1137       power = TREE_INT_CST_LOW (arg1) - 1;
1138       gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1139       return power;
1140
1141     default:
1142       gcc_unreachable ();
1143     }
1144 }
1145
1146 /* Find the single immediate use of STMT's LHS, and replace it
1147    with OP.  Remove STMT.  If STMT's LHS is the same as *DEF,
1148    replace *DEF with OP as well.  */
1149
1150 static void
1151 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1152 {
1153   tree lhs;
1154   gimple *use_stmt;
1155   use_operand_p use;
1156   gimple_stmt_iterator gsi;
1157
1158   if (is_gimple_call (stmt))
1159     lhs = gimple_call_lhs (stmt);
1160   else
1161     lhs = gimple_assign_lhs (stmt);
1162
1163   gcc_assert (has_single_use (lhs));
1164   single_imm_use (lhs, &use, &use_stmt);
1165   if (lhs == *def)
1166     *def = op;
1167   SET_USE (use, op);
1168   if (TREE_CODE (op) != SSA_NAME)
1169     update_stmt (use_stmt);
1170   gsi = gsi_for_stmt (stmt);
1171   unlink_stmt_vdef (stmt);
1172   reassoc_remove_stmt (&gsi);
1173   release_defs (stmt);
1174 }
1175
1176 /* Walks the linear chain with result *DEF searching for an operation
1177    with operand OP and code OPCODE removing that from the chain.  *DEF
1178    is updated if there is only one operand but no operation left.  */
1179
1180 static void
1181 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1182 {
1183   gimple *stmt = SSA_NAME_DEF_STMT (*def);
1184
1185   do
1186     {
1187       tree name;
1188
1189       if (opcode == MULT_EXPR
1190           && stmt_is_power_of_op (stmt, op))
1191         {
1192           if (decrement_power (stmt) == 1)
1193             propagate_op_to_single_use (op, stmt, def);
1194           return;
1195         }
1196
1197       name = gimple_assign_rhs1 (stmt);
1198
1199       /* If this is the operation we look for and one of the operands
1200          is ours simply propagate the other operand into the stmts
1201          single use.  */
1202       if (gimple_assign_rhs_code (stmt) == opcode
1203           && (name == op
1204               || gimple_assign_rhs2 (stmt) == op))
1205         {
1206           if (name == op)
1207             name = gimple_assign_rhs2 (stmt);
1208           propagate_op_to_single_use (name, stmt, def);
1209           return;
1210         }
1211
1212       /* We might have a multiply of two __builtin_pow* calls, and
1213          the operand might be hiding in the rightmost one.  */
1214       if (opcode == MULT_EXPR
1215           && gimple_assign_rhs_code (stmt) == opcode
1216           && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1217           && has_single_use (gimple_assign_rhs2 (stmt)))
1218         {
1219           gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1220           if (stmt_is_power_of_op (stmt2, op))
1221             {
1222               if (decrement_power (stmt2) == 1)
1223                 propagate_op_to_single_use (op, stmt2, def);
1224               return;
1225             }
1226         }
1227
1228       /* Continue walking the chain.  */
1229       gcc_assert (name != op
1230                   && TREE_CODE (name) == SSA_NAME);
1231       stmt = SSA_NAME_DEF_STMT (name);
1232     }
1233   while (1);
1234 }
1235
1236 /* Returns true if statement S1 dominates statement S2.  Like
1237    stmt_dominates_stmt_p, but uses stmt UIDs to optimize.  */
1238
1239 static bool
1240 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1241 {
1242   basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1243
1244   /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1245      SSA_NAME.  Assume it lives at the beginning of function and
1246      thus dominates everything.  */
1247   if (!bb1 || s1 == s2)
1248     return true;
1249
1250   /* If bb2 is NULL, it doesn't dominate any stmt with a bb.  */
1251   if (!bb2)
1252     return false;
1253
1254   if (bb1 == bb2)
1255     {
1256       /* PHIs in the same basic block are assumed to be
1257          executed all in parallel, if only one stmt is a PHI,
1258          it dominates the other stmt in the same basic block.  */
1259       if (gimple_code (s1) == GIMPLE_PHI)
1260         return true;
1261
1262       if (gimple_code (s2) == GIMPLE_PHI)
1263         return false;
1264
1265       gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1266
1267       if (gimple_uid (s1) < gimple_uid (s2))
1268         return true;
1269
1270       if (gimple_uid (s1) > gimple_uid (s2))
1271         return false;
1272
1273       gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1274       unsigned int uid = gimple_uid (s1);
1275       for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1276         {
1277           gimple *s = gsi_stmt (gsi);
1278           if (gimple_uid (s) != uid)
1279             break;
1280           if (s == s2)
1281             return true;
1282         }
1283
1284       return false;
1285     }
1286
1287   return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1288 }
1289
1290 /* Insert STMT after INSERT_POINT.  */
1291
1292 static void
1293 insert_stmt_after (gimple *stmt, gimple *insert_point)
1294 {
1295   gimple_stmt_iterator gsi;
1296   basic_block bb;
1297
1298   if (gimple_code (insert_point) == GIMPLE_PHI)
1299     bb = gimple_bb (insert_point);
1300   else if (!stmt_ends_bb_p (insert_point))
1301     {
1302       gsi = gsi_for_stmt (insert_point);
1303       gimple_set_uid (stmt, gimple_uid (insert_point));
1304       gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1305       return;
1306     }
1307   else
1308     /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1309        thus if it must end a basic block, it should be a call that can
1310        throw, or some assignment that can throw.  If it throws, the LHS
1311        of it will not be initialized though, so only valid places using
1312        the SSA_NAME should be dominated by the fallthru edge.  */
1313     bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1314   gsi = gsi_after_labels (bb);
1315   if (gsi_end_p (gsi))
1316     {
1317       gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1318       gimple_set_uid (stmt,
1319                       gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1320     }
1321   else
1322     gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1323   gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1324 }
1325
1326 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1327    the result.  Places the statement after the definition of either
1328    OP1 or OP2.  Returns the new statement.  */
1329
1330 static gimple *
1331 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1332 {
1333   gimple *op1def = NULL, *op2def = NULL;
1334   gimple_stmt_iterator gsi;
1335   tree op;
1336   gassign *sum;
1337
1338   /* Create the addition statement.  */
1339   op = make_ssa_name (type);
1340   sum = gimple_build_assign (op, opcode, op1, op2);
1341
1342   /* Find an insertion place and insert.  */
1343   if (TREE_CODE (op1) == SSA_NAME)
1344     op1def = SSA_NAME_DEF_STMT (op1);
1345   if (TREE_CODE (op2) == SSA_NAME)
1346     op2def = SSA_NAME_DEF_STMT (op2);
1347   if ((!op1def || gimple_nop_p (op1def))
1348       && (!op2def || gimple_nop_p (op2def)))
1349     {
1350       gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1351       if (gsi_end_p (gsi))
1352         {
1353           gimple_stmt_iterator gsi2
1354             = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1355           gimple_set_uid (sum,
1356                           gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1357         }
1358       else
1359         gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1360       gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1361     }
1362   else
1363     {
1364       gimple *insert_point;
1365       if ((!op1def || gimple_nop_p (op1def))
1366            || (op2def && !gimple_nop_p (op2def)
1367                && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1368         insert_point = op2def;
1369       else
1370         insert_point = op1def;
1371       insert_stmt_after (sum, insert_point);
1372     }
1373   update_stmt (sum);
1374
1375   return sum;
1376 }
1377
1378 /* Perform un-distribution of divisions and multiplications.
1379    A * X + B * X is transformed into (A + B) * X and A / X + B / X
1380    to (A + B) / X for real X.
1381
1382    The algorithm is organized as follows.
1383
1384     - First we walk the addition chain *OPS looking for summands that
1385       are defined by a multiplication or a real division.  This results
1386       in the candidates bitmap with relevant indices into *OPS.
1387
1388     - Second we build the chains of multiplications or divisions for
1389       these candidates, counting the number of occurrences of (operand, code)
1390       pairs in all of the candidates chains.
1391
1392     - Third we sort the (operand, code) pairs by number of occurrence and
1393       process them starting with the pair with the most uses.
1394
1395       * For each such pair we walk the candidates again to build a
1396         second candidate bitmap noting all multiplication/division chains
1397         that have at least one occurrence of (operand, code).
1398
1399       * We build an alternate addition chain only covering these
1400         candidates with one (operand, code) operation removed from their
1401         multiplication/division chain.
1402
1403       * The first candidate gets replaced by the alternate addition chain
1404         multiplied/divided by the operand.
1405
1406       * All candidate chains get disabled for further processing and
1407         processing of (operand, code) pairs continues.
1408
1409   The alternate addition chains built are re-processed by the main
1410   reassociation algorithm which allows optimizing a * x * y + b * y * x
1411   to (a + b ) * x * y in one invocation of the reassociation pass.  */
1412
1413 static bool
1414 undistribute_ops_list (enum tree_code opcode,
1415                        vec<operand_entry *> *ops, struct loop *loop)
1416 {
1417   unsigned int length = ops->length ();
1418   operand_entry *oe1;
1419   unsigned i, j;
1420   sbitmap candidates, candidates2;
1421   unsigned nr_candidates, nr_candidates2;
1422   sbitmap_iterator sbi0;
1423   vec<operand_entry *> *subops;
1424   bool changed = false;
1425   int next_oecount_id = 0;
1426
1427   if (length <= 1
1428       || opcode != PLUS_EXPR)
1429     return false;
1430
1431   /* Build a list of candidates to process.  */
1432   candidates = sbitmap_alloc (length);
1433   bitmap_clear (candidates);
1434   nr_candidates = 0;
1435   FOR_EACH_VEC_ELT (*ops, i, oe1)
1436     {
1437       enum tree_code dcode;
1438       gimple *oe1def;
1439
1440       if (TREE_CODE (oe1->op) != SSA_NAME)
1441         continue;
1442       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1443       if (!is_gimple_assign (oe1def))
1444         continue;
1445       dcode = gimple_assign_rhs_code (oe1def);
1446       if ((dcode != MULT_EXPR
1447            && dcode != RDIV_EXPR)
1448           || !is_reassociable_op (oe1def, dcode, loop))
1449         continue;
1450
1451       bitmap_set_bit (candidates, i);
1452       nr_candidates++;
1453     }
1454
1455   if (nr_candidates < 2)
1456     {
1457       sbitmap_free (candidates);
1458       return false;
1459     }
1460
1461   if (dump_file && (dump_flags & TDF_DETAILS))
1462     {
1463       fprintf (dump_file, "searching for un-distribute opportunities ");
1464       print_generic_expr (dump_file,
1465         (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1466       fprintf (dump_file, " %d\n", nr_candidates);
1467     }
1468
1469   /* Build linearized sub-operand lists and the counting table.  */
1470   cvec.create (0);
1471
1472   hash_table<oecount_hasher> ctable (15);
1473
1474   /* ??? Macro arguments cannot have multi-argument template types in
1475      them.  This typedef is needed to workaround that limitation.  */
1476   typedef vec<operand_entry *> vec_operand_entry_t_heap;
1477   subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1478   EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1479     {
1480       gimple *oedef;
1481       enum tree_code oecode;
1482       unsigned j;
1483
1484       oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1485       oecode = gimple_assign_rhs_code (oedef);
1486       linearize_expr_tree (&subops[i], oedef,
1487                            associative_tree_code (oecode), false);
1488
1489       FOR_EACH_VEC_ELT (subops[i], j, oe1)
1490         {
1491           oecount c;
1492           int *slot;
1493           int idx;
1494           c.oecode = oecode;
1495           c.cnt = 1;
1496           c.id = next_oecount_id++;
1497           c.op = oe1->op;
1498           cvec.safe_push (c);
1499           idx = cvec.length () + 41;
1500           slot = ctable.find_slot (idx, INSERT);
1501           if (!*slot)
1502             {
1503               *slot = idx;
1504             }
1505           else
1506             {
1507               cvec.pop ();
1508               cvec[*slot - 42].cnt++;
1509             }
1510         }
1511     }
1512
1513   /* Sort the counting table.  */
1514   cvec.qsort (oecount_cmp);
1515
1516   if (dump_file && (dump_flags & TDF_DETAILS))
1517     {
1518       oecount *c;
1519       fprintf (dump_file, "Candidates:\n");
1520       FOR_EACH_VEC_ELT (cvec, j, c)
1521         {
1522           fprintf (dump_file, "  %u %s: ", c->cnt,
1523                    c->oecode == MULT_EXPR
1524                    ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1525           print_generic_expr (dump_file, c->op, 0);
1526           fprintf (dump_file, "\n");
1527         }
1528     }
1529
1530   /* Process the (operand, code) pairs in order of most occurrence.  */
1531   candidates2 = sbitmap_alloc (length);
1532   while (!cvec.is_empty ())
1533     {
1534       oecount *c = &cvec.last ();
1535       if (c->cnt < 2)
1536         break;
1537
1538       /* Now collect the operands in the outer chain that contain
1539          the common operand in their inner chain.  */
1540       bitmap_clear (candidates2);
1541       nr_candidates2 = 0;
1542       EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1543         {
1544           gimple *oedef;
1545           enum tree_code oecode;
1546           unsigned j;
1547           tree op = (*ops)[i]->op;
1548
1549           /* If we undistributed in this chain already this may be
1550              a constant.  */
1551           if (TREE_CODE (op) != SSA_NAME)
1552             continue;
1553
1554           oedef = SSA_NAME_DEF_STMT (op);
1555           oecode = gimple_assign_rhs_code (oedef);
1556           if (oecode != c->oecode)
1557             continue;
1558
1559           FOR_EACH_VEC_ELT (subops[i], j, oe1)
1560             {
1561               if (oe1->op == c->op)
1562                 {
1563                   bitmap_set_bit (candidates2, i);
1564                   ++nr_candidates2;
1565                   break;
1566                 }
1567             }
1568         }
1569
1570       if (nr_candidates2 >= 2)
1571         {
1572           operand_entry *oe1, *oe2;
1573           gimple *prod;
1574           int first = bitmap_first_set_bit (candidates2);
1575
1576           /* Build the new addition chain.  */
1577           oe1 = (*ops)[first];
1578           if (dump_file && (dump_flags & TDF_DETAILS))
1579             {
1580               fprintf (dump_file, "Building (");
1581               print_generic_expr (dump_file, oe1->op, 0);
1582             }
1583           zero_one_operation (&oe1->op, c->oecode, c->op);
1584           EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1585             {
1586               gimple *sum;
1587               oe2 = (*ops)[i];
1588               if (dump_file && (dump_flags & TDF_DETAILS))
1589                 {
1590                   fprintf (dump_file, " + ");
1591                   print_generic_expr (dump_file, oe2->op, 0);
1592                 }
1593               zero_one_operation (&oe2->op, c->oecode, c->op);
1594               sum = build_and_add_sum (TREE_TYPE (oe1->op),
1595                                        oe1->op, oe2->op, opcode);
1596               oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1597               oe2->rank = 0;
1598               oe1->op = gimple_get_lhs (sum);
1599             }
1600
1601           /* Apply the multiplication/division.  */
1602           prod = build_and_add_sum (TREE_TYPE (oe1->op),
1603                                     oe1->op, c->op, c->oecode);
1604           if (dump_file && (dump_flags & TDF_DETAILS))
1605             {
1606               fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1607               print_generic_expr (dump_file, c->op, 0);
1608               fprintf (dump_file, "\n");
1609             }
1610
1611           /* Record it in the addition chain and disable further
1612              undistribution with this op.  */
1613           oe1->op = gimple_assign_lhs (prod);
1614           oe1->rank = get_rank (oe1->op);
1615           subops[first].release ();
1616
1617           changed = true;
1618         }
1619
1620       cvec.pop ();
1621     }
1622
1623   for (i = 0; i < ops->length (); ++i)
1624     subops[i].release ();
1625   free (subops);
1626   cvec.release ();
1627   sbitmap_free (candidates);
1628   sbitmap_free (candidates2);
1629
1630   return changed;
1631 }
1632
1633 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1634    expression, examine the other OPS to see if any of them are comparisons
1635    of the same values, which we may be able to combine or eliminate.
1636    For example, we can rewrite (a < b) | (a == b) as (a <= b).  */
1637
1638 static bool
1639 eliminate_redundant_comparison (enum tree_code opcode,
1640                                 vec<operand_entry *> *ops,
1641                                 unsigned int currindex,
1642                                 operand_entry *curr)
1643 {
1644   tree op1, op2;
1645   enum tree_code lcode, rcode;
1646   gimple *def1, *def2;
1647   int i;
1648   operand_entry *oe;
1649
1650   if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1651     return false;
1652
1653   /* Check that CURR is a comparison.  */
1654   if (TREE_CODE (curr->op) != SSA_NAME)
1655     return false;
1656   def1 = SSA_NAME_DEF_STMT (curr->op);
1657   if (!is_gimple_assign (def1))
1658     return false;
1659   lcode = gimple_assign_rhs_code (def1);
1660   if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1661     return false;
1662   op1 = gimple_assign_rhs1 (def1);
1663   op2 = gimple_assign_rhs2 (def1);
1664
1665   /* Now look for a similar comparison in the remaining OPS.  */
1666   for (i = currindex + 1; ops->iterate (i, &oe); i++)
1667     {
1668       tree t;
1669
1670       if (TREE_CODE (oe->op) != SSA_NAME)
1671         continue;
1672       def2 = SSA_NAME_DEF_STMT (oe->op);
1673       if (!is_gimple_assign (def2))
1674         continue;
1675       rcode = gimple_assign_rhs_code (def2);
1676       if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1677         continue;
1678
1679       /* If we got here, we have a match.  See if we can combine the
1680          two comparisons.  */
1681       if (opcode == BIT_IOR_EXPR)
1682         t = maybe_fold_or_comparisons (lcode, op1, op2,
1683                                        rcode, gimple_assign_rhs1 (def2),
1684                                        gimple_assign_rhs2 (def2));
1685       else
1686         t = maybe_fold_and_comparisons (lcode, op1, op2,
1687                                         rcode, gimple_assign_rhs1 (def2),
1688                                         gimple_assign_rhs2 (def2));
1689       if (!t)
1690         continue;
1691
1692       /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1693          always give us a boolean_type_node value back.  If the original
1694          BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1695          we need to convert.  */
1696       if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1697         t = fold_convert (TREE_TYPE (curr->op), t);
1698
1699       if (TREE_CODE (t) != INTEGER_CST
1700           && !operand_equal_p (t, curr->op, 0))
1701         {
1702           enum tree_code subcode;
1703           tree newop1, newop2;
1704           if (!COMPARISON_CLASS_P (t))
1705             continue;
1706           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1707           STRIP_USELESS_TYPE_CONVERSION (newop1);
1708           STRIP_USELESS_TYPE_CONVERSION (newop2);
1709           if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1710             continue;
1711         }
1712
1713       if (dump_file && (dump_flags & TDF_DETAILS))
1714         {
1715           fprintf (dump_file, "Equivalence: ");
1716           print_generic_expr (dump_file, curr->op, 0);
1717           fprintf (dump_file, " %s ", op_symbol_code (opcode));
1718           print_generic_expr (dump_file, oe->op, 0);
1719           fprintf (dump_file, " -> ");
1720           print_generic_expr (dump_file, t, 0);
1721           fprintf (dump_file, "\n");
1722         }
1723
1724       /* Now we can delete oe, as it has been subsumed by the new combined
1725          expression t.  */
1726       ops->ordered_remove (i);
1727       reassociate_stats.ops_eliminated ++;
1728
1729       /* If t is the same as curr->op, we're done.  Otherwise we must
1730          replace curr->op with t.  Special case is if we got a constant
1731          back, in which case we add it to the end instead of in place of
1732          the current entry.  */
1733       if (TREE_CODE (t) == INTEGER_CST)
1734         {
1735           ops->ordered_remove (currindex);
1736           add_to_ops_vec (ops, t);
1737         }
1738       else if (!operand_equal_p (t, curr->op, 0))
1739         {
1740           gimple *sum;
1741           enum tree_code subcode;
1742           tree newop1;
1743           tree newop2;
1744           gcc_assert (COMPARISON_CLASS_P (t));
1745           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1746           STRIP_USELESS_TYPE_CONVERSION (newop1);
1747           STRIP_USELESS_TYPE_CONVERSION (newop2);
1748           gcc_checking_assert (is_gimple_val (newop1)
1749                                && is_gimple_val (newop2));
1750           sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1751           curr->op = gimple_get_lhs (sum);
1752         }
1753       return true;
1754     }
1755
1756   return false;
1757 }
1758
1759 /* Transform repeated addition of same values into multiply with
1760    constant.  */
1761 static bool
1762 transform_add_to_multiply (gimple *stmt, vec<operand_entry *> *ops)
1763 {
1764   operand_entry *oe;
1765   tree op = NULL_TREE;
1766   int j;
1767   int i, start = -1, end = 0, count = 0;
1768   vec<std::pair <int, int> > indxs = vNULL;
1769   bool changed = false;
1770
1771   if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op))
1772       && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op))
1773           || !flag_unsafe_math_optimizations))
1774     return false;
1775
1776   /* Look for repeated operands.  */
1777   FOR_EACH_VEC_ELT (*ops, i, oe)
1778     {
1779       if (start == -1)
1780         {
1781           count = 1;
1782           op = oe->op;
1783           start = i;
1784         }
1785       else if (operand_equal_p (oe->op, op, 0))
1786         {
1787           count++;
1788           end = i;
1789         }
1790       else
1791         {
1792           if (count > 1)
1793             indxs.safe_push (std::make_pair (start, end));
1794           count = 1;
1795           op = oe->op;
1796           start = i;
1797         }
1798     }
1799
1800   if (count > 1)
1801     indxs.safe_push (std::make_pair (start, end));
1802
1803   for (j = indxs.length () - 1; j >= 0; --j)
1804     {
1805       /* Convert repeated operand addition to multiplication.  */
1806       start = indxs[j].first;
1807       end = indxs[j].second;
1808       op = (*ops)[start]->op;
1809       count = end - start + 1;
1810       for (i = end; i >= start; --i)
1811         ops->unordered_remove (i);
1812       tree tmp = make_ssa_name (TREE_TYPE (op));
1813       tree cst = build_int_cst (integer_type_node, count);
1814       gimple *def_stmt = SSA_NAME_DEF_STMT (op);
1815       gassign *mul_stmt
1816         = gimple_build_assign (tmp, MULT_EXPR,
1817                                op, fold_convert (TREE_TYPE (op), cst));
1818       if (gimple_code (def_stmt) == GIMPLE_NOP
1819           || gimple_bb (stmt) != gimple_bb (def_stmt))
1820         {
1821           gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1822           gimple_set_uid (mul_stmt, gimple_uid (stmt));
1823           gsi_insert_before (&gsi, mul_stmt, GSI_NEW_STMT);
1824         }
1825       else
1826         insert_stmt_after (mul_stmt, def_stmt);
1827       gimple_set_visited (mul_stmt, true);
1828       add_to_ops_vec (ops, tmp);
1829       changed = true;
1830     }
1831
1832   return changed;
1833 }
1834
1835
1836 /* Perform various identities and other optimizations on the list of
1837    operand entries, stored in OPS.  The tree code for the binary
1838    operation between all the operands is OPCODE.  */
1839
1840 static void
1841 optimize_ops_list (enum tree_code opcode,
1842                    vec<operand_entry *> *ops)
1843 {
1844   unsigned int length = ops->length ();
1845   unsigned int i;
1846   operand_entry *oe;
1847   operand_entry *oelast = NULL;
1848   bool iterate = false;
1849
1850   if (length == 1)
1851     return;
1852
1853   oelast = ops->last ();
1854
1855   /* If the last two are constants, pop the constants off, merge them
1856      and try the next two.  */
1857   if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1858     {
1859       operand_entry *oelm1 = (*ops)[length - 2];
1860
1861       if (oelm1->rank == 0
1862           && is_gimple_min_invariant (oelm1->op)
1863           && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1864                                        TREE_TYPE (oelast->op)))
1865         {
1866           tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1867                                      oelm1->op, oelast->op);
1868
1869           if (folded && is_gimple_min_invariant (folded))
1870             {
1871               if (dump_file && (dump_flags & TDF_DETAILS))
1872                 fprintf (dump_file, "Merging constants\n");
1873
1874               ops->pop ();
1875               ops->pop ();
1876
1877               add_to_ops_vec (ops, folded);
1878               reassociate_stats.constants_eliminated++;
1879
1880               optimize_ops_list (opcode, ops);
1881               return;
1882             }
1883         }
1884     }
1885
1886   eliminate_using_constants (opcode, ops);
1887   oelast = NULL;
1888
1889   for (i = 0; ops->iterate (i, &oe);)
1890     {
1891       bool done = false;
1892
1893       if (eliminate_not_pairs (opcode, ops, i, oe))
1894         return;
1895       if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1896           || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1897           || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1898         {
1899           if (done)
1900             return;
1901           iterate = true;
1902           oelast = NULL;
1903           continue;
1904         }
1905       oelast = oe;
1906       i++;
1907     }
1908
1909   length = ops->length ();
1910   oelast = ops->last ();
1911
1912   if (iterate)
1913     optimize_ops_list (opcode, ops);
1914 }
1915
1916 /* The following functions are subroutines to optimize_range_tests and allow
1917    it to try to change a logical combination of comparisons into a range
1918    test.
1919
1920    For example, both
1921         X == 2 || X == 5 || X == 3 || X == 4
1922    and
1923         X >= 2 && X <= 5
1924    are converted to
1925         (unsigned) (X - 2) <= 3
1926
1927    For more information see comments above fold_test_range in fold-const.c,
1928    this implementation is for GIMPLE.  */
1929
1930 struct range_entry
1931 {
1932   tree exp;
1933   tree low;
1934   tree high;
1935   bool in_p;
1936   bool strict_overflow_p;
1937   unsigned int idx, next;
1938 };
1939
1940 /* This is similar to make_range in fold-const.c, but on top of
1941    GIMPLE instead of trees.  If EXP is non-NULL, it should be
1942    an SSA_NAME and STMT argument is ignored, otherwise STMT
1943    argument should be a GIMPLE_COND.  */
1944
1945 static void
1946 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
1947 {
1948   int in_p;
1949   tree low, high;
1950   bool is_bool, strict_overflow_p;
1951
1952   r->exp = NULL_TREE;
1953   r->in_p = false;
1954   r->strict_overflow_p = false;
1955   r->low = NULL_TREE;
1956   r->high = NULL_TREE;
1957   if (exp != NULL_TREE
1958       && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1959     return;
1960
1961   /* Start with simply saying "EXP != 0" and then look at the code of EXP
1962      and see if we can refine the range.  Some of the cases below may not
1963      happen, but it doesn't seem worth worrying about this.  We "continue"
1964      the outer loop when we've changed something; otherwise we "break"
1965      the switch, which will "break" the while.  */
1966   low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1967   high = low;
1968   in_p = 0;
1969   strict_overflow_p = false;
1970   is_bool = false;
1971   if (exp == NULL_TREE)
1972     is_bool = true;
1973   else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1974     {
1975       if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1976         is_bool = true;
1977       else
1978         return;
1979     }
1980   else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1981     is_bool = true;
1982
1983   while (1)
1984     {
1985       enum tree_code code;
1986       tree arg0, arg1, exp_type;
1987       tree nexp;
1988       location_t loc;
1989
1990       if (exp != NULL_TREE)
1991         {
1992           if (TREE_CODE (exp) != SSA_NAME
1993               || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
1994             break;
1995
1996           stmt = SSA_NAME_DEF_STMT (exp);
1997           if (!is_gimple_assign (stmt))
1998             break;
1999
2000           code = gimple_assign_rhs_code (stmt);
2001           arg0 = gimple_assign_rhs1 (stmt);
2002           arg1 = gimple_assign_rhs2 (stmt);
2003           exp_type = TREE_TYPE (exp);
2004         }
2005       else
2006         {
2007           code = gimple_cond_code (stmt);
2008           arg0 = gimple_cond_lhs (stmt);
2009           arg1 = gimple_cond_rhs (stmt);
2010           exp_type = boolean_type_node;
2011         }
2012
2013       if (TREE_CODE (arg0) != SSA_NAME)
2014         break;
2015       loc = gimple_location (stmt);
2016       switch (code)
2017         {
2018         case BIT_NOT_EXPR:
2019           if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
2020               /* Ensure the range is either +[-,0], +[0,0],
2021                  -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2022                  -[1,1].  If it is e.g. +[-,-] or -[-,-]
2023                  or similar expression of unconditional true or
2024                  false, it should not be negated.  */
2025               && ((high && integer_zerop (high))
2026                   || (low && integer_onep (low))))
2027             {
2028               in_p = !in_p;
2029               exp = arg0;
2030               continue;
2031             }
2032           break;
2033         case SSA_NAME:
2034           exp = arg0;
2035           continue;
2036         CASE_CONVERT:
2037           if (is_bool)
2038             goto do_default;
2039           if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
2040             {
2041               if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
2042                 is_bool = true;
2043               else
2044                 return;
2045             }
2046           else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
2047             is_bool = true;
2048           goto do_default;
2049         case EQ_EXPR:
2050         case NE_EXPR:
2051         case LT_EXPR:
2052         case LE_EXPR:
2053         case GE_EXPR:
2054         case GT_EXPR:
2055           is_bool = true;
2056           /* FALLTHRU */
2057         default:
2058           if (!is_bool)
2059             return;
2060         do_default:
2061           nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2062                                   &low, &high, &in_p,
2063                                   &strict_overflow_p);
2064           if (nexp != NULL_TREE)
2065             {
2066               exp = nexp;
2067               gcc_assert (TREE_CODE (exp) == SSA_NAME);
2068               continue;
2069             }
2070           break;
2071         }
2072       break;
2073     }
2074   if (is_bool)
2075     {
2076       r->exp = exp;
2077       r->in_p = in_p;
2078       r->low = low;
2079       r->high = high;
2080       r->strict_overflow_p = strict_overflow_p;
2081     }
2082 }
2083
2084 /* Comparison function for qsort.  Sort entries
2085    without SSA_NAME exp first, then with SSA_NAMEs sorted
2086    by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2087    by increasing ->low and if ->low is the same, by increasing
2088    ->high.  ->low == NULL_TREE means minimum, ->high == NULL_TREE
2089    maximum.  */
2090
2091 static int
2092 range_entry_cmp (const void *a, const void *b)
2093 {
2094   const struct range_entry *p = (const struct range_entry *) a;
2095   const struct range_entry *q = (const struct range_entry *) b;
2096
2097   if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2098     {
2099       if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2100         {
2101           /* Group range_entries for the same SSA_NAME together.  */
2102           if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2103             return -1;
2104           else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2105             return 1;
2106           /* If ->low is different, NULL low goes first, then by
2107              ascending low.  */
2108           if (p->low != NULL_TREE)
2109             {
2110               if (q->low != NULL_TREE)
2111                 {
2112                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
2113                                           p->low, q->low);
2114                   if (tem && integer_onep (tem))
2115                     return -1;
2116                   tem = fold_binary (GT_EXPR, boolean_type_node,
2117                                      p->low, q->low);
2118                   if (tem && integer_onep (tem))
2119                     return 1;
2120                 }
2121               else
2122                 return 1;
2123             }
2124           else if (q->low != NULL_TREE)
2125             return -1;
2126           /* If ->high is different, NULL high goes last, before that by
2127              ascending high.  */
2128           if (p->high != NULL_TREE)
2129             {
2130               if (q->high != NULL_TREE)
2131                 {
2132                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
2133                                           p->high, q->high);
2134                   if (tem && integer_onep (tem))
2135                     return -1;
2136                   tem = fold_binary (GT_EXPR, boolean_type_node,
2137                                      p->high, q->high);
2138                   if (tem && integer_onep (tem))
2139                     return 1;
2140                 }
2141               else
2142                 return -1;
2143             }
2144           else if (q->high != NULL_TREE)
2145             return 1;
2146           /* If both ranges are the same, sort below by ascending idx.  */
2147         }
2148       else
2149         return 1;
2150     }
2151   else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2152     return -1;
2153
2154   if (p->idx < q->idx)
2155     return -1;
2156   else
2157     {
2158       gcc_checking_assert (p->idx > q->idx);
2159       return 1;
2160     }
2161 }
2162
2163 /* Helper routine of optimize_range_test.
2164    [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2165    RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2166    OPCODE and OPS are arguments of optimize_range_tests.  If OTHERRANGE
2167    is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2168    an array of COUNT pointers to other ranges.  Return
2169    true if the range merge has been successful.
2170    If OPCODE is ERROR_MARK, this is called from within
2171    maybe_optimize_range_tests and is performing inter-bb range optimization.
2172    In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2173    oe->rank.  */
2174
2175 static bool
2176 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2177                    struct range_entry **otherrangep,
2178                    unsigned int count, enum tree_code opcode,
2179                    vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2180                    bool in_p, tree low, tree high, bool strict_overflow_p)
2181 {
2182   operand_entry *oe = (*ops)[range->idx];
2183   tree op = oe->op;
2184   gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2185                     : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2186   location_t loc = gimple_location (stmt);
2187   tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2188   tree tem = build_range_check (loc, optype, unshare_expr (exp),
2189                                 in_p, low, high);
2190   enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2191   gimple_stmt_iterator gsi;
2192   unsigned int i, uid;
2193
2194   if (tem == NULL_TREE)
2195     return false;
2196
2197   /* If op is default def SSA_NAME, there is no place to insert the
2198      new comparison.  Give up, unless we can use OP itself as the
2199      range test.  */
2200   if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2201     {
2202       if (op == range->exp
2203           && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2204               || TREE_CODE (optype) == BOOLEAN_TYPE)
2205           && (op == tem
2206               || (TREE_CODE (tem) == EQ_EXPR
2207                   && TREE_OPERAND (tem, 0) == op
2208                   && integer_onep (TREE_OPERAND (tem, 1))))
2209           && opcode != BIT_IOR_EXPR
2210           && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2211         {
2212           stmt = NULL;
2213           tem = op;
2214         }
2215       else
2216         return false;
2217     }
2218
2219   if (strict_overflow_p && issue_strict_overflow_warning (wc))
2220     warning_at (loc, OPT_Wstrict_overflow,
2221                 "assuming signed overflow does not occur "
2222                 "when simplifying range test");
2223
2224   if (dump_file && (dump_flags & TDF_DETAILS))
2225     {
2226       struct range_entry *r;
2227       fprintf (dump_file, "Optimizing range tests ");
2228       print_generic_expr (dump_file, range->exp, 0);
2229       fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2230       print_generic_expr (dump_file, range->low, 0);
2231       fprintf (dump_file, ", ");
2232       print_generic_expr (dump_file, range->high, 0);
2233       fprintf (dump_file, "]");
2234       for (i = 0; i < count; i++)
2235         {
2236           if (otherrange)
2237             r = otherrange + i;
2238           else
2239             r = otherrangep[i];
2240           fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2241           print_generic_expr (dump_file, r->low, 0);
2242           fprintf (dump_file, ", ");
2243           print_generic_expr (dump_file, r->high, 0);
2244           fprintf (dump_file, "]");
2245         }
2246       fprintf (dump_file, "\n into ");
2247       print_generic_expr (dump_file, tem, 0);
2248       fprintf (dump_file, "\n");
2249     }
2250
2251   if (opcode == BIT_IOR_EXPR
2252       || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2253     tem = invert_truthvalue_loc (loc, tem);
2254
2255   tem = fold_convert_loc (loc, optype, tem);
2256   if (stmt)
2257     {
2258       gsi = gsi_for_stmt (stmt);
2259       uid = gimple_uid (stmt);
2260     }
2261   else
2262     {
2263       gsi = gsi_none ();
2264       uid = 0;
2265     }
2266   if (stmt == NULL)
2267     gcc_checking_assert (tem == op);
2268   /* In rare cases range->exp can be equal to lhs of stmt.
2269      In that case we have to insert after the stmt rather then before
2270      it.  If stmt is a PHI, insert it at the start of the basic block.  */
2271   else if (op != range->exp)
2272     {
2273       gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2274       tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2275                                       GSI_SAME_STMT);
2276       gsi_prev (&gsi);
2277     }
2278   else if (gimple_code (stmt) != GIMPLE_PHI)
2279     {
2280       gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2281       tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false,
2282                                       GSI_CONTINUE_LINKING);
2283     }
2284   else
2285     {
2286       gsi = gsi_after_labels (gimple_bb (stmt));
2287       if (!gsi_end_p (gsi))
2288         uid = gimple_uid (gsi_stmt (gsi));
2289       else
2290         {
2291           gsi = gsi_start_bb (gimple_bb (stmt));
2292           uid = 1;
2293           while (!gsi_end_p (gsi))
2294             {
2295               uid = gimple_uid (gsi_stmt (gsi));
2296               gsi_next (&gsi);
2297             }
2298         }
2299       gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2300       tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2301                                       GSI_SAME_STMT);
2302       if (gsi_end_p (gsi))
2303         gsi = gsi_last_bb (gimple_bb (stmt));
2304       else
2305         gsi_prev (&gsi);
2306     }
2307   for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2308     if (gimple_uid (gsi_stmt (gsi)))
2309       break;
2310     else
2311       gimple_set_uid (gsi_stmt (gsi), uid);
2312
2313   oe->op = tem;
2314   range->exp = exp;
2315   range->low = low;
2316   range->high = high;
2317   range->in_p = in_p;
2318   range->strict_overflow_p = false;
2319
2320   for (i = 0; i < count; i++)
2321     {
2322       if (otherrange)
2323         range = otherrange + i;
2324       else
2325         range = otherrangep[i];
2326       oe = (*ops)[range->idx];
2327       /* Now change all the other range test immediate uses, so that
2328          those tests will be optimized away.  */
2329       if (opcode == ERROR_MARK)
2330         {
2331           if (oe->op)
2332             oe->op = build_int_cst (TREE_TYPE (oe->op),
2333                                     oe->rank == BIT_IOR_EXPR ? 0 : 1);
2334           else
2335             oe->op = (oe->rank == BIT_IOR_EXPR
2336                       ? boolean_false_node : boolean_true_node);
2337         }
2338       else
2339         oe->op = error_mark_node;
2340       range->exp = NULL_TREE;
2341     }
2342   return true;
2343 }
2344
2345 /* Optimize X == CST1 || X == CST2
2346    if popcount (CST1 ^ CST2) == 1 into
2347    (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2348    Similarly for ranges.  E.g.
2349    X != 2 && X != 3 && X != 10 && X != 11
2350    will be transformed by the previous optimization into
2351    !((X - 2U) <= 1U || (X - 10U) <= 1U)
2352    and this loop can transform that into
2353    !(((X & ~8) - 2U) <= 1U).  */
2354
2355 static bool
2356 optimize_range_tests_xor (enum tree_code opcode, tree type,
2357                           tree lowi, tree lowj, tree highi, tree highj,
2358                           vec<operand_entry *> *ops,
2359                           struct range_entry *rangei,
2360                           struct range_entry *rangej)
2361 {
2362   tree lowxor, highxor, tem, exp;
2363   /* Check lowi ^ lowj == highi ^ highj and
2364      popcount (lowi ^ lowj) == 1.  */
2365   lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2366   if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2367     return false;
2368   if (!integer_pow2p (lowxor))
2369     return false;
2370   highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2371   if (!tree_int_cst_equal (lowxor, highxor))
2372     return false;
2373
2374   tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2375   exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2376   lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2377   highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2378   if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2379                          NULL, rangei->in_p, lowj, highj,
2380                          rangei->strict_overflow_p
2381                          || rangej->strict_overflow_p))
2382     return true;
2383   return false;
2384 }
2385
2386 /* Optimize X == CST1 || X == CST2
2387    if popcount (CST2 - CST1) == 1 into
2388    ((X - CST1) & ~(CST2 - CST1)) == 0.
2389    Similarly for ranges.  E.g.
2390    X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2391    || X == 75 || X == 45
2392    will be transformed by the previous optimization into
2393    (X - 43U) <= 3U || (X - 75U) <= 3U
2394    and this loop can transform that into
2395    ((X - 43U) & ~(75U - 43U)) <= 3U.  */
2396 static bool
2397 optimize_range_tests_diff (enum tree_code opcode, tree type,
2398                             tree lowi, tree lowj, tree highi, tree highj,
2399                             vec<operand_entry *> *ops,
2400                             struct range_entry *rangei,
2401                             struct range_entry *rangej)
2402 {
2403   tree tem1, tem2, mask;
2404   /* Check highi - lowi == highj - lowj.  */
2405   tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2406   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2407     return false;
2408   tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2409   if (!tree_int_cst_equal (tem1, tem2))
2410     return false;
2411   /* Check popcount (lowj - lowi) == 1.  */
2412   tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2413   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2414     return false;
2415   if (!integer_pow2p (tem1))
2416     return false;
2417
2418   type = unsigned_type_for (type);
2419   tem1 = fold_convert (type, tem1);
2420   tem2 = fold_convert (type, tem2);
2421   lowi = fold_convert (type, lowi);
2422   mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2423   tem1 = fold_binary (MINUS_EXPR, type,
2424                       fold_convert (type, rangei->exp), lowi);
2425   tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2426   lowj = build_int_cst (type, 0);
2427   if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2428                          NULL, rangei->in_p, lowj, tem2,
2429                          rangei->strict_overflow_p
2430                          || rangej->strict_overflow_p))
2431     return true;
2432   return false;
2433 }
2434
2435 /* It does some common checks for function optimize_range_tests_xor and
2436    optimize_range_tests_diff.
2437    If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2438    Else it calls optimize_range_tests_diff.  */
2439
2440 static bool
2441 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2442                         bool optimize_xor, vec<operand_entry *> *ops,
2443                         struct range_entry *ranges)
2444 {
2445   int i, j;
2446   bool any_changes = false;
2447   for (i = first; i < length; i++)
2448     {
2449       tree lowi, highi, lowj, highj, type, tem;
2450
2451       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2452         continue;
2453       type = TREE_TYPE (ranges[i].exp);
2454       if (!INTEGRAL_TYPE_P (type))
2455         continue;
2456       lowi = ranges[i].low;
2457       if (lowi == NULL_TREE)
2458         lowi = TYPE_MIN_VALUE (type);
2459       highi = ranges[i].high;
2460       if (highi == NULL_TREE)
2461         continue;
2462       for (j = i + 1; j < length && j < i + 64; j++)
2463         {
2464           bool changes;
2465           if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2466             continue;
2467           lowj = ranges[j].low;
2468           if (lowj == NULL_TREE)
2469             continue;
2470           highj = ranges[j].high;
2471           if (highj == NULL_TREE)
2472             highj = TYPE_MAX_VALUE (type);
2473           /* Check lowj > highi.  */
2474           tem = fold_binary (GT_EXPR, boolean_type_node,
2475                              lowj, highi);
2476           if (tem == NULL_TREE || !integer_onep (tem))
2477             continue;
2478           if (optimize_xor)
2479             changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2480                                                 highi, highj, ops,
2481                                                 ranges + i, ranges + j);
2482           else
2483             changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2484                                                  highi, highj, ops,
2485                                                  ranges + i, ranges + j);
2486           if (changes)
2487             {
2488               any_changes = true;
2489               break;
2490             }
2491         }
2492     }
2493   return any_changes;
2494 }
2495
2496 /* Helper function of optimize_range_tests_to_bit_test.  Handle a single
2497    range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2498    EXP on success, NULL otherwise.  */
2499
2500 static tree
2501 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
2502                        wide_int *mask, tree *totallowp)
2503 {
2504   tree tem = int_const_binop (MINUS_EXPR, high, low);
2505   if (tem == NULL_TREE
2506       || TREE_CODE (tem) != INTEGER_CST
2507       || TREE_OVERFLOW (tem)
2508       || tree_int_cst_sgn (tem) == -1
2509       || compare_tree_int (tem, prec) != -1)
2510     return NULL_TREE;
2511
2512   unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
2513   *mask = wi::shifted_mask (0, max, false, prec);
2514   if (TREE_CODE (exp) == BIT_AND_EXPR
2515       && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2516     {
2517       widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
2518       msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
2519       if (wi::popcount (msk) == 1
2520           && wi::ltu_p (msk, prec - max))
2521         {
2522           *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
2523           max += msk.to_uhwi ();
2524           exp = TREE_OPERAND (exp, 0);
2525           if (integer_zerop (low)
2526               && TREE_CODE (exp) == PLUS_EXPR
2527               && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2528             {
2529               tree ret = TREE_OPERAND (exp, 0);
2530               STRIP_NOPS (ret);
2531               widest_int bias
2532                 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
2533                                      TYPE_PRECISION (TREE_TYPE (low))));
2534               tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
2535               if (totallowp)
2536                 {
2537                   *totallowp = tbias;
2538                   return ret;
2539                 }
2540               else if (!tree_int_cst_lt (totallow, tbias))
2541                 return NULL_TREE;
2542               bias = wi::to_widest (tbias);
2543               bias -= wi::to_widest (totallow);
2544               if (bias >= 0 && bias < prec - max)
2545                 {
2546                   *mask = wi::lshift (*mask, bias);
2547                   return ret;
2548                 }
2549             }
2550         }
2551     }
2552   if (totallowp)
2553     return exp;
2554   if (!tree_int_cst_lt (totallow, low))
2555     return exp;
2556   tem = int_const_binop (MINUS_EXPR, low, totallow);
2557   if (tem == NULL_TREE
2558       || TREE_CODE (tem) != INTEGER_CST
2559       || TREE_OVERFLOW (tem)
2560       || compare_tree_int (tem, prec - max) == 1)
2561     return NULL_TREE;
2562
2563   *mask = wi::lshift (*mask, wi::to_widest (tem));
2564   return exp;
2565 }
2566
2567 /* Attempt to optimize small range tests using bit test.
2568    E.g.
2569    X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2570    && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2571    has been by earlier optimizations optimized into:
2572    ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2573    As all the 43 through 82 range is less than 64 numbers,
2574    for 64-bit word targets optimize that into:
2575    (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0  */
2576
2577 static bool
2578 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
2579                                   vec<operand_entry *> *ops,
2580                                   struct range_entry *ranges)
2581 {
2582   int i, j;
2583   bool any_changes = false;
2584   int prec = GET_MODE_BITSIZE (word_mode);
2585   auto_vec<struct range_entry *, 64> candidates;
2586
2587   for (i = first; i < length - 2; i++)
2588     {
2589       tree lowi, highi, lowj, highj, type;
2590
2591       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2592         continue;
2593       type = TREE_TYPE (ranges[i].exp);
2594       if (!INTEGRAL_TYPE_P (type))
2595         continue;
2596       lowi = ranges[i].low;
2597       if (lowi == NULL_TREE)
2598         lowi = TYPE_MIN_VALUE (type);
2599       highi = ranges[i].high;
2600       if (highi == NULL_TREE)
2601         continue;
2602       wide_int mask;
2603       tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
2604                                         highi, &mask, &lowi);
2605       if (exp == NULL_TREE)
2606         continue;
2607       bool strict_overflow_p = ranges[i].strict_overflow_p;
2608       candidates.truncate (0);
2609       int end = MIN (i + 64, length);
2610       for (j = i + 1; j < end; j++)
2611         {
2612           tree exp2;
2613           if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
2614             continue;
2615           if (ranges[j].exp == exp)
2616             ;
2617           else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
2618             {
2619               exp2 = TREE_OPERAND (ranges[j].exp, 0);
2620               if (exp2 == exp)
2621                 ;
2622               else if (TREE_CODE (exp2) == PLUS_EXPR)
2623                 {
2624                   exp2 = TREE_OPERAND (exp2, 0);
2625                   STRIP_NOPS (exp2);
2626                   if (exp2 != exp)
2627                     continue;
2628                 }
2629               else
2630                 continue;
2631             }
2632           else
2633             continue;
2634           lowj = ranges[j].low;
2635           if (lowj == NULL_TREE)
2636             continue;
2637           highj = ranges[j].high;
2638           if (highj == NULL_TREE)
2639             highj = TYPE_MAX_VALUE (type);
2640           wide_int mask2;
2641           exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
2642                                         highj, &mask2, NULL);
2643           if (exp2 != exp)
2644             continue;
2645           mask |= mask2;
2646           strict_overflow_p |= ranges[j].strict_overflow_p;
2647           candidates.safe_push (&ranges[j]);
2648         }
2649
2650       /* If we need otherwise 3 or more comparisons, use a bit test.  */
2651       if (candidates.length () >= 2)
2652         {
2653           tree high = wide_int_to_tree (TREE_TYPE (lowi),
2654                                         wi::to_widest (lowi)
2655                                         + prec - 1 - wi::clz (mask));
2656           operand_entry *oe = (*ops)[ranges[i].idx];
2657           tree op = oe->op;
2658           gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2659                             : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2660           location_t loc = gimple_location (stmt);
2661           tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2662
2663           /* See if it isn't cheaper to pretend the minimum value of the
2664              range is 0, if maximum value is small enough.
2665              We can avoid then subtraction of the minimum value, but the
2666              mask constant could be perhaps more expensive.  */
2667           if (compare_tree_int (lowi, 0) > 0
2668               && compare_tree_int (high, prec) < 0)
2669             {
2670               int cost_diff;
2671               HOST_WIDE_INT m = tree_to_uhwi (lowi);
2672               rtx reg = gen_raw_REG (word_mode, 10000);
2673               bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
2674               cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg,
2675                                                       GEN_INT (-m)), speed_p);
2676               rtx r = immed_wide_int_const (mask, word_mode);
2677               cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
2678                                          word_mode, speed_p);
2679               r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
2680               cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
2681                                          word_mode, speed_p);
2682               if (cost_diff > 0)
2683                 {
2684                   mask = wi::lshift (mask, m);
2685                   lowi = build_zero_cst (TREE_TYPE (lowi));
2686                 }
2687             }
2688
2689           tree tem = build_range_check (loc, optype, unshare_expr (exp),
2690                                         false, lowi, high);
2691           if (tem == NULL_TREE || is_gimple_val (tem))
2692             continue;
2693           tree etype = unsigned_type_for (TREE_TYPE (exp));
2694           exp = fold_build2_loc (loc, MINUS_EXPR, etype,
2695                                  fold_convert_loc (loc, etype, exp),
2696                                  fold_convert_loc (loc, etype, lowi));
2697           exp = fold_convert_loc (loc, integer_type_node, exp);
2698           tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
2699           exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
2700                                  build_int_cst (word_type, 1), exp);
2701           exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
2702                                  wide_int_to_tree (word_type, mask));
2703           exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
2704                                  build_zero_cst (word_type));
2705           if (is_gimple_val (exp))
2706             continue;
2707
2708           /* The shift might have undefined behavior if TEM is true,
2709              but reassociate_bb isn't prepared to have basic blocks
2710              split when it is running.  So, temporarily emit a code
2711              with BIT_IOR_EXPR instead of &&, and fix it up in
2712              branch_fixup.  */
2713           gimple_seq seq;
2714           tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
2715           gcc_assert (TREE_CODE (tem) == SSA_NAME);
2716           gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
2717           gimple_seq seq2;
2718           exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
2719           gimple_seq_add_seq_without_update (&seq, seq2);
2720           gcc_assert (TREE_CODE (exp) == SSA_NAME);
2721           gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
2722           gimple *g = gimple_build_assign (make_ssa_name (optype),
2723                                            BIT_IOR_EXPR, tem, exp);
2724           gimple_set_location (g, loc);
2725           gimple_seq_add_stmt_without_update (&seq, g);
2726           exp = gimple_assign_lhs (g);
2727           tree val = build_zero_cst (optype);
2728           if (update_range_test (&ranges[i], NULL, candidates.address (),
2729                                  candidates.length (), opcode, ops, exp,
2730                                  seq, false, val, val, strict_overflow_p))
2731             {
2732               any_changes = true;
2733               reassoc_branch_fixups.safe_push (tem);
2734             }
2735           else
2736             gimple_seq_discard (seq);
2737         }
2738     }
2739   return any_changes;
2740 }
2741
2742 /* Optimize range tests, similarly how fold_range_test optimizes
2743    it on trees.  The tree code for the binary
2744    operation between all the operands is OPCODE.
2745    If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2746    maybe_optimize_range_tests for inter-bb range optimization.
2747    In that case if oe->op is NULL, oe->id is bb->index whose
2748    GIMPLE_COND is && or ||ed into the test, and oe->rank says
2749    the actual opcode.  */
2750
2751 static bool
2752 optimize_range_tests (enum tree_code opcode,
2753                       vec<operand_entry *> *ops)
2754 {
2755   unsigned int length = ops->length (), i, j, first;
2756   operand_entry *oe;
2757   struct range_entry *ranges;
2758   bool any_changes = false;
2759
2760   if (length == 1)
2761     return false;
2762
2763   ranges = XNEWVEC (struct range_entry, length);
2764   for (i = 0; i < length; i++)
2765     {
2766       oe = (*ops)[i];
2767       ranges[i].idx = i;
2768       init_range_entry (ranges + i, oe->op,
2769                         oe->op
2770                         ? NULL
2771                         : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
2772       /* For | invert it now, we will invert it again before emitting
2773          the optimized expression.  */
2774       if (opcode == BIT_IOR_EXPR
2775           || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2776         ranges[i].in_p = !ranges[i].in_p;
2777     }
2778
2779   qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2780   for (i = 0; i < length; i++)
2781     if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2782       break;
2783
2784   /* Try to merge ranges.  */
2785   for (first = i; i < length; i++)
2786     {
2787       tree low = ranges[i].low;
2788       tree high = ranges[i].high;
2789       int in_p = ranges[i].in_p;
2790       bool strict_overflow_p = ranges[i].strict_overflow_p;
2791       int update_fail_count = 0;
2792
2793       for (j = i + 1; j < length; j++)
2794         {
2795           if (ranges[i].exp != ranges[j].exp)
2796             break;
2797           if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2798                              ranges[j].in_p, ranges[j].low, ranges[j].high))
2799             break;
2800           strict_overflow_p |= ranges[j].strict_overflow_p;
2801         }
2802
2803       if (j == i + 1)
2804         continue;
2805
2806       if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
2807                              opcode, ops, ranges[i].exp, NULL, in_p,
2808                              low, high, strict_overflow_p))
2809         {
2810           i = j - 1;
2811           any_changes = true;
2812         }
2813       /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2814          while update_range_test would fail.  */
2815       else if (update_fail_count == 64)
2816         i = j - 1;
2817       else
2818         ++update_fail_count;
2819     }
2820
2821   any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2822                                          ops, ranges);
2823
2824   if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2825     any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2826                                            ops, ranges);
2827   if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
2828     any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
2829                                                      ops, ranges);
2830
2831   if (any_changes && opcode != ERROR_MARK)
2832     {
2833       j = 0;
2834       FOR_EACH_VEC_ELT (*ops, i, oe)
2835         {
2836           if (oe->op == error_mark_node)
2837             continue;
2838           else if (i != j)
2839             (*ops)[j] = oe;
2840           j++;
2841         }
2842       ops->truncate (j);
2843     }
2844
2845   XDELETEVEC (ranges);
2846   return any_changes;
2847 }
2848
2849 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
2850    the operands of the VEC_COND_EXPR.  Returns ERROR_MARK on failure,
2851    otherwise the comparison code.  */
2852
2853 static tree_code
2854 ovce_extract_ops (tree var, gassign **rets, bool *reti)
2855 {
2856   if (TREE_CODE (var) != SSA_NAME)
2857     return ERROR_MARK;
2858
2859   gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
2860   if (stmt == NULL)
2861     return ERROR_MARK;
2862
2863   /* ??? If we start creating more COND_EXPR, we could perform
2864      this same optimization with them.  For now, simplify.  */
2865   if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
2866     return ERROR_MARK;
2867
2868   tree cond = gimple_assign_rhs1 (stmt);
2869   tree_code cmp = TREE_CODE (cond);
2870   if (TREE_CODE_CLASS (cmp) != tcc_comparison)
2871     return ERROR_MARK;
2872
2873   /* ??? For now, allow only canonical true and false result vectors.
2874      We could expand this to other constants should the need arise,
2875      but at the moment we don't create them.  */
2876   tree t = gimple_assign_rhs2 (stmt);
2877   tree f = gimple_assign_rhs3 (stmt);
2878   bool inv;
2879   if (integer_all_onesp (t))
2880     inv = false;
2881   else if (integer_all_onesp (f))
2882     {
2883       cmp = invert_tree_comparison (cmp, false);
2884       inv = true;
2885     }
2886   else
2887     return ERROR_MARK;
2888   if (!integer_zerop (f))
2889     return ERROR_MARK;
2890
2891   /* Success!  */
2892   if (rets)
2893     *rets = stmt;
2894   if (reti)
2895     *reti = inv;
2896   return cmp;
2897 }
2898
2899 /* Optimize the condition of VEC_COND_EXPRs which have been combined
2900    with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR).  */
2901
2902 static bool
2903 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
2904 {
2905   unsigned int length = ops->length (), i, j;
2906   bool any_changes = false;
2907
2908   if (length == 1)
2909     return false;
2910
2911   for (i = 0; i < length; ++i)
2912     {
2913       tree elt0 = (*ops)[i]->op;
2914
2915       gassign *stmt0;
2916       bool invert;
2917       tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert);
2918       if (cmp0 == ERROR_MARK)
2919         continue;
2920
2921       for (j = i + 1; j < length; ++j)
2922         {
2923           tree &elt1 = (*ops)[j]->op;
2924
2925           gassign *stmt1;
2926           tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL);
2927           if (cmp1 == ERROR_MARK)
2928             continue;
2929
2930           tree cond0 = gimple_assign_rhs1 (stmt0);
2931           tree x0 = TREE_OPERAND (cond0, 0);
2932           tree y0 = TREE_OPERAND (cond0, 1);
2933
2934           tree cond1 = gimple_assign_rhs1 (stmt1);
2935           tree x1 = TREE_OPERAND (cond1, 0);
2936           tree y1 = TREE_OPERAND (cond1, 1);
2937
2938           tree comb;
2939           if (opcode == BIT_AND_EXPR)
2940             comb = maybe_fold_and_comparisons (cmp0, x0, y0, cmp1, x1, y1);
2941           else if (opcode == BIT_IOR_EXPR)
2942             comb = maybe_fold_or_comparisons (cmp0, x0, y0, cmp1, x1, y1);
2943           else
2944             gcc_unreachable ();
2945           if (comb == NULL)
2946             continue;
2947
2948           /* Success! */
2949           if (dump_file && (dump_flags & TDF_DETAILS))
2950             {
2951               fprintf (dump_file, "Transforming ");
2952               print_generic_expr (dump_file, cond0, 0);
2953               fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
2954               print_generic_expr (dump_file, cond1, 0);
2955               fprintf (dump_file, " into ");
2956               print_generic_expr (dump_file, comb, 0);
2957               fputc ('\n', dump_file);
2958             }
2959
2960           gimple_assign_set_rhs1 (stmt0, comb);
2961           if (invert)
2962             std::swap (*gimple_assign_rhs2_ptr (stmt0),
2963                        *gimple_assign_rhs3_ptr (stmt0));
2964           update_stmt (stmt0);
2965
2966           elt1 = error_mark_node;
2967           any_changes = true;
2968         }
2969     }
2970
2971   if (any_changes)
2972     {
2973       operand_entry *oe;
2974       j = 0;
2975       FOR_EACH_VEC_ELT (*ops, i, oe)
2976         {
2977           if (oe->op == error_mark_node)
2978             continue;
2979           else if (i != j)
2980             (*ops)[j] = oe;
2981           j++;
2982         }
2983       ops->truncate (j);
2984     }
2985
2986   return any_changes;
2987 }
2988
2989 /* Return true if STMT is a cast like:
2990    <bb N>:
2991    ...
2992    _123 = (int) _234;
2993
2994    <bb M>:
2995    # _345 = PHI <_123(N), 1(...), 1(...)>
2996    where _234 has bool type, _123 has single use and
2997    bb N has a single successor M.  This is commonly used in
2998    the last block of a range test.  */
2999
3000 static bool
3001 final_range_test_p (gimple *stmt)
3002 {
3003   basic_block bb, rhs_bb;
3004   edge e;
3005   tree lhs, rhs;
3006   use_operand_p use_p;
3007   gimple *use_stmt;
3008
3009   if (!gimple_assign_cast_p (stmt))
3010     return false;
3011   bb = gimple_bb (stmt);
3012   if (!single_succ_p (bb))
3013     return false;
3014   e = single_succ_edge (bb);
3015   if (e->flags & EDGE_COMPLEX)
3016     return false;
3017
3018   lhs = gimple_assign_lhs (stmt);
3019   rhs = gimple_assign_rhs1 (stmt);
3020   if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3021       || TREE_CODE (rhs) != SSA_NAME
3022       || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
3023     return false;
3024
3025   /* Test whether lhs is consumed only by a PHI in the only successor bb.  */
3026   if (!single_imm_use (lhs, &use_p, &use_stmt))
3027     return false;
3028
3029   if (gimple_code (use_stmt) != GIMPLE_PHI
3030       || gimple_bb (use_stmt) != e->dest)
3031     return false;
3032
3033   /* And that the rhs is defined in the same loop.  */
3034   rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
3035   if (rhs_bb == NULL
3036       || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
3037     return false;
3038
3039   return true;
3040 }
3041
3042 /* Return true if BB is suitable basic block for inter-bb range test
3043    optimization.  If BACKWARD is true, BB should be the only predecessor
3044    of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
3045    or compared with to find a common basic block to which all conditions
3046    branch to if true resp. false.  If BACKWARD is false, TEST_BB should
3047    be the only predecessor of BB.  */
3048
3049 static bool
3050 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
3051                   bool backward)
3052 {
3053   edge_iterator ei, ei2;
3054   edge e, e2;
3055   gimple *stmt;
3056   gphi_iterator gsi;
3057   bool other_edge_seen = false;
3058   bool is_cond;
3059
3060   if (test_bb == bb)
3061     return false;
3062   /* Check last stmt first.  */
3063   stmt = last_stmt (bb);
3064   if (stmt == NULL
3065       || (gimple_code (stmt) != GIMPLE_COND
3066           && (backward || !final_range_test_p (stmt)))
3067       || gimple_visited_p (stmt)
3068       || stmt_could_throw_p (stmt)
3069       || *other_bb == bb)
3070     return false;
3071   is_cond = gimple_code (stmt) == GIMPLE_COND;
3072   if (is_cond)
3073     {
3074       /* If last stmt is GIMPLE_COND, verify that one of the succ edges
3075          goes to the next bb (if BACKWARD, it is TEST_BB), and the other
3076          to *OTHER_BB (if not set yet, try to find it out).  */
3077       if (EDGE_COUNT (bb->succs) != 2)
3078         return false;
3079       FOR_EACH_EDGE (e, ei, bb->succs)
3080         {
3081           if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
3082             return false;
3083           if (e->dest == test_bb)
3084             {
3085               if (backward)
3086                 continue;
3087               else
3088                 return false;
3089             }
3090           if (e->dest == bb)
3091             return false;
3092           if (*other_bb == NULL)
3093             {
3094               FOR_EACH_EDGE (e2, ei2, test_bb->succs)
3095                 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
3096                   return false;
3097                 else if (e->dest == e2->dest)
3098                   *other_bb = e->dest;
3099               if (*other_bb == NULL)
3100                 return false;
3101             }
3102           if (e->dest == *other_bb)
3103             other_edge_seen = true;
3104           else if (backward)
3105             return false;
3106         }
3107       if (*other_bb == NULL || !other_edge_seen)
3108         return false;
3109     }
3110   else if (single_succ (bb) != *other_bb)
3111     return false;
3112
3113   /* Now check all PHIs of *OTHER_BB.  */
3114   e = find_edge (bb, *other_bb);
3115   e2 = find_edge (test_bb, *other_bb);
3116   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
3117     {
3118       gphi *phi = gsi.phi ();
3119       /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
3120          corresponding to BB and TEST_BB predecessor must be the same.  */
3121       if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
3122                             gimple_phi_arg_def (phi, e2->dest_idx), 0))
3123         {
3124           /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
3125              one of the PHIs should have the lhs of the last stmt in
3126              that block as PHI arg and that PHI should have 0 or 1
3127              corresponding to it in all other range test basic blocks
3128              considered.  */
3129           if (!is_cond)
3130             {
3131               if (gimple_phi_arg_def (phi, e->dest_idx)
3132                   == gimple_assign_lhs (stmt)
3133                   && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
3134                       || integer_onep (gimple_phi_arg_def (phi,
3135                                                            e2->dest_idx))))
3136                 continue;
3137             }
3138           else
3139             {
3140               gimple *test_last = last_stmt (test_bb);
3141               if (gimple_code (test_last) != GIMPLE_COND
3142                   && gimple_phi_arg_def (phi, e2->dest_idx)
3143                      == gimple_assign_lhs (test_last)
3144                   && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
3145                       || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
3146                 continue;
3147             }
3148
3149           return false;
3150         }
3151     }
3152   return true;
3153 }
3154
3155 /* Return true if BB doesn't have side-effects that would disallow
3156    range test optimization, all SSA_NAMEs set in the bb are consumed
3157    in the bb and there are no PHIs.  */
3158
3159 static bool
3160 no_side_effect_bb (basic_block bb)
3161 {
3162   gimple_stmt_iterator gsi;
3163   gimple *last;
3164
3165   if (!gimple_seq_empty_p (phi_nodes (bb)))
3166     return false;
3167   last = last_stmt (bb);
3168   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3169     {
3170       gimple *stmt = gsi_stmt (gsi);
3171       tree lhs;
3172       imm_use_iterator imm_iter;
3173       use_operand_p use_p;
3174
3175       if (is_gimple_debug (stmt))
3176         continue;
3177       if (gimple_has_side_effects (stmt))
3178         return false;
3179       if (stmt == last)
3180         return true;
3181       if (!is_gimple_assign (stmt))
3182         return false;
3183       lhs = gimple_assign_lhs (stmt);
3184       if (TREE_CODE (lhs) != SSA_NAME)
3185         return false;
3186       if (gimple_assign_rhs_could_trap_p (stmt))
3187         return false;
3188       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
3189         {
3190           gimple *use_stmt = USE_STMT (use_p);
3191           if (is_gimple_debug (use_stmt))
3192             continue;
3193           if (gimple_bb (use_stmt) != bb)
3194             return false;
3195         }
3196     }
3197   return false;
3198 }
3199
3200 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
3201    return true and fill in *OPS recursively.  */
3202
3203 static bool
3204 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
3205          struct loop *loop)
3206 {
3207   gimple *stmt = SSA_NAME_DEF_STMT (var);
3208   tree rhs[2];
3209   int i;
3210
3211   if (!is_reassociable_op (stmt, code, loop))
3212     return false;
3213
3214   rhs[0] = gimple_assign_rhs1 (stmt);
3215   rhs[1] = gimple_assign_rhs2 (stmt);
3216   gimple_set_visited (stmt, true);
3217   for (i = 0; i < 2; i++)
3218     if (TREE_CODE (rhs[i]) == SSA_NAME
3219         && !get_ops (rhs[i], code, ops, loop)
3220         && has_single_use (rhs[i]))
3221       {
3222         operand_entry *oe = operand_entry_pool.allocate ();
3223
3224         oe->op = rhs[i];
3225         oe->rank = code;
3226         oe->id = 0;
3227         oe->count = 1;
3228         ops->safe_push (oe);
3229       }
3230   return true;
3231 }
3232
3233 /* Find the ops that were added by get_ops starting from VAR, see if
3234    they were changed during update_range_test and if yes, create new
3235    stmts.  */
3236
3237 static tree
3238 update_ops (tree var, enum tree_code code, vec<operand_entry *> ops,
3239             unsigned int *pidx, struct loop *loop)
3240 {
3241   gimple *stmt = SSA_NAME_DEF_STMT (var);
3242   tree rhs[4];
3243   int i;
3244
3245   if (!is_reassociable_op (stmt, code, loop))
3246     return NULL;
3247
3248   rhs[0] = gimple_assign_rhs1 (stmt);
3249   rhs[1] = gimple_assign_rhs2 (stmt);
3250   rhs[2] = rhs[0];
3251   rhs[3] = rhs[1];
3252   for (i = 0; i < 2; i++)
3253     if (TREE_CODE (rhs[i]) == SSA_NAME)
3254       {
3255         rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
3256         if (rhs[2 + i] == NULL_TREE)
3257           {
3258             if (has_single_use (rhs[i]))
3259               rhs[2 + i] = ops[(*pidx)++]->op;
3260             else
3261               rhs[2 + i] = rhs[i];
3262           }
3263       }
3264   if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
3265       && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
3266     {
3267       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3268       var = make_ssa_name (TREE_TYPE (var));
3269       gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
3270                                         rhs[2], rhs[3]);
3271       gimple_set_uid (g, gimple_uid (stmt));
3272       gimple_set_visited (g, true);
3273       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3274     }
3275   return var;
3276 }
3277
3278 /* Structure to track the initial value passed to get_ops and
3279    the range in the ops vector for each basic block.  */
3280
3281 struct inter_bb_range_test_entry
3282 {
3283   tree op;
3284   unsigned int first_idx, last_idx;
3285 };
3286
3287 /* Inter-bb range test optimization.
3288
3289    Returns TRUE if a gimple conditional is optimized to a true/false,
3290    otherwise return FALSE.
3291
3292    This indicates to the caller that it should run a CFG cleanup pass
3293    once reassociation is completed.  */
3294
3295 static bool
3296 maybe_optimize_range_tests (gimple *stmt)
3297 {
3298   basic_block first_bb = gimple_bb (stmt);
3299   basic_block last_bb = first_bb;
3300   basic_block other_bb = NULL;
3301   basic_block bb;
3302   edge_iterator ei;
3303   edge e;
3304   auto_vec<operand_entry *> ops;
3305   auto_vec<inter_bb_range_test_entry> bbinfo;
3306   bool any_changes = false;
3307   bool cfg_cleanup_needed = false;
3308
3309   /* Consider only basic blocks that end with GIMPLE_COND or
3310      a cast statement satisfying final_range_test_p.  All
3311      but the last bb in the first_bb .. last_bb range
3312      should end with GIMPLE_COND.  */
3313   if (gimple_code (stmt) == GIMPLE_COND)
3314     {
3315       if (EDGE_COUNT (first_bb->succs) != 2)
3316         return cfg_cleanup_needed;
3317     }
3318   else if (final_range_test_p (stmt))
3319     other_bb = single_succ (first_bb);
3320   else
3321     return cfg_cleanup_needed;
3322
3323   if (stmt_could_throw_p (stmt))
3324     return cfg_cleanup_needed;
3325
3326   /* As relative ordering of post-dominator sons isn't fixed,
3327      maybe_optimize_range_tests can be called first on any
3328      bb in the range we want to optimize.  So, start searching
3329      backwards, if first_bb can be set to a predecessor.  */
3330   while (single_pred_p (first_bb))
3331     {
3332       basic_block pred_bb = single_pred (first_bb);
3333       if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
3334         break;
3335       if (!no_side_effect_bb (first_bb))
3336         break;
3337       first_bb = pred_bb;
3338     }
3339   /* If first_bb is last_bb, other_bb hasn't been computed yet.
3340      Before starting forward search in last_bb successors, find
3341      out the other_bb.  */
3342   if (first_bb == last_bb)
3343     {
3344       other_bb = NULL;
3345       /* As non-GIMPLE_COND last stmt always terminates the range,
3346          if forward search didn't discover anything, just give up.  */
3347       if (gimple_code (stmt) != GIMPLE_COND)
3348         return cfg_cleanup_needed;
3349       /* Look at both successors.  Either it ends with a GIMPLE_COND
3350          and satisfies suitable_cond_bb, or ends with a cast and
3351          other_bb is that cast's successor.  */
3352       FOR_EACH_EDGE (e, ei, first_bb->succs)
3353         if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
3354             || e->dest == first_bb)
3355           return cfg_cleanup_needed;
3356         else if (single_pred_p (e->dest))
3357           {
3358             stmt = last_stmt (e->dest);
3359             if (stmt
3360                 && gimple_code (stmt) == GIMPLE_COND
3361                 && EDGE_COUNT (e->dest->succs) == 2)
3362               {
3363                 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
3364                   break;
3365                 else
3366                   other_bb = NULL;
3367               }
3368             else if (stmt
3369                      && final_range_test_p (stmt)
3370                      && find_edge (first_bb, single_succ (e->dest)))
3371               {
3372                 other_bb = single_succ (e->dest);
3373                 if (other_bb == first_bb)
3374                   other_bb = NULL;
3375               }
3376           }
3377       if (other_bb == NULL)
3378         return cfg_cleanup_needed;
3379     }
3380   /* Now do the forward search, moving last_bb to successor bbs
3381      that aren't other_bb.  */
3382   while (EDGE_COUNT (last_bb->succs) == 2)
3383     {
3384       FOR_EACH_EDGE (e, ei, last_bb->succs)
3385         if (e->dest != other_bb)
3386           break;
3387       if (e == NULL)
3388         break;
3389       if (!single_pred_p (e->dest))
3390         break;
3391       if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
3392         break;
3393       if (!no_side_effect_bb (e->dest))
3394         break;
3395       last_bb = e->dest;
3396     }
3397   if (first_bb == last_bb)
3398     return cfg_cleanup_needed;
3399   /* Here basic blocks first_bb through last_bb's predecessor
3400      end with GIMPLE_COND, all of them have one of the edges to
3401      other_bb and another to another block in the range,
3402      all blocks except first_bb don't have side-effects and
3403      last_bb ends with either GIMPLE_COND, or cast satisfying
3404      final_range_test_p.  */
3405   for (bb = last_bb; ; bb = single_pred (bb))
3406     {
3407       enum tree_code code;
3408       tree lhs, rhs;
3409       inter_bb_range_test_entry bb_ent;
3410
3411       bb_ent.op = NULL_TREE;
3412       bb_ent.first_idx = ops.length ();
3413       bb_ent.last_idx = bb_ent.first_idx;
3414       e = find_edge (bb, other_bb);
3415       stmt = last_stmt (bb);
3416       gimple_set_visited (stmt, true);
3417       if (gimple_code (stmt) != GIMPLE_COND)
3418         {
3419           use_operand_p use_p;
3420           gimple *phi;
3421           edge e2;
3422           unsigned int d;
3423
3424           lhs = gimple_assign_lhs (stmt);
3425           rhs = gimple_assign_rhs1 (stmt);
3426           gcc_assert (bb == last_bb);
3427
3428           /* stmt is
3429              _123 = (int) _234;
3430
3431              followed by:
3432              <bb M>:
3433              # _345 = PHI <_123(N), 1(...), 1(...)>
3434
3435              or 0 instead of 1.  If it is 0, the _234
3436              range test is anded together with all the
3437              other range tests, if it is 1, it is ored with
3438              them.  */
3439           single_imm_use (lhs, &use_p, &phi);
3440           gcc_assert (gimple_code (phi) == GIMPLE_PHI);
3441           e2 = find_edge (first_bb, other_bb);
3442           d = e2->dest_idx;
3443           gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
3444           if (integer_zerop (gimple_phi_arg_def (phi, d)))
3445             code = BIT_AND_EXPR;
3446           else
3447             {
3448               gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
3449               code = BIT_IOR_EXPR;
3450             }
3451
3452           /* If _234 SSA_NAME_DEF_STMT is
3453              _234 = _567 | _789;
3454              (or &, corresponding to 1/0 in the phi arguments,
3455              push into ops the individual range test arguments
3456              of the bitwise or resp. and, recursively.  */
3457           if (!get_ops (rhs, code, &ops,
3458                         loop_containing_stmt (stmt))
3459               && has_single_use (rhs))
3460             {
3461               /* Otherwise, push the _234 range test itself.  */
3462               operand_entry *oe = operand_entry_pool.allocate ();
3463
3464               oe->op = rhs;
3465               oe->rank = code;
3466               oe->id = 0;
3467               oe->count = 1;
3468               ops.safe_push (oe);
3469               bb_ent.last_idx++;
3470             }
3471           else
3472             bb_ent.last_idx = ops.length ();
3473           bb_ent.op = rhs;
3474           bbinfo.safe_push (bb_ent);
3475           continue;
3476         }
3477       /* Otherwise stmt is GIMPLE_COND.  */
3478       code = gimple_cond_code (stmt);
3479       lhs = gimple_cond_lhs (stmt);
3480       rhs = gimple_cond_rhs (stmt);
3481       if (TREE_CODE (lhs) == SSA_NAME
3482           && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3483           && ((code != EQ_EXPR && code != NE_EXPR)
3484               || rhs != boolean_false_node
3485                  /* Either push into ops the individual bitwise
3486                     or resp. and operands, depending on which
3487                     edge is other_bb.  */
3488               || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
3489                                  ^ (code == EQ_EXPR))
3490                                 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
3491                            loop_containing_stmt (stmt))))
3492         {
3493           /* Or push the GIMPLE_COND stmt itself.  */
3494           operand_entry *oe = operand_entry_pool.allocate ();
3495
3496           oe->op = NULL;
3497           oe->rank = (e->flags & EDGE_TRUE_VALUE)
3498                      ? BIT_IOR_EXPR : BIT_AND_EXPR;
3499           /* oe->op = NULL signs that there is no SSA_NAME
3500              for the range test, and oe->id instead is the
3501              basic block number, at which's end the GIMPLE_COND
3502              is.  */
3503           oe->id = bb->index;
3504           oe->count = 1;
3505           ops.safe_push (oe);
3506           bb_ent.op = NULL;
3507           bb_ent.last_idx++;
3508         }
3509       else if (ops.length () > bb_ent.first_idx)
3510         {
3511           bb_ent.op = lhs;
3512           bb_ent.last_idx = ops.length ();
3513         }
3514       bbinfo.safe_push (bb_ent);
3515       if (bb == first_bb)
3516         break;
3517     }
3518   if (ops.length () > 1)
3519     any_changes = optimize_range_tests (ERROR_MARK, &ops);
3520   if (any_changes)
3521     {
3522       unsigned int idx, max_idx = 0;
3523       /* update_ops relies on has_single_use predicates returning the
3524          same values as it did during get_ops earlier.  Additionally it
3525          never removes statements, only adds new ones and it should walk
3526          from the single imm use and check the predicate already before
3527          making those changes.
3528          On the other side, the handling of GIMPLE_COND directly can turn
3529          previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
3530          it needs to be done in a separate loop afterwards.  */
3531       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3532         {
3533           if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3534               && bbinfo[idx].op != NULL_TREE)
3535             {
3536               tree new_op;
3537
3538               max_idx = idx;
3539               stmt = last_stmt (bb);
3540               new_op = update_ops (bbinfo[idx].op,
3541                                    (enum tree_code)
3542                                    ops[bbinfo[idx].first_idx]->rank,
3543                                    ops, &bbinfo[idx].first_idx,
3544                                    loop_containing_stmt (stmt));
3545               if (new_op == NULL_TREE)
3546                 {
3547                   gcc_assert (bb == last_bb);
3548                   new_op = ops[bbinfo[idx].first_idx++]->op;
3549                 }
3550               if (bbinfo[idx].op != new_op)
3551                 {
3552                   imm_use_iterator iter;
3553                   use_operand_p use_p;
3554                   gimple *use_stmt, *cast_stmt = NULL;
3555
3556                   FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
3557                     if (is_gimple_debug (use_stmt))
3558                       continue;
3559                     else if (gimple_code (use_stmt) == GIMPLE_COND
3560                              || gimple_code (use_stmt) == GIMPLE_PHI)
3561                       FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3562                         SET_USE (use_p, new_op);
3563                     else if (gimple_assign_cast_p (use_stmt))
3564                       cast_stmt = use_stmt;
3565                     else
3566                       gcc_unreachable ();
3567                   if (cast_stmt)
3568                     {
3569                       gcc_assert (bb == last_bb);
3570                       tree lhs = gimple_assign_lhs (cast_stmt);
3571                       tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
3572                       enum tree_code rhs_code
3573                         = gimple_assign_rhs_code (cast_stmt);
3574                       gassign *g;
3575                       if (is_gimple_min_invariant (new_op))
3576                         {
3577                           new_op = fold_convert (TREE_TYPE (lhs), new_op);
3578                           g = gimple_build_assign (new_lhs, new_op);
3579                         }
3580                       else
3581                         g = gimple_build_assign (new_lhs, rhs_code, new_op);
3582                       gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt);
3583                       gimple_set_uid (g, gimple_uid (cast_stmt));
3584                       gimple_set_visited (g, true);
3585                       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3586                       FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
3587                         if (is_gimple_debug (use_stmt))
3588                           continue;
3589                         else if (gimple_code (use_stmt) == GIMPLE_COND
3590                                  || gimple_code (use_stmt) == GIMPLE_PHI)
3591                           FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3592                             SET_USE (use_p, new_lhs);
3593                         else
3594                           gcc_unreachable ();
3595                     }
3596                 }
3597             }
3598           if (bb == first_bb)
3599             break;
3600         }
3601       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3602         {
3603           if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3604               && bbinfo[idx].op == NULL_TREE
3605               && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
3606             {
3607               gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
3608
3609               if (idx > max_idx)
3610                 max_idx = idx;
3611
3612               /* If we collapse the conditional to a true/false
3613                  condition, then bubble that knowledge up to our caller.  */
3614               if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
3615                 {
3616                   gimple_cond_make_false (cond_stmt);
3617                   cfg_cleanup_needed = true;
3618                 }
3619               else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
3620                 {
3621                   gimple_cond_make_true (cond_stmt);
3622                   cfg_cleanup_needed = true;
3623                 }
3624               else
3625                 {
3626                   gimple_cond_set_code (cond_stmt, NE_EXPR);
3627                   gimple_cond_set_lhs (cond_stmt,
3628                                        ops[bbinfo[idx].first_idx]->op);
3629                   gimple_cond_set_rhs (cond_stmt, boolean_false_node);
3630                 }
3631               update_stmt (cond_stmt);
3632             }
3633           if (bb == first_bb)
3634             break;
3635         }
3636
3637       /* The above changes could result in basic blocks after the first
3638          modified one, up to and including last_bb, to be executed even if
3639          they would not be in the original program.  If the value ranges of
3640          assignment lhs' in those bbs were dependent on the conditions
3641          guarding those basic blocks which now can change, the VRs might
3642          be incorrect.  As no_side_effect_bb should ensure those SSA_NAMEs
3643          are only used within the same bb, it should be not a big deal if
3644          we just reset all the VRs in those bbs.  See PR68671.  */
3645       for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
3646         reset_flow_sensitive_info_in_bb (bb);
3647     }
3648   return cfg_cleanup_needed;
3649 }
3650
3651 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3652    of STMT in it's operands.  This is also known as a "destructive
3653    update" operation.  */
3654
3655 static bool
3656 is_phi_for_stmt (gimple *stmt, tree operand)
3657 {
3658   gimple *def_stmt;
3659   gphi *def_phi;
3660   tree lhs;
3661   use_operand_p arg_p;
3662   ssa_op_iter i;
3663
3664   if (TREE_CODE (operand) != SSA_NAME)
3665     return false;
3666
3667   lhs = gimple_assign_lhs (stmt);
3668
3669   def_stmt = SSA_NAME_DEF_STMT (operand);
3670   def_phi = dyn_cast <gphi *> (def_stmt);
3671   if (!def_phi)
3672     return false;
3673
3674   FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
3675     if (lhs == USE_FROM_PTR (arg_p))
3676       return true;
3677   return false;
3678 }
3679
3680 /* Remove def stmt of VAR if VAR has zero uses and recurse
3681    on rhs1 operand if so.  */
3682
3683 static void
3684 remove_visited_stmt_chain (tree var)
3685 {
3686   gimple *stmt;
3687   gimple_stmt_iterator gsi;
3688
3689   while (1)
3690     {
3691       if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
3692         return;
3693       stmt = SSA_NAME_DEF_STMT (var);
3694       if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
3695         {
3696           var = gimple_assign_rhs1 (stmt);
3697           gsi = gsi_for_stmt (stmt);
3698           reassoc_remove_stmt (&gsi);
3699           release_defs (stmt);
3700         }
3701       else
3702         return;
3703     }
3704 }
3705
3706 /* This function checks three consequtive operands in
3707    passed operands vector OPS starting from OPINDEX and
3708    swaps two operands if it is profitable for binary operation
3709    consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3710
3711    We pair ops with the same rank if possible.
3712
3713    The alternative we try is to see if STMT is a destructive
3714    update style statement, which is like:
3715    b = phi (a, ...)
3716    a = c + b;
3717    In that case, we want to use the destructive update form to
3718    expose the possible vectorizer sum reduction opportunity.
3719    In that case, the third operand will be the phi node. This
3720    check is not performed if STMT is null.
3721
3722    We could, of course, try to be better as noted above, and do a
3723    lot of work to try to find these opportunities in >3 operand
3724    cases, but it is unlikely to be worth it.  */
3725
3726 static void
3727 swap_ops_for_binary_stmt (vec<operand_entry *> ops,
3728                           unsigned int opindex, gimple *stmt)
3729 {
3730   operand_entry *oe1, *oe2, *oe3;
3731
3732   oe1 = ops[opindex];
3733   oe2 = ops[opindex + 1];
3734   oe3 = ops[opindex + 2];
3735
3736   if ((oe1->rank == oe2->rank
3737        && oe2->rank != oe3->rank)
3738       || (stmt && is_phi_for_stmt (stmt, oe3->op)
3739           && !is_phi_for_stmt (stmt, oe1->op)
3740           && !is_phi_for_stmt (stmt, oe2->op)))
3741     {
3742       operand_entry temp = *oe3;
3743       oe3->op = oe1->op;
3744       oe3->rank = oe1->rank;
3745       oe1->op = temp.op;
3746       oe1->rank= temp.rank;
3747     }
3748   else if ((oe1->rank == oe3->rank
3749             && oe2->rank != oe3->rank)
3750            || (stmt && is_phi_for_stmt (stmt, oe2->op)
3751                && !is_phi_for_stmt (stmt, oe1->op)
3752                && !is_phi_for_stmt (stmt, oe3->op)))
3753     {
3754       operand_entry temp = *oe2;
3755       oe2->op = oe1->op;
3756       oe2->rank = oe1->rank;
3757       oe1->op = temp.op;
3758       oe1->rank = temp.rank;
3759     }
3760 }
3761
3762 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3763    two definitions, otherwise return STMT.  */
3764
3765 static inline gimple *
3766 find_insert_point (gimple *stmt, tree rhs1, tree rhs2)
3767 {
3768   if (TREE_CODE (rhs1) == SSA_NAME
3769       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
3770     stmt = SSA_NAME_DEF_STMT (rhs1);
3771   if (TREE_CODE (rhs2) == SSA_NAME
3772       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
3773     stmt = SSA_NAME_DEF_STMT (rhs2);
3774   return stmt;
3775 }
3776
3777 /* Recursively rewrite our linearized statements so that the operators
3778    match those in OPS[OPINDEX], putting the computation in rank
3779    order.  Return new lhs.  */
3780
3781 static tree
3782 rewrite_expr_tree (gimple *stmt, unsigned int opindex,
3783                    vec<operand_entry *> ops, bool changed)
3784 {
3785   tree rhs1 = gimple_assign_rhs1 (stmt);
3786   tree rhs2 = gimple_assign_rhs2 (stmt);
3787   tree lhs = gimple_assign_lhs (stmt);
3788   operand_entry *oe;
3789
3790   /* The final recursion case for this function is that you have
3791      exactly two operations left.
3792      If we had exactly one op in the entire list to start with, we
3793      would have never called this function, and the tail recursion
3794      rewrites them one at a time.  */
3795   if (opindex + 2 == ops.length ())
3796     {
3797       operand_entry *oe1, *oe2;
3798
3799       oe1 = ops[opindex];
3800       oe2 = ops[opindex + 1];
3801
3802       if (rhs1 != oe1->op || rhs2 != oe2->op)
3803         {
3804           gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3805           unsigned int uid = gimple_uid (stmt);
3806
3807           if (dump_file && (dump_flags & TDF_DETAILS))
3808             {
3809               fprintf (dump_file, "Transforming ");
3810               print_gimple_stmt (dump_file, stmt, 0, 0);
3811             }
3812
3813           /* Even when changed is false, reassociation could have e.g. removed
3814              some redundant operations, so unless we are just swapping the
3815              arguments or unless there is no change at all (then we just
3816              return lhs), force creation of a new SSA_NAME.  */
3817           if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
3818             {
3819               gimple *insert_point
3820                 = find_insert_point (stmt, oe1->op, oe2->op);
3821               lhs = make_ssa_name (TREE_TYPE (lhs));
3822               stmt
3823                 = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
3824                                        oe1->op, oe2->op);
3825               gimple_set_uid (stmt, uid);
3826               gimple_set_visited (stmt, true);
3827               if (insert_point == gsi_stmt (gsi))
3828                 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3829               else
3830                 insert_stmt_after (stmt, insert_point);
3831             }
3832           else
3833             {
3834               gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
3835                                    == stmt);
3836               gimple_assign_set_rhs1 (stmt, oe1->op);
3837               gimple_assign_set_rhs2 (stmt, oe2->op);
3838               update_stmt (stmt);
3839             }
3840
3841           if (rhs1 != oe1->op && rhs1 != oe2->op)
3842             remove_visited_stmt_chain (rhs1);
3843
3844           if (dump_file && (dump_flags & TDF_DETAILS))
3845             {
3846               fprintf (dump_file, " into ");
3847               print_gimple_stmt (dump_file, stmt, 0, 0);
3848             }
3849         }
3850       return lhs;
3851     }
3852
3853   /* If we hit here, we should have 3 or more ops left.  */
3854   gcc_assert (opindex + 2 < ops.length ());
3855
3856   /* Rewrite the next operator.  */
3857   oe = ops[opindex];
3858
3859   /* Recurse on the LHS of the binary operator, which is guaranteed to
3860      be the non-leaf side.  */
3861   tree new_rhs1
3862     = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
3863                          changed || oe->op != rhs2);
3864
3865   if (oe->op != rhs2 || new_rhs1 != rhs1)
3866     {
3867       if (dump_file && (dump_flags & TDF_DETAILS))
3868         {
3869           fprintf (dump_file, "Transforming ");
3870           print_gimple_stmt (dump_file, stmt, 0, 0);
3871         }
3872
3873       /* If changed is false, this is either opindex == 0
3874          or all outer rhs2's were equal to corresponding oe->op,
3875          and powi_result is NULL.
3876          That means lhs is equivalent before and after reassociation.
3877          Otherwise ensure the old lhs SSA_NAME is not reused and
3878          create a new stmt as well, so that any debug stmts will be
3879          properly adjusted.  */
3880       if (changed)
3881         {
3882           gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3883           unsigned int uid = gimple_uid (stmt);
3884           gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op);
3885
3886           lhs = make_ssa_name (TREE_TYPE (lhs));
3887           stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
3888                                       new_rhs1, oe->op);
3889           gimple_set_uid (stmt, uid);
3890           gimple_set_visited (stmt, true);
3891           if (insert_point == gsi_stmt (gsi))
3892             gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3893           else
3894             insert_stmt_after (stmt, insert_point);
3895         }
3896       else
3897         {
3898           gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
3899                                == stmt);
3900           gimple_assign_set_rhs1 (stmt, new_rhs1);
3901           gimple_assign_set_rhs2 (stmt, oe->op);
3902           update_stmt (stmt);
3903         }
3904
3905       if (dump_file && (dump_flags & TDF_DETAILS))
3906         {
3907           fprintf (dump_file, " into ");
3908           print_gimple_stmt (dump_file, stmt, 0, 0);
3909         }
3910     }
3911   return lhs;
3912 }
3913
3914 /* Find out how many cycles we need to compute statements chain.
3915    OPS_NUM holds number os statements in a chain.  CPU_WIDTH is a
3916    maximum number of independent statements we may execute per cycle.  */
3917
3918 static int
3919 get_required_cycles (int ops_num, int cpu_width)
3920 {
3921   int res;
3922   int elog;
3923   unsigned int rest;
3924
3925   /* While we have more than 2 * cpu_width operands
3926      we may reduce number of operands by cpu_width
3927      per cycle.  */
3928   res = ops_num / (2 * cpu_width);
3929
3930   /* Remained operands count may be reduced twice per cycle
3931      until we have only one operand.  */
3932   rest = (unsigned)(ops_num - res * cpu_width);
3933   elog = exact_log2 (rest);
3934   if (elog >= 0)
3935     res += elog;
3936   else
3937     res += floor_log2 (rest) + 1;
3938
3939   return res;
3940 }
3941
3942 /* Returns an optimal number of registers to use for computation of
3943    given statements.  */
3944
3945 static int
3946 get_reassociation_width (int ops_num, enum tree_code opc,
3947                          machine_mode mode)
3948 {
3949   int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
3950   int width;
3951   int width_min;
3952   int cycles_best;
3953
3954   if (param_width > 0)
3955     width = param_width;
3956   else
3957     width = targetm.sched.reassociation_width (opc, mode);
3958
3959   if (width == 1)
3960     return width;
3961
3962   /* Get the minimal time required for sequence computation.  */
3963   cycles_best = get_required_cycles (ops_num, width);
3964
3965   /* Check if we may use less width and still compute sequence for
3966      the same time.  It will allow us to reduce registers usage.
3967      get_required_cycles is monotonically increasing with lower width
3968      so we can perform a binary search for the minimal width that still
3969      results in the optimal cycle count.  */
3970   width_min = 1;
3971   while (width > width_min)
3972     {
3973       int width_mid = (width + width_min) / 2;
3974
3975       if (get_required_cycles (ops_num, width_mid) == cycles_best)
3976         width = width_mid;
3977       else if (width_min < width_mid)
3978         width_min = width_mid;
3979       else
3980         break;
3981     }
3982
3983   return width;
3984 }
3985
3986 /* Recursively rewrite our linearized statements so that the operators
3987    match those in OPS[OPINDEX], putting the computation in rank
3988    order and trying to allow operations to be executed in
3989    parallel.  */
3990
3991 static void
3992 rewrite_expr_tree_parallel (gassign *stmt, int width,
3993                             vec<operand_entry *> ops)
3994 {
3995   enum tree_code opcode = gimple_assign_rhs_code (stmt);
3996   int op_num = ops.length ();
3997   int stmt_num = op_num - 1;
3998   gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
3999   int op_index = op_num - 1;
4000   int stmt_index = 0;
4001   int ready_stmts_end = 0;
4002   int i = 0;
4003   tree last_rhs1 = gimple_assign_rhs1 (stmt);
4004
4005   /* We start expression rewriting from the top statements.
4006      So, in this loop we create a full list of statements
4007      we will work with.  */
4008   stmts[stmt_num - 1] = stmt;
4009   for (i = stmt_num - 2; i >= 0; i--)
4010     stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
4011
4012   for (i = 0; i < stmt_num; i++)
4013     {
4014       tree op1, op2;
4015
4016       /* Determine whether we should use results of
4017          already handled statements or not.  */
4018       if (ready_stmts_end == 0
4019           && (i - stmt_index >= width || op_index < 1))
4020         ready_stmts_end = i;
4021
4022       /* Now we choose operands for the next statement.  Non zero
4023          value in ready_stmts_end means here that we should use
4024          the result of already generated statements as new operand.  */
4025       if (ready_stmts_end > 0)
4026         {
4027           op1 = gimple_assign_lhs (stmts[stmt_index++]);
4028           if (ready_stmts_end > stmt_index)
4029             op2 = gimple_assign_lhs (stmts[stmt_index++]);
4030           else if (op_index >= 0)
4031             op2 = ops[op_index--]->op;
4032           else
4033             {
4034               gcc_assert (stmt_index < i);
4035               op2 = gimple_assign_lhs (stmts[stmt_index++]);
4036             }
4037
4038           if (stmt_index >= ready_stmts_end)
4039             ready_stmts_end = 0;
4040         }
4041       else
4042         {
4043           if (op_index > 1)
4044             swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
4045           op2 = ops[op_index--]->op;
4046           op1 = ops[op_index--]->op;
4047         }
4048
4049       /* If we emit the last statement then we should put
4050          operands into the last statement.  It will also
4051          break the loop.  */
4052       if (op_index < 0 && stmt_index == i)
4053         i = stmt_num - 1;
4054
4055       if (dump_file && (dump_flags & TDF_DETAILS))
4056         {
4057           fprintf (dump_file, "Transforming ");
4058           print_gimple_stmt (dump_file, stmts[i], 0, 0);
4059         }
4060
4061       /* We keep original statement only for the last one.  All
4062          others are recreated.  */
4063       if (i == stmt_num - 1)
4064         {
4065           gimple_assign_set_rhs1 (stmts[i], op1);
4066           gimple_assign_set_rhs2 (stmts[i], op2);
4067           update_stmt (stmts[i]);
4068         }
4069       else
4070         stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
4071
4072       if (dump_file && (dump_flags & TDF_DETAILS))
4073         {
4074           fprintf (dump_file, " into ");
4075           print_gimple_stmt (dump_file, stmts[i], 0, 0);
4076         }
4077     }
4078
4079   remove_visited_stmt_chain (last_rhs1);
4080 }
4081
4082 /* Transform STMT, which is really (A +B) + (C + D) into the left
4083    linear form, ((A+B)+C)+D.
4084    Recurse on D if necessary.  */
4085
4086 static void
4087 linearize_expr (gimple *stmt)
4088 {
4089   gimple_stmt_iterator gsi;
4090   gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
4091   gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
4092   gimple *oldbinrhs = binrhs;
4093   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4094   gimple *newbinrhs = NULL;
4095   struct loop *loop = loop_containing_stmt (stmt);
4096   tree lhs = gimple_assign_lhs (stmt);
4097
4098   gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
4099               && is_reassociable_op (binrhs, rhscode, loop));
4100
4101   gsi = gsi_for_stmt (stmt);
4102
4103   gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
4104   binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
4105                                 gimple_assign_rhs_code (binrhs),
4106                                 gimple_assign_lhs (binlhs),
4107                                 gimple_assign_rhs2 (binrhs));
4108   gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
4109   gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
4110   gimple_set_uid (binrhs, gimple_uid (stmt));
4111
4112   if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
4113     newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
4114
4115   if (dump_file && (dump_flags & TDF_DETAILS))
4116     {
4117       fprintf (dump_file, "Linearized: ");
4118       print_gimple_stmt (dump_file, stmt, 0, 0);
4119     }
4120
4121   reassociate_stats.linearized++;
4122   update_stmt (stmt);
4123
4124   gsi = gsi_for_stmt (oldbinrhs);
4125   reassoc_remove_stmt (&gsi);
4126   release_defs (oldbinrhs);
4127
4128   gimple_set_visited (stmt, true);
4129   gimple_set_visited (binlhs, true);
4130   gimple_set_visited (binrhs, true);
4131
4132   /* Tail recurse on the new rhs if it still needs reassociation.  */
4133   if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
4134     /* ??? This should probably be linearize_expr (newbinrhs) but I don't
4135            want to change the algorithm while converting to tuples.  */
4136     linearize_expr (stmt);
4137 }
4138
4139 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
4140    it.  Otherwise, return NULL.  */
4141
4142 static gimple *
4143 get_single_immediate_use (tree lhs)
4144 {
4145   use_operand_p immuse;
4146   gimple *immusestmt;
4147
4148   if (TREE_CODE (lhs) == SSA_NAME
4149       && single_imm_use (lhs, &immuse, &immusestmt)
4150       && is_gimple_assign (immusestmt))
4151     return immusestmt;
4152
4153   return NULL;
4154 }
4155
4156 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
4157    representing the negated value.  Insertions of any necessary
4158    instructions go before GSI.
4159    This function is recursive in that, if you hand it "a_5" as the
4160    value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
4161    transform b_3 + b_4 into a_5 = -b_3 + -b_4.  */
4162
4163 static tree
4164 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
4165 {
4166   gimple *negatedefstmt = NULL;
4167   tree resultofnegate;
4168   gimple_stmt_iterator gsi;
4169   unsigned int uid;
4170
4171   /* If we are trying to negate a name, defined by an add, negate the
4172      add operands instead.  */
4173   if (TREE_CODE (tonegate) == SSA_NAME)
4174     negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
4175   if (TREE_CODE (tonegate) == SSA_NAME
4176       && is_gimple_assign (negatedefstmt)
4177       && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
4178       && has_single_use (gimple_assign_lhs (negatedefstmt))
4179       && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
4180     {
4181       tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
4182       tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
4183       tree lhs = gimple_assign_lhs (negatedefstmt);
4184       gimple *g;
4185
4186       gsi = gsi_for_stmt (negatedefstmt);
4187       rhs1 = negate_value (rhs1, &gsi);
4188
4189       gsi = gsi_for_stmt (negatedefstmt);
4190       rhs2 = negate_value (rhs2, &gsi);
4191
4192       gsi = gsi_for_stmt (negatedefstmt);
4193       lhs = make_ssa_name (TREE_TYPE (lhs));
4194       gimple_set_visited (negatedefstmt, true);
4195       g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
4196       gimple_set_uid (g, gimple_uid (negatedefstmt));
4197       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4198       return lhs;
4199     }
4200
4201   tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
4202   resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
4203                                              NULL_TREE, true, GSI_SAME_STMT);
4204   gsi = *gsip;
4205   uid = gimple_uid (gsi_stmt (gsi));
4206   for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
4207     {
4208       gimple *stmt = gsi_stmt (gsi);
4209       if (gimple_uid (stmt) != 0)
4210         break;
4211       gimple_set_uid (stmt, uid);
4212     }
4213   return resultofnegate;
4214 }
4215
4216 /* Return true if we should break up the subtract in STMT into an add
4217    with negate.  This is true when we the subtract operands are really
4218    adds, or the subtract itself is used in an add expression.  In
4219    either case, breaking up the subtract into an add with negate
4220    exposes the adds to reassociation.  */
4221
4222 static bool
4223 should_break_up_subtract (gimple *stmt)
4224 {
4225   tree lhs = gimple_assign_lhs (stmt);
4226   tree binlhs = gimple_assign_rhs1 (stmt);
4227   tree binrhs = gimple_assign_rhs2 (stmt);
4228   gimple *immusestmt;
4229   struct loop *loop = loop_containing_stmt (stmt);
4230
4231   if (TREE_CODE (binlhs) == SSA_NAME
4232       && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
4233     return true;
4234
4235   if (TREE_CODE (binrhs) == SSA_NAME
4236       && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
4237     return true;
4238
4239   if (TREE_CODE (lhs) == SSA_NAME
4240       && (immusestmt = get_single_immediate_use (lhs))
4241       && is_gimple_assign (immusestmt)
4242       && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
4243           ||  gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
4244     return true;
4245   return false;
4246 }
4247
4248 /* Transform STMT from A - B into A + -B.  */
4249
4250 static void
4251 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
4252 {
4253   tree rhs1 = gimple_assign_rhs1 (stmt);
4254   tree rhs2 = gimple_assign_rhs2 (stmt);
4255
4256   if (dump_file && (dump_flags & TDF_DETAILS))
4257     {
4258       fprintf (dump_file, "Breaking up subtract ");
4259       print_gimple_stmt (dump_file, stmt, 0, 0);
4260     }
4261
4262   rhs2 = negate_value (rhs2, gsip);
4263   gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
4264   update_stmt (stmt);
4265 }
4266
4267 /* Determine whether STMT is a builtin call that raises an SSA name
4268    to an integer power and has only one use.  If so, and this is early
4269    reassociation and unsafe math optimizations are permitted, place
4270    the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
4271    If any of these conditions does not hold, return FALSE.  */
4272
4273 static bool
4274 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent)
4275 {
4276   tree arg1;
4277   REAL_VALUE_TYPE c, cint;
4278
4279   switch (gimple_call_combined_fn (stmt))
4280     {
4281     CASE_CFN_POW:
4282       if (flag_errno_math)
4283         return false;
4284
4285       *base = gimple_call_arg (stmt, 0);
4286       arg1 = gimple_call_arg (stmt, 1);
4287
4288       if (TREE_CODE (arg1) != REAL_CST)
4289         return false;
4290
4291       c = TREE_REAL_CST (arg1);
4292
4293       if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
4294         return false;
4295
4296       *exponent = real_to_integer (&c);
4297       real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
4298       if (!real_identical (&c, &cint))
4299         return false;
4300
4301       break;
4302
4303     CASE_CFN_POWI:
4304       *base = gimple_call_arg (stmt, 0);
4305       arg1 = gimple_call_arg (stmt, 1);
4306
4307       if (!tree_fits_shwi_p (arg1))
4308         return false;
4309
4310       *exponent = tree_to_shwi (arg1);
4311       break;
4312
4313     default:
4314       return false;
4315     }
4316
4317   /* Expanding negative exponents is generally unproductive, so we don't
4318      complicate matters with those.  Exponents of zero and one should
4319      have been handled by expression folding.  */
4320   if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
4321     return false;
4322
4323   return true;
4324 }
4325
4326 /* Try to derive and add operand entry for OP to *OPS.  Return false if
4327    unsuccessful.  */
4328
4329 static bool
4330 try_special_add_to_ops (vec<operand_entry *> *ops,
4331                         enum tree_code code,
4332                         tree op, gimple* def_stmt)
4333 {
4334   tree base = NULL_TREE;
4335   HOST_WIDE_INT exponent = 0;
4336
4337   if (TREE_CODE (op) != SSA_NAME
4338       || ! has_single_use (op))
4339     return false;
4340
4341   if (code == MULT_EXPR
4342       && reassoc_insert_powi_p
4343       && flag_unsafe_math_optimizations
4344       && is_gimple_call (def_stmt)
4345       && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent))
4346     {
4347       add_repeat_to_ops_vec (ops, base, exponent);
4348       gimple_set_visited (def_stmt, true);
4349       return true;
4350     }
4351   else if (code == MULT_EXPR
4352            && is_gimple_assign (def_stmt)
4353            && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
4354            && !HONOR_SNANS (TREE_TYPE (op))
4355            && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op))
4356                || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op))))
4357     {
4358       tree rhs1 = gimple_assign_rhs1 (def_stmt);
4359       tree cst = build_minus_one_cst (TREE_TYPE (op));
4360       add_to_ops_vec (ops, rhs1);
4361       add_to_ops_vec (ops, cst);
4362       gimple_set_visited (def_stmt, true);
4363       return true;
4364     }
4365
4366   return false;
4367 }
4368
4369 /* Recursively linearize a binary expression that is the RHS of STMT.
4370    Place the operands of the expression tree in the vector named OPS.  */
4371
4372 static void
4373 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
4374                      bool is_associative, bool set_visited)
4375 {
4376   tree binlhs = gimple_assign_rhs1 (stmt);
4377   tree binrhs = gimple_assign_rhs2 (stmt);
4378   gimple *binlhsdef = NULL, *binrhsdef = NULL;
4379   bool binlhsisreassoc = false;
4380   bool binrhsisreassoc = false;
4381   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4382   struct loop *loop = loop_containing_stmt (stmt);
4383
4384   if (set_visited)
4385     gimple_set_visited (stmt, true);
4386
4387   if (TREE_CODE (binlhs) == SSA_NAME)
4388     {
4389       binlhsdef = SSA_NAME_DEF_STMT (binlhs);
4390       binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
4391                          && !stmt_could_throw_p (binlhsdef));
4392     }
4393
4394   if (TREE_CODE (binrhs) == SSA_NAME)
4395     {
4396       binrhsdef = SSA_NAME_DEF_STMT (binrhs);
4397       binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
4398                          && !stmt_could_throw_p (binrhsdef));
4399     }
4400
4401   /* If the LHS is not reassociable, but the RHS is, we need to swap
4402      them.  If neither is reassociable, there is nothing we can do, so
4403      just put them in the ops vector.  If the LHS is reassociable,
4404      linearize it.  If both are reassociable, then linearize the RHS
4405      and the LHS.  */
4406
4407   if (!binlhsisreassoc)
4408     {
4409       /* If this is not a associative operation like division, give up.  */
4410       if (!is_associative)
4411         {
4412           add_to_ops_vec (ops, binrhs);
4413           return;
4414         }
4415
4416       if (!binrhsisreassoc)
4417         {
4418           if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
4419             add_to_ops_vec (ops, binrhs);
4420
4421           if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef))
4422             add_to_ops_vec (ops, binlhs);
4423
4424           return;
4425         }
4426
4427       if (dump_file && (dump_flags & TDF_DETAILS))
4428         {
4429           fprintf (dump_file, "swapping operands of ");
4430           print_gimple_stmt (dump_file, stmt, 0, 0);
4431         }
4432
4433       swap_ssa_operands (stmt,
4434                          gimple_assign_rhs1_ptr (stmt),
4435                          gimple_assign_rhs2_ptr (stmt));
4436       update_stmt (stmt);
4437
4438       if (dump_file && (dump_flags & TDF_DETAILS))
4439         {
4440           fprintf (dump_file, " is now ");
4441           print_gimple_stmt (dump_file, stmt, 0, 0);
4442         }
4443
4444       /* We want to make it so the lhs is always the reassociative op,
4445          so swap.  */
4446       std::swap (binlhs, binrhs);
4447     }
4448   else if (binrhsisreassoc)
4449     {
4450       linearize_expr (stmt);
4451       binlhs = gimple_assign_rhs1 (stmt);
4452       binrhs = gimple_assign_rhs2 (stmt);
4453     }
4454
4455   gcc_assert (TREE_CODE (binrhs) != SSA_NAME
4456               || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
4457                                       rhscode, loop));
4458   linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
4459                        is_associative, set_visited);
4460
4461   if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
4462     add_to_ops_vec (ops, binrhs);
4463 }
4464
4465 /* Repropagate the negates back into subtracts, since no other pass
4466    currently does it.  */
4467
4468 static void
4469 repropagate_negates (void)
4470 {
4471   unsigned int i = 0;
4472   tree negate;
4473
4474   FOR_EACH_VEC_ELT (plus_negates, i, negate)
4475     {
4476       gimple *user = get_single_immediate_use (negate);
4477
4478       if (!user || !is_gimple_assign (user))
4479         continue;
4480
4481       /* The negate operand can be either operand of a PLUS_EXPR
4482          (it can be the LHS if the RHS is a constant for example).
4483
4484          Force the negate operand to the RHS of the PLUS_EXPR, then
4485          transform the PLUS_EXPR into a MINUS_EXPR.  */
4486       if (gimple_assign_rhs_code (user) == PLUS_EXPR)
4487         {
4488           /* If the negated operand appears on the LHS of the
4489              PLUS_EXPR, exchange the operands of the PLUS_EXPR
4490              to force the negated operand to the RHS of the PLUS_EXPR.  */
4491           if (gimple_assign_rhs1 (user) == negate)
4492             {
4493               swap_ssa_operands (user,
4494                                  gimple_assign_rhs1_ptr (user),
4495                                  gimple_assign_rhs2_ptr (user));
4496             }
4497
4498           /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
4499              the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR.  */
4500           if (gimple_assign_rhs2 (user) == negate)
4501             {
4502               tree rhs1 = gimple_assign_rhs1 (user);
4503               tree rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
4504               gimple_stmt_iterator gsi = gsi_for_stmt (user);
4505               gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
4506               update_stmt (user);
4507             }
4508         }
4509       else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
4510         {
4511           if (gimple_assign_rhs1 (user) == negate)
4512             {
4513               /* We have
4514                    x = -a
4515                    y = x - b
4516                  which we transform into
4517                    x = a + b
4518                    y = -x .
4519                  This pushes down the negate which we possibly can merge
4520                  into some other operation, hence insert it into the
4521                  plus_negates vector.  */
4522               gimple *feed = SSA_NAME_DEF_STMT (negate);
4523               tree a = gimple_assign_rhs1 (feed);
4524               tree b = gimple_assign_rhs2 (user);
4525               gimple_stmt_iterator gsi = gsi_for_stmt (feed);
4526               gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
4527               tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
4528               gimple *g = gimple_build_assign (x, PLUS_EXPR, a, b);
4529               gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
4530               gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
4531               user = gsi_stmt (gsi2);
4532               update_stmt (user);
4533               reassoc_remove_stmt (&gsi);
4534               release_defs (feed);
4535               plus_negates.safe_push (gimple_assign_lhs (user));
4536             }
4537           else
4538             {
4539               /* Transform "x = -a; y = b - x" into "y = b + a", getting
4540                  rid of one operation.  */
4541               gimple *feed = SSA_NAME_DEF_STMT (negate);
4542               tree a = gimple_assign_rhs1 (feed);
4543               tree rhs1 = gimple_assign_rhs1 (user);
4544               gimple_stmt_iterator gsi = gsi_for_stmt (user);
4545               gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
4546               update_stmt (gsi_stmt (gsi));
4547             }
4548         }
4549     }
4550 }
4551
4552 /* Returns true if OP is of a type for which we can do reassociation.
4553    That is for integral or non-saturating fixed-point types, and for
4554    floating point type when associative-math is enabled.  */
4555
4556 static bool
4557 can_reassociate_p (tree op)
4558 {
4559   tree type = TREE_TYPE (op);
4560   if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
4561       || NON_SAT_FIXED_POINT_TYPE_P (type)
4562       || (flag_associative_math && FLOAT_TYPE_P (type)))
4563     return true;
4564   return false;
4565 }
4566
4567 /* Break up subtract operations in block BB.
4568
4569    We do this top down because we don't know whether the subtract is
4570    part of a possible chain of reassociation except at the top.
4571
4572    IE given
4573    d = f + g
4574    c = a + e
4575    b = c - d
4576    q = b - r
4577    k = t - q
4578
4579    we want to break up k = t - q, but we won't until we've transformed q
4580    = b - r, which won't be broken up until we transform b = c - d.
4581
4582    En passant, clear the GIMPLE visited flag on every statement
4583    and set UIDs within each basic block.  */
4584
4585 static void
4586 break_up_subtract_bb (basic_block bb)
4587 {
4588   gimple_stmt_iterator gsi;
4589   basic_block son;
4590   unsigned int uid = 1;
4591
4592   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4593     {
4594       gimple *stmt = gsi_stmt (gsi);
4595       gimple_set_visited (stmt, false);
4596       gimple_set_uid (stmt, uid++);
4597
4598       if (!is_gimple_assign (stmt)
4599           || !can_reassociate_p (gimple_assign_lhs (stmt)))
4600         continue;
4601
4602       /* Look for simple gimple subtract operations.  */
4603       if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
4604         {
4605           if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
4606               || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
4607             continue;
4608
4609           /* Check for a subtract used only in an addition.  If this
4610              is the case, transform it into add of a negate for better
4611              reassociation.  IE transform C = A-B into C = A + -B if C
4612              is only used in an addition.  */
4613           if (should_break_up_subtract (stmt))
4614             break_up_subtract (stmt, &gsi);
4615         }
4616       else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
4617                && can_reassociate_p (gimple_assign_rhs1 (stmt)))
4618         plus_negates.safe_push (gimple_assign_lhs (stmt));
4619     }
4620   for (son = first_dom_son (CDI_DOMINATORS, bb);
4621        son;
4622        son = next_dom_son (CDI_DOMINATORS, son))
4623     break_up_subtract_bb (son);
4624 }
4625
4626 /* Used for repeated factor analysis.  */
4627 struct repeat_factor
4628 {
4629   /* An SSA name that occurs in a multiply chain.  */
4630   tree factor;
4631
4632   /* Cached rank of the factor.  */
4633   unsigned rank;
4634
4635   /* Number of occurrences of the factor in the chain.  */
4636   HOST_WIDE_INT count;
4637
4638   /* An SSA name representing the product of this factor and
4639      all factors appearing later in the repeated factor vector.  */
4640   tree repr;
4641 };
4642
4643
4644 static vec<repeat_factor> repeat_factor_vec;
4645
4646 /* Used for sorting the repeat factor vector.  Sort primarily by
4647    ascending occurrence count, secondarily by descending rank.  */
4648
4649 static int
4650 compare_repeat_factors (const void *x1, const void *x2)
4651 {
4652   const repeat_factor *rf1 = (const repeat_factor *) x1;
4653   const repeat_factor *rf2 = (const repeat_factor *) x2;
4654
4655   if (rf1->count != rf2->count)
4656     return rf1->count - rf2->count;
4657
4658   return rf2->rank - rf1->rank;
4659 }
4660
4661 /* Look for repeated operands in OPS in the multiply tree rooted at
4662    STMT.  Replace them with an optimal sequence of multiplies and powi
4663    builtin calls, and remove the used operands from OPS.  Return an
4664    SSA name representing the value of the replacement sequence.  */
4665
4666 static tree
4667 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
4668 {
4669   unsigned i, j, vec_len;
4670   int ii;
4671   operand_entry *oe;
4672   repeat_factor *rf1, *rf2;
4673   repeat_factor rfnew;
4674   tree result = NULL_TREE;
4675   tree target_ssa, iter_result;
4676   tree type = TREE_TYPE (gimple_get_lhs (stmt));
4677   tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
4678   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4679   gimple *mul_stmt, *pow_stmt;
4680
4681   /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4682      target.  */
4683   if (!powi_fndecl)
4684     return NULL_TREE;
4685
4686   /* Allocate the repeated factor vector.  */
4687   repeat_factor_vec.create (10);
4688
4689   /* Scan the OPS vector for all SSA names in the product and build
4690      up a vector of occurrence counts for each factor.  */
4691   FOR_EACH_VEC_ELT (*ops, i, oe)
4692     {
4693       if (TREE_CODE (oe->op) == SSA_NAME)
4694         {
4695           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4696             {
4697               if (rf1->factor == oe->op)
4698                 {
4699                   rf1->count += oe->count;
4700                   break;
4701                 }
4702             }
4703
4704           if (j >= repeat_factor_vec.length ())
4705             {
4706               rfnew.factor = oe->op;
4707               rfnew.rank = oe->rank;
4708               rfnew.count = oe->count;
4709               rfnew.repr = NULL_TREE;
4710               repeat_factor_vec.safe_push (rfnew);
4711             }
4712         }
4713     }
4714
4715   /* Sort the repeated factor vector by (a) increasing occurrence count,
4716      and (b) decreasing rank.  */
4717   repeat_factor_vec.qsort (compare_repeat_factors);
4718
4719   /* It is generally best to combine as many base factors as possible
4720      into a product before applying __builtin_powi to the result.
4721      However, the sort order chosen for the repeated factor vector
4722      allows us to cache partial results for the product of the base
4723      factors for subsequent use.  When we already have a cached partial
4724      result from a previous iteration, it is best to make use of it
4725      before looking for another __builtin_pow opportunity.
4726
4727      As an example, consider x * x * y * y * y * z * z * z * z.
4728      We want to first compose the product x * y * z, raise it to the
4729      second power, then multiply this by y * z, and finally multiply
4730      by z.  This can be done in 5 multiplies provided we cache y * z
4731      for use in both expressions:
4732
4733         t1 = y * z
4734         t2 = t1 * x
4735         t3 = t2 * t2
4736         t4 = t1 * t3
4737         result = t4 * z
4738
4739      If we instead ignored the cached y * z and first multiplied by
4740      the __builtin_pow opportunity z * z, we would get the inferior:
4741
4742         t1 = y * z
4743         t2 = t1 * x
4744         t3 = t2 * t2
4745         t4 = z * z
4746         t5 = t3 * t4
4747         result = t5 * y  */
4748
4749   vec_len = repeat_factor_vec.length ();
4750
4751   /* Repeatedly look for opportunities to create a builtin_powi call.  */
4752   while (true)
4753     {
4754       HOST_WIDE_INT power;
4755
4756       /* First look for the largest cached product of factors from
4757          preceding iterations.  If found, create a builtin_powi for
4758          it if the minimum occurrence count for its factors is at
4759          least 2, or just use this cached product as our next
4760          multiplicand if the minimum occurrence count is 1.  */
4761       FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4762         {
4763           if (rf1->repr && rf1->count > 0)
4764             break;
4765         }
4766
4767       if (j < vec_len)
4768         {
4769           power = rf1->count;
4770
4771           if (power == 1)
4772             {
4773               iter_result = rf1->repr;
4774
4775               if (dump_file && (dump_flags & TDF_DETAILS))
4776                 {
4777                   unsigned elt;
4778                   repeat_factor *rf;
4779                   fputs ("Multiplying by cached product ", dump_file);
4780                   for (elt = j; elt < vec_len; elt++)
4781                     {
4782                       rf = &repeat_factor_vec[elt];
4783                       print_generic_expr (dump_file, rf->factor, 0);
4784                       if (elt < vec_len - 1)
4785                         fputs (" * ", dump_file);
4786                     }
4787                   fputs ("\n", dump_file);
4788                 }
4789             }
4790           else
4791             {
4792               iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4793               pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4794                                             build_int_cst (integer_type_node,
4795                                                            power));
4796               gimple_call_set_lhs (pow_stmt, iter_result);
4797               gimple_set_location (pow_stmt, gimple_location (stmt));
4798               gimple_set_uid (pow_stmt, gimple_uid (stmt));
4799               gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4800
4801               if (dump_file && (dump_flags & TDF_DETAILS))
4802                 {
4803                   unsigned elt;
4804                   repeat_factor *rf;
4805                   fputs ("Building __builtin_pow call for cached product (",
4806                          dump_file);
4807                   for (elt = j; elt < vec_len; elt++)
4808                     {
4809                       rf = &repeat_factor_vec[elt];
4810                       print_generic_expr (dump_file, rf->factor, 0);
4811                       if (elt < vec_len - 1)
4812                         fputs (" * ", dump_file);
4813                     }
4814                   fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
4815                            power);
4816                 }
4817             }
4818         }
4819       else
4820         {
4821           /* Otherwise, find the first factor in the repeated factor
4822              vector whose occurrence count is at least 2.  If no such
4823              factor exists, there are no builtin_powi opportunities
4824              remaining.  */
4825           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4826             {
4827               if (rf1->count >= 2)
4828                 break;
4829             }
4830
4831           if (j >= vec_len)
4832             break;
4833
4834           power = rf1->count;
4835
4836           if (dump_file && (dump_flags & TDF_DETAILS))
4837             {
4838               unsigned elt;
4839               repeat_factor *rf;
4840               fputs ("Building __builtin_pow call for (", dump_file);
4841               for (elt = j; elt < vec_len; elt++)
4842                 {
4843                   rf = &repeat_factor_vec[elt];
4844                   print_generic_expr (dump_file, rf->factor, 0);
4845                   if (elt < vec_len - 1)
4846                     fputs (" * ", dump_file);
4847                 }
4848               fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
4849             }
4850
4851           reassociate_stats.pows_created++;
4852
4853           /* Visit each element of the vector in reverse order (so that
4854              high-occurrence elements are visited first, and within the
4855              same occurrence count, lower-ranked elements are visited
4856              first).  Form a linear product of all elements in this order
4857              whose occurrencce count is at least that of element J.
4858              Record the SSA name representing the product of each element
4859              with all subsequent elements in the vector.  */
4860           if (j == vec_len - 1)
4861             rf1->repr = rf1->factor;
4862           else
4863             {
4864               for (ii = vec_len - 2; ii >= (int)j; ii--)
4865                 {
4866                   tree op1, op2;
4867
4868                   rf1 = &repeat_factor_vec[ii];
4869                   rf2 = &repeat_factor_vec[ii + 1];
4870
4871                   /* Init the last factor's representative to be itself.  */
4872                   if (!rf2->repr)
4873                     rf2->repr = rf2->factor;
4874
4875                   op1 = rf1->factor;
4876                   op2 = rf2->repr;
4877
4878                   target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4879                   mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
4880                                                   op1, op2);
4881                   gimple_set_location (mul_stmt, gimple_location (stmt));
4882                   gimple_set_uid (mul_stmt, gimple_uid (stmt));
4883                   gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4884                   rf1->repr = target_ssa;
4885
4886                   /* Don't reprocess the multiply we just introduced.  */
4887                   gimple_set_visited (mul_stmt, true);
4888                 }
4889             }
4890
4891           /* Form a call to __builtin_powi for the maximum product
4892              just formed, raised to the power obtained earlier.  */
4893           rf1 = &repeat_factor_vec[j];
4894           iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4895           pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4896                                         build_int_cst (integer_type_node,
4897                                                        power));
4898           gimple_call_set_lhs (pow_stmt, iter_result);
4899           gimple_set_location (pow_stmt, gimple_location (stmt));
4900           gimple_set_uid (pow_stmt, gimple_uid (stmt));
4901           gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4902         }
4903
4904       /* If we previously formed at least one other builtin_powi call,
4905          form the product of this one and those others.  */
4906       if (result)
4907         {
4908           tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4909           mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
4910                                           result, iter_result);
4911           gimple_set_location (mul_stmt, gimple_location (stmt));
4912           gimple_set_uid (mul_stmt, gimple_uid (stmt));
4913           gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4914           gimple_set_visited (mul_stmt, true);
4915           result = new_result;
4916         }
4917       else
4918         result = iter_result;
4919
4920       /* Decrement the occurrence count of each element in the product
4921          by the count found above, and remove this many copies of each
4922          factor from OPS.  */
4923       for (i = j; i < vec_len; i++)
4924         {
4925           unsigned k = power;
4926           unsigned n;
4927
4928           rf1 = &repeat_factor_vec[i];
4929           rf1->count -= power;
4930
4931           FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
4932             {
4933               if (oe->op == rf1->factor)
4934                 {
4935                   if (oe->count <= k)
4936                     {
4937                       ops->ordered_remove (n);
4938                       k -= oe->count;
4939
4940                       if (k == 0)
4941                         break;
4942                     }
4943                   else
4944                     {
4945                       oe->count -= k;
4946                       break;
4947                     }
4948                 }
4949             }
4950         }
4951     }
4952
4953   /* At this point all elements in the repeated factor vector have a
4954      remaining occurrence count of 0 or 1, and those with a count of 1
4955      don't have cached representatives.  Re-sort the ops vector and
4956      clean up.  */
4957   ops->qsort (sort_by_operand_rank);
4958   repeat_factor_vec.release ();
4959
4960   /* Return the final product computed herein.  Note that there may
4961      still be some elements with single occurrence count left in OPS;
4962      those will be handled by the normal reassociation logic.  */
4963   return result;
4964 }
4965
4966 /* Attempt to optimize
4967    CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
4968    CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0.  */
4969
4970 static void
4971 attempt_builtin_copysign (vec<operand_entry *> *ops)
4972 {
4973   operand_entry *oe;
4974   unsigned int i;
4975   unsigned int length = ops->length ();
4976   tree cst = ops->last ()->op;
4977
4978   if (length == 1 || TREE_CODE (cst) != REAL_CST)
4979     return;
4980
4981   FOR_EACH_VEC_ELT (*ops, i, oe)
4982     {
4983       if (TREE_CODE (oe->op) == SSA_NAME
4984           && has_single_use (oe->op))
4985         {
4986           gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
4987           if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
4988             {
4989               tree arg0, arg1;
4990               switch (gimple_call_combined_fn (old_call))
4991                 {
4992                 CASE_CFN_COPYSIGN:
4993                   arg0 = gimple_call_arg (old_call, 0);
4994                   arg1 = gimple_call_arg (old_call, 1);
4995                   /* The first argument of copysign must be a constant,
4996                      otherwise there's nothing to do.  */
4997                   if (TREE_CODE (arg0) == REAL_CST)
4998                     {
4999                       tree type = TREE_TYPE (arg0);
5000                       tree mul = const_binop (MULT_EXPR, type, cst, arg0);
5001                       /* If we couldn't fold to a single constant, skip it.
5002                          That happens e.g. for inexact multiplication when
5003                          -frounding-math.  */
5004                       if (mul == NULL_TREE)
5005                         break;
5006                       /* Instead of adjusting OLD_CALL, let's build a new
5007                          call to not leak the LHS and prevent keeping bogus
5008                          debug statements.  DCE will clean up the old call.  */
5009                       gcall *new_call;
5010                       if (gimple_call_internal_p (old_call))
5011                         new_call = gimple_build_call_internal
5012                           (IFN_COPYSIGN, 2, mul, arg1);
5013                       else
5014                         new_call = gimple_build_call
5015                           (gimple_call_fndecl (old_call), 2, mul, arg1);
5016                       tree lhs = make_ssa_name (type);
5017                       gimple_call_set_lhs (new_call, lhs);
5018                       gimple_set_location (new_call,
5019                                            gimple_location (old_call));
5020                       insert_stmt_after (new_call, old_call);
5021                       /* We've used the constant, get rid of it.  */
5022                       ops->pop ();
5023                       bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
5024                       /* Handle the CST1 < 0 case by negating the result.  */
5025                       if (cst1_neg)
5026                         {
5027                           tree negrhs = make_ssa_name (TREE_TYPE (lhs));
5028                           gimple *negate_stmt
5029                             = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
5030                           insert_stmt_after (negate_stmt, new_call);
5031                           oe->op = negrhs;
5032                         }
5033                       else
5034                         oe->op = lhs;
5035                       if (dump_file && (dump_flags & TDF_DETAILS))
5036                         {
5037                           fprintf (dump_file, "Optimizing copysign: ");
5038                           print_generic_expr (dump_file, cst, 0);
5039                           fprintf (dump_file, " * COPYSIGN (");
5040                           print_generic_expr (dump_file, arg0, 0);
5041                           fprintf (dump_file, ", ");
5042                           print_generic_expr (dump_file, arg1, 0);
5043                           fprintf (dump_file, ") into %sCOPYSIGN (",
5044                                    cst1_neg ? "-" : "");
5045                           print_generic_expr (dump_file, mul, 0);
5046                           fprintf (dump_file, ", ");
5047                           print_generic_expr (dump_file, arg1, 0);
5048                           fprintf (dump_file, "\n");
5049                         }
5050                       return;
5051                     }
5052                   break;
5053                 default:
5054                   break;
5055                 }
5056             }
5057         }
5058     }
5059 }
5060
5061 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS.  */
5062
5063 static void
5064 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
5065 {
5066   tree rhs1;
5067
5068   if (dump_file && (dump_flags & TDF_DETAILS))
5069     {
5070       fprintf (dump_file, "Transforming ");
5071       print_gimple_stmt (dump_file, stmt, 0, 0);
5072     }
5073
5074   rhs1 = gimple_assign_rhs1 (stmt);
5075   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
5076   update_stmt (stmt);
5077   remove_visited_stmt_chain (rhs1);
5078
5079   if (dump_file && (dump_flags & TDF_DETAILS))
5080     {
5081       fprintf (dump_file, " into ");
5082       print_gimple_stmt (dump_file, stmt, 0, 0);
5083     }
5084 }
5085
5086 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2.  */
5087
5088 static void
5089 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
5090                             tree rhs1, tree rhs2)
5091 {
5092   if (dump_file && (dump_flags & TDF_DETAILS))
5093     {
5094       fprintf (dump_file, "Transforming ");
5095       print_gimple_stmt (dump_file, stmt, 0, 0);
5096     }
5097
5098   gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
5099   update_stmt (gsi_stmt (*gsi));
5100   remove_visited_stmt_chain (rhs1);
5101
5102   if (dump_file && (dump_flags & TDF_DETAILS))
5103     {
5104       fprintf (dump_file, " into ");
5105       print_gimple_stmt (dump_file, stmt, 0, 0);
5106     }
5107 }
5108
5109 /* Reassociate expressions in basic block BB and its post-dominator as
5110    children.
5111
5112    Bubble up return status from maybe_optimize_range_tests.  */
5113
5114 static bool
5115 reassociate_bb (basic_block bb)
5116 {
5117   gimple_stmt_iterator gsi;
5118   basic_block son;
5119   gimple *stmt = last_stmt (bb);
5120   bool cfg_cleanup_needed = false;
5121
5122   if (stmt && !gimple_visited_p (stmt))
5123     cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
5124
5125   for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
5126     {
5127       stmt = gsi_stmt (gsi);
5128
5129       if (is_gimple_assign (stmt)
5130           && !stmt_could_throw_p (stmt))
5131         {
5132           tree lhs, rhs1, rhs2;
5133           enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
5134
5135           /* If this is not a gimple binary expression, there is
5136              nothing for us to do with it.  */
5137           if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
5138             continue;
5139
5140           /* If this was part of an already processed statement,
5141              we don't need to touch it again. */
5142           if (gimple_visited_p (stmt))
5143             {
5144               /* This statement might have become dead because of previous
5145                  reassociations.  */
5146               if (has_zero_uses (gimple_get_lhs (stmt)))
5147                 {
5148                   reassoc_remove_stmt (&gsi);
5149                   release_defs (stmt);
5150                   /* We might end up removing the last stmt above which
5151                      places the iterator to the end of the sequence.
5152                      Reset it to the last stmt in this case which might
5153                      be the end of the sequence as well if we removed
5154                      the last statement of the sequence.  In which case
5155                      we need to bail out.  */
5156                   if (gsi_end_p (gsi))
5157                     {
5158                       gsi = gsi_last_bb (bb);
5159                       if (gsi_end_p (gsi))
5160                         break;
5161                     }
5162                 }
5163               continue;
5164             }
5165
5166           lhs = gimple_assign_lhs (stmt);
5167           rhs1 = gimple_assign_rhs1 (stmt);
5168           rhs2 = gimple_assign_rhs2 (stmt);
5169
5170           /* For non-bit or min/max operations we can't associate
5171              all types.  Verify that here.  */
5172           if (rhs_code != BIT_IOR_EXPR
5173               && rhs_code != BIT_AND_EXPR
5174               && rhs_code != BIT_XOR_EXPR
5175               && rhs_code != MIN_EXPR
5176               && rhs_code != MAX_EXPR
5177               && (!can_reassociate_p (lhs)
5178                   || !can_reassociate_p (rhs1)
5179                   || !can_reassociate_p (rhs2)))
5180             continue;
5181
5182           if (associative_tree_code (rhs_code))
5183             {
5184               auto_vec<operand_entry *> ops;
5185               tree powi_result = NULL_TREE;
5186               bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
5187
5188               /* There may be no immediate uses left by the time we
5189                  get here because we may have eliminated them all.  */
5190               if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
5191                 continue;
5192
5193               gimple_set_visited (stmt, true);
5194               linearize_expr_tree (&ops, stmt, true, true);
5195               ops.qsort (sort_by_operand_rank);
5196               optimize_ops_list (rhs_code, &ops);
5197               if (undistribute_ops_list (rhs_code, &ops,
5198                                          loop_containing_stmt (stmt)))
5199                 {
5200                   ops.qsort (sort_by_operand_rank);
5201                   optimize_ops_list (rhs_code, &ops);
5202                 }
5203
5204               if (rhs_code == PLUS_EXPR
5205                   && transform_add_to_multiply (stmt, &ops))
5206                 ops.qsort (sort_by_operand_rank);
5207
5208               if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
5209                 {
5210                   if (is_vector)
5211                     optimize_vec_cond_expr (rhs_code, &ops);
5212                   else
5213                     optimize_range_tests (rhs_code, &ops);
5214                 }
5215
5216               if (rhs_code == MULT_EXPR && !is_vector)
5217                 {
5218                   attempt_builtin_copysign (&ops);
5219
5220                   if (reassoc_insert_powi_p
5221                       && flag_unsafe_math_optimizations)
5222                     powi_result = attempt_builtin_powi (stmt, &ops);
5223                 }
5224
5225               operand_entry *last;
5226               bool negate_result = false;
5227               if (ops.length () > 1
5228                   && rhs_code == MULT_EXPR)
5229                 {
5230                   last = ops.last ();
5231                   if (((TREE_CODE (last->op) == INTEGER_CST
5232                         && integer_minus_onep (last->op))
5233                        || real_minus_onep (last->op))
5234                       && !HONOR_SNANS (TREE_TYPE (lhs))
5235                       && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs))
5236                           || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs))))
5237                     {
5238                       ops.pop ();
5239                       negate_result = true;
5240                     }
5241                 }
5242
5243               /* If the operand vector is now empty, all operands were
5244                  consumed by the __builtin_powi optimization.  */
5245               if (ops.length () == 0)
5246                 transform_stmt_to_copy (&gsi, stmt, powi_result);
5247               else if (ops.length () == 1)
5248                 {
5249                   tree last_op = ops.last ()->op;
5250
5251                   if (powi_result)
5252                     transform_stmt_to_multiply (&gsi, stmt, last_op,
5253                                                 powi_result);
5254                   else
5255                     transform_stmt_to_copy (&gsi, stmt, last_op);
5256                 }
5257               else
5258                 {
5259                   machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
5260                   int ops_num = ops.length ();
5261                   int width = get_reassociation_width (ops_num, rhs_code, mode);
5262                   tree new_lhs = lhs;
5263
5264                   if (dump_file && (dump_flags & TDF_DETAILS))
5265                     fprintf (dump_file,
5266                              "Width = %d was chosen for reassociation\n", width);
5267
5268                   if (width > 1
5269                       && ops.length () > 3)
5270                     rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
5271                                                 width, ops);
5272                   else
5273                     {
5274                       /* When there are three operands left, we want
5275                          to make sure the ones that get the double
5276                          binary op are chosen wisely.  */
5277                       int len = ops.length ();
5278                       if (len >= 3)
5279                         swap_ops_for_binary_stmt (ops, len - 3, stmt);
5280
5281                       new_lhs = rewrite_expr_tree (stmt, 0, ops,
5282                                                    powi_result != NULL);
5283                     }
5284
5285                   /* If we combined some repeated factors into a
5286                      __builtin_powi call, multiply that result by the
5287                      reassociated operands.  */
5288                   if (powi_result)
5289                     {
5290                       gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
5291                       tree type = TREE_TYPE (lhs);
5292                       tree target_ssa = make_temp_ssa_name (type, NULL,
5293                                                             "reassocpow");
5294                       gimple_set_lhs (lhs_stmt, target_ssa);
5295                       update_stmt (lhs_stmt);
5296                       if (lhs != new_lhs)
5297                         target_ssa = new_lhs;
5298                       mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
5299                                                       powi_result, target_ssa);
5300                       gimple_set_location (mul_stmt, gimple_location (stmt));
5301                       gimple_set_uid (mul_stmt, gimple_uid (stmt));
5302                       gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
5303                     }
5304                 }
5305
5306               if (negate_result)
5307                 {
5308                   stmt = SSA_NAME_DEF_STMT (lhs);
5309                   tree tmp = make_ssa_name (TREE_TYPE (lhs));
5310                   gimple_set_lhs (stmt, tmp);
5311                   gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR,
5312                                                            tmp);
5313                   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5314                   gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT);
5315                   update_stmt (stmt);
5316                 }
5317             }
5318         }
5319     }
5320   for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
5321        son;
5322        son = next_dom_son (CDI_POST_DOMINATORS, son))
5323     cfg_cleanup_needed |= reassociate_bb (son);
5324
5325   return cfg_cleanup_needed;
5326 }
5327
5328 /* Add jumps around shifts for range tests turned into bit tests.
5329    For each SSA_NAME VAR we have code like:
5330    VAR = ...; // final stmt of range comparison
5331    // bit test here...;
5332    OTHERVAR = ...; // final stmt of the bit test sequence
5333    RES = VAR | OTHERVAR;
5334    Turn the above into:
5335    VAR = ...;
5336    if (VAR != 0)
5337      goto <l3>;
5338    else
5339      goto <l2>;
5340    <l2>:
5341    // bit test here...;
5342    OTHERVAR = ...;
5343    <l3>:
5344    # RES = PHI<1(l1), OTHERVAR(l2)>;  */
5345
5346 static void
5347 branch_fixup (void)
5348 {
5349   tree var;
5350   unsigned int i;
5351
5352   FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
5353     {
5354       gimple *def_stmt = SSA_NAME_DEF_STMT (var);
5355       gimple *use_stmt;
5356       use_operand_p use;
5357       bool ok = single_imm_use (var, &use, &use_stmt);
5358       gcc_assert (ok
5359                   && is_gimple_assign (use_stmt)
5360                   && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
5361                   && gimple_bb (def_stmt) == gimple_bb (use_stmt));
5362
5363       basic_block cond_bb = gimple_bb (def_stmt);
5364       basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
5365       basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
5366
5367       gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
5368       gimple *g = gimple_build_cond (NE_EXPR, var,
5369                                      build_zero_cst (TREE_TYPE (var)),
5370                                      NULL_TREE, NULL_TREE);
5371       location_t loc = gimple_location (use_stmt);
5372       gimple_set_location (g, loc);
5373       gsi_insert_after (&gsi, g, GSI_NEW_STMT);
5374
5375       edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
5376       etrue->probability = REG_BR_PROB_BASE / 2;
5377       etrue->count = cond_bb->count / 2;
5378       edge efalse = find_edge (cond_bb, then_bb);
5379       efalse->flags = EDGE_FALSE_VALUE;
5380       efalse->probability -= etrue->probability;
5381       efalse->count -= etrue->count;
5382       then_bb->count -= etrue->count;
5383
5384       tree othervar = NULL_TREE;
5385       if (gimple_assign_rhs1 (use_stmt) == var)
5386         othervar = gimple_assign_rhs2 (use_stmt);
5387       else if (gimple_assign_rhs2 (use_stmt) == var)
5388         othervar = gimple_assign_rhs1 (use_stmt);
5389       else
5390         gcc_unreachable ();
5391       tree lhs = gimple_assign_lhs (use_stmt);
5392       gphi *phi = create_phi_node (lhs, merge_bb);
5393       add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
5394       add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
5395       gsi = gsi_for_stmt (use_stmt);
5396       gsi_remove (&gsi, true);
5397
5398       set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
5399       set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
5400     }
5401   reassoc_branch_fixups.release ();
5402 }
5403
5404 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
5405 void debug_ops_vector (vec<operand_entry *> ops);
5406
5407 /* Dump the operand entry vector OPS to FILE.  */
5408
5409 void
5410 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
5411 {
5412   operand_entry *oe;
5413   unsigned int i;
5414
5415   FOR_EACH_VEC_ELT (ops, i, oe)
5416     {
5417       fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
5418       print_generic_expr (file, oe->op, 0);
5419       fprintf (file, "\n");
5420     }
5421 }
5422
5423 /* Dump the operand entry vector OPS to STDERR.  */
5424
5425 DEBUG_FUNCTION void
5426 debug_ops_vector (vec<operand_entry *> ops)
5427 {
5428   dump_ops_vector (stderr, ops);
5429 }
5430
5431 /* Bubble up return status from reassociate_bb.  */
5432
5433 static bool
5434 do_reassoc (void)
5435 {
5436   break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
5437   return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
5438 }
5439
5440 /* Initialize the reassociation pass.  */
5441
5442 static void
5443 init_reassoc (void)
5444 {
5445   int i;
5446   long rank = 2;
5447   int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
5448
5449   /* Find the loops, so that we can prevent moving calculations in
5450      them.  */
5451   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
5452
5453   memset (&reassociate_stats, 0, sizeof (reassociate_stats));
5454
5455   next_operand_entry_id = 0;
5456
5457   /* Reverse RPO (Reverse Post Order) will give us something where
5458      deeper loops come later.  */
5459   pre_and_rev_post_order_compute (NULL, bbs, false);
5460   bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
5461   operand_rank = new hash_map<tree, long>;
5462
5463   /* Give each default definition a distinct rank.  This includes
5464      parameters and the static chain.  Walk backwards over all
5465      SSA names so that we get proper rank ordering according
5466      to tree_swap_operands_p.  */
5467   for (i = num_ssa_names - 1; i > 0; --i)
5468     {
5469       tree name = ssa_name (i);
5470       if (name && SSA_NAME_IS_DEFAULT_DEF (name))
5471         insert_operand_rank (name, ++rank);
5472     }
5473
5474   /* Set up rank for each BB  */
5475   for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
5476     bb_rank[bbs[i]] = ++rank  << 16;
5477
5478   free (bbs);
5479   calculate_dominance_info (CDI_POST_DOMINATORS);
5480   plus_negates = vNULL;
5481 }
5482
5483 /* Cleanup after the reassociation pass, and print stats if
5484    requested.  */
5485
5486 static void
5487 fini_reassoc (void)
5488 {
5489   statistics_counter_event (cfun, "Linearized",
5490                             reassociate_stats.linearized);
5491   statistics_counter_event (cfun, "Constants eliminated",
5492                             reassociate_stats.constants_eliminated);
5493   statistics_counter_event (cfun, "Ops eliminated",
5494                             reassociate_stats.ops_eliminated);
5495   statistics_counter_event (cfun, "Statements rewritten",
5496                             reassociate_stats.rewritten);
5497   statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
5498                             reassociate_stats.pows_encountered);
5499   statistics_counter_event (cfun, "Built-in powi calls created",
5500                             reassociate_stats.pows_created);
5501
5502   delete operand_rank;
5503   operand_entry_pool.release ();
5504   free (bb_rank);
5505   plus_negates.release ();
5506   free_dominance_info (CDI_POST_DOMINATORS);
5507   loop_optimizer_finalize ();
5508 }
5509
5510 /* Gate and execute functions for Reassociation.  If INSERT_POWI_P, enable
5511    insertion of __builtin_powi calls.
5512
5513    Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
5514    optimization of a gimple conditional.  Otherwise returns zero.  */
5515
5516 static unsigned int
5517 execute_reassoc (bool insert_powi_p)
5518 {
5519   reassoc_insert_powi_p = insert_powi_p;
5520
5521   init_reassoc ();
5522
5523   bool cfg_cleanup_needed;
5524   cfg_cleanup_needed = do_reassoc ();
5525   repropagate_negates ();
5526   branch_fixup ();
5527
5528   fini_reassoc ();
5529   return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
5530 }
5531
5532 namespace {
5533
5534 const pass_data pass_data_reassoc =
5535 {
5536   GIMPLE_PASS, /* type */
5537   "reassoc", /* name */
5538   OPTGROUP_NONE, /* optinfo_flags */
5539   TV_TREE_REASSOC, /* tv_id */
5540   ( PROP_cfg | PROP_ssa ), /* properties_required */
5541   0, /* properties_provided */
5542   0, /* properties_destroyed */
5543   0, /* todo_flags_start */
5544   TODO_update_ssa_only_virtuals, /* todo_flags_finish */
5545 };
5546
5547 class pass_reassoc : public gimple_opt_pass
5548 {
5549 public:
5550   pass_reassoc (gcc::context *ctxt)
5551     : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
5552   {}
5553
5554   /* opt_pass methods: */
5555   opt_pass * clone () { return new pass_reassoc (m_ctxt); }
5556   void set_pass_param (unsigned int n, bool param)
5557     {
5558       gcc_assert (n == 0);
5559       insert_powi_p = param;
5560     }
5561   virtual bool gate (function *) { return flag_tree_reassoc != 0; }
5562   virtual unsigned int execute (function *)
5563     { return execute_reassoc (insert_powi_p); }
5564
5565  private:
5566   /* Enable insertion of __builtin_powi calls during execute_reassoc.  See
5567      point 3a in the pass header comment.  */
5568   bool insert_powi_p;
5569 }; // class pass_reassoc
5570
5571 } // anon namespace
5572
5573 gimple_opt_pass *
5574 make_pass_reassoc (gcc::context *ctxt)
5575 {
5576   return new pass_reassoc (ctxt);
5577 }