gcc/tree-ssa-reassoc.c

   1 /* Reassociation for trees.
   2    Copyright (C) 2005-2016 Free Software Foundation, Inc.
   3    Contributed by Daniel Berlin <dan@dberlin.org>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 3, or (at your option)
  10 any later version.
  11
  12 GCC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "backend.h"
  25 #include "target.h"
  26 #include "rtl.h"
  27 #include "tree.h"
  28 #include "gimple.h"
  29 #include "cfghooks.h"
  30 #include "alloc-pool.h"
  31 #include "tree-pass.h"
  32 #include "tm_p.h"
  33 #include "ssa.h"
  34 #include "optabs-tree.h"
  35 #include "gimple-pretty-print.h"
  36 #include "diagnostic-core.h"
  37 #include "fold-const.h"
  38 #include "stor-layout.h"
  39 #include "cfganal.h"
  40 #include "gimple-fold.h"
  41 #include "tree-eh.h"
  42 #include "gimple-iterator.h"
  43 #include "gimplify-me.h"
  44 #include "tree-cfg.h"
  45 #include "tree-ssa-loop.h"
  46 #include "flags.h"
  47 #include "tree-ssa.h"
  48 #include "langhooks.h"
  49 #include "cfgloop.h"
  50 #include "params.h"
  51 #include "builtins.h"
  52 #include "gimplify.h"
  53 #include "case-cfn-macros.h"
  54
  55 /*  This is a simple global reassociation pass.  It is, in part, based
  56     on the LLVM pass of the same name (They do some things more/less
  57     than we do, in different orders, etc).
  58
  59     It consists of five steps:
  60
  61     1. Breaking up subtract operations into addition + negate, where
  62     it would promote the reassociation of adds.
  63
  64     2. Left linearization of the expression trees, so that (A+B)+(C+D)
  65     becomes (((A+B)+C)+D), which is easier for us to rewrite later.
  66     During linearization, we place the operands of the binary
  67     expressions into a vector of operand_entry_*
  68
  69     3. Optimization of the operand lists, eliminating things like a +
  70     -a, a & a, etc.
  71
  72     3a. Combine repeated factors with the same occurrence counts
  73     into a __builtin_powi call that will later be optimized into
  74     an optimal number of multiplies.
  75
  76     4. Rewrite the expression trees we linearized and optimized so
  77     they are in proper rank order.
  78
  79     5. Repropagate negates, as nothing else will clean it up ATM.
  80
  81     A bit of theory on #4, since nobody seems to write anything down
  82     about why it makes sense to do it the way they do it:
  83
  84     We could do this much nicer theoretically, but don't (for reasons
  85     explained after how to do it theoretically nice :P).
  86
  87     In order to promote the most redundancy elimination, you want
  88     binary expressions whose operands are the same rank (or
  89     preferably, the same value) exposed to the redundancy eliminator,
  90     for possible elimination.
  91
  92     So the way to do this if we really cared, is to build the new op
  93     tree from the leaves to the roots, merging as you go, and putting the
  94     new op on the end of the worklist, until you are left with one
  95     thing on the worklist.
  96
  97     IE if you have to rewrite the following set of operands (listed with
  98     rank in parentheses), with opcode PLUS_EXPR:
  99
 100     a (1),  b (1),  c (1),  d (2), e (2)
 101
 102
 103     We start with our merge worklist empty, and the ops list with all of
 104     those on it.
 105
 106     You want to first merge all leaves of the same rank, as much as
 107     possible.
 108
 109     So first build a binary op of
 110
 111     mergetmp = a + b, and put "mergetmp" on the merge worklist.
 112
 113     Because there is no three operand form of PLUS_EXPR, c is not going to
 114     be exposed to redundancy elimination as a rank 1 operand.
 115
 116     So you might as well throw it on the merge worklist (you could also
 117     consider it to now be a rank two operand, and merge it with d and e,
 118     but in this case, you then have evicted e from a binary op. So at
 119     least in this situation, you can't win.)
 120
 121     Then build a binary op of d + e
 122     mergetmp2 = d + e
 123
 124     and put mergetmp2 on the merge worklist.
 125
 126     so merge worklist = {mergetmp, c, mergetmp2}
 127
 128     Continue building binary ops of these operations until you have only
 129     one operation left on the worklist.
 130
 131     So we have
 132
 133     build binary op
 134     mergetmp3 = mergetmp + c
 135
 136     worklist = {mergetmp2, mergetmp3}
 137
 138     mergetmp4 = mergetmp2 + mergetmp3
 139
 140     worklist = {mergetmp4}
 141
 142     because we have one operation left, we can now just set the original
 143     statement equal to the result of that operation.
 144
 145     This will at least expose a + b  and d + e to redundancy elimination
 146     as binary operations.
 147
 148     For extra points, you can reuse the old statements to build the
 149     mergetmps, since you shouldn't run out.
 150
 151     So why don't we do this?
 152
 153     Because it's expensive, and rarely will help.  Most trees we are
 154     reassociating have 3 or less ops.  If they have 2 ops, they already
 155     will be written into a nice single binary op.  If you have 3 ops, a
 156     single simple check suffices to tell you whether the first two are of the
 157     same rank.  If so, you know to order it
 158
 159     mergetmp = op1 + op2
 160     newstmt = mergetmp + op3
 161
 162     instead of
 163     mergetmp = op2 + op3
 164     newstmt = mergetmp + op1
 165
 166     If all three are of the same rank, you can't expose them all in a
 167     single binary operator anyway, so the above is *still* the best you
 168     can do.
 169
 170     Thus, this is what we do.  When we have three ops left, we check to see
 171     what order to put them in, and call it a day.  As a nod to vector sum
 172     reduction, we check if any of the ops are really a phi node that is a
 173     destructive update for the associating op, and keep the destructive
 174     update together for vector sum reduction recognition.  */
 175
 176 /* Enable insertion of __builtin_powi calls during execute_reassoc.  See
 177    point 3a in the pass header comment.  */
 178 static bool reassoc_insert_powi_p;
 179
 180 /* Statistics */
 181 static struct
 182 {
 183   int linearized;
 184   int constants_eliminated;
 185   int ops_eliminated;
 186   int rewritten;
 187   int pows_encountered;
 188   int pows_created;
 189 } reassociate_stats;
 190
 191 /* Operator, rank pair.  */
 192 struct operand_entry
 193 {
 194   unsigned int rank;
 195   int id;
 196   tree op;
 197   unsigned int count;
 198 };
 199
 200 static object_allocator<operand_entry> operand_entry_pool
 201   ("operand entry pool");
 202
 203 /* This is used to assign a unique ID to each struct operand_entry
 204    so that qsort results are identical on different hosts.  */
 205 static int next_operand_entry_id;
 206
 207 /* Starting rank number for a given basic block, so that we can rank
 208    operations using unmovable instructions in that BB based on the bb
 209    depth.  */
 210 static long *bb_rank;
 211
 212 /* Operand->rank hashtable.  */
 213 static hash_map<tree, long> *operand_rank;
 214
 215 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
 216    all basic blocks the CFG should be adjusted - basic blocks
 217    split right after that SSA_NAME's definition statement and before
 218    the only use, which must be a bit ior.  */
 219 static vec<tree> reassoc_branch_fixups;
 220
 221 /* Forward decls.  */
 222 static long get_rank (tree);
 223 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
 224
 225 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
 226    possibly added by gsi_remove.  */
 227
 228 bool
 229 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
 230 {
 231   gimple *stmt = gsi_stmt (*gsi);
 232
 233   if (!MAY_HAVE_DEBUG_STMTS || gimple_code (stmt) == GIMPLE_PHI)
 234     return gsi_remove (gsi, true);
 235
 236   gimple_stmt_iterator prev = *gsi;
 237   gsi_prev (&prev);
 238   unsigned uid = gimple_uid (stmt);
 239   basic_block bb = gimple_bb (stmt);
 240   bool ret = gsi_remove (gsi, true);
 241   if (!gsi_end_p (prev))
 242     gsi_next (&prev);
 243   else
 244     prev = gsi_start_bb (bb);
 245   gimple *end_stmt = gsi_stmt (*gsi);
 246   while ((stmt = gsi_stmt (prev)) != end_stmt)
 247     {
 248       gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
 249       gimple_set_uid (stmt, uid);
 250       gsi_next (&prev);
 251     }
 252   return ret;
 253 }
 254
 255 /* Bias amount for loop-carried phis.  We want this to be larger than
 256    the depth of any reassociation tree we can see, but not larger than
 257    the rank difference between two blocks.  */
 258 #define PHI_LOOP_BIAS (1 << 15)
 259
 260 /* Rank assigned to a phi statement.  If STMT is a loop-carried phi of
 261    an innermost loop, and the phi has only a single use which is inside
 262    the loop, then the rank is the block rank of the loop latch plus an
 263    extra bias for the loop-carried dependence.  This causes expressions
 264    calculated into an accumulator variable to be independent for each
 265    iteration of the loop.  If STMT is some other phi, the rank is the
 266    block rank of its containing block.  */
 267 static long
 268 phi_rank (gimple *stmt)
 269 {
 270   basic_block bb = gimple_bb (stmt);
 271   struct loop *father = bb->loop_father;
 272   tree res;
 273   unsigned i;
 274   use_operand_p use;
 275   gimple *use_stmt;
 276
 277   /* We only care about real loops (those with a latch).  */
 278   if (!father->latch)
 279     return bb_rank[bb->index];
 280
 281   /* Interesting phis must be in headers of innermost loops.  */
 282   if (bb != father->header
 283       || father->inner)
 284     return bb_rank[bb->index];
 285
 286   /* Ignore virtual SSA_NAMEs.  */
 287   res = gimple_phi_result (stmt);
 288   if (virtual_operand_p (res))
 289     return bb_rank[bb->index];
 290
 291   /* The phi definition must have a single use, and that use must be
 292      within the loop.  Otherwise this isn't an accumulator pattern.  */
 293   if (!single_imm_use (res, &use, &use_stmt)
 294       || gimple_bb (use_stmt)->loop_father != father)
 295     return bb_rank[bb->index];
 296
 297   /* Look for phi arguments from within the loop.  If found, bias this phi.  */
 298   for (i = 0; i < gimple_phi_num_args (stmt); i++)
 299     {
 300       tree arg = gimple_phi_arg_def (stmt, i);
 301       if (TREE_CODE (arg) == SSA_NAME
 302           && !SSA_NAME_IS_DEFAULT_DEF (arg))
 303         {
 304           gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
 305           if (gimple_bb (def_stmt)->loop_father == father)
 306             return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
 307         }
 308     }
 309
 310   /* Must be an uninteresting phi.  */
 311   return bb_rank[bb->index];
 312 }
 313
 314 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
 315    loop-carried dependence of an innermost loop, return TRUE; else
 316    return FALSE.  */
 317 static bool
 318 loop_carried_phi (tree exp)
 319 {
 320   gimple *phi_stmt;
 321   long block_rank;
 322
 323   if (TREE_CODE (exp) != SSA_NAME
 324       || SSA_NAME_IS_DEFAULT_DEF (exp))
 325     return false;
 326
 327   phi_stmt = SSA_NAME_DEF_STMT (exp);
 328
 329   if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
 330     return false;
 331
 332   /* Non-loop-carried phis have block rank.  Loop-carried phis have
 333      an additional bias added in.  If this phi doesn't have block rank,
 334      it's biased and should not be propagated.  */
 335   block_rank = bb_rank[gimple_bb (phi_stmt)->index];
 336
 337   if (phi_rank (phi_stmt) != block_rank)
 338     return true;
 339
 340   return false;
 341 }
 342
 343 /* Return the maximum of RANK and the rank that should be propagated
 344    from expression OP.  For most operands, this is just the rank of OP.
 345    For loop-carried phis, the value is zero to avoid undoing the bias
 346    in favor of the phi.  */
 347 static long
 348 propagate_rank (long rank, tree op)
 349 {
 350   long op_rank;
 351
 352   if (loop_carried_phi (op))
 353     return rank;
 354
 355   op_rank = get_rank (op);
 356
 357   return MAX (rank, op_rank);
 358 }
 359
 360 /* Look up the operand rank structure for expression E.  */
 361
 362 static inline long
 363 find_operand_rank (tree e)
 364 {
 365   long *slot = operand_rank->get (e);
 366   return slot ? *slot : -1;
 367 }
 368
 369 /* Insert {E,RANK} into the operand rank hashtable.  */
 370
 371 static inline void
 372 insert_operand_rank (tree e, long rank)
 373 {
 374   gcc_assert (rank > 0);
 375   gcc_assert (!operand_rank->put (e, rank));
 376 }
 377
 378 /* Given an expression E, return the rank of the expression.  */
 379
 380 static long
 381 get_rank (tree e)
 382 {
 383   /* SSA_NAME's have the rank of the expression they are the result
 384      of.
 385      For globals and uninitialized values, the rank is 0.
 386      For function arguments, use the pre-setup rank.
 387      For PHI nodes, stores, asm statements, etc, we use the rank of
 388      the BB.
 389      For simple operations, the rank is the maximum rank of any of
 390      its operands, or the bb_rank, whichever is less.
 391      I make no claims that this is optimal, however, it gives good
 392      results.  */
 393
 394   /* We make an exception to the normal ranking system to break
 395      dependences of accumulator variables in loops.  Suppose we
 396      have a simple one-block loop containing:
 397
 398        x_1 = phi(x_0, x_2)
 399        b = a + x_1
 400        c = b + d
 401        x_2 = c + e
 402
 403      As shown, each iteration of the calculation into x is fully
 404      dependent upon the iteration before it.  We would prefer to
 405      see this in the form:
 406
 407        x_1 = phi(x_0, x_2)
 408        b = a + d
 409        c = b + e
 410        x_2 = c + x_1
 411
 412      If the loop is unrolled, the calculations of b and c from
 413      different iterations can be interleaved.
 414
 415      To obtain this result during reassociation, we bias the rank
 416      of the phi definition x_1 upward, when it is recognized as an
 417      accumulator pattern.  The artificial rank causes it to be
 418      added last, providing the desired independence.  */
 419
 420   if (TREE_CODE (e) == SSA_NAME)
 421     {
 422       ssa_op_iter iter;
 423       gimple *stmt;
 424       long rank;
 425       tree op;
 426
 427       if (SSA_NAME_IS_DEFAULT_DEF (e))
 428         return find_operand_rank (e);
 429
 430       stmt = SSA_NAME_DEF_STMT (e);
 431       if (gimple_code (stmt) == GIMPLE_PHI)
 432         return phi_rank (stmt);
 433
 434       if (!is_gimple_assign (stmt))
 435         return bb_rank[gimple_bb (stmt)->index];
 436
 437       /* If we already have a rank for this expression, use that.  */
 438       rank = find_operand_rank (e);
 439       if (rank != -1)
 440         return rank;
 441
 442       /* Otherwise, find the maximum rank for the operands.  As an
 443          exception, remove the bias from loop-carried phis when propagating
 444          the rank so that dependent operations are not also biased.  */
 445       /* Simply walk over all SSA uses - this takes advatage of the
 446          fact that non-SSA operands are is_gimple_min_invariant and
 447          thus have rank 0.  */
 448       rank = 0;
 449       FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
 450         rank = propagate_rank (rank, op);
 451
 452       if (dump_file && (dump_flags & TDF_DETAILS))
 453         {
 454           fprintf (dump_file, "Rank for ");
 455           print_generic_expr (dump_file, e, 0);
 456           fprintf (dump_file, " is %ld\n", (rank + 1));
 457         }
 458
 459       /* Note the rank in the hashtable so we don't recompute it.  */
 460       insert_operand_rank (e, (rank + 1));
 461       return (rank + 1);
 462     }
 463
 464   /* Constants, globals, etc., are rank 0 */
 465   return 0;
 466 }
 467
 468
 469 /* We want integer ones to end up last no matter what, since they are
 470    the ones we can do the most with.  */
 471 #define INTEGER_CONST_TYPE 1 << 3
 472 #define FLOAT_CONST_TYPE 1 << 2
 473 #define OTHER_CONST_TYPE 1 << 1
 474
 475 /* Classify an invariant tree into integer, float, or other, so that
 476    we can sort them to be near other constants of the same type.  */
 477 static inline int
 478 constant_type (tree t)
 479 {
 480   if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
 481     return INTEGER_CONST_TYPE;
 482   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
 483     return FLOAT_CONST_TYPE;
 484   else
 485     return OTHER_CONST_TYPE;
 486 }
 487
 488 /* qsort comparison function to sort operand entries PA and PB by rank
 489    so that the sorted array is ordered by rank in decreasing order.  */
 490 static int
 491 sort_by_operand_rank (const void *pa, const void *pb)
 492 {
 493   const operand_entry *oea = *(const operand_entry *const *)pa;
 494   const operand_entry *oeb = *(const operand_entry *const *)pb;
 495
 496   /* It's nicer for optimize_expression if constants that are likely
 497      to fold when added/multiplied//whatever are put next to each
 498      other.  Since all constants have rank 0, order them by type.  */
 499   if (oeb->rank == 0 && oea->rank == 0)
 500     {
 501       if (constant_type (oeb->op) != constant_type (oea->op))
 502         return constant_type (oeb->op) - constant_type (oea->op);
 503       else
 504         /* To make sorting result stable, we use unique IDs to determine
 505            order.  */
 506         return oeb->id - oea->id;
 507     }
 508
 509   /* Lastly, make sure the versions that are the same go next to each
 510      other.  */
 511   if ((oeb->rank - oea->rank == 0)
 512       && TREE_CODE (oea->op) == SSA_NAME
 513       && TREE_CODE (oeb->op) == SSA_NAME)
 514     {
 515       /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
 516          versions of removed SSA_NAMEs, so if possible, prefer to sort
 517          based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
 518          See PR60418.  */
 519       if (!SSA_NAME_IS_DEFAULT_DEF (oea->op)
 520           && !SSA_NAME_IS_DEFAULT_DEF (oeb->op)
 521           && SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
 522         {
 523           gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
 524           gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
 525           basic_block bba = gimple_bb (stmta);
 526           basic_block bbb = gimple_bb (stmtb);
 527           if (bbb != bba)
 528             {
 529               if (bb_rank[bbb->index] != bb_rank[bba->index])
 530                 return bb_rank[bbb->index] - bb_rank[bba->index];
 531             }
 532           else
 533             {
 534               bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
 535               bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
 536               if (da != db)
 537                 return da ? 1 : -1;
 538             }
 539         }
 540
 541       if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
 542         return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
 543       else
 544         return oeb->id - oea->id;
 545     }
 546
 547   if (oeb->rank != oea->rank)
 548     return oeb->rank - oea->rank;
 549   else
 550     return oeb->id - oea->id;
 551 }
 552
 553 /* Add an operand entry to *OPS for the tree operand OP.  */
 554
 555 static void
 556 add_to_ops_vec (vec<operand_entry *> *ops, tree op)
 557 {
 558   operand_entry *oe = operand_entry_pool.allocate ();
 559
 560   oe->op = op;
 561   oe->rank = get_rank (op);
 562   oe->id = next_operand_entry_id++;
 563   oe->count = 1;
 564   ops->safe_push (oe);
 565 }
 566
 567 /* Add an operand entry to *OPS for the tree operand OP with repeat
 568    count REPEAT.  */
 569
 570 static void
 571 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
 572                        HOST_WIDE_INT repeat)
 573 {
 574   operand_entry *oe = operand_entry_pool.allocate ();
 575
 576   oe->op = op;
 577   oe->rank = get_rank (op);
 578   oe->id = next_operand_entry_id++;
 579   oe->count = repeat;
 580   ops->safe_push (oe);
 581
 582   reassociate_stats.pows_encountered++;
 583 }
 584
 585 /* Return true if STMT is reassociable operation containing a binary
 586    operation with tree code CODE, and is inside LOOP.  */
 587
 588 static bool
 589 is_reassociable_op (gimple *stmt, enum tree_code code, struct loop *loop)
 590 {
 591   basic_block bb = gimple_bb (stmt);
 592
 593   if (gimple_bb (stmt) == NULL)
 594     return false;
 595
 596   if (!flow_bb_inside_loop_p (loop, bb))
 597     return false;
 598
 599   if (is_gimple_assign (stmt)
 600       && gimple_assign_rhs_code (stmt) == code
 601       && has_single_use (gimple_assign_lhs (stmt)))
 602     return true;
 603
 604   return false;
 605 }
 606
 607
 608 /* Return true if STMT is a nop-conversion.  */
 609
 610 static bool
 611 gimple_nop_conversion_p (gimple *stmt)
 612 {
 613   if (gassign *ass = dyn_cast <gassign *> (stmt))
 614     {
 615       if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
 616           && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
 617                                     TREE_TYPE (gimple_assign_rhs1 (ass))))
 618         return true;
 619     }
 620   return false;
 621 }
 622
 623 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
 624    operand of the negate operation.  Otherwise, return NULL.  */
 625
 626 static tree
 627 get_unary_op (tree name, enum tree_code opcode)
 628 {
 629   gimple *stmt = SSA_NAME_DEF_STMT (name);
 630
 631   /* Look through nop conversions (sign changes).  */
 632   if (gimple_nop_conversion_p (stmt)
 633       && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
 634     stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
 635
 636   if (!is_gimple_assign (stmt))
 637     return NULL_TREE;
 638
 639   if (gimple_assign_rhs_code (stmt) == opcode)
 640     return gimple_assign_rhs1 (stmt);
 641   return NULL_TREE;
 642 }
 643
 644 /* Return true if OP1 and OP2 have the same value if casted to either type.  */
 645
 646 static bool
 647 ops_equal_values_p (tree op1, tree op2)
 648 {
 649   if (op1 == op2)
 650     return true;
 651
 652   tree orig_op1 = op1;
 653   if (TREE_CODE (op1) == SSA_NAME)
 654     {
 655       gimple *stmt = SSA_NAME_DEF_STMT (op1);
 656       if (gimple_nop_conversion_p (stmt))
 657         {
 658           op1 = gimple_assign_rhs1 (stmt);
 659           if (op1 == op2)
 660             return true;
 661         }
 662     }
 663
 664   if (TREE_CODE (op2) == SSA_NAME)
 665     {
 666       gimple *stmt = SSA_NAME_DEF_STMT (op2);
 667       if (gimple_nop_conversion_p (stmt))
 668         {
 669           op2 = gimple_assign_rhs1 (stmt);
 670           if (op1 == op2
 671               || orig_op1 == op2)
 672             return true;
 673         }
 674     }
 675
 676   return false;
 677 }
 678
 679
 680 /* If CURR and LAST are a pair of ops that OPCODE allows us to
 681    eliminate through equivalences, do so, remove them from OPS, and
 682    return true.  Otherwise, return false.  */
 683
 684 static bool
 685 eliminate_duplicate_pair (enum tree_code opcode,
 686                           vec<operand_entry *> *ops,
 687                           bool *all_done,
 688                           unsigned int i,
 689                           operand_entry *curr,
 690                           operand_entry *last)
 691 {
 692
 693   /* If we have two of the same op, and the opcode is & |, min, or max,
 694      we can eliminate one of them.
 695      If we have two of the same op, and the opcode is ^, we can
 696      eliminate both of them.  */
 697
 698   if (last && last->op == curr->op)
 699     {
 700       switch (opcode)
 701         {
 702         case MAX_EXPR:
 703         case MIN_EXPR:
 704         case BIT_IOR_EXPR:
 705         case BIT_AND_EXPR:
 706           if (dump_file && (dump_flags & TDF_DETAILS))
 707             {
 708               fprintf (dump_file, "Equivalence: ");
 709               print_generic_expr (dump_file, curr->op, 0);
 710               fprintf (dump_file, " [&|minmax] ");
 711               print_generic_expr (dump_file, last->op, 0);
 712               fprintf (dump_file, " -> ");
 713               print_generic_stmt (dump_file, last->op, 0);
 714             }
 715
 716           ops->ordered_remove (i);
 717           reassociate_stats.ops_eliminated ++;
 718
 719           return true;
 720
 721         case BIT_XOR_EXPR:
 722           if (dump_file && (dump_flags & TDF_DETAILS))
 723             {
 724               fprintf (dump_file, "Equivalence: ");
 725               print_generic_expr (dump_file, curr->op, 0);
 726               fprintf (dump_file, " ^ ");
 727               print_generic_expr (dump_file, last->op, 0);
 728               fprintf (dump_file, " -> nothing\n");
 729             }
 730
 731           reassociate_stats.ops_eliminated += 2;
 732
 733           if (ops->length () == 2)
 734             {
 735               ops->create (0);
 736               add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
 737               *all_done = true;
 738             }
 739           else
 740             {
 741               ops->ordered_remove (i-1);
 742               ops->ordered_remove (i-1);
 743             }
 744
 745           return true;
 746
 747         default:
 748           break;
 749         }
 750     }
 751   return false;
 752 }
 753
 754 static vec<tree> plus_negates;
 755
 756 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
 757    expression, look in OPS for a corresponding positive operation to cancel
 758    it out.  If we find one, remove the other from OPS, replace
 759    OPS[CURRINDEX] with 0 or -1, respectively, and return true.  Otherwise,
 760    return false. */
 761
 762 static bool
 763 eliminate_plus_minus_pair (enum tree_code opcode,
 764                            vec<operand_entry *> *ops,
 765                            unsigned int currindex,
 766                            operand_entry *curr)
 767 {
 768   tree negateop;
 769   tree notop;
 770   unsigned int i;
 771   operand_entry *oe;
 772
 773   if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
 774     return false;
 775
 776   negateop = get_unary_op (curr->op, NEGATE_EXPR);
 777   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 778   if (negateop == NULL_TREE && notop == NULL_TREE)
 779     return false;
 780
 781   /* Any non-negated version will have a rank that is one less than
 782      the current rank.  So once we hit those ranks, if we don't find
 783      one, we can stop.  */
 784
 785   for (i = currindex + 1;
 786        ops->iterate (i, &oe)
 787        && oe->rank >= curr->rank - 1 ;
 788        i++)
 789     {
 790       if (negateop
 791           && ops_equal_values_p (oe->op, negateop))
 792         {
 793           if (dump_file && (dump_flags & TDF_DETAILS))
 794             {
 795               fprintf (dump_file, "Equivalence: ");
 796               print_generic_expr (dump_file, negateop, 0);
 797               fprintf (dump_file, " + -");
 798               print_generic_expr (dump_file, oe->op, 0);
 799               fprintf (dump_file, " -> 0\n");
 800             }
 801
 802           ops->ordered_remove (i);
 803           add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
 804           ops->ordered_remove (currindex);
 805           reassociate_stats.ops_eliminated ++;
 806
 807           return true;
 808         }
 809       else if (notop
 810                && ops_equal_values_p (oe->op, notop))
 811         {
 812           tree op_type = TREE_TYPE (oe->op);
 813
 814           if (dump_file && (dump_flags & TDF_DETAILS))
 815             {
 816               fprintf (dump_file, "Equivalence: ");
 817               print_generic_expr (dump_file, notop, 0);
 818               fprintf (dump_file, " + ~");
 819               print_generic_expr (dump_file, oe->op, 0);
 820               fprintf (dump_file, " -> -1\n");
 821             }
 822
 823           ops->ordered_remove (i);
 824           add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
 825           ops->ordered_remove (currindex);
 826           reassociate_stats.ops_eliminated ++;
 827
 828           return true;
 829         }
 830     }
 831
 832   /* If CURR->OP is a negate expr without nop conversion in a plus expr:
 833      save it for later inspection in repropagate_negates().  */
 834   if (negateop != NULL_TREE
 835       && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
 836     plus_negates.safe_push (curr->op);
 837
 838   return false;
 839 }
 840
 841 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
 842    bitwise not expression, look in OPS for a corresponding operand to
 843    cancel it out.  If we find one, remove the other from OPS, replace
 844    OPS[CURRINDEX] with 0, and return true.  Otherwise, return
 845    false. */
 846
 847 static bool
 848 eliminate_not_pairs (enum tree_code opcode,
 849                      vec<operand_entry *> *ops,
 850                      unsigned int currindex,
 851                      operand_entry *curr)
 852 {
 853   tree notop;
 854   unsigned int i;
 855   operand_entry *oe;
 856
 857   if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
 858       || TREE_CODE (curr->op) != SSA_NAME)
 859     return false;
 860
 861   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 862   if (notop == NULL_TREE)
 863     return false;
 864
 865   /* Any non-not version will have a rank that is one less than
 866      the current rank.  So once we hit those ranks, if we don't find
 867      one, we can stop.  */
 868
 869   for (i = currindex + 1;
 870        ops->iterate (i, &oe)
 871        && oe->rank >= curr->rank - 1;
 872        i++)
 873     {
 874       if (oe->op == notop)
 875         {
 876           if (dump_file && (dump_flags & TDF_DETAILS))
 877             {
 878               fprintf (dump_file, "Equivalence: ");
 879               print_generic_expr (dump_file, notop, 0);
 880               if (opcode == BIT_AND_EXPR)
 881                 fprintf (dump_file, " & ~");
 882               else if (opcode == BIT_IOR_EXPR)
 883                 fprintf (dump_file, " | ~");
 884               print_generic_expr (dump_file, oe->op, 0);
 885               if (opcode == BIT_AND_EXPR)
 886                 fprintf (dump_file, " -> 0\n");
 887               else if (opcode == BIT_IOR_EXPR)
 888                 fprintf (dump_file, " -> -1\n");
 889             }
 890
 891           if (opcode == BIT_AND_EXPR)
 892             oe->op = build_zero_cst (TREE_TYPE (oe->op));
 893           else if (opcode == BIT_IOR_EXPR)
 894             oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
 895
 896           reassociate_stats.ops_eliminated += ops->length () - 1;
 897           ops->truncate (0);
 898           ops->quick_push (oe);
 899           return true;
 900         }
 901     }
 902
 903   return false;
 904 }
 905
 906 /* Use constant value that may be present in OPS to try to eliminate
 907    operands.  Note that this function is only really used when we've
 908    eliminated ops for other reasons, or merged constants.  Across
 909    single statements, fold already does all of this, plus more.  There
 910    is little point in duplicating logic, so I've only included the
 911    identities that I could ever construct testcases to trigger.  */
 912
 913 static void
 914 eliminate_using_constants (enum tree_code opcode,
 915                            vec<operand_entry *> *ops)
 916 {
 917   operand_entry *oelast = ops->last ();
 918   tree type = TREE_TYPE (oelast->op);
 919
 920   if (oelast->rank == 0
 921       && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
 922     {
 923       switch (opcode)
 924         {
 925         case BIT_AND_EXPR:
 926           if (integer_zerop (oelast->op))
 927             {
 928               if (ops->length () != 1)
 929                 {
 930                   if (dump_file && (dump_flags & TDF_DETAILS))
 931                     fprintf (dump_file, "Found & 0, removing all other ops\n");
 932
 933                   reassociate_stats.ops_eliminated += ops->length () - 1;
 934
 935                   ops->truncate (0);
 936                   ops->quick_push (oelast);
 937                   return;
 938                 }
 939             }
 940           else if (integer_all_onesp (oelast->op))
 941             {
 942               if (ops->length () != 1)
 943                 {
 944                   if (dump_file && (dump_flags & TDF_DETAILS))
 945                     fprintf (dump_file, "Found & -1, removing\n");
 946                   ops->pop ();
 947                   reassociate_stats.ops_eliminated++;
 948                 }
 949             }
 950           break;
 951         case BIT_IOR_EXPR:
 952           if (integer_all_onesp (oelast->op))
 953             {
 954               if (ops->length () != 1)
 955                 {
 956                   if (dump_file && (dump_flags & TDF_DETAILS))
 957                     fprintf (dump_file, "Found | -1, removing all other ops\n");
 958
 959                   reassociate_stats.ops_eliminated += ops->length () - 1;
 960
 961                   ops->truncate (0);
 962                   ops->quick_push (oelast);
 963                   return;
 964                 }
 965             }
 966           else if (integer_zerop (oelast->op))
 967             {
 968               if (ops->length () != 1)
 969                 {
 970                   if (dump_file && (dump_flags & TDF_DETAILS))
 971                     fprintf (dump_file, "Found | 0, removing\n");
 972                   ops->pop ();
 973                   reassociate_stats.ops_eliminated++;
 974                 }
 975             }
 976           break;
 977         case MULT_EXPR:
 978           if (integer_zerop (oelast->op)
 979               || (FLOAT_TYPE_P (type)
 980                   && !HONOR_NANS (type)
 981                   && !HONOR_SIGNED_ZEROS (type)
 982                   && real_zerop (oelast->op)))
 983             {
 984               if (ops->length () != 1)
 985                 {
 986                   if (dump_file && (dump_flags & TDF_DETAILS))
 987                     fprintf (dump_file, "Found * 0, removing all other ops\n");
 988
 989                   reassociate_stats.ops_eliminated += ops->length () - 1;
 990                   ops->truncate (1);
 991                   ops->quick_push (oelast);
 992                   return;
 993                 }
 994             }
 995           else if (integer_onep (oelast->op)
 996                    || (FLOAT_TYPE_P (type)
 997                        && !HONOR_SNANS (type)
 998                        && real_onep (oelast->op)))
 999             {
1000               if (ops->length () != 1)
1001                 {
1002                   if (dump_file && (dump_flags & TDF_DETAILS))
1003                     fprintf (dump_file, "Found * 1, removing\n");
1004                   ops->pop ();
1005                   reassociate_stats.ops_eliminated++;
1006                   return;
1007                 }
1008             }
1009           break;
1010         case BIT_XOR_EXPR:
1011         case PLUS_EXPR:
1012         case MINUS_EXPR:
1013           if (integer_zerop (oelast->op)
1014               || (FLOAT_TYPE_P (type)
1015                   && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1016                   && fold_real_zero_addition_p (type, oelast->op,
1017                                                 opcode == MINUS_EXPR)))
1018             {
1019               if (ops->length () != 1)
1020                 {
1021                   if (dump_file && (dump_flags & TDF_DETAILS))
1022                     fprintf (dump_file, "Found [|^+] 0, removing\n");
1023                   ops->pop ();
1024                   reassociate_stats.ops_eliminated++;
1025                   return;
1026                 }
1027             }
1028           break;
1029         default:
1030           break;
1031         }
1032     }
1033 }
1034
1035
1036 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1037                                  bool, bool);
1038
1039 /* Structure for tracking and counting operands.  */
1040 struct oecount {
1041   int cnt;
1042   int id;
1043   enum tree_code oecode;
1044   tree op;
1045 };
1046
1047
1048 /* The heap for the oecount hashtable and the sorted list of operands.  */
1049 static vec<oecount> cvec;
1050
1051
1052 /* Oecount hashtable helpers.  */
1053
1054 struct oecount_hasher : int_hash <int, 0, 1>
1055 {
1056   static inline hashval_t hash (int);
1057   static inline bool equal (int, int);
1058 };
1059
1060 /* Hash function for oecount.  */
1061
1062 inline hashval_t
1063 oecount_hasher::hash (int p)
1064 {
1065   const oecount *c = &cvec[p - 42];
1066   return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1067 }
1068
1069 /* Comparison function for oecount.  */
1070
1071 inline bool
1072 oecount_hasher::equal (int p1, int p2)
1073 {
1074   const oecount *c1 = &cvec[p1 - 42];
1075   const oecount *c2 = &cvec[p2 - 42];
1076   return (c1->oecode == c2->oecode
1077           && c1->op == c2->op);
1078 }
1079
1080 /* Comparison function for qsort sorting oecount elements by count.  */
1081
1082 static int
1083 oecount_cmp (const void *p1, const void *p2)
1084 {
1085   const oecount *c1 = (const oecount *)p1;
1086   const oecount *c2 = (const oecount *)p2;
1087   if (c1->cnt != c2->cnt)
1088     return c1->cnt - c2->cnt;
1089   else
1090     /* If counts are identical, use unique IDs to stabilize qsort.  */
1091     return c1->id - c2->id;
1092 }
1093
1094 /* Return TRUE iff STMT represents a builtin call that raises OP
1095    to some exponent.  */
1096
1097 static bool
1098 stmt_is_power_of_op (gimple *stmt, tree op)
1099 {
1100   if (!is_gimple_call (stmt))
1101     return false;
1102
1103   switch (gimple_call_combined_fn (stmt))
1104     {
1105     CASE_CFN_POW:
1106     CASE_CFN_POWI:
1107       return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1108
1109     default:
1110       return false;
1111     }
1112 }
1113
1114 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1115    in place and return the result.  Assumes that stmt_is_power_of_op
1116    was previously called for STMT and returned TRUE.  */
1117
1118 static HOST_WIDE_INT
1119 decrement_power (gimple *stmt)
1120 {
1121   REAL_VALUE_TYPE c, cint;
1122   HOST_WIDE_INT power;
1123   tree arg1;
1124
1125   switch (gimple_call_combined_fn (stmt))
1126     {
1127     CASE_CFN_POW:
1128       arg1 = gimple_call_arg (stmt, 1);
1129       c = TREE_REAL_CST (arg1);
1130       power = real_to_integer (&c) - 1;
1131       real_from_integer (&cint, VOIDmode, power, SIGNED);
1132       gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1133       return power;
1134
1135     CASE_CFN_POWI:
1136       arg1 = gimple_call_arg (stmt, 1);
1137       power = TREE_INT_CST_LOW (arg1) - 1;
1138       gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1139       return power;
1140
1141     default:
1142       gcc_unreachable ();
1143     }
1144 }
1145
1146 /* Find the single immediate use of STMT's LHS, and replace it
1147    with OP.  Remove STMT.  If STMT's LHS is the same as *DEF,
1148    replace *DEF with OP as well.  */
1149
1150 static void
1151 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1152 {
1153   tree lhs;
1154   gimple *use_stmt;
1155   use_operand_p use;
1156   gimple_stmt_iterator gsi;
1157
1158   if (is_gimple_call (stmt))
1159     lhs = gimple_call_lhs (stmt);
1160   else
1161     lhs = gimple_assign_lhs (stmt);
1162
1163   gcc_assert (has_single_use (lhs));
1164   single_imm_use (lhs, &use, &use_stmt);
1165   if (lhs == *def)
1166     *def = op;
1167   SET_USE (use, op);
1168   if (TREE_CODE (op) != SSA_NAME)
1169     update_stmt (use_stmt);
1170   gsi = gsi_for_stmt (stmt);
1171   unlink_stmt_vdef (stmt);
1172   reassoc_remove_stmt (&gsi);
1173   release_defs (stmt);
1174 }
1175
1176 /* Walks the linear chain with result *DEF searching for an operation
1177    with operand OP and code OPCODE removing that from the chain.  *DEF
1178    is updated if there is only one operand but no operation left.  */
1179
1180 static void
1181 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1182 {
1183   gimple *stmt = SSA_NAME_DEF_STMT (*def);
1184
1185   do
1186     {
1187       tree name;
1188
1189       if (opcode == MULT_EXPR
1190           && stmt_is_power_of_op (stmt, op))
1191         {
1192           if (decrement_power (stmt) == 1)
1193             propagate_op_to_single_use (op, stmt, def);
1194           return;
1195         }
1196
1197       name = gimple_assign_rhs1 (stmt);
1198
1199       /* If this is the operation we look for and one of the operands
1200          is ours simply propagate the other operand into the stmts
1201          single use.  */
1202       if (gimple_assign_rhs_code (stmt) == opcode
1203           && (name == op
1204               || gimple_assign_rhs2 (stmt) == op))
1205         {
1206           if (name == op)
1207             name = gimple_assign_rhs2 (stmt);
1208           propagate_op_to_single_use (name, stmt, def);
1209           return;
1210         }
1211
1212       /* We might have a multiply of two __builtin_pow* calls, and
1213          the operand might be hiding in the rightmost one.  */
1214       if (opcode == MULT_EXPR
1215           && gimple_assign_rhs_code (stmt) == opcode
1216           && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1217           && has_single_use (gimple_assign_rhs2 (stmt)))
1218         {
1219           gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1220           if (stmt_is_power_of_op (stmt2, op))
1221             {
1222               if (decrement_power (stmt2) == 1)
1223                 propagate_op_to_single_use (op, stmt2, def);
1224               return;
1225             }
1226         }
1227
1228       /* Continue walking the chain.  */
1229       gcc_assert (name != op
1230                   && TREE_CODE (name) == SSA_NAME);
1231       stmt = SSA_NAME_DEF_STMT (name);
1232     }
1233   while (1);
1234 }
1235
1236 /* Returns true if statement S1 dominates statement S2.  Like
1237    stmt_dominates_stmt_p, but uses stmt UIDs to optimize.  */
1238
1239 static bool
1240 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1241 {
1242   basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1243
1244   /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1245      SSA_NAME.  Assume it lives at the beginning of function and
1246      thus dominates everything.  */
1247   if (!bb1 || s1 == s2)
1248     return true;
1249
1250   /* If bb2 is NULL, it doesn't dominate any stmt with a bb.  */
1251   if (!bb2)
1252     return false;
1253
1254   if (bb1 == bb2)
1255     {
1256       /* PHIs in the same basic block are assumed to be
1257          executed all in parallel, if only one stmt is a PHI,
1258          it dominates the other stmt in the same basic block.  */
1259       if (gimple_code (s1) == GIMPLE_PHI)
1260         return true;
1261
1262       if (gimple_code (s2) == GIMPLE_PHI)
1263         return false;
1264
1265       gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1266
1267       if (gimple_uid (s1) < gimple_uid (s2))
1268         return true;
1269
1270       if (gimple_uid (s1) > gimple_uid (s2))
1271         return false;
1272
1273       gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1274       unsigned int uid = gimple_uid (s1);
1275       for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1276         {
1277           gimple *s = gsi_stmt (gsi);
1278           if (gimple_uid (s) != uid)
1279             break;
1280           if (s == s2)
1281             return true;
1282         }
1283
1284       return false;
1285     }
1286
1287   return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1288 }
1289
1290 /* Insert STMT after INSERT_POINT.  */
1291
1292 static void
1293 insert_stmt_after (gimple *stmt, gimple *insert_point)
1294 {
1295   gimple_stmt_iterator gsi;
1296   basic_block bb;
1297
1298   if (gimple_code (insert_point) == GIMPLE_PHI)
1299     bb = gimple_bb (insert_point);
1300   else if (!stmt_ends_bb_p (insert_point))
1301     {
1302       gsi = gsi_for_stmt (insert_point);
1303       gimple_set_uid (stmt, gimple_uid (insert_point));
1304       gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1305       return;
1306     }
1307   else
1308     /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1309        thus if it must end a basic block, it should be a call that can
1310        throw, or some assignment that can throw.  If it throws, the LHS
1311        of it will not be initialized though, so only valid places using
1312        the SSA_NAME should be dominated by the fallthru edge.  */
1313     bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1314   gsi = gsi_after_labels (bb);
1315   if (gsi_end_p (gsi))
1316     {
1317       gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1318       gimple_set_uid (stmt,
1319                       gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1320     }
1321   else
1322     gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1323   gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1324 }
1325
1326 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1327    the result.  Places the statement after the definition of either
1328    OP1 or OP2.  Returns the new statement.  */
1329
1330 static gimple *
1331 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1332 {
1333   gimple *op1def = NULL, *op2def = NULL;
1334   gimple_stmt_iterator gsi;
1335   tree op;
1336   gassign *sum;
1337
1338   /* Create the addition statement.  */
1339   op = make_ssa_name (type);
1340   sum = gimple_build_assign (op, opcode, op1, op2);
1341
1342   /* Find an insertion place and insert.  */
1343   if (TREE_CODE (op1) == SSA_NAME)
1344     op1def = SSA_NAME_DEF_STMT (op1);
1345   if (TREE_CODE (op2) == SSA_NAME)
1346     op2def = SSA_NAME_DEF_STMT (op2);
1347   if ((!op1def || gimple_nop_p (op1def))
1348       && (!op2def || gimple_nop_p (op2def)))
1349     {
1350       gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1351       if (gsi_end_p (gsi))
1352         {
1353           gimple_stmt_iterator gsi2
1354             = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1355           gimple_set_uid (sum,
1356                           gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1357         }
1358       else
1359         gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1360       gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1361     }
1362   else
1363     {
1364       gimple *insert_point;
1365       if ((!op1def || gimple_nop_p (op1def))
1366            || (op2def && !gimple_nop_p (op2def)
1367                && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1368         insert_point = op2def;
1369       else
1370         insert_point = op1def;
1371       insert_stmt_after (sum, insert_point);
1372     }
1373   update_stmt (sum);
1374
1375   return sum;
1376 }
1377
1378 /* Perform un-distribution of divisions and multiplications.
1379    A * X + B * X is transformed into (A + B) * X and A / X + B / X
1380    to (A + B) / X for real X.
1381
1382    The algorithm is organized as follows.
1383
1384     - First we walk the addition chain *OPS looking for summands that
1385       are defined by a multiplication or a real division.  This results
1386       in the candidates bitmap with relevant indices into *OPS.
1387
1388     - Second we build the chains of multiplications or divisions for
1389       these candidates, counting the number of occurrences of (operand, code)
1390       pairs in all of the candidates chains.
1391
1392     - Third we sort the (operand, code) pairs by number of occurrence and
1393       process them starting with the pair with the most uses.
1394
1395       * For each such pair we walk the candidates again to build a
1396         second candidate bitmap noting all multiplication/division chains
1397         that have at least one occurrence of (operand, code).
1398
1399       * We build an alternate addition chain only covering these
1400         candidates with one (operand, code) operation removed from their
1401         multiplication/division chain.
1402
1403       * The first candidate gets replaced by the alternate addition chain
1404         multiplied/divided by the operand.
1405
1406       * All candidate chains get disabled for further processing and
1407         processing of (operand, code) pairs continues.
1408
1409   The alternate addition chains built are re-processed by the main
1410   reassociation algorithm which allows optimizing a * x * y + b * y * x
1411   to (a + b ) * x * y in one invocation of the reassociation pass.  */
1412
1413 static bool
1414 undistribute_ops_list (enum tree_code opcode,
1415                        vec<operand_entry *> *ops, struct loop *loop)
1416 {
1417   unsigned int length = ops->length ();
1418   operand_entry *oe1;
1419   unsigned i, j;
1420   sbitmap candidates, candidates2;
1421   unsigned nr_candidates, nr_candidates2;
1422   sbitmap_iterator sbi0;
1423   vec<operand_entry *> *subops;
1424   bool changed = false;
1425   int next_oecount_id = 0;
1426
1427   if (length <= 1
1428       || opcode != PLUS_EXPR)
1429     return false;
1430
1431   /* Build a list of candidates to process.  */
1432   candidates = sbitmap_alloc (length);
1433   bitmap_clear (candidates);
1434   nr_candidates = 0;
1435   FOR_EACH_VEC_ELT (*ops, i, oe1)
1436     {
1437       enum tree_code dcode;
1438       gimple *oe1def;
1439
1440       if (TREE_CODE (oe1->op) != SSA_NAME)
1441         continue;
1442       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1443       if (!is_gimple_assign (oe1def))
1444         continue;
1445       dcode = gimple_assign_rhs_code (oe1def);
1446       if ((dcode != MULT_EXPR
1447            && dcode != RDIV_EXPR)
1448           || !is_reassociable_op (oe1def, dcode, loop))
1449         continue;
1450
1451       bitmap_set_bit (candidates, i);
1452       nr_candidates++;
1453     }
1454
1455   if (nr_candidates < 2)
1456     {
1457       sbitmap_free (candidates);
1458       return false;
1459     }
1460
1461   if (dump_file && (dump_flags & TDF_DETAILS))
1462     {
1463       fprintf (dump_file, "searching for un-distribute opportunities ");
1464       print_generic_expr (dump_file,
1465         (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1466       fprintf (dump_file, " %d\n", nr_candidates);
1467     }
1468
1469   /* Build linearized sub-operand lists and the counting table.  */
1470   cvec.create (0);
1471
1472   hash_table<oecount_hasher> ctable (15);
1473
1474   /* ??? Macro arguments cannot have multi-argument template types in
1475      them.  This typedef is needed to workaround that limitation.  */
1476   typedef vec<operand_entry *> vec_operand_entry_t_heap;
1477   subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1478   EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1479     {
1480       gimple *oedef;
1481       enum tree_code oecode;
1482       unsigned j;
1483
1484       oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1485       oecode = gimple_assign_rhs_code (oedef);
1486       linearize_expr_tree (&subops[i], oedef,
1487                            associative_tree_code (oecode), false);
1488
1489       FOR_EACH_VEC_ELT (subops[i], j, oe1)
1490         {
1491           oecount c;
1492           int *slot;
1493           int idx;
1494           c.oecode = oecode;
1495           c.cnt = 1;
1496           c.id = next_oecount_id++;
1497           c.op = oe1->op;
1498           cvec.safe_push (c);
1499           idx = cvec.length () + 41;
1500           slot = ctable.find_slot (idx, INSERT);
1501           if (!*slot)
1502             {
1503               *slot = idx;
1504             }
1505           else
1506             {
1507               cvec.pop ();
1508               cvec[*slot - 42].cnt++;
1509             }
1510         }
1511     }
1512
1513   /* Sort the counting table.  */
1514   cvec.qsort (oecount_cmp);
1515
1516   if (dump_file && (dump_flags & TDF_DETAILS))
1517     {
1518       oecount *c;
1519       fprintf (dump_file, "Candidates:\n");
1520       FOR_EACH_VEC_ELT (cvec, j, c)
1521         {
1522           fprintf (dump_file, "  %u %s: ", c->cnt,
1523                    c->oecode == MULT_EXPR
1524                    ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1525           print_generic_expr (dump_file, c->op, 0);
1526           fprintf (dump_file, "\n");
1527         }
1528     }
1529
1530   /* Process the (operand, code) pairs in order of most occurrence.  */
1531   candidates2 = sbitmap_alloc (length);
1532   while (!cvec.is_empty ())
1533     {
1534       oecount *c = &cvec.last ();
1535       if (c->cnt < 2)
1536         break;
1537
1538       /* Now collect the operands in the outer chain that contain
1539          the common operand in their inner chain.  */
1540       bitmap_clear (candidates2);
1541       nr_candidates2 = 0;
1542       EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1543         {
1544           gimple *oedef;
1545           enum tree_code oecode;
1546           unsigned j;
1547           tree op = (*ops)[i]->op;
1548
1549           /* If we undistributed in this chain already this may be
1550              a constant.  */
1551           if (TREE_CODE (op) != SSA_NAME)
1552             continue;
1553
1554           oedef = SSA_NAME_DEF_STMT (op);
1555           oecode = gimple_assign_rhs_code (oedef);
1556           if (oecode != c->oecode)
1557             continue;
1558
1559           FOR_EACH_VEC_ELT (subops[i], j, oe1)
1560             {
1561               if (oe1->op == c->op)
1562                 {
1563                   bitmap_set_bit (candidates2, i);
1564                   ++nr_candidates2;
1565                   break;
1566                 }
1567             }
1568         }
1569
1570       if (nr_candidates2 >= 2)
1571         {
1572           operand_entry *oe1, *oe2;
1573           gimple *prod;
1574           int first = bitmap_first_set_bit (candidates2);
1575
1576           /* Build the new addition chain.  */
1577           oe1 = (*ops)[first];
1578           if (dump_file && (dump_flags & TDF_DETAILS))
1579             {
1580               fprintf (dump_file, "Building (");
1581               print_generic_expr (dump_file, oe1->op, 0);
1582             }
1583           zero_one_operation (&oe1->op, c->oecode, c->op);
1584           EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1585             {
1586               gimple *sum;
1587               oe2 = (*ops)[i];
1588               if (dump_file && (dump_flags & TDF_DETAILS))
1589                 {
1590                   fprintf (dump_file, " + ");
1591                   print_generic_expr (dump_file, oe2->op, 0);
1592                 }
1593               zero_one_operation (&oe2->op, c->oecode, c->op);
1594               sum = build_and_add_sum (TREE_TYPE (oe1->op),
1595                                        oe1->op, oe2->op, opcode);
1596               oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1597               oe2->rank = 0;
1598               oe1->op = gimple_get_lhs (sum);
1599             }
1600
1601           /* Apply the multiplication/division.  */
1602           prod = build_and_add_sum (TREE_TYPE (oe1->op),
1603                                     oe1->op, c->op, c->oecode);
1604           if (dump_file && (dump_flags & TDF_DETAILS))
1605             {
1606               fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1607               print_generic_expr (dump_file, c->op, 0);
1608               fprintf (dump_file, "\n");
1609             }
1610
1611           /* Record it in the addition chain and disable further
1612              undistribution with this op.  */
1613           oe1->op = gimple_assign_lhs (prod);
1614           oe1->rank = get_rank (oe1->op);
1615           subops[first].release ();
1616
1617           changed = true;
1618         }
1619
1620       cvec.pop ();
1621     }
1622
1623   for (i = 0; i < ops->length (); ++i)
1624     subops[i].release ();
1625   free (subops);
1626   cvec.release ();
1627   sbitmap_free (candidates);
1628   sbitmap_free (candidates2);
1629
1630   return changed;
1631 }
1632
1633 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1634    expression, examine the other OPS to see if any of them are comparisons
1635    of the same values, which we may be able to combine or eliminate.
1636    For example, we can rewrite (a < b) | (a == b) as (a <= b).  */
1637
1638 static bool
1639 eliminate_redundant_comparison (enum tree_code opcode,
1640                                 vec<operand_entry *> *ops,
1641                                 unsigned int currindex,
1642                                 operand_entry *curr)
1643 {
1644   tree op1, op2;
1645   enum tree_code lcode, rcode;
1646   gimple *def1, *def2;
1647   int i;
1648   operand_entry *oe;
1649
1650   if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1651     return false;
1652
1653   /* Check that CURR is a comparison.  */
1654   if (TREE_CODE (curr->op) != SSA_NAME)
1655     return false;
1656   def1 = SSA_NAME_DEF_STMT (curr->op);
1657   if (!is_gimple_assign (def1))
1658     return false;
1659   lcode = gimple_assign_rhs_code (def1);
1660   if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1661     return false;
1662   op1 = gimple_assign_rhs1 (def1);
1663   op2 = gimple_assign_rhs2 (def1);
1664
1665   /* Now look for a similar comparison in the remaining OPS.  */
1666   for (i = currindex + 1; ops->iterate (i, &oe); i++)
1667     {
1668       tree t;
1669
1670       if (TREE_CODE (oe->op) != SSA_NAME)
1671         continue;
1672       def2 = SSA_NAME_DEF_STMT (oe->op);
1673       if (!is_gimple_assign (def2))
1674         continue;
1675       rcode = gimple_assign_rhs_code (def2);
1676       if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1677         continue;
1678
1679       /* If we got here, we have a match.  See if we can combine the
1680          two comparisons.  */
1681       if (opcode == BIT_IOR_EXPR)
1682         t = maybe_fold_or_comparisons (lcode, op1, op2,
1683                                        rcode, gimple_assign_rhs1 (def2),
1684                                        gimple_assign_rhs2 (def2));
1685       else
1686         t = maybe_fold_and_comparisons (lcode, op1, op2,
1687                                         rcode, gimple_assign_rhs1 (def2),
1688                                         gimple_assign_rhs2 (def2));
1689       if (!t)
1690         continue;
1691
1692       /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1693          always give us a boolean_type_node value back.  If the original
1694          BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1695          we need to convert.  */
1696       if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1697         t = fold_convert (TREE_TYPE (curr->op), t);
1698
1699       if (TREE_CODE (t) != INTEGER_CST
1700           && !operand_equal_p (t, curr->op, 0))
1701         {
1702           enum tree_code subcode;
1703           tree newop1, newop2;
1704           if (!COMPARISON_CLASS_P (t))
1705             continue;
1706           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1707           STRIP_USELESS_TYPE_CONVERSION (newop1);
1708           STRIP_USELESS_TYPE_CONVERSION (newop2);
1709           if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1710             continue;
1711         }
1712
1713       if (dump_file && (dump_flags & TDF_DETAILS))
1714         {
1715           fprintf (dump_file, "Equivalence: ");
1716           print_generic_expr (dump_file, curr->op, 0);
1717           fprintf (dump_file, " %s ", op_symbol_code (opcode));
1718           print_generic_expr (dump_file, oe->op, 0);
1719           fprintf (dump_file, " -> ");
1720           print_generic_expr (dump_file, t, 0);
1721           fprintf (dump_file, "\n");
1722         }
1723
1724       /* Now we can delete oe, as it has been subsumed by the new combined
1725          expression t.  */
1726       ops->ordered_remove (i);
1727       reassociate_stats.ops_eliminated ++;
1728
1729       /* If t is the same as curr->op, we're done.  Otherwise we must
1730          replace curr->op with t.  Special case is if we got a constant
1731          back, in which case we add it to the end instead of in place of
1732          the current entry.  */
1733       if (TREE_CODE (t) == INTEGER_CST)
1734         {
1735           ops->ordered_remove (currindex);
1736           add_to_ops_vec (ops, t);
1737         }
1738       else if (!operand_equal_p (t, curr->op, 0))
1739         {
1740           gimple *sum;
1741           enum tree_code subcode;
1742           tree newop1;
1743           tree newop2;
1744           gcc_assert (COMPARISON_CLASS_P (t));
1745           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1746           STRIP_USELESS_TYPE_CONVERSION (newop1);
1747           STRIP_USELESS_TYPE_CONVERSION (newop2);
1748           gcc_checking_assert (is_gimple_val (newop1)
1749                                && is_gimple_val (newop2));
1750           sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1751           curr->op = gimple_get_lhs (sum);
1752         }
1753       return true;
1754     }
1755
1756   return false;
1757 }
1758
1759 /* Perform various identities and other optimizations on the list of
1760    operand entries, stored in OPS.  The tree code for the binary
1761    operation between all the operands is OPCODE.  */
1762
1763 static void
1764 optimize_ops_list (enum tree_code opcode,
1765                    vec<operand_entry *> *ops)
1766 {
1767   unsigned int length = ops->length ();
1768   unsigned int i;
1769   operand_entry *oe;
1770   operand_entry *oelast = NULL;
1771   bool iterate = false;
1772
1773   if (length == 1)
1774     return;
1775
1776   oelast = ops->last ();
1777
1778   /* If the last two are constants, pop the constants off, merge them
1779      and try the next two.  */
1780   if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1781     {
1782       operand_entry *oelm1 = (*ops)[length - 2];
1783
1784       if (oelm1->rank == 0
1785           && is_gimple_min_invariant (oelm1->op)
1786           && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1787                                        TREE_TYPE (oelast->op)))
1788         {
1789           tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1790                                      oelm1->op, oelast->op);
1791
1792           if (folded && is_gimple_min_invariant (folded))
1793             {
1794               if (dump_file && (dump_flags & TDF_DETAILS))
1795                 fprintf (dump_file, "Merging constants\n");
1796
1797               ops->pop ();
1798               ops->pop ();
1799
1800               add_to_ops_vec (ops, folded);
1801               reassociate_stats.constants_eliminated++;
1802
1803               optimize_ops_list (opcode, ops);
1804               return;
1805             }
1806         }
1807     }
1808
1809   eliminate_using_constants (opcode, ops);
1810   oelast = NULL;
1811
1812   for (i = 0; ops->iterate (i, &oe);)
1813     {
1814       bool done = false;
1815
1816       if (eliminate_not_pairs (opcode, ops, i, oe))
1817         return;
1818       if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1819           || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1820           || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1821         {
1822           if (done)
1823             return;
1824           iterate = true;
1825           oelast = NULL;
1826           continue;
1827         }
1828       oelast = oe;
1829       i++;
1830     }
1831
1832   length = ops->length ();
1833   oelast = ops->last ();
1834
1835   if (iterate)
1836     optimize_ops_list (opcode, ops);
1837 }
1838
1839 /* The following functions are subroutines to optimize_range_tests and allow
1840    it to try to change a logical combination of comparisons into a range
1841    test.
1842
1843    For example, both
1844         X == 2 || X == 5 || X == 3 || X == 4
1845    and
1846         X >= 2 && X <= 5
1847    are converted to
1848         (unsigned) (X - 2) <= 3
1849
1850    For more information see comments above fold_test_range in fold-const.c,
1851    this implementation is for GIMPLE.  */
1852
1853 struct range_entry
1854 {
1855   tree exp;
1856   tree low;
1857   tree high;
1858   bool in_p;
1859   bool strict_overflow_p;
1860   unsigned int idx, next;
1861 };
1862
1863 /* This is similar to make_range in fold-const.c, but on top of
1864    GIMPLE instead of trees.  If EXP is non-NULL, it should be
1865    an SSA_NAME and STMT argument is ignored, otherwise STMT
1866    argument should be a GIMPLE_COND.  */
1867
1868 static void
1869 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
1870 {
1871   int in_p;
1872   tree low, high;
1873   bool is_bool, strict_overflow_p;
1874
1875   r->exp = NULL_TREE;
1876   r->in_p = false;
1877   r->strict_overflow_p = false;
1878   r->low = NULL_TREE;
1879   r->high = NULL_TREE;
1880   if (exp != NULL_TREE
1881       && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1882     return;
1883
1884   /* Start with simply saying "EXP != 0" and then look at the code of EXP
1885      and see if we can refine the range.  Some of the cases below may not
1886      happen, but it doesn't seem worth worrying about this.  We "continue"
1887      the outer loop when we've changed something; otherwise we "break"
1888      the switch, which will "break" the while.  */
1889   low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1890   high = low;
1891   in_p = 0;
1892   strict_overflow_p = false;
1893   is_bool = false;
1894   if (exp == NULL_TREE)
1895     is_bool = true;
1896   else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1897     {
1898       if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1899         is_bool = true;
1900       else
1901         return;
1902     }
1903   else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1904     is_bool = true;
1905
1906   while (1)
1907     {
1908       enum tree_code code;
1909       tree arg0, arg1, exp_type;
1910       tree nexp;
1911       location_t loc;
1912
1913       if (exp != NULL_TREE)
1914         {
1915           if (TREE_CODE (exp) != SSA_NAME
1916               || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
1917             break;
1918
1919           stmt = SSA_NAME_DEF_STMT (exp);
1920           if (!is_gimple_assign (stmt))
1921             break;
1922
1923           code = gimple_assign_rhs_code (stmt);
1924           arg0 = gimple_assign_rhs1 (stmt);
1925           arg1 = gimple_assign_rhs2 (stmt);
1926           exp_type = TREE_TYPE (exp);
1927         }
1928       else
1929         {
1930           code = gimple_cond_code (stmt);
1931           arg0 = gimple_cond_lhs (stmt);
1932           arg1 = gimple_cond_rhs (stmt);
1933           exp_type = boolean_type_node;
1934         }
1935
1936       if (TREE_CODE (arg0) != SSA_NAME)
1937         break;
1938       loc = gimple_location (stmt);
1939       switch (code)
1940         {
1941         case BIT_NOT_EXPR:
1942           if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
1943               /* Ensure the range is either +[-,0], +[0,0],
1944                  -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1945                  -[1,1].  If it is e.g. +[-,-] or -[-,-]
1946                  or similar expression of unconditional true or
1947                  false, it should not be negated.  */
1948               && ((high && integer_zerop (high))
1949                   || (low && integer_onep (low))))
1950             {
1951               in_p = !in_p;
1952               exp = arg0;
1953               continue;
1954             }
1955           break;
1956         case SSA_NAME:
1957           exp = arg0;
1958           continue;
1959         CASE_CONVERT:
1960           if (is_bool)
1961             goto do_default;
1962           if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1963             {
1964               if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1965                 is_bool = true;
1966               else
1967                 return;
1968             }
1969           else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1970             is_bool = true;
1971           goto do_default;
1972         case EQ_EXPR:
1973         case NE_EXPR:
1974         case LT_EXPR:
1975         case LE_EXPR:
1976         case GE_EXPR:
1977         case GT_EXPR:
1978           is_bool = true;
1979           /* FALLTHRU */
1980         default:
1981           if (!is_bool)
1982             return;
1983         do_default:
1984           nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1985                                   &low, &high, &in_p,
1986                                   &strict_overflow_p);
1987           if (nexp != NULL_TREE)
1988             {
1989               exp = nexp;
1990               gcc_assert (TREE_CODE (exp) == SSA_NAME);
1991               continue;
1992             }
1993           break;
1994         }
1995       break;
1996     }
1997   if (is_bool)
1998     {
1999       r->exp = exp;
2000       r->in_p = in_p;
2001       r->low = low;
2002       r->high = high;
2003       r->strict_overflow_p = strict_overflow_p;
2004     }
2005 }
2006
2007 /* Comparison function for qsort.  Sort entries
2008    without SSA_NAME exp first, then with SSA_NAMEs sorted
2009    by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2010    by increasing ->low and if ->low is the same, by increasing
2011    ->high.  ->low == NULL_TREE means minimum, ->high == NULL_TREE
2012    maximum.  */
2013
2014 static int
2015 range_entry_cmp (const void *a, const void *b)
2016 {
2017   const struct range_entry *p = (const struct range_entry *) a;
2018   const struct range_entry *q = (const struct range_entry *) b;
2019
2020   if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2021     {
2022       if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2023         {
2024           /* Group range_entries for the same SSA_NAME together.  */
2025           if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2026             return -1;
2027           else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2028             return 1;
2029           /* If ->low is different, NULL low goes first, then by
2030              ascending low.  */
2031           if (p->low != NULL_TREE)
2032             {
2033               if (q->low != NULL_TREE)
2034                 {
2035                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
2036                                           p->low, q->low);
2037                   if (tem && integer_onep (tem))
2038                     return -1;
2039                   tem = fold_binary (GT_EXPR, boolean_type_node,
2040                                      p->low, q->low);
2041                   if (tem && integer_onep (tem))
2042                     return 1;
2043                 }
2044               else
2045                 return 1;
2046             }
2047           else if (q->low != NULL_TREE)
2048             return -1;
2049           /* If ->high is different, NULL high goes last, before that by
2050              ascending high.  */
2051           if (p->high != NULL_TREE)
2052             {
2053               if (q->high != NULL_TREE)
2054                 {
2055                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
2056                                           p->high, q->high);
2057                   if (tem && integer_onep (tem))
2058                     return -1;
2059                   tem = fold_binary (GT_EXPR, boolean_type_node,
2060                                      p->high, q->high);
2061                   if (tem && integer_onep (tem))
2062                     return 1;
2063                 }
2064               else
2065                 return -1;
2066             }
2067           else if (q->high != NULL_TREE)
2068             return 1;
2069           /* If both ranges are the same, sort below by ascending idx.  */
2070         }
2071       else
2072         return 1;
2073     }
2074   else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2075     return -1;
2076
2077   if (p->idx < q->idx)
2078     return -1;
2079   else
2080     {
2081       gcc_checking_assert (p->idx > q->idx);
2082       return 1;
2083     }
2084 }
2085
2086 /* Helper routine of optimize_range_test.
2087    [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2088    RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2089    OPCODE and OPS are arguments of optimize_range_tests.  If OTHERRANGE
2090    is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2091    an array of COUNT pointers to other ranges.  Return
2092    true if the range merge has been successful.
2093    If OPCODE is ERROR_MARK, this is called from within
2094    maybe_optimize_range_tests and is performing inter-bb range optimization.
2095    In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2096    oe->rank.  */
2097
2098 static bool
2099 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2100                    struct range_entry **otherrangep,
2101                    unsigned int count, enum tree_code opcode,
2102                    vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2103                    bool in_p, tree low, tree high, bool strict_overflow_p)
2104 {
2105   operand_entry *oe = (*ops)[range->idx];
2106   tree op = oe->op;
2107   gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2108                     : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2109   location_t loc = gimple_location (stmt);
2110   tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2111   tree tem = build_range_check (loc, optype, unshare_expr (exp),
2112                                 in_p, low, high);
2113   enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2114   gimple_stmt_iterator gsi;
2115   unsigned int i, uid;
2116
2117   if (tem == NULL_TREE)
2118     return false;
2119
2120   /* If op is default def SSA_NAME, there is no place to insert the
2121      new comparison.  Give up, unless we can use OP itself as the
2122      range test.  */
2123   if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2124     {
2125       if (op == range->exp
2126           && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2127               || TREE_CODE (optype) == BOOLEAN_TYPE)
2128           && (op == tem
2129               || (TREE_CODE (tem) == EQ_EXPR
2130                   && TREE_OPERAND (tem, 0) == op
2131                   && integer_onep (TREE_OPERAND (tem, 1))))
2132           && opcode != BIT_IOR_EXPR
2133           && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2134         {
2135           stmt = NULL;
2136           tem = op;
2137         }
2138       else
2139         return false;
2140     }
2141
2142   if (strict_overflow_p && issue_strict_overflow_warning (wc))
2143     warning_at (loc, OPT_Wstrict_overflow,
2144                 "assuming signed overflow does not occur "
2145                 "when simplifying range test");
2146
2147   if (dump_file && (dump_flags & TDF_DETAILS))
2148     {
2149       struct range_entry *r;
2150       fprintf (dump_file, "Optimizing range tests ");
2151       print_generic_expr (dump_file, range->exp, 0);
2152       fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2153       print_generic_expr (dump_file, range->low, 0);
2154       fprintf (dump_file, ", ");
2155       print_generic_expr (dump_file, range->high, 0);
2156       fprintf (dump_file, "]");
2157       for (i = 0; i < count; i++)
2158         {
2159           if (otherrange)
2160             r = otherrange + i;
2161           else
2162             r = otherrangep[i];
2163           fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2164           print_generic_expr (dump_file, r->low, 0);
2165           fprintf (dump_file, ", ");
2166           print_generic_expr (dump_file, r->high, 0);
2167           fprintf (dump_file, "]");
2168         }
2169       fprintf (dump_file, "\n into ");
2170       print_generic_expr (dump_file, tem, 0);
2171       fprintf (dump_file, "\n");
2172     }
2173
2174   if (opcode == BIT_IOR_EXPR
2175       || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2176     tem = invert_truthvalue_loc (loc, tem);
2177
2178   tem = fold_convert_loc (loc, optype, tem);
2179   if (stmt)
2180     {
2181       gsi = gsi_for_stmt (stmt);
2182       uid = gimple_uid (stmt);
2183     }
2184   else
2185     {
2186       gsi = gsi_none ();
2187       uid = 0;
2188     }
2189   if (stmt == NULL)
2190     gcc_checking_assert (tem == op);
2191   /* In rare cases range->exp can be equal to lhs of stmt.
2192      In that case we have to insert after the stmt rather then before
2193      it.  If stmt is a PHI, insert it at the start of the basic block.  */
2194   else if (op != range->exp)
2195     {
2196       gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2197       tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2198                                       GSI_SAME_STMT);
2199       gsi_prev (&gsi);
2200     }
2201   else if (gimple_code (stmt) != GIMPLE_PHI)
2202     {
2203       gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2204       tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false,
2205                                       GSI_CONTINUE_LINKING);
2206     }
2207   else
2208     {
2209       gsi = gsi_after_labels (gimple_bb (stmt));
2210       if (!gsi_end_p (gsi))
2211         uid = gimple_uid (gsi_stmt (gsi));
2212       else
2213         {
2214           gsi = gsi_start_bb (gimple_bb (stmt));
2215           uid = 1;
2216           while (!gsi_end_p (gsi))
2217             {
2218               uid = gimple_uid (gsi_stmt (gsi));
2219               gsi_next (&gsi);
2220             }
2221         }
2222       gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2223       tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2224                                       GSI_SAME_STMT);
2225       if (gsi_end_p (gsi))
2226         gsi = gsi_last_bb (gimple_bb (stmt));
2227       else
2228         gsi_prev (&gsi);
2229     }
2230   for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2231     if (gimple_uid (gsi_stmt (gsi)))
2232       break;
2233     else
2234       gimple_set_uid (gsi_stmt (gsi), uid);
2235
2236   oe->op = tem;
2237   range->exp = exp;
2238   range->low = low;
2239   range->high = high;
2240   range->in_p = in_p;
2241   range->strict_overflow_p = false;
2242
2243   for (i = 0; i < count; i++)
2244     {
2245       if (otherrange)
2246         range = otherrange + i;
2247       else
2248         range = otherrangep[i];
2249       oe = (*ops)[range->idx];
2250       /* Now change all the other range test immediate uses, so that
2251          those tests will be optimized away.  */
2252       if (opcode == ERROR_MARK)
2253         {
2254           if (oe->op)
2255             oe->op = build_int_cst (TREE_TYPE (oe->op),
2256                                     oe->rank == BIT_IOR_EXPR ? 0 : 1);
2257           else
2258             oe->op = (oe->rank == BIT_IOR_EXPR
2259                       ? boolean_false_node : boolean_true_node);
2260         }
2261       else
2262         oe->op = error_mark_node;
2263       range->exp = NULL_TREE;
2264     }
2265   return true;
2266 }
2267
2268 /* Optimize X == CST1 || X == CST2
2269    if popcount (CST1 ^ CST2) == 1 into
2270    (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2271    Similarly for ranges.  E.g.
2272    X != 2 && X != 3 && X != 10 && X != 11
2273    will be transformed by the previous optimization into
2274    !((X - 2U) <= 1U || (X - 10U) <= 1U)
2275    and this loop can transform that into
2276    !(((X & ~8) - 2U) <= 1U).  */
2277
2278 static bool
2279 optimize_range_tests_xor (enum tree_code opcode, tree type,
2280                           tree lowi, tree lowj, tree highi, tree highj,
2281                           vec<operand_entry *> *ops,
2282                           struct range_entry *rangei,
2283                           struct range_entry *rangej)
2284 {
2285   tree lowxor, highxor, tem, exp;
2286   /* Check lowi ^ lowj == highi ^ highj and
2287      popcount (lowi ^ lowj) == 1.  */
2288   lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2289   if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2290     return false;
2291   if (!integer_pow2p (lowxor))
2292     return false;
2293   highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2294   if (!tree_int_cst_equal (lowxor, highxor))
2295     return false;
2296
2297   tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2298   exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2299   lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2300   highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2301   if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2302                          NULL, rangei->in_p, lowj, highj,
2303                          rangei->strict_overflow_p
2304                          || rangej->strict_overflow_p))
2305     return true;
2306   return false;
2307 }
2308
2309 /* Optimize X == CST1 || X == CST2
2310    if popcount (CST2 - CST1) == 1 into
2311    ((X - CST1) & ~(CST2 - CST1)) == 0.
2312    Similarly for ranges.  E.g.
2313    X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2314    || X == 75 || X == 45
2315    will be transformed by the previous optimization into
2316    (X - 43U) <= 3U || (X - 75U) <= 3U
2317    and this loop can transform that into
2318    ((X - 43U) & ~(75U - 43U)) <= 3U.  */
2319 static bool
2320 optimize_range_tests_diff (enum tree_code opcode, tree type,
2321                             tree lowi, tree lowj, tree highi, tree highj,
2322                             vec<operand_entry *> *ops,
2323                             struct range_entry *rangei,
2324                             struct range_entry *rangej)
2325 {
2326   tree tem1, tem2, mask;
2327   /* Check highi - lowi == highj - lowj.  */
2328   tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2329   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2330     return false;
2331   tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2332   if (!tree_int_cst_equal (tem1, tem2))
2333     return false;
2334   /* Check popcount (lowj - lowi) == 1.  */
2335   tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2336   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2337     return false;
2338   if (!integer_pow2p (tem1))
2339     return false;
2340
2341   type = unsigned_type_for (type);
2342   tem1 = fold_convert (type, tem1);
2343   tem2 = fold_convert (type, tem2);
2344   lowi = fold_convert (type, lowi);
2345   mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2346   tem1 = fold_binary (MINUS_EXPR, type,
2347                       fold_convert (type, rangei->exp), lowi);
2348   tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2349   lowj = build_int_cst (type, 0);
2350   if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2351                          NULL, rangei->in_p, lowj, tem2,
2352                          rangei->strict_overflow_p
2353                          || rangej->strict_overflow_p))
2354     return true;
2355   return false;
2356 }
2357
2358 /* It does some common checks for function optimize_range_tests_xor and
2359    optimize_range_tests_diff.
2360    If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2361    Else it calls optimize_range_tests_diff.  */
2362
2363 static bool
2364 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2365                         bool optimize_xor, vec<operand_entry *> *ops,
2366                         struct range_entry *ranges)
2367 {
2368   int i, j;
2369   bool any_changes = false;
2370   for (i = first; i < length; i++)
2371     {
2372       tree lowi, highi, lowj, highj, type, tem;
2373
2374       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2375         continue;
2376       type = TREE_TYPE (ranges[i].exp);
2377       if (!INTEGRAL_TYPE_P (type))
2378         continue;
2379       lowi = ranges[i].low;
2380       if (lowi == NULL_TREE)
2381         lowi = TYPE_MIN_VALUE (type);
2382       highi = ranges[i].high;
2383       if (highi == NULL_TREE)
2384         continue;
2385       for (j = i + 1; j < length && j < i + 64; j++)
2386         {
2387           bool changes;
2388           if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2389             continue;
2390           lowj = ranges[j].low;
2391           if (lowj == NULL_TREE)
2392             continue;
2393           highj = ranges[j].high;
2394           if (highj == NULL_TREE)
2395             highj = TYPE_MAX_VALUE (type);
2396           /* Check lowj > highi.  */
2397           tem = fold_binary (GT_EXPR, boolean_type_node,
2398                              lowj, highi);
2399           if (tem == NULL_TREE || !integer_onep (tem))
2400             continue;
2401           if (optimize_xor)
2402             changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2403                                                 highi, highj, ops,
2404                                                 ranges + i, ranges + j);
2405           else
2406             changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2407                                                  highi, highj, ops,
2408                                                  ranges + i, ranges + j);
2409           if (changes)
2410             {
2411               any_changes = true;
2412               break;
2413             }
2414         }
2415     }
2416   return any_changes;
2417 }
2418
2419 /* Helper function of optimize_range_tests_to_bit_test.  Handle a single
2420    range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2421    EXP on success, NULL otherwise.  */
2422
2423 static tree
2424 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
2425                        wide_int *mask, tree *totallowp)
2426 {
2427   tree tem = int_const_binop (MINUS_EXPR, high, low);
2428   if (tem == NULL_TREE
2429       || TREE_CODE (tem) != INTEGER_CST
2430       || TREE_OVERFLOW (tem)
2431       || tree_int_cst_sgn (tem) == -1
2432       || compare_tree_int (tem, prec) != -1)
2433     return NULL_TREE;
2434
2435   unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
2436   *mask = wi::shifted_mask (0, max, false, prec);
2437   if (TREE_CODE (exp) == BIT_AND_EXPR
2438       && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2439     {
2440       widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
2441       msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
2442       if (wi::popcount (msk) == 1
2443           && wi::ltu_p (msk, prec - max))
2444         {
2445           *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
2446           max += msk.to_uhwi ();
2447           exp = TREE_OPERAND (exp, 0);
2448           if (integer_zerop (low)
2449               && TREE_CODE (exp) == PLUS_EXPR
2450               && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2451             {
2452               tree ret = TREE_OPERAND (exp, 0);
2453               STRIP_NOPS (ret);
2454               widest_int bias
2455                 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
2456                                      TYPE_PRECISION (TREE_TYPE (low))));
2457               tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
2458               if (totallowp)
2459                 {
2460                   *totallowp = tbias;
2461                   return ret;
2462                 }
2463               else if (!tree_int_cst_lt (totallow, tbias))
2464                 return NULL_TREE;
2465               bias = wi::to_widest (tbias);
2466               bias -= wi::to_widest (totallow);
2467               if (wi::ges_p (bias, 0) && wi::lts_p (bias, prec - max))
2468                 {
2469                   *mask = wi::lshift (*mask, bias);
2470                   return ret;
2471                 }
2472             }
2473         }
2474     }
2475   if (totallowp)
2476     return exp;
2477   if (!tree_int_cst_lt (totallow, low))
2478     return exp;
2479   tem = int_const_binop (MINUS_EXPR, low, totallow);
2480   if (tem == NULL_TREE
2481       || TREE_CODE (tem) != INTEGER_CST
2482       || TREE_OVERFLOW (tem)
2483       || compare_tree_int (tem, prec - max) == 1)
2484     return NULL_TREE;
2485
2486   *mask = wi::lshift (*mask, wi::to_widest (tem));
2487   return exp;
2488 }
2489
2490 /* Attempt to optimize small range tests using bit test.
2491    E.g.
2492    X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2493    && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2494    has been by earlier optimizations optimized into:
2495    ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2496    As all the 43 through 82 range is less than 64 numbers,
2497    for 64-bit word targets optimize that into:
2498    (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0  */
2499
2500 static bool
2501 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
2502                                   vec<operand_entry *> *ops,
2503                                   struct range_entry *ranges)
2504 {
2505   int i, j;
2506   bool any_changes = false;
2507   int prec = GET_MODE_BITSIZE (word_mode);
2508   auto_vec<struct range_entry *, 64> candidates;
2509
2510   for (i = first; i < length - 2; i++)
2511     {
2512       tree lowi, highi, lowj, highj, type;
2513
2514       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2515         continue;
2516       type = TREE_TYPE (ranges[i].exp);
2517       if (!INTEGRAL_TYPE_P (type))
2518         continue;
2519       lowi = ranges[i].low;
2520       if (lowi == NULL_TREE)
2521         lowi = TYPE_MIN_VALUE (type);
2522       highi = ranges[i].high;
2523       if (highi == NULL_TREE)
2524         continue;
2525       wide_int mask;
2526       tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
2527                                         highi, &mask, &lowi);
2528       if (exp == NULL_TREE)
2529         continue;
2530       bool strict_overflow_p = ranges[i].strict_overflow_p;
2531       candidates.truncate (0);
2532       int end = MIN (i + 64, length);
2533       for (j = i + 1; j < end; j++)
2534         {
2535           tree exp2;
2536           if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
2537             continue;
2538           if (ranges[j].exp == exp)
2539             ;
2540           else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
2541             {
2542               exp2 = TREE_OPERAND (ranges[j].exp, 0);
2543               if (exp2 == exp)
2544                 ;
2545               else if (TREE_CODE (exp2) == PLUS_EXPR)
2546                 {
2547                   exp2 = TREE_OPERAND (exp2, 0);
2548                   STRIP_NOPS (exp2);
2549                   if (exp2 != exp)
2550                     continue;
2551                 }
2552               else
2553                 continue;
2554             }
2555           else
2556             continue;
2557           lowj = ranges[j].low;
2558           if (lowj == NULL_TREE)
2559             continue;
2560           highj = ranges[j].high;
2561           if (highj == NULL_TREE)
2562             highj = TYPE_MAX_VALUE (type);
2563           wide_int mask2;
2564           exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
2565                                         highj, &mask2, NULL);
2566           if (exp2 != exp)
2567             continue;
2568           mask |= mask2;
2569           strict_overflow_p |= ranges[j].strict_overflow_p;
2570           candidates.safe_push (&ranges[j]);
2571         }
2572
2573       /* If we need otherwise 3 or more comparisons, use a bit test.  */
2574       if (candidates.length () >= 2)
2575         {
2576           tree high = wide_int_to_tree (TREE_TYPE (lowi),
2577                                         wi::to_widest (lowi)
2578                                         + prec - 1 - wi::clz (mask));
2579           operand_entry *oe = (*ops)[ranges[i].idx];
2580           tree op = oe->op;
2581           gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2582                             : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2583           location_t loc = gimple_location (stmt);
2584           tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2585
2586           /* See if it isn't cheaper to pretend the minimum value of the
2587              range is 0, if maximum value is small enough.
2588              We can avoid then subtraction of the minimum value, but the
2589              mask constant could be perhaps more expensive.  */
2590           if (compare_tree_int (lowi, 0) > 0
2591               && compare_tree_int (high, prec) < 0)
2592             {
2593               int cost_diff;
2594               HOST_WIDE_INT m = tree_to_uhwi (lowi);
2595               rtx reg = gen_raw_REG (word_mode, 10000);
2596               bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
2597               cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg,
2598                                                       GEN_INT (-m)), speed_p);
2599               rtx r = immed_wide_int_const (mask, word_mode);
2600               cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
2601                                          word_mode, speed_p);
2602               r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
2603               cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
2604                                          word_mode, speed_p);
2605               if (cost_diff > 0)
2606                 {
2607                   mask = wi::lshift (mask, m);
2608                   lowi = build_zero_cst (TREE_TYPE (lowi));
2609                 }
2610             }
2611
2612           tree tem = build_range_check (loc, optype, unshare_expr (exp),
2613                                         false, lowi, high);
2614           if (tem == NULL_TREE || is_gimple_val (tem))
2615             continue;
2616           tree etype = unsigned_type_for (TREE_TYPE (exp));
2617           exp = fold_build2_loc (loc, MINUS_EXPR, etype,
2618                                  fold_convert_loc (loc, etype, exp),
2619                                  fold_convert_loc (loc, etype, lowi));
2620           exp = fold_convert_loc (loc, integer_type_node, exp);
2621           tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
2622           exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
2623                                  build_int_cst (word_type, 1), exp);
2624           exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
2625                                  wide_int_to_tree (word_type, mask));
2626           exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
2627                                  build_zero_cst (word_type));
2628           if (is_gimple_val (exp))
2629             continue;
2630
2631           /* The shift might have undefined behavior if TEM is true,
2632              but reassociate_bb isn't prepared to have basic blocks
2633              split when it is running.  So, temporarily emit a code
2634              with BIT_IOR_EXPR instead of &&, and fix it up in
2635              branch_fixup.  */
2636           gimple_seq seq;
2637           tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
2638           gcc_assert (TREE_CODE (tem) == SSA_NAME);
2639           gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
2640           gimple_seq seq2;
2641           exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
2642           gimple_seq_add_seq_without_update (&seq, seq2);
2643           gcc_assert (TREE_CODE (exp) == SSA_NAME);
2644           gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
2645           gimple *g = gimple_build_assign (make_ssa_name (optype),
2646                                            BIT_IOR_EXPR, tem, exp);
2647           gimple_set_location (g, loc);
2648           gimple_seq_add_stmt_without_update (&seq, g);
2649           exp = gimple_assign_lhs (g);
2650           tree val = build_zero_cst (optype);
2651           if (update_range_test (&ranges[i], NULL, candidates.address (),
2652                                  candidates.length (), opcode, ops, exp,
2653                                  seq, false, val, val, strict_overflow_p))
2654             {
2655               any_changes = true;
2656               reassoc_branch_fixups.safe_push (tem);
2657             }
2658           else
2659             gimple_seq_discard (seq);
2660         }
2661     }
2662   return any_changes;
2663 }
2664
2665 /* Optimize range tests, similarly how fold_range_test optimizes
2666    it on trees.  The tree code for the binary
2667    operation between all the operands is OPCODE.
2668    If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2669    maybe_optimize_range_tests for inter-bb range optimization.
2670    In that case if oe->op is NULL, oe->id is bb->index whose
2671    GIMPLE_COND is && or ||ed into the test, and oe->rank says
2672    the actual opcode.  */
2673
2674 static bool
2675 optimize_range_tests (enum tree_code opcode,
2676                       vec<operand_entry *> *ops)
2677 {
2678   unsigned int length = ops->length (), i, j, first;
2679   operand_entry *oe;
2680   struct range_entry *ranges;
2681   bool any_changes = false;
2682
2683   if (length == 1)
2684     return false;
2685
2686   ranges = XNEWVEC (struct range_entry, length);
2687   for (i = 0; i < length; i++)
2688     {
2689       oe = (*ops)[i];
2690       ranges[i].idx = i;
2691       init_range_entry (ranges + i, oe->op,
2692                         oe->op
2693                         ? NULL
2694                         : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
2695       /* For | invert it now, we will invert it again before emitting
2696          the optimized expression.  */
2697       if (opcode == BIT_IOR_EXPR
2698           || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2699         ranges[i].in_p = !ranges[i].in_p;
2700     }
2701
2702   qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2703   for (i = 0; i < length; i++)
2704     if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2705       break;
2706
2707   /* Try to merge ranges.  */
2708   for (first = i; i < length; i++)
2709     {
2710       tree low = ranges[i].low;
2711       tree high = ranges[i].high;
2712       int in_p = ranges[i].in_p;
2713       bool strict_overflow_p = ranges[i].strict_overflow_p;
2714       int update_fail_count = 0;
2715
2716       for (j = i + 1; j < length; j++)
2717         {
2718           if (ranges[i].exp != ranges[j].exp)
2719             break;
2720           if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2721                              ranges[j].in_p, ranges[j].low, ranges[j].high))
2722             break;
2723           strict_overflow_p |= ranges[j].strict_overflow_p;
2724         }
2725
2726       if (j == i + 1)
2727         continue;
2728
2729       if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
2730                              opcode, ops, ranges[i].exp, NULL, in_p,
2731                              low, high, strict_overflow_p))
2732         {
2733           i = j - 1;
2734           any_changes = true;
2735         }
2736       /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2737          while update_range_test would fail.  */
2738       else if (update_fail_count == 64)
2739         i = j - 1;
2740       else
2741         ++update_fail_count;
2742     }
2743
2744   any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2745                                          ops, ranges);
2746
2747   if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2748     any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2749                                            ops, ranges);
2750   if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
2751     any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
2752                                                      ops, ranges);
2753
2754   if (any_changes && opcode != ERROR_MARK)
2755     {
2756       j = 0;
2757       FOR_EACH_VEC_ELT (*ops, i, oe)
2758         {
2759           if (oe->op == error_mark_node)
2760             continue;
2761           else if (i != j)
2762             (*ops)[j] = oe;
2763           j++;
2764         }
2765       ops->truncate (j);
2766     }
2767
2768   XDELETEVEC (ranges);
2769   return any_changes;
2770 }
2771
2772 /* Return true if STMT is a cast like:
2773    <bb N>:
2774    ...
2775    _123 = (int) _234;
2776
2777    <bb M>:
2778    # _345 = PHI <_123(N), 1(...), 1(...)>
2779    where _234 has bool type, _123 has single use and
2780    bb N has a single successor M.  This is commonly used in
2781    the last block of a range test.  */
2782
2783 static bool
2784 final_range_test_p (gimple *stmt)
2785 {
2786   basic_block bb, rhs_bb;
2787   edge e;
2788   tree lhs, rhs;
2789   use_operand_p use_p;
2790   gimple *use_stmt;
2791
2792   if (!gimple_assign_cast_p (stmt))
2793     return false;
2794   bb = gimple_bb (stmt);
2795   if (!single_succ_p (bb))
2796     return false;
2797   e = single_succ_edge (bb);
2798   if (e->flags & EDGE_COMPLEX)
2799     return false;
2800
2801   lhs = gimple_assign_lhs (stmt);
2802   rhs = gimple_assign_rhs1 (stmt);
2803   if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2804       || TREE_CODE (rhs) != SSA_NAME
2805       || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2806     return false;
2807
2808   /* Test whether lhs is consumed only by a PHI in the only successor bb.  */
2809   if (!single_imm_use (lhs, &use_p, &use_stmt))
2810     return false;
2811
2812   if (gimple_code (use_stmt) != GIMPLE_PHI
2813       || gimple_bb (use_stmt) != e->dest)
2814     return false;
2815
2816   /* And that the rhs is defined in the same loop.  */
2817   rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2818   if (rhs_bb == NULL
2819       || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2820     return false;
2821
2822   return true;
2823 }
2824
2825 /* Return true if BB is suitable basic block for inter-bb range test
2826    optimization.  If BACKWARD is true, BB should be the only predecessor
2827    of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2828    or compared with to find a common basic block to which all conditions
2829    branch to if true resp. false.  If BACKWARD is false, TEST_BB should
2830    be the only predecessor of BB.  */
2831
2832 static bool
2833 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2834                   bool backward)
2835 {
2836   edge_iterator ei, ei2;
2837   edge e, e2;
2838   gimple *stmt;
2839   gphi_iterator gsi;
2840   bool other_edge_seen = false;
2841   bool is_cond;
2842
2843   if (test_bb == bb)
2844     return false;
2845   /* Check last stmt first.  */
2846   stmt = last_stmt (bb);
2847   if (stmt == NULL
2848       || (gimple_code (stmt) != GIMPLE_COND
2849           && (backward || !final_range_test_p (stmt)))
2850       || gimple_visited_p (stmt)
2851       || stmt_could_throw_p (stmt)
2852       || *other_bb == bb)
2853     return false;
2854   is_cond = gimple_code (stmt) == GIMPLE_COND;
2855   if (is_cond)
2856     {
2857       /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2858          goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2859          to *OTHER_BB (if not set yet, try to find it out).  */
2860       if (EDGE_COUNT (bb->succs) != 2)
2861         return false;
2862       FOR_EACH_EDGE (e, ei, bb->succs)
2863         {
2864           if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2865             return false;
2866           if (e->dest == test_bb)
2867             {
2868               if (backward)
2869                 continue;
2870               else
2871                 return false;
2872             }
2873           if (e->dest == bb)
2874             return false;
2875           if (*other_bb == NULL)
2876             {
2877               FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2878                 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2879                   return false;
2880                 else if (e->dest == e2->dest)
2881                   *other_bb = e->dest;
2882               if (*other_bb == NULL)
2883                 return false;
2884             }
2885           if (e->dest == *other_bb)
2886             other_edge_seen = true;
2887           else if (backward)
2888             return false;
2889         }
2890       if (*other_bb == NULL || !other_edge_seen)
2891         return false;
2892     }
2893   else if (single_succ (bb) != *other_bb)
2894     return false;
2895
2896   /* Now check all PHIs of *OTHER_BB.  */
2897   e = find_edge (bb, *other_bb);
2898   e2 = find_edge (test_bb, *other_bb);
2899   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2900     {
2901       gphi *phi = gsi.phi ();
2902       /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2903          corresponding to BB and TEST_BB predecessor must be the same.  */
2904       if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2905                             gimple_phi_arg_def (phi, e2->dest_idx), 0))
2906         {
2907           /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2908              one of the PHIs should have the lhs of the last stmt in
2909              that block as PHI arg and that PHI should have 0 or 1
2910              corresponding to it in all other range test basic blocks
2911              considered.  */
2912           if (!is_cond)
2913             {
2914               if (gimple_phi_arg_def (phi, e->dest_idx)
2915                   == gimple_assign_lhs (stmt)
2916                   && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2917                       || integer_onep (gimple_phi_arg_def (phi,
2918                                                            e2->dest_idx))))
2919                 continue;
2920             }
2921           else
2922             {
2923               gimple *test_last = last_stmt (test_bb);
2924               if (gimple_code (test_last) != GIMPLE_COND
2925                   && gimple_phi_arg_def (phi, e2->dest_idx)
2926                      == gimple_assign_lhs (test_last)
2927                   && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2928                       || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2929                 continue;
2930             }
2931
2932           return false;
2933         }
2934     }
2935   return true;
2936 }
2937
2938 /* Return true if BB doesn't have side-effects that would disallow
2939    range test optimization, all SSA_NAMEs set in the bb are consumed
2940    in the bb and there are no PHIs.  */
2941
2942 static bool
2943 no_side_effect_bb (basic_block bb)
2944 {
2945   gimple_stmt_iterator gsi;
2946   gimple *last;
2947
2948   if (!gimple_seq_empty_p (phi_nodes (bb)))
2949     return false;
2950   last = last_stmt (bb);
2951   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2952     {
2953       gimple *stmt = gsi_stmt (gsi);
2954       tree lhs;
2955       imm_use_iterator imm_iter;
2956       use_operand_p use_p;
2957
2958       if (is_gimple_debug (stmt))
2959         continue;
2960       if (gimple_has_side_effects (stmt))
2961         return false;
2962       if (stmt == last)
2963         return true;
2964       if (!is_gimple_assign (stmt))
2965         return false;
2966       lhs = gimple_assign_lhs (stmt);
2967       if (TREE_CODE (lhs) != SSA_NAME)
2968         return false;
2969       if (gimple_assign_rhs_could_trap_p (stmt))
2970         return false;
2971       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2972         {
2973           gimple *use_stmt = USE_STMT (use_p);
2974           if (is_gimple_debug (use_stmt))
2975             continue;
2976           if (gimple_bb (use_stmt) != bb)
2977             return false;
2978         }
2979     }
2980   return false;
2981 }
2982
2983 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2984    return true and fill in *OPS recursively.  */
2985
2986 static bool
2987 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
2988          struct loop *loop)
2989 {
2990   gimple *stmt = SSA_NAME_DEF_STMT (var);
2991   tree rhs[2];
2992   int i;
2993
2994   if (!is_reassociable_op (stmt, code, loop))
2995     return false;
2996
2997   rhs[0] = gimple_assign_rhs1 (stmt);
2998   rhs[1] = gimple_assign_rhs2 (stmt);
2999   gimple_set_visited (stmt, true);
3000   for (i = 0; i < 2; i++)
3001     if (TREE_CODE (rhs[i]) == SSA_NAME
3002         && !get_ops (rhs[i], code, ops, loop)
3003         && has_single_use (rhs[i]))
3004       {
3005         operand_entry *oe = operand_entry_pool.allocate ();
3006
3007         oe->op = rhs[i];
3008         oe->rank = code;
3009         oe->id = 0;
3010         oe->count = 1;
3011         ops->safe_push (oe);
3012       }
3013   return true;
3014 }
3015
3016 /* Find the ops that were added by get_ops starting from VAR, see if
3017    they were changed during update_range_test and if yes, create new
3018    stmts.  */
3019
3020 static tree
3021 update_ops (tree var, enum tree_code code, vec<operand_entry *> ops,
3022             unsigned int *pidx, struct loop *loop)
3023 {
3024   gimple *stmt = SSA_NAME_DEF_STMT (var);
3025   tree rhs[4];
3026   int i;
3027
3028   if (!is_reassociable_op (stmt, code, loop))
3029     return NULL;
3030
3031   rhs[0] = gimple_assign_rhs1 (stmt);
3032   rhs[1] = gimple_assign_rhs2 (stmt);
3033   rhs[2] = rhs[0];
3034   rhs[3] = rhs[1];
3035   for (i = 0; i < 2; i++)
3036     if (TREE_CODE (rhs[i]) == SSA_NAME)
3037       {
3038         rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
3039         if (rhs[2 + i] == NULL_TREE)
3040           {
3041             if (has_single_use (rhs[i]))
3042               rhs[2 + i] = ops[(*pidx)++]->op;
3043             else
3044               rhs[2 + i] = rhs[i];
3045           }
3046       }
3047   if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
3048       && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
3049     {
3050       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3051       var = make_ssa_name (TREE_TYPE (var));
3052       gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
3053                                         rhs[2], rhs[3]);
3054       gimple_set_uid (g, gimple_uid (stmt));
3055       gimple_set_visited (g, true);
3056       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3057     }
3058   return var;
3059 }
3060
3061 /* Structure to track the initial value passed to get_ops and
3062    the range in the ops vector for each basic block.  */
3063
3064 struct inter_bb_range_test_entry
3065 {
3066   tree op;
3067   unsigned int first_idx, last_idx;
3068 };
3069
3070 /* Inter-bb range test optimization.
3071
3072    Returns TRUE if a gimple conditional is optimized to a true/false,
3073    otherwise return FALSE.
3074
3075    This indicates to the caller that it should run a CFG cleanup pass
3076    once reassociation is completed.  */
3077
3078 static bool
3079 maybe_optimize_range_tests (gimple *stmt)
3080 {
3081   basic_block first_bb = gimple_bb (stmt);
3082   basic_block last_bb = first_bb;
3083   basic_block other_bb = NULL;
3084   basic_block bb;
3085   edge_iterator ei;
3086   edge e;
3087   auto_vec<operand_entry *> ops;
3088   auto_vec<inter_bb_range_test_entry> bbinfo;
3089   bool any_changes = false;
3090   bool cfg_cleanup_needed = false;
3091
3092   /* Consider only basic blocks that end with GIMPLE_COND or
3093      a cast statement satisfying final_range_test_p.  All
3094      but the last bb in the first_bb .. last_bb range
3095      should end with GIMPLE_COND.  */
3096   if (gimple_code (stmt) == GIMPLE_COND)
3097     {
3098       if (EDGE_COUNT (first_bb->succs) != 2)
3099         return cfg_cleanup_needed;
3100     }
3101   else if (final_range_test_p (stmt))
3102     other_bb = single_succ (first_bb);
3103   else
3104     return cfg_cleanup_needed;
3105
3106   if (stmt_could_throw_p (stmt))
3107     return cfg_cleanup_needed;
3108
3109   /* As relative ordering of post-dominator sons isn't fixed,
3110      maybe_optimize_range_tests can be called first on any
3111      bb in the range we want to optimize.  So, start searching
3112      backwards, if first_bb can be set to a predecessor.  */
3113   while (single_pred_p (first_bb))
3114     {
3115       basic_block pred_bb = single_pred (first_bb);
3116       if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
3117         break;
3118       if (!no_side_effect_bb (first_bb))
3119         break;
3120       first_bb = pred_bb;
3121     }
3122   /* If first_bb is last_bb, other_bb hasn't been computed yet.
3123      Before starting forward search in last_bb successors, find
3124      out the other_bb.  */
3125   if (first_bb == last_bb)
3126     {
3127       other_bb = NULL;
3128       /* As non-GIMPLE_COND last stmt always terminates the range,
3129          if forward search didn't discover anything, just give up.  */
3130       if (gimple_code (stmt) != GIMPLE_COND)
3131         return cfg_cleanup_needed;
3132       /* Look at both successors.  Either it ends with a GIMPLE_COND
3133          and satisfies suitable_cond_bb, or ends with a cast and
3134          other_bb is that cast's successor.  */
3135       FOR_EACH_EDGE (e, ei, first_bb->succs)
3136         if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
3137             || e->dest == first_bb)
3138           return cfg_cleanup_needed;
3139         else if (single_pred_p (e->dest))
3140           {
3141             stmt = last_stmt (e->dest);
3142             if (stmt
3143                 && gimple_code (stmt) == GIMPLE_COND
3144                 && EDGE_COUNT (e->dest->succs) == 2)
3145               {
3146                 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
3147                   break;
3148                 else
3149                   other_bb = NULL;
3150               }
3151             else if (stmt
3152                      && final_range_test_p (stmt)
3153                      && find_edge (first_bb, single_succ (e->dest)))
3154               {
3155                 other_bb = single_succ (e->dest);
3156                 if (other_bb == first_bb)
3157                   other_bb = NULL;
3158               }
3159           }
3160       if (other_bb == NULL)
3161         return cfg_cleanup_needed;
3162     }
3163   /* Now do the forward search, moving last_bb to successor bbs
3164      that aren't other_bb.  */
3165   while (EDGE_COUNT (last_bb->succs) == 2)
3166     {
3167       FOR_EACH_EDGE (e, ei, last_bb->succs)
3168         if (e->dest != other_bb)
3169           break;
3170       if (e == NULL)
3171         break;
3172       if (!single_pred_p (e->dest))
3173         break;
3174       if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
3175         break;
3176       if (!no_side_effect_bb (e->dest))
3177         break;
3178       last_bb = e->dest;
3179     }
3180   if (first_bb == last_bb)
3181     return cfg_cleanup_needed;
3182   /* Here basic blocks first_bb through last_bb's predecessor
3183      end with GIMPLE_COND, all of them have one of the edges to
3184      other_bb and another to another block in the range,
3185      all blocks except first_bb don't have side-effects and
3186      last_bb ends with either GIMPLE_COND, or cast satisfying
3187      final_range_test_p.  */
3188   for (bb = last_bb; ; bb = single_pred (bb))
3189     {
3190       enum tree_code code;
3191       tree lhs, rhs;
3192       inter_bb_range_test_entry bb_ent;
3193
3194       bb_ent.op = NULL_TREE;
3195       bb_ent.first_idx = ops.length ();
3196       bb_ent.last_idx = bb_ent.first_idx;
3197       e = find_edge (bb, other_bb);
3198       stmt = last_stmt (bb);
3199       gimple_set_visited (stmt, true);
3200       if (gimple_code (stmt) != GIMPLE_COND)
3201         {
3202           use_operand_p use_p;
3203           gimple *phi;
3204           edge e2;
3205           unsigned int d;
3206
3207           lhs = gimple_assign_lhs (stmt);
3208           rhs = gimple_assign_rhs1 (stmt);
3209           gcc_assert (bb == last_bb);
3210
3211           /* stmt is
3212              _123 = (int) _234;
3213
3214              followed by:
3215              <bb M>:
3216              # _345 = PHI <_123(N), 1(...), 1(...)>
3217
3218              or 0 instead of 1.  If it is 0, the _234
3219              range test is anded together with all the
3220              other range tests, if it is 1, it is ored with
3221              them.  */
3222           single_imm_use (lhs, &use_p, &phi);
3223           gcc_assert (gimple_code (phi) == GIMPLE_PHI);
3224           e2 = find_edge (first_bb, other_bb);
3225           d = e2->dest_idx;
3226           gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
3227           if (integer_zerop (gimple_phi_arg_def (phi, d)))
3228             code = BIT_AND_EXPR;
3229           else
3230             {
3231               gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
3232               code = BIT_IOR_EXPR;
3233             }
3234
3235           /* If _234 SSA_NAME_DEF_STMT is
3236              _234 = _567 | _789;
3237              (or &, corresponding to 1/0 in the phi arguments,
3238              push into ops the individual range test arguments
3239              of the bitwise or resp. and, recursively.  */
3240           if (!get_ops (rhs, code, &ops,
3241                         loop_containing_stmt (stmt))
3242               && has_single_use (rhs))
3243             {
3244               /* Otherwise, push the _234 range test itself.  */
3245               operand_entry *oe = operand_entry_pool.allocate ();
3246
3247               oe->op = rhs;
3248               oe->rank = code;
3249               oe->id = 0;
3250               oe->count = 1;
3251               ops.safe_push (oe);
3252               bb_ent.last_idx++;
3253             }
3254           else
3255             bb_ent.last_idx = ops.length ();
3256           bb_ent.op = rhs;
3257           bbinfo.safe_push (bb_ent);
3258           continue;
3259         }
3260       /* Otherwise stmt is GIMPLE_COND.  */
3261       code = gimple_cond_code (stmt);
3262       lhs = gimple_cond_lhs (stmt);
3263       rhs = gimple_cond_rhs (stmt);
3264       if (TREE_CODE (lhs) == SSA_NAME
3265           && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3266           && ((code != EQ_EXPR && code != NE_EXPR)
3267               || rhs != boolean_false_node
3268                  /* Either push into ops the individual bitwise
3269                     or resp. and operands, depending on which
3270                     edge is other_bb.  */
3271               || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
3272                                  ^ (code == EQ_EXPR))
3273                                 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
3274                            loop_containing_stmt (stmt))))
3275         {
3276           /* Or push the GIMPLE_COND stmt itself.  */
3277           operand_entry *oe = operand_entry_pool.allocate ();
3278
3279           oe->op = NULL;
3280           oe->rank = (e->flags & EDGE_TRUE_VALUE)
3281                      ? BIT_IOR_EXPR : BIT_AND_EXPR;
3282           /* oe->op = NULL signs that there is no SSA_NAME
3283              for the range test, and oe->id instead is the
3284              basic block number, at which's end the GIMPLE_COND
3285              is.  */
3286           oe->id = bb->index;
3287           oe->count = 1;
3288           ops.safe_push (oe);
3289           bb_ent.op = NULL;
3290           bb_ent.last_idx++;
3291         }
3292       else if (ops.length () > bb_ent.first_idx)
3293         {
3294           bb_ent.op = lhs;
3295           bb_ent.last_idx = ops.length ();
3296         }
3297       bbinfo.safe_push (bb_ent);
3298       if (bb == first_bb)
3299         break;
3300     }
3301   if (ops.length () > 1)
3302     any_changes = optimize_range_tests (ERROR_MARK, &ops);
3303   if (any_changes)
3304     {
3305       unsigned int idx, max_idx = 0;
3306       /* update_ops relies on has_single_use predicates returning the
3307          same values as it did during get_ops earlier.  Additionally it
3308          never removes statements, only adds new ones and it should walk
3309          from the single imm use and check the predicate already before
3310          making those changes.
3311          On the other side, the handling of GIMPLE_COND directly can turn
3312          previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
3313          it needs to be done in a separate loop afterwards.  */
3314       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3315         {
3316           if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3317               && bbinfo[idx].op != NULL_TREE)
3318             {
3319               tree new_op;
3320
3321               max_idx = idx;
3322               stmt = last_stmt (bb);
3323               new_op = update_ops (bbinfo[idx].op,
3324                                    (enum tree_code)
3325                                    ops[bbinfo[idx].first_idx]->rank,
3326                                    ops, &bbinfo[idx].first_idx,
3327                                    loop_containing_stmt (stmt));
3328               if (new_op == NULL_TREE)
3329                 {
3330                   gcc_assert (bb == last_bb);
3331                   new_op = ops[bbinfo[idx].first_idx++]->op;
3332                 }
3333               if (bbinfo[idx].op != new_op)
3334                 {
3335                   imm_use_iterator iter;
3336                   use_operand_p use_p;
3337                   gimple *use_stmt, *cast_stmt = NULL;
3338
3339                   FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
3340                     if (is_gimple_debug (use_stmt))
3341                       continue;
3342                     else if (gimple_code (use_stmt) == GIMPLE_COND
3343                              || gimple_code (use_stmt) == GIMPLE_PHI)
3344                       FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3345                         SET_USE (use_p, new_op);
3346                     else if (gimple_assign_cast_p (use_stmt))
3347                       cast_stmt = use_stmt;
3348                     else
3349                       gcc_unreachable ();
3350                   if (cast_stmt)
3351                     {
3352                       gcc_assert (bb == last_bb);
3353                       tree lhs = gimple_assign_lhs (cast_stmt);
3354                       tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
3355                       enum tree_code rhs_code
3356                         = gimple_assign_rhs_code (cast_stmt);
3357                       gassign *g;
3358                       if (is_gimple_min_invariant (new_op))
3359                         {
3360                           new_op = fold_convert (TREE_TYPE (lhs), new_op);
3361                           g = gimple_build_assign (new_lhs, new_op);
3362                         }
3363                       else
3364                         g = gimple_build_assign (new_lhs, rhs_code, new_op);
3365                       gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt);
3366                       gimple_set_uid (g, gimple_uid (cast_stmt));
3367                       gimple_set_visited (g, true);
3368                       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3369                       FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
3370                         if (is_gimple_debug (use_stmt))
3371                           continue;
3372                         else if (gimple_code (use_stmt) == GIMPLE_COND
3373                                  || gimple_code (use_stmt) == GIMPLE_PHI)
3374                           FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3375                             SET_USE (use_p, new_lhs);
3376                         else
3377                           gcc_unreachable ();
3378                     }
3379                 }
3380             }
3381           if (bb == first_bb)
3382             break;
3383         }
3384       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3385         {
3386           if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3387               && bbinfo[idx].op == NULL_TREE
3388               && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
3389             {
3390               gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
3391
3392               if (idx > max_idx)
3393                 max_idx = idx;
3394
3395               /* If we collapse the conditional to a true/false
3396                  condition, then bubble that knowledge up to our caller.  */
3397               if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
3398                 {
3399                   gimple_cond_make_false (cond_stmt);
3400                   cfg_cleanup_needed = true;
3401                 }
3402               else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
3403                 {
3404                   gimple_cond_make_true (cond_stmt);
3405                   cfg_cleanup_needed = true;
3406                 }
3407               else
3408                 {
3409                   gimple_cond_set_code (cond_stmt, NE_EXPR);
3410                   gimple_cond_set_lhs (cond_stmt,
3411                                        ops[bbinfo[idx].first_idx]->op);
3412                   gimple_cond_set_rhs (cond_stmt, boolean_false_node);
3413                 }
3414               update_stmt (cond_stmt);
3415             }
3416           if (bb == first_bb)
3417             break;
3418         }
3419
3420       /* The above changes could result in basic blocks after the first
3421          modified one, up to and including last_bb, to be executed even if
3422          they would not be in the original program.  If the value ranges of
3423          assignment lhs' in those bbs were dependent on the conditions
3424          guarding those basic blocks which now can change, the VRs might
3425          be incorrect.  As no_side_effect_bb should ensure those SSA_NAMEs
3426          are only used within the same bb, it should be not a big deal if
3427          we just reset all the VRs in those bbs.  See PR68671.  */
3428       for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
3429         reset_flow_sensitive_info_in_bb (bb);
3430     }
3431   return cfg_cleanup_needed;
3432 }
3433
3434 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3435    of STMT in it's operands.  This is also known as a "destructive
3436    update" operation.  */
3437
3438 static bool
3439 is_phi_for_stmt (gimple *stmt, tree operand)
3440 {
3441   gimple *def_stmt;
3442   gphi *def_phi;
3443   tree lhs;
3444   use_operand_p arg_p;
3445   ssa_op_iter i;
3446
3447   if (TREE_CODE (operand) != SSA_NAME)
3448     return false;
3449
3450   lhs = gimple_assign_lhs (stmt);
3451
3452   def_stmt = SSA_NAME_DEF_STMT (operand);
3453   def_phi = dyn_cast <gphi *> (def_stmt);
3454   if (!def_phi)
3455     return false;
3456
3457   FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
3458     if (lhs == USE_FROM_PTR (arg_p))
3459       return true;
3460   return false;
3461 }
3462
3463 /* Remove def stmt of VAR if VAR has zero uses and recurse
3464    on rhs1 operand if so.  */
3465
3466 static void
3467 remove_visited_stmt_chain (tree var)
3468 {
3469   gimple *stmt;
3470   gimple_stmt_iterator gsi;
3471
3472   while (1)
3473     {
3474       if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
3475         return;
3476       stmt = SSA_NAME_DEF_STMT (var);
3477       if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
3478         {
3479           var = gimple_assign_rhs1 (stmt);
3480           gsi = gsi_for_stmt (stmt);
3481           reassoc_remove_stmt (&gsi);
3482           release_defs (stmt);
3483         }
3484       else
3485         return;
3486     }
3487 }
3488
3489 /* This function checks three consequtive operands in
3490    passed operands vector OPS starting from OPINDEX and
3491    swaps two operands if it is profitable for binary operation
3492    consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3493
3494    We pair ops with the same rank if possible.
3495
3496    The alternative we try is to see if STMT is a destructive
3497    update style statement, which is like:
3498    b = phi (a, ...)
3499    a = c + b;
3500    In that case, we want to use the destructive update form to
3501    expose the possible vectorizer sum reduction opportunity.
3502    In that case, the third operand will be the phi node. This
3503    check is not performed if STMT is null.
3504
3505    We could, of course, try to be better as noted above, and do a
3506    lot of work to try to find these opportunities in >3 operand
3507    cases, but it is unlikely to be worth it.  */
3508
3509 static void
3510 swap_ops_for_binary_stmt (vec<operand_entry *> ops,
3511                           unsigned int opindex, gimple *stmt)
3512 {
3513   operand_entry *oe1, *oe2, *oe3;
3514
3515   oe1 = ops[opindex];
3516   oe2 = ops[opindex + 1];
3517   oe3 = ops[opindex + 2];
3518
3519   if ((oe1->rank == oe2->rank
3520        && oe2->rank != oe3->rank)
3521       || (stmt && is_phi_for_stmt (stmt, oe3->op)
3522           && !is_phi_for_stmt (stmt, oe1->op)
3523           && !is_phi_for_stmt (stmt, oe2->op)))
3524     {
3525       operand_entry temp = *oe3;
3526       oe3->op = oe1->op;
3527       oe3->rank = oe1->rank;
3528       oe1->op = temp.op;
3529       oe1->rank= temp.rank;
3530     }
3531   else if ((oe1->rank == oe3->rank
3532             && oe2->rank != oe3->rank)
3533            || (stmt && is_phi_for_stmt (stmt, oe2->op)
3534                && !is_phi_for_stmt (stmt, oe1->op)
3535                && !is_phi_for_stmt (stmt, oe3->op)))
3536     {
3537       operand_entry temp = *oe2;
3538       oe2->op = oe1->op;
3539       oe2->rank = oe1->rank;
3540       oe1->op = temp.op;
3541       oe1->rank = temp.rank;
3542     }
3543 }
3544
3545 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3546    two definitions, otherwise return STMT.  */
3547
3548 static inline gimple *
3549 find_insert_point (gimple *stmt, tree rhs1, tree rhs2)
3550 {
3551   if (TREE_CODE (rhs1) == SSA_NAME
3552       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
3553     stmt = SSA_NAME_DEF_STMT (rhs1);
3554   if (TREE_CODE (rhs2) == SSA_NAME
3555       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
3556     stmt = SSA_NAME_DEF_STMT (rhs2);
3557   return stmt;
3558 }
3559
3560 /* Recursively rewrite our linearized statements so that the operators
3561    match those in OPS[OPINDEX], putting the computation in rank
3562    order.  Return new lhs.  */
3563
3564 static tree
3565 rewrite_expr_tree (gimple *stmt, unsigned int opindex,
3566                    vec<operand_entry *> ops, bool changed)
3567 {
3568   tree rhs1 = gimple_assign_rhs1 (stmt);
3569   tree rhs2 = gimple_assign_rhs2 (stmt);
3570   tree lhs = gimple_assign_lhs (stmt);
3571   operand_entry *oe;
3572
3573   /* The final recursion case for this function is that you have
3574      exactly two operations left.
3575      If we had exactly one op in the entire list to start with, we
3576      would have never called this function, and the tail recursion
3577      rewrites them one at a time.  */
3578   if (opindex + 2 == ops.length ())
3579     {
3580       operand_entry *oe1, *oe2;
3581
3582       oe1 = ops[opindex];
3583       oe2 = ops[opindex + 1];
3584
3585       if (rhs1 != oe1->op || rhs2 != oe2->op)
3586         {
3587           gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3588           unsigned int uid = gimple_uid (stmt);
3589
3590           if (dump_file && (dump_flags & TDF_DETAILS))
3591             {
3592               fprintf (dump_file, "Transforming ");
3593               print_gimple_stmt (dump_file, stmt, 0, 0);
3594             }
3595
3596           /* Even when changed is false, reassociation could have e.g. removed
3597              some redundant operations, so unless we are just swapping the
3598              arguments or unless there is no change at all (then we just
3599              return lhs), force creation of a new SSA_NAME.  */
3600           if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
3601             {
3602               gimple *insert_point
3603                 = find_insert_point (stmt, oe1->op, oe2->op);
3604               lhs = make_ssa_name (TREE_TYPE (lhs));
3605               stmt
3606                 = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
3607                                        oe1->op, oe2->op);
3608               gimple_set_uid (stmt, uid);
3609               gimple_set_visited (stmt, true);
3610               if (insert_point == gsi_stmt (gsi))
3611                 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3612               else
3613                 insert_stmt_after (stmt, insert_point);
3614             }
3615           else
3616             {
3617               gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
3618                                    == stmt);
3619               gimple_assign_set_rhs1 (stmt, oe1->op);
3620               gimple_assign_set_rhs2 (stmt, oe2->op);
3621               update_stmt (stmt);
3622             }
3623
3624           if (rhs1 != oe1->op && rhs1 != oe2->op)
3625             remove_visited_stmt_chain (rhs1);
3626
3627           if (dump_file && (dump_flags & TDF_DETAILS))
3628             {
3629               fprintf (dump_file, " into ");
3630               print_gimple_stmt (dump_file, stmt, 0, 0);
3631             }
3632         }
3633       return lhs;
3634     }
3635
3636   /* If we hit here, we should have 3 or more ops left.  */
3637   gcc_assert (opindex + 2 < ops.length ());
3638
3639   /* Rewrite the next operator.  */
3640   oe = ops[opindex];
3641
3642   /* Recurse on the LHS of the binary operator, which is guaranteed to
3643      be the non-leaf side.  */
3644   tree new_rhs1
3645     = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
3646                          changed || oe->op != rhs2);
3647
3648   if (oe->op != rhs2 || new_rhs1 != rhs1)
3649     {
3650       if (dump_file && (dump_flags & TDF_DETAILS))
3651         {
3652           fprintf (dump_file, "Transforming ");
3653           print_gimple_stmt (dump_file, stmt, 0, 0);
3654         }
3655
3656       /* If changed is false, this is either opindex == 0
3657          or all outer rhs2's were equal to corresponding oe->op,
3658          and powi_result is NULL.
3659          That means lhs is equivalent before and after reassociation.
3660          Otherwise ensure the old lhs SSA_NAME is not reused and
3661          create a new stmt as well, so that any debug stmts will be
3662          properly adjusted.  */
3663       if (changed)
3664         {
3665           gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3666           unsigned int uid = gimple_uid (stmt);
3667           gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op);
3668
3669           lhs = make_ssa_name (TREE_TYPE (lhs));
3670           stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
3671                                       new_rhs1, oe->op);
3672           gimple_set_uid (stmt, uid);
3673           gimple_set_visited (stmt, true);
3674           if (insert_point == gsi_stmt (gsi))
3675             gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3676           else
3677             insert_stmt_after (stmt, insert_point);
3678         }
3679       else
3680         {
3681           gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
3682                                == stmt);
3683           gimple_assign_set_rhs1 (stmt, new_rhs1);
3684           gimple_assign_set_rhs2 (stmt, oe->op);
3685           update_stmt (stmt);
3686         }
3687
3688       if (dump_file && (dump_flags & TDF_DETAILS))
3689         {
3690           fprintf (dump_file, " into ");
3691           print_gimple_stmt (dump_file, stmt, 0, 0);
3692         }
3693     }
3694   return lhs;
3695 }
3696
3697 /* Find out how many cycles we need to compute statements chain.
3698    OPS_NUM holds number os statements in a chain.  CPU_WIDTH is a
3699    maximum number of independent statements we may execute per cycle.  */
3700
3701 static int
3702 get_required_cycles (int ops_num, int cpu_width)
3703 {
3704   int res;
3705   int elog;
3706   unsigned int rest;
3707
3708   /* While we have more than 2 * cpu_width operands
3709      we may reduce number of operands by cpu_width
3710      per cycle.  */
3711   res = ops_num / (2 * cpu_width);
3712
3713   /* Remained operands count may be reduced twice per cycle
3714      until we have only one operand.  */
3715   rest = (unsigned)(ops_num - res * cpu_width);
3716   elog = exact_log2 (rest);
3717   if (elog >= 0)
3718     res += elog;
3719   else
3720     res += floor_log2 (rest) + 1;
3721
3722   return res;
3723 }
3724
3725 /* Returns an optimal number of registers to use for computation of
3726    given statements.  */
3727
3728 static int
3729 get_reassociation_width (int ops_num, enum tree_code opc,
3730                          machine_mode mode)
3731 {
3732   int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
3733   int width;
3734   int width_min;
3735   int cycles_best;
3736
3737   if (param_width > 0)
3738     width = param_width;
3739   else
3740     width = targetm.sched.reassociation_width (opc, mode);
3741
3742   if (width == 1)
3743     return width;
3744
3745   /* Get the minimal time required for sequence computation.  */
3746   cycles_best = get_required_cycles (ops_num, width);
3747
3748   /* Check if we may use less width and still compute sequence for
3749      the same time.  It will allow us to reduce registers usage.
3750      get_required_cycles is monotonically increasing with lower width
3751      so we can perform a binary search for the minimal width that still
3752      results in the optimal cycle count.  */
3753   width_min = 1;
3754   while (width > width_min)
3755     {
3756       int width_mid = (width + width_min) / 2;
3757
3758       if (get_required_cycles (ops_num, width_mid) == cycles_best)
3759         width = width_mid;
3760       else if (width_min < width_mid)
3761         width_min = width_mid;
3762       else
3763         break;
3764     }
3765
3766   return width;
3767 }
3768
3769 /* Recursively rewrite our linearized statements so that the operators
3770    match those in OPS[OPINDEX], putting the computation in rank
3771    order and trying to allow operations to be executed in
3772    parallel.  */
3773
3774 static void
3775 rewrite_expr_tree_parallel (gassign *stmt, int width,
3776                             vec<operand_entry *> ops)
3777 {
3778   enum tree_code opcode = gimple_assign_rhs_code (stmt);
3779   int op_num = ops.length ();
3780   int stmt_num = op_num - 1;
3781   gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
3782   int op_index = op_num - 1;
3783   int stmt_index = 0;
3784   int ready_stmts_end = 0;
3785   int i = 0;
3786   tree last_rhs1 = gimple_assign_rhs1 (stmt);
3787
3788   /* We start expression rewriting from the top statements.
3789      So, in this loop we create a full list of statements
3790      we will work with.  */
3791   stmts[stmt_num - 1] = stmt;
3792   for (i = stmt_num - 2; i >= 0; i--)
3793     stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3794
3795   for (i = 0; i < stmt_num; i++)
3796     {
3797       tree op1, op2;
3798
3799       /* Determine whether we should use results of
3800          already handled statements or not.  */
3801       if (ready_stmts_end == 0
3802           && (i - stmt_index >= width || op_index < 1))
3803         ready_stmts_end = i;
3804
3805       /* Now we choose operands for the next statement.  Non zero
3806          value in ready_stmts_end means here that we should use
3807          the result of already generated statements as new operand.  */
3808       if (ready_stmts_end > 0)
3809         {
3810           op1 = gimple_assign_lhs (stmts[stmt_index++]);
3811           if (ready_stmts_end > stmt_index)
3812             op2 = gimple_assign_lhs (stmts[stmt_index++]);
3813           else if (op_index >= 0)
3814             op2 = ops[op_index--]->op;
3815           else
3816             {
3817               gcc_assert (stmt_index < i);
3818               op2 = gimple_assign_lhs (stmts[stmt_index++]);
3819             }
3820
3821           if (stmt_index >= ready_stmts_end)
3822             ready_stmts_end = 0;
3823         }
3824       else
3825         {
3826           if (op_index > 1)
3827             swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3828           op2 = ops[op_index--]->op;
3829           op1 = ops[op_index--]->op;
3830         }
3831
3832       /* If we emit the last statement then we should put
3833          operands into the last statement.  It will also
3834          break the loop.  */
3835       if (op_index < 0 && stmt_index == i)
3836         i = stmt_num - 1;
3837
3838       if (dump_file && (dump_flags & TDF_DETAILS))
3839         {
3840           fprintf (dump_file, "Transforming ");
3841           print_gimple_stmt (dump_file, stmts[i], 0, 0);
3842         }
3843
3844       /* We keep original statement only for the last one.  All
3845          others are recreated.  */
3846       if (i == stmt_num - 1)
3847         {
3848           gimple_assign_set_rhs1 (stmts[i], op1);
3849           gimple_assign_set_rhs2 (stmts[i], op2);
3850           update_stmt (stmts[i]);
3851         }
3852       else
3853         stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3854
3855       if (dump_file && (dump_flags & TDF_DETAILS))
3856         {
3857           fprintf (dump_file, " into ");
3858           print_gimple_stmt (dump_file, stmts[i], 0, 0);
3859         }
3860     }
3861
3862   remove_visited_stmt_chain (last_rhs1);
3863 }
3864
3865 /* Transform STMT, which is really (A +B) + (C + D) into the left
3866    linear form, ((A+B)+C)+D.
3867    Recurse on D if necessary.  */
3868
3869 static void
3870 linearize_expr (gimple *stmt)
3871 {
3872   gimple_stmt_iterator gsi;
3873   gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3874   gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3875   gimple *oldbinrhs = binrhs;
3876   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3877   gimple *newbinrhs = NULL;
3878   struct loop *loop = loop_containing_stmt (stmt);
3879   tree lhs = gimple_assign_lhs (stmt);
3880
3881   gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3882               && is_reassociable_op (binrhs, rhscode, loop));
3883
3884   gsi = gsi_for_stmt (stmt);
3885
3886   gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3887   binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
3888                                 gimple_assign_rhs_code (binrhs),
3889                                 gimple_assign_lhs (binlhs),
3890                                 gimple_assign_rhs2 (binrhs));
3891   gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3892   gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
3893   gimple_set_uid (binrhs, gimple_uid (stmt));
3894
3895   if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3896     newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3897
3898   if (dump_file && (dump_flags & TDF_DETAILS))
3899     {
3900       fprintf (dump_file, "Linearized: ");
3901       print_gimple_stmt (dump_file, stmt, 0, 0);
3902     }
3903
3904   reassociate_stats.linearized++;
3905   update_stmt (stmt);
3906
3907   gsi = gsi_for_stmt (oldbinrhs);
3908   reassoc_remove_stmt (&gsi);
3909   release_defs (oldbinrhs);
3910
3911   gimple_set_visited (stmt, true);
3912   gimple_set_visited (binlhs, true);
3913   gimple_set_visited (binrhs, true);
3914
3915   /* Tail recurse on the new rhs if it still needs reassociation.  */
3916   if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3917     /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3918            want to change the algorithm while converting to tuples.  */
3919     linearize_expr (stmt);
3920 }
3921
3922 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3923    it.  Otherwise, return NULL.  */
3924
3925 static gimple *
3926 get_single_immediate_use (tree lhs)
3927 {
3928   use_operand_p immuse;
3929   gimple *immusestmt;
3930
3931   if (TREE_CODE (lhs) == SSA_NAME
3932       && single_imm_use (lhs, &immuse, &immusestmt)
3933       && is_gimple_assign (immusestmt))
3934     return immusestmt;
3935
3936   return NULL;
3937 }
3938
3939 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3940    representing the negated value.  Insertions of any necessary
3941    instructions go before GSI.
3942    This function is recursive in that, if you hand it "a_5" as the
3943    value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3944    transform b_3 + b_4 into a_5 = -b_3 + -b_4.  */
3945
3946 static tree
3947 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
3948 {
3949   gimple *negatedefstmt = NULL;
3950   tree resultofnegate;
3951   gimple_stmt_iterator gsi;
3952   unsigned int uid;
3953
3954   /* If we are trying to negate a name, defined by an add, negate the
3955      add operands instead.  */
3956   if (TREE_CODE (tonegate) == SSA_NAME)
3957     negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3958   if (TREE_CODE (tonegate) == SSA_NAME
3959       && is_gimple_assign (negatedefstmt)
3960       && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3961       && has_single_use (gimple_assign_lhs (negatedefstmt))
3962       && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3963     {
3964       tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3965       tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3966       tree lhs = gimple_assign_lhs (negatedefstmt);
3967       gimple *g;
3968
3969       gsi = gsi_for_stmt (negatedefstmt);
3970       rhs1 = negate_value (rhs1, &gsi);
3971
3972       gsi = gsi_for_stmt (negatedefstmt);
3973       rhs2 = negate_value (rhs2, &gsi);
3974
3975       gsi = gsi_for_stmt (negatedefstmt);
3976       lhs = make_ssa_name (TREE_TYPE (lhs));
3977       gimple_set_visited (negatedefstmt, true);
3978       g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
3979       gimple_set_uid (g, gimple_uid (negatedefstmt));
3980       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3981       return lhs;
3982     }
3983
3984   tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3985   resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
3986                                              NULL_TREE, true, GSI_SAME_STMT);
3987   gsi = *gsip;
3988   uid = gimple_uid (gsi_stmt (gsi));
3989   for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
3990     {
3991       gimple *stmt = gsi_stmt (gsi);
3992       if (gimple_uid (stmt) != 0)
3993         break;
3994       gimple_set_uid (stmt, uid);
3995     }
3996   return resultofnegate;
3997 }
3998
3999 /* Return true if we should break up the subtract in STMT into an add
4000    with negate.  This is true when we the subtract operands are really
4001    adds, or the subtract itself is used in an add expression.  In
4002    either case, breaking up the subtract into an add with negate
4003    exposes the adds to reassociation.  */
4004
4005 static bool
4006 should_break_up_subtract (gimple *stmt)
4007 {
4008   tree lhs = gimple_assign_lhs (stmt);
4009   tree binlhs = gimple_assign_rhs1 (stmt);
4010   tree binrhs = gimple_assign_rhs2 (stmt);
4011   gimple *immusestmt;
4012   struct loop *loop = loop_containing_stmt (stmt);
4013
4014   if (TREE_CODE (binlhs) == SSA_NAME
4015       && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
4016     return true;
4017
4018   if (TREE_CODE (binrhs) == SSA_NAME
4019       && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
4020     return true;
4021
4022   if (TREE_CODE (lhs) == SSA_NAME
4023       && (immusestmt = get_single_immediate_use (lhs))
4024       && is_gimple_assign (immusestmt)
4025       && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
4026           ||  gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
4027     return true;
4028   return false;
4029 }
4030
4031 /* Transform STMT from A - B into A + -B.  */
4032
4033 static void
4034 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
4035 {
4036   tree rhs1 = gimple_assign_rhs1 (stmt);
4037   tree rhs2 = gimple_assign_rhs2 (stmt);
4038
4039   if (dump_file && (dump_flags & TDF_DETAILS))
4040     {
4041       fprintf (dump_file, "Breaking up subtract ");
4042       print_gimple_stmt (dump_file, stmt, 0, 0);
4043     }
4044
4045   rhs2 = negate_value (rhs2, gsip);
4046   gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
4047   update_stmt (stmt);
4048 }
4049
4050 /* Determine whether STMT is a builtin call that raises an SSA name
4051    to an integer power and has only one use.  If so, and this is early
4052    reassociation and unsafe math optimizations are permitted, place
4053    the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
4054    If any of these conditions does not hold, return FALSE.  */
4055
4056 static bool
4057 acceptable_pow_call (gimple *stmt, tree *base, HOST_WIDE_INT *exponent)
4058 {
4059   tree arg1;
4060   REAL_VALUE_TYPE c, cint;
4061
4062   if (!reassoc_insert_powi_p
4063       || !flag_unsafe_math_optimizations
4064       || !is_gimple_call (stmt)
4065       || !has_single_use (gimple_call_lhs (stmt)))
4066     return false;
4067
4068   switch (gimple_call_combined_fn (stmt))
4069     {
4070     CASE_CFN_POW:
4071       if (flag_errno_math)
4072         return false;
4073
4074       *base = gimple_call_arg (stmt, 0);
4075       arg1 = gimple_call_arg (stmt, 1);
4076
4077       if (TREE_CODE (arg1) != REAL_CST)
4078         return false;
4079
4080       c = TREE_REAL_CST (arg1);
4081
4082       if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
4083         return false;
4084
4085       *exponent = real_to_integer (&c);
4086       real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
4087       if (!real_identical (&c, &cint))
4088         return false;
4089
4090       break;
4091
4092     CASE_CFN_POWI:
4093       *base = gimple_call_arg (stmt, 0);
4094       arg1 = gimple_call_arg (stmt, 1);
4095
4096       if (!tree_fits_shwi_p (arg1))
4097         return false;
4098
4099       *exponent = tree_to_shwi (arg1);
4100       break;
4101
4102     default:
4103       return false;
4104     }
4105
4106   /* Expanding negative exponents is generally unproductive, so we don't
4107      complicate matters with those.  Exponents of zero and one should
4108      have been handled by expression folding.  */
4109   if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
4110     return false;
4111
4112   return true;
4113 }
4114
4115 /* Recursively linearize a binary expression that is the RHS of STMT.
4116    Place the operands of the expression tree in the vector named OPS.  */
4117
4118 static void
4119 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
4120                      bool is_associative, bool set_visited)
4121 {
4122   tree binlhs = gimple_assign_rhs1 (stmt);
4123   tree binrhs = gimple_assign_rhs2 (stmt);
4124   gimple *binlhsdef = NULL, *binrhsdef = NULL;
4125   bool binlhsisreassoc = false;
4126   bool binrhsisreassoc = false;
4127   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4128   struct loop *loop = loop_containing_stmt (stmt);
4129   tree base = NULL_TREE;
4130   HOST_WIDE_INT exponent = 0;
4131
4132   if (set_visited)
4133     gimple_set_visited (stmt, true);
4134
4135   if (TREE_CODE (binlhs) == SSA_NAME)
4136     {
4137       binlhsdef = SSA_NAME_DEF_STMT (binlhs);
4138       binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
4139                          && !stmt_could_throw_p (binlhsdef));
4140     }
4141
4142   if (TREE_CODE (binrhs) == SSA_NAME)
4143     {
4144       binrhsdef = SSA_NAME_DEF_STMT (binrhs);
4145       binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
4146                          && !stmt_could_throw_p (binrhsdef));
4147     }
4148
4149   /* If the LHS is not reassociable, but the RHS is, we need to swap
4150      them.  If neither is reassociable, there is nothing we can do, so
4151      just put them in the ops vector.  If the LHS is reassociable,
4152      linearize it.  If both are reassociable, then linearize the RHS
4153      and the LHS.  */
4154
4155   if (!binlhsisreassoc)
4156     {
4157       /* If this is not a associative operation like division, give up.  */
4158       if (!is_associative)
4159         {
4160           add_to_ops_vec (ops, binrhs);
4161           return;
4162         }
4163
4164       if (!binrhsisreassoc)
4165         {
4166           if (rhscode == MULT_EXPR
4167               && TREE_CODE (binrhs) == SSA_NAME
4168               && acceptable_pow_call (binrhsdef, &base, &exponent))
4169             {
4170               add_repeat_to_ops_vec (ops, base, exponent);
4171               gimple_set_visited (binrhsdef, true);
4172             }
4173           else
4174             add_to_ops_vec (ops, binrhs);
4175
4176           if (rhscode == MULT_EXPR
4177               && TREE_CODE (binlhs) == SSA_NAME
4178               && acceptable_pow_call (binlhsdef, &base, &exponent))
4179             {
4180               add_repeat_to_ops_vec (ops, base, exponent);
4181               gimple_set_visited (binlhsdef, true);
4182             }
4183           else
4184             add_to_ops_vec (ops, binlhs);
4185
4186           return;
4187         }
4188
4189       if (dump_file && (dump_flags & TDF_DETAILS))
4190         {
4191           fprintf (dump_file, "swapping operands of ");
4192           print_gimple_stmt (dump_file, stmt, 0, 0);
4193         }
4194
4195       swap_ssa_operands (stmt,
4196                          gimple_assign_rhs1_ptr (stmt),
4197                          gimple_assign_rhs2_ptr (stmt));
4198       update_stmt (stmt);
4199
4200       if (dump_file && (dump_flags & TDF_DETAILS))
4201         {
4202           fprintf (dump_file, " is now ");
4203           print_gimple_stmt (dump_file, stmt, 0, 0);
4204         }
4205
4206       /* We want to make it so the lhs is always the reassociative op,
4207          so swap.  */
4208       std::swap (binlhs, binrhs);
4209     }
4210   else if (binrhsisreassoc)
4211     {
4212       linearize_expr (stmt);
4213       binlhs = gimple_assign_rhs1 (stmt);
4214       binrhs = gimple_assign_rhs2 (stmt);
4215     }
4216
4217   gcc_assert (TREE_CODE (binrhs) != SSA_NAME
4218               || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
4219                                       rhscode, loop));
4220   linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
4221                        is_associative, set_visited);
4222
4223   if (rhscode == MULT_EXPR
4224       && TREE_CODE (binrhs) == SSA_NAME
4225       && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
4226     {
4227       add_repeat_to_ops_vec (ops, base, exponent);
4228       gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
4229     }
4230   else
4231     add_to_ops_vec (ops, binrhs);
4232 }
4233
4234 /* Repropagate the negates back into subtracts, since no other pass
4235    currently does it.  */
4236
4237 static void
4238 repropagate_negates (void)
4239 {
4240   unsigned int i = 0;
4241   tree negate;
4242
4243   FOR_EACH_VEC_ELT (plus_negates, i, negate)
4244     {
4245       gimple *user = get_single_immediate_use (negate);
4246
4247       if (!user || !is_gimple_assign (user))
4248         continue;
4249
4250       /* The negate operand can be either operand of a PLUS_EXPR
4251          (it can be the LHS if the RHS is a constant for example).
4252
4253          Force the negate operand to the RHS of the PLUS_EXPR, then
4254          transform the PLUS_EXPR into a MINUS_EXPR.  */
4255       if (gimple_assign_rhs_code (user) == PLUS_EXPR)
4256         {
4257           /* If the negated operand appears on the LHS of the
4258              PLUS_EXPR, exchange the operands of the PLUS_EXPR
4259              to force the negated operand to the RHS of the PLUS_EXPR.  */
4260           if (gimple_assign_rhs1 (user) == negate)
4261             {
4262               swap_ssa_operands (user,
4263                                  gimple_assign_rhs1_ptr (user),
4264                                  gimple_assign_rhs2_ptr (user));
4265             }
4266
4267           /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
4268              the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR.  */
4269           if (gimple_assign_rhs2 (user) == negate)
4270             {
4271               tree rhs1 = gimple_assign_rhs1 (user);
4272               tree rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
4273               gimple_stmt_iterator gsi = gsi_for_stmt (user);
4274               gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
4275               update_stmt (user);
4276             }
4277         }
4278       else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
4279         {
4280           if (gimple_assign_rhs1 (user) == negate)
4281             {
4282               /* We have
4283                    x = -a
4284                    y = x - b
4285                  which we transform into
4286                    x = a + b
4287                    y = -x .
4288                  This pushes down the negate which we possibly can merge
4289                  into some other operation, hence insert it into the
4290                  plus_negates vector.  */
4291               gimple *feed = SSA_NAME_DEF_STMT (negate);
4292               tree a = gimple_assign_rhs1 (feed);
4293               tree b = gimple_assign_rhs2 (user);
4294               gimple_stmt_iterator gsi = gsi_for_stmt (feed);
4295               gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
4296               tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
4297               gimple *g = gimple_build_assign (x, PLUS_EXPR, a, b);
4298               gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
4299               gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
4300               user = gsi_stmt (gsi2);
4301               update_stmt (user);
4302               reassoc_remove_stmt (&gsi);
4303               release_defs (feed);
4304               plus_negates.safe_push (gimple_assign_lhs (user));
4305             }
4306           else
4307             {
4308               /* Transform "x = -a; y = b - x" into "y = b + a", getting
4309                  rid of one operation.  */
4310               gimple *feed = SSA_NAME_DEF_STMT (negate);
4311               tree a = gimple_assign_rhs1 (feed);
4312               tree rhs1 = gimple_assign_rhs1 (user);
4313               gimple_stmt_iterator gsi = gsi_for_stmt (user);
4314               gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
4315               update_stmt (gsi_stmt (gsi));
4316             }
4317         }
4318     }
4319 }
4320
4321 /* Returns true if OP is of a type for which we can do reassociation.
4322    That is for integral or non-saturating fixed-point types, and for
4323    floating point type when associative-math is enabled.  */
4324
4325 static bool
4326 can_reassociate_p (tree op)
4327 {
4328   tree type = TREE_TYPE (op);
4329   if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
4330       || NON_SAT_FIXED_POINT_TYPE_P (type)
4331       || (flag_associative_math && FLOAT_TYPE_P (type)))
4332     return true;
4333   return false;
4334 }
4335
4336 /* Break up subtract operations in block BB.
4337
4338    We do this top down because we don't know whether the subtract is
4339    part of a possible chain of reassociation except at the top.
4340
4341    IE given
4342    d = f + g
4343    c = a + e
4344    b = c - d
4345    q = b - r
4346    k = t - q
4347
4348    we want to break up k = t - q, but we won't until we've transformed q
4349    = b - r, which won't be broken up until we transform b = c - d.
4350
4351    En passant, clear the GIMPLE visited flag on every statement
4352    and set UIDs within each basic block.  */
4353
4354 static void
4355 break_up_subtract_bb (basic_block bb)
4356 {
4357   gimple_stmt_iterator gsi;
4358   basic_block son;
4359   unsigned int uid = 1;
4360
4361   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4362     {
4363       gimple *stmt = gsi_stmt (gsi);
4364       gimple_set_visited (stmt, false);
4365       gimple_set_uid (stmt, uid++);
4366
4367       if (!is_gimple_assign (stmt)
4368           || !can_reassociate_p (gimple_assign_lhs (stmt)))
4369         continue;
4370
4371       /* Look for simple gimple subtract operations.  */
4372       if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
4373         {
4374           if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
4375               || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
4376             continue;
4377
4378           /* Check for a subtract used only in an addition.  If this
4379              is the case, transform it into add of a negate for better
4380              reassociation.  IE transform C = A-B into C = A + -B if C
4381              is only used in an addition.  */
4382           if (should_break_up_subtract (stmt))
4383             break_up_subtract (stmt, &gsi);
4384         }
4385       else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
4386                && can_reassociate_p (gimple_assign_rhs1 (stmt)))
4387         plus_negates.safe_push (gimple_assign_lhs (stmt));
4388     }
4389   for (son = first_dom_son (CDI_DOMINATORS, bb);
4390        son;
4391        son = next_dom_son (CDI_DOMINATORS, son))
4392     break_up_subtract_bb (son);
4393 }
4394
4395 /* Used for repeated factor analysis.  */
4396 struct repeat_factor
4397 {
4398   /* An SSA name that occurs in a multiply chain.  */
4399   tree factor;
4400
4401   /* Cached rank of the factor.  */
4402   unsigned rank;
4403
4404   /* Number of occurrences of the factor in the chain.  */
4405   HOST_WIDE_INT count;
4406
4407   /* An SSA name representing the product of this factor and
4408      all factors appearing later in the repeated factor vector.  */
4409   tree repr;
4410 };
4411
4412
4413 static vec<repeat_factor> repeat_factor_vec;
4414
4415 /* Used for sorting the repeat factor vector.  Sort primarily by
4416    ascending occurrence count, secondarily by descending rank.  */
4417
4418 static int
4419 compare_repeat_factors (const void *x1, const void *x2)
4420 {
4421   const repeat_factor *rf1 = (const repeat_factor *) x1;
4422   const repeat_factor *rf2 = (const repeat_factor *) x2;
4423
4424   if (rf1->count != rf2->count)
4425     return rf1->count - rf2->count;
4426
4427   return rf2->rank - rf1->rank;
4428 }
4429
4430 /* Look for repeated operands in OPS in the multiply tree rooted at
4431    STMT.  Replace them with an optimal sequence of multiplies and powi
4432    builtin calls, and remove the used operands from OPS.  Return an
4433    SSA name representing the value of the replacement sequence.  */
4434
4435 static tree
4436 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
4437 {
4438   unsigned i, j, vec_len;
4439   int ii;
4440   operand_entry *oe;
4441   repeat_factor *rf1, *rf2;
4442   repeat_factor rfnew;
4443   tree result = NULL_TREE;
4444   tree target_ssa, iter_result;
4445   tree type = TREE_TYPE (gimple_get_lhs (stmt));
4446   tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
4447   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4448   gimple *mul_stmt, *pow_stmt;
4449
4450   /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4451      target.  */
4452   if (!powi_fndecl)
4453     return NULL_TREE;
4454
4455   /* Allocate the repeated factor vector.  */
4456   repeat_factor_vec.create (10);
4457
4458   /* Scan the OPS vector for all SSA names in the product and build
4459      up a vector of occurrence counts for each factor.  */
4460   FOR_EACH_VEC_ELT (*ops, i, oe)
4461     {
4462       if (TREE_CODE (oe->op) == SSA_NAME)
4463         {
4464           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4465             {
4466               if (rf1->factor == oe->op)
4467                 {
4468                   rf1->count += oe->count;
4469                   break;
4470                 }
4471             }
4472
4473           if (j >= repeat_factor_vec.length ())
4474             {
4475               rfnew.factor = oe->op;
4476               rfnew.rank = oe->rank;
4477               rfnew.count = oe->count;
4478               rfnew.repr = NULL_TREE;
4479               repeat_factor_vec.safe_push (rfnew);
4480             }
4481         }
4482     }
4483
4484   /* Sort the repeated factor vector by (a) increasing occurrence count,
4485      and (b) decreasing rank.  */
4486   repeat_factor_vec.qsort (compare_repeat_factors);
4487
4488   /* It is generally best to combine as many base factors as possible
4489      into a product before applying __builtin_powi to the result.
4490      However, the sort order chosen for the repeated factor vector
4491      allows us to cache partial results for the product of the base
4492      factors for subsequent use.  When we already have a cached partial
4493      result from a previous iteration, it is best to make use of it
4494      before looking for another __builtin_pow opportunity.
4495
4496      As an example, consider x * x * y * y * y * z * z * z * z.
4497      We want to first compose the product x * y * z, raise it to the
4498      second power, then multiply this by y * z, and finally multiply
4499      by z.  This can be done in 5 multiplies provided we cache y * z
4500      for use in both expressions:
4501
4502         t1 = y * z
4503         t2 = t1 * x
4504         t3 = t2 * t2
4505         t4 = t1 * t3
4506         result = t4 * z
4507
4508      If we instead ignored the cached y * z and first multiplied by
4509      the __builtin_pow opportunity z * z, we would get the inferior:
4510
4511         t1 = y * z
4512         t2 = t1 * x
4513         t3 = t2 * t2
4514         t4 = z * z
4515         t5 = t3 * t4
4516         result = t5 * y  */
4517
4518   vec_len = repeat_factor_vec.length ();
4519
4520   /* Repeatedly look for opportunities to create a builtin_powi call.  */
4521   while (true)
4522     {
4523       HOST_WIDE_INT power;
4524
4525       /* First look for the largest cached product of factors from
4526          preceding iterations.  If found, create a builtin_powi for
4527          it if the minimum occurrence count for its factors is at
4528          least 2, or just use this cached product as our next
4529          multiplicand if the minimum occurrence count is 1.  */
4530       FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4531         {
4532           if (rf1->repr && rf1->count > 0)
4533             break;
4534         }
4535
4536       if (j < vec_len)
4537         {
4538           power = rf1->count;
4539
4540           if (power == 1)
4541             {
4542               iter_result = rf1->repr;
4543
4544               if (dump_file && (dump_flags & TDF_DETAILS))
4545                 {
4546                   unsigned elt;
4547                   repeat_factor *rf;
4548                   fputs ("Multiplying by cached product ", dump_file);
4549                   for (elt = j; elt < vec_len; elt++)
4550                     {
4551                       rf = &repeat_factor_vec[elt];
4552                       print_generic_expr (dump_file, rf->factor, 0);
4553                       if (elt < vec_len - 1)
4554                         fputs (" * ", dump_file);
4555                     }
4556                   fputs ("\n", dump_file);
4557                 }
4558             }
4559           else
4560             {
4561               iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4562               pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4563                                             build_int_cst (integer_type_node,
4564                                                            power));
4565               gimple_call_set_lhs (pow_stmt, iter_result);
4566               gimple_set_location (pow_stmt, gimple_location (stmt));
4567               gimple_set_uid (pow_stmt, gimple_uid (stmt));
4568               gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4569
4570               if (dump_file && (dump_flags & TDF_DETAILS))
4571                 {
4572                   unsigned elt;
4573                   repeat_factor *rf;
4574                   fputs ("Building __builtin_pow call for cached product (",
4575                          dump_file);
4576                   for (elt = j; elt < vec_len; elt++)
4577                     {
4578                       rf = &repeat_factor_vec[elt];
4579                       print_generic_expr (dump_file, rf->factor, 0);
4580                       if (elt < vec_len - 1)
4581                         fputs (" * ", dump_file);
4582                     }
4583                   fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
4584                            power);
4585                 }
4586             }
4587         }
4588       else
4589         {
4590           /* Otherwise, find the first factor in the repeated factor
4591              vector whose occurrence count is at least 2.  If no such
4592              factor exists, there are no builtin_powi opportunities
4593              remaining.  */
4594           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4595             {
4596               if (rf1->count >= 2)
4597                 break;
4598             }
4599
4600           if (j >= vec_len)
4601             break;
4602
4603           power = rf1->count;
4604
4605           if (dump_file && (dump_flags & TDF_DETAILS))
4606             {
4607               unsigned elt;
4608               repeat_factor *rf;
4609               fputs ("Building __builtin_pow call for (", dump_file);
4610               for (elt = j; elt < vec_len; elt++)
4611                 {
4612                   rf = &repeat_factor_vec[elt];
4613                   print_generic_expr (dump_file, rf->factor, 0);
4614                   if (elt < vec_len - 1)
4615                     fputs (" * ", dump_file);
4616                 }
4617               fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
4618             }
4619
4620           reassociate_stats.pows_created++;
4621
4622           /* Visit each element of the vector in reverse order (so that
4623              high-occurrence elements are visited first, and within the
4624              same occurrence count, lower-ranked elements are visited
4625              first).  Form a linear product of all elements in this order
4626              whose occurrencce count is at least that of element J.
4627              Record the SSA name representing the product of each element
4628              with all subsequent elements in the vector.  */
4629           if (j == vec_len - 1)
4630             rf1->repr = rf1->factor;
4631           else
4632             {
4633               for (ii = vec_len - 2; ii >= (int)j; ii--)
4634                 {
4635                   tree op1, op2;
4636
4637                   rf1 = &repeat_factor_vec[ii];
4638                   rf2 = &repeat_factor_vec[ii + 1];
4639
4640                   /* Init the last factor's representative to be itself.  */
4641                   if (!rf2->repr)
4642                     rf2->repr = rf2->factor;
4643
4644                   op1 = rf1->factor;
4645                   op2 = rf2->repr;
4646
4647                   target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4648                   mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
4649                                                   op1, op2);
4650                   gimple_set_location (mul_stmt, gimple_location (stmt));
4651                   gimple_set_uid (mul_stmt, gimple_uid (stmt));
4652                   gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4653                   rf1->repr = target_ssa;
4654
4655                   /* Don't reprocess the multiply we just introduced.  */
4656                   gimple_set_visited (mul_stmt, true);
4657                 }
4658             }
4659
4660           /* Form a call to __builtin_powi for the maximum product
4661              just formed, raised to the power obtained earlier.  */
4662           rf1 = &repeat_factor_vec[j];
4663           iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4664           pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4665                                         build_int_cst (integer_type_node,
4666                                                        power));
4667           gimple_call_set_lhs (pow_stmt, iter_result);
4668           gimple_set_location (pow_stmt, gimple_location (stmt));
4669           gimple_set_uid (pow_stmt, gimple_uid (stmt));
4670           gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4671         }
4672
4673       /* If we previously formed at least one other builtin_powi call,
4674          form the product of this one and those others.  */
4675       if (result)
4676         {
4677           tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4678           mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
4679                                           result, iter_result);
4680           gimple_set_location (mul_stmt, gimple_location (stmt));
4681           gimple_set_uid (mul_stmt, gimple_uid (stmt));
4682           gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4683           gimple_set_visited (mul_stmt, true);
4684           result = new_result;
4685         }
4686       else
4687         result = iter_result;
4688
4689       /* Decrement the occurrence count of each element in the product
4690          by the count found above, and remove this many copies of each
4691          factor from OPS.  */
4692       for (i = j; i < vec_len; i++)
4693         {
4694           unsigned k = power;
4695           unsigned n;
4696
4697           rf1 = &repeat_factor_vec[i];
4698           rf1->count -= power;
4699
4700           FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
4701             {
4702               if (oe->op == rf1->factor)
4703                 {
4704                   if (oe->count <= k)
4705                     {
4706                       ops->ordered_remove (n);
4707                       k -= oe->count;
4708
4709                       if (k == 0)
4710                         break;
4711                     }
4712                   else
4713                     {
4714                       oe->count -= k;
4715                       break;
4716                     }
4717                 }
4718             }
4719         }
4720     }
4721
4722   /* At this point all elements in the repeated factor vector have a
4723      remaining occurrence count of 0 or 1, and those with a count of 1
4724      don't have cached representatives.  Re-sort the ops vector and
4725      clean up.  */
4726   ops->qsort (sort_by_operand_rank);
4727   repeat_factor_vec.release ();
4728
4729   /* Return the final product computed herein.  Note that there may
4730      still be some elements with single occurrence count left in OPS;
4731      those will be handled by the normal reassociation logic.  */
4732   return result;
4733 }
4734
4735 /* Attempt to optimize
4736    CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
4737    CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0.  */
4738
4739 static void
4740 attempt_builtin_copysign (vec<operand_entry *> *ops)
4741 {
4742   operand_entry *oe;
4743   unsigned int i;
4744   unsigned int length = ops->length ();
4745   tree cst = ops->last ()->op;
4746
4747   if (length == 1 || TREE_CODE (cst) != REAL_CST)
4748     return;
4749
4750   FOR_EACH_VEC_ELT (*ops, i, oe)
4751     {
4752       if (TREE_CODE (oe->op) == SSA_NAME
4753           && has_single_use (oe->op))
4754         {
4755           gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
4756           if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
4757             {
4758               tree arg0, arg1;
4759               switch (gimple_call_combined_fn (old_call))
4760                 {
4761                 CASE_CFN_COPYSIGN:
4762                   arg0 = gimple_call_arg (old_call, 0);
4763                   arg1 = gimple_call_arg (old_call, 1);
4764                   /* The first argument of copysign must be a constant,
4765                      otherwise there's nothing to do.  */
4766                   if (TREE_CODE (arg0) == REAL_CST)
4767                     {
4768                       tree type = TREE_TYPE (arg0);
4769                       tree mul = const_binop (MULT_EXPR, type, cst, arg0);
4770                       /* If we couldn't fold to a single constant, skip it.
4771                          That happens e.g. for inexact multiplication when
4772                          -frounding-math.  */
4773                       if (mul == NULL_TREE)
4774                         break;
4775                       /* Instead of adjusting OLD_CALL, let's build a new
4776                          call to not leak the LHS and prevent keeping bogus
4777                          debug statements.  DCE will clean up the old call.  */
4778                       gcall *new_call;
4779                       if (gimple_call_internal_p (old_call))
4780                         new_call = gimple_build_call_internal
4781                           (IFN_COPYSIGN, 2, mul, arg1);
4782                       else
4783                         new_call = gimple_build_call
4784                           (gimple_call_fndecl (old_call), 2, mul, arg1);
4785                       tree lhs = make_ssa_name (type);
4786                       gimple_call_set_lhs (new_call, lhs);
4787                       gimple_set_location (new_call,
4788                                            gimple_location (old_call));
4789                       insert_stmt_after (new_call, old_call);
4790                       /* We've used the constant, get rid of it.  */
4791                       ops->pop ();
4792                       bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
4793                       /* Handle the CST1 < 0 case by negating the result.  */
4794                       if (cst1_neg)
4795                         {
4796                           tree negrhs = make_ssa_name (TREE_TYPE (lhs));
4797                           gimple *negate_stmt
4798                             = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
4799                           insert_stmt_after (negate_stmt, new_call);
4800                           oe->op = negrhs;
4801                         }
4802                       else
4803                         oe->op = lhs;
4804                       if (dump_file && (dump_flags & TDF_DETAILS))
4805                         {
4806                           fprintf (dump_file, "Optimizing copysign: ");
4807                           print_generic_expr (dump_file, cst, 0);
4808                           fprintf (dump_file, " * COPYSIGN (");
4809                           print_generic_expr (dump_file, arg0, 0);
4810                           fprintf (dump_file, ", ");
4811                           print_generic_expr (dump_file, arg1, 0);
4812                           fprintf (dump_file, ") into %sCOPYSIGN (",
4813                                    cst1_neg ? "-" : "");
4814                           print_generic_expr (dump_file, mul, 0);
4815                           fprintf (dump_file, ", ");
4816                           print_generic_expr (dump_file, arg1, 0);
4817                           fprintf (dump_file, "\n");
4818                         }
4819                       return;
4820                     }
4821                   break;
4822                 default:
4823                   break;
4824                 }
4825             }
4826         }
4827     }
4828 }
4829
4830 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS.  */
4831
4832 static void
4833 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
4834 {
4835   tree rhs1;
4836
4837   if (dump_file && (dump_flags & TDF_DETAILS))
4838     {
4839       fprintf (dump_file, "Transforming ");
4840       print_gimple_stmt (dump_file, stmt, 0, 0);
4841     }
4842
4843   rhs1 = gimple_assign_rhs1 (stmt);
4844   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
4845   update_stmt (stmt);
4846   remove_visited_stmt_chain (rhs1);
4847
4848   if (dump_file && (dump_flags & TDF_DETAILS))
4849     {
4850       fprintf (dump_file, " into ");
4851       print_gimple_stmt (dump_file, stmt, 0, 0);
4852     }
4853 }
4854
4855 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2.  */
4856
4857 static void
4858 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
4859                             tree rhs1, tree rhs2)
4860 {
4861   if (dump_file && (dump_flags & TDF_DETAILS))
4862     {
4863       fprintf (dump_file, "Transforming ");
4864       print_gimple_stmt (dump_file, stmt, 0, 0);
4865     }
4866
4867   gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
4868   update_stmt (gsi_stmt (*gsi));
4869   remove_visited_stmt_chain (rhs1);
4870
4871   if (dump_file && (dump_flags & TDF_DETAILS))
4872     {
4873       fprintf (dump_file, " into ");
4874       print_gimple_stmt (dump_file, stmt, 0, 0);
4875     }
4876 }
4877
4878 /* Reassociate expressions in basic block BB and its post-dominator as
4879    children.
4880
4881    Bubble up return status from maybe_optimize_range_tests.  */
4882
4883 static bool
4884 reassociate_bb (basic_block bb)
4885 {
4886   gimple_stmt_iterator gsi;
4887   basic_block son;
4888   gimple *stmt = last_stmt (bb);
4889   bool cfg_cleanup_needed = false;
4890
4891   if (stmt && !gimple_visited_p (stmt))
4892     cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
4893
4894   for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4895     {
4896       stmt = gsi_stmt (gsi);
4897
4898       if (is_gimple_assign (stmt)
4899           && !stmt_could_throw_p (stmt))
4900         {
4901           tree lhs, rhs1, rhs2;
4902           enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4903
4904           /* If this is not a gimple binary expression, there is
4905              nothing for us to do with it.  */
4906           if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4907             continue;
4908
4909           /* If this was part of an already processed statement,
4910              we don't need to touch it again. */
4911           if (gimple_visited_p (stmt))
4912             {
4913               /* This statement might have become dead because of previous
4914                  reassociations.  */
4915               if (has_zero_uses (gimple_get_lhs (stmt)))
4916                 {
4917                   reassoc_remove_stmt (&gsi);
4918                   release_defs (stmt);
4919                   /* We might end up removing the last stmt above which
4920                      places the iterator to the end of the sequence.
4921                      Reset it to the last stmt in this case which might
4922                      be the end of the sequence as well if we removed
4923                      the last statement of the sequence.  In which case
4924                      we need to bail out.  */
4925                   if (gsi_end_p (gsi))
4926                     {
4927                       gsi = gsi_last_bb (bb);
4928                       if (gsi_end_p (gsi))
4929                         break;
4930                     }
4931                 }
4932               continue;
4933             }
4934
4935           lhs = gimple_assign_lhs (stmt);
4936           rhs1 = gimple_assign_rhs1 (stmt);
4937           rhs2 = gimple_assign_rhs2 (stmt);
4938
4939           /* For non-bit or min/max operations we can't associate
4940              all types.  Verify that here.  */
4941           if (rhs_code != BIT_IOR_EXPR
4942               && rhs_code != BIT_AND_EXPR
4943               && rhs_code != BIT_XOR_EXPR
4944               && rhs_code != MIN_EXPR
4945               && rhs_code != MAX_EXPR
4946               && (!can_reassociate_p (lhs)
4947                   || !can_reassociate_p (rhs1)
4948                   || !can_reassociate_p (rhs2)))
4949             continue;
4950
4951           if (associative_tree_code (rhs_code))
4952             {
4953               auto_vec<operand_entry *> ops;
4954               tree powi_result = NULL_TREE;
4955
4956               /* There may be no immediate uses left by the time we
4957                  get here because we may have eliminated them all.  */
4958               if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4959                 continue;
4960
4961               gimple_set_visited (stmt, true);
4962               linearize_expr_tree (&ops, stmt, true, true);
4963               ops.qsort (sort_by_operand_rank);
4964               optimize_ops_list (rhs_code, &ops);
4965               if (undistribute_ops_list (rhs_code, &ops,
4966                                          loop_containing_stmt (stmt)))
4967                 {
4968                   ops.qsort (sort_by_operand_rank);
4969                   optimize_ops_list (rhs_code, &ops);
4970                 }
4971
4972               if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4973                 optimize_range_tests (rhs_code, &ops);
4974
4975               if (rhs_code == MULT_EXPR)
4976                 attempt_builtin_copysign (&ops);
4977
4978               if (reassoc_insert_powi_p
4979                   && rhs_code == MULT_EXPR
4980                   && flag_unsafe_math_optimizations)
4981                 powi_result = attempt_builtin_powi (stmt, &ops);
4982
4983               /* If the operand vector is now empty, all operands were
4984                  consumed by the __builtin_powi optimization.  */
4985               if (ops.length () == 0)
4986                 transform_stmt_to_copy (&gsi, stmt, powi_result);
4987               else if (ops.length () == 1)
4988                 {
4989                   tree last_op = ops.last ()->op;
4990
4991                   if (powi_result)
4992                     transform_stmt_to_multiply (&gsi, stmt, last_op,
4993                                                 powi_result);
4994                   else
4995                     transform_stmt_to_copy (&gsi, stmt, last_op);
4996                 }
4997               else
4998                 {
4999                   machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
5000                   int ops_num = ops.length ();
5001                   int width = get_reassociation_width (ops_num, rhs_code, mode);
5002                   tree new_lhs = lhs;
5003
5004                   if (dump_file && (dump_flags & TDF_DETAILS))
5005                     fprintf (dump_file,
5006                              "Width = %d was chosen for reassociation\n", width);
5007
5008                   if (width > 1
5009                       && ops.length () > 3)
5010                     rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
5011                                                 width, ops);
5012                   else
5013                     {
5014                       /* When there are three operands left, we want
5015                          to make sure the ones that get the double
5016                          binary op are chosen wisely.  */
5017                       int len = ops.length ();
5018                       if (len >= 3)
5019                         swap_ops_for_binary_stmt (ops, len - 3, stmt);
5020
5021                       new_lhs = rewrite_expr_tree (stmt, 0, ops,
5022                                                    powi_result != NULL);
5023                     }
5024
5025                   /* If we combined some repeated factors into a
5026                      __builtin_powi call, multiply that result by the
5027                      reassociated operands.  */
5028                   if (powi_result)
5029                     {
5030                       gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
5031                       tree type = TREE_TYPE (lhs);
5032                       tree target_ssa = make_temp_ssa_name (type, NULL,
5033                                                             "reassocpow");
5034                       gimple_set_lhs (lhs_stmt, target_ssa);
5035                       update_stmt (lhs_stmt);
5036                       if (lhs != new_lhs)
5037                         target_ssa = new_lhs;
5038                       mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
5039                                                       powi_result, target_ssa);
5040                       gimple_set_location (mul_stmt, gimple_location (stmt));
5041                       gimple_set_uid (mul_stmt, gimple_uid (stmt));
5042                       gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
5043                     }
5044                 }
5045             }
5046         }
5047     }
5048   for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
5049        son;
5050        son = next_dom_son (CDI_POST_DOMINATORS, son))
5051     cfg_cleanup_needed |= reassociate_bb (son);
5052
5053   return cfg_cleanup_needed;
5054 }
5055
5056 /* Add jumps around shifts for range tests turned into bit tests.
5057    For each SSA_NAME VAR we have code like:
5058    VAR = ...; // final stmt of range comparison
5059    // bit test here...;
5060    OTHERVAR = ...; // final stmt of the bit test sequence
5061    RES = VAR | OTHERVAR;
5062    Turn the above into:
5063    VAR = ...;
5064    if (VAR != 0)
5065      goto <l3>;
5066    else
5067      goto <l2>;
5068    <l2>:
5069    // bit test here...;
5070    OTHERVAR = ...;
5071    <l3>:
5072    # RES = PHI<1(l1), OTHERVAR(l2)>;  */
5073
5074 static void
5075 branch_fixup (void)
5076 {
5077   tree var;
5078   unsigned int i;
5079
5080   FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
5081     {
5082       gimple *def_stmt = SSA_NAME_DEF_STMT (var);
5083       gimple *use_stmt;
5084       use_operand_p use;
5085       bool ok = single_imm_use (var, &use, &use_stmt);
5086       gcc_assert (ok
5087                   && is_gimple_assign (use_stmt)
5088                   && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
5089                   && gimple_bb (def_stmt) == gimple_bb (use_stmt));
5090
5091       basic_block cond_bb = gimple_bb (def_stmt);
5092       basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
5093       basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
5094
5095       gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
5096       gimple *g = gimple_build_cond (NE_EXPR, var,
5097                                      build_zero_cst (TREE_TYPE (var)),
5098                                      NULL_TREE, NULL_TREE);
5099       location_t loc = gimple_location (use_stmt);
5100       gimple_set_location (g, loc);
5101       gsi_insert_after (&gsi, g, GSI_NEW_STMT);
5102
5103       edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
5104       etrue->probability = REG_BR_PROB_BASE / 2;
5105       etrue->count = cond_bb->count / 2;
5106       edge efalse = find_edge (cond_bb, then_bb);
5107       efalse->flags = EDGE_FALSE_VALUE;
5108       efalse->probability -= etrue->probability;
5109       efalse->count -= etrue->count;
5110       then_bb->count -= etrue->count;
5111
5112       tree othervar = NULL_TREE;
5113       if (gimple_assign_rhs1 (use_stmt) == var)
5114         othervar = gimple_assign_rhs2 (use_stmt);
5115       else if (gimple_assign_rhs2 (use_stmt) == var)
5116         othervar = gimple_assign_rhs1 (use_stmt);
5117       else
5118         gcc_unreachable ();
5119       tree lhs = gimple_assign_lhs (use_stmt);
5120       gphi *phi = create_phi_node (lhs, merge_bb);
5121       add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
5122       add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
5123       gsi = gsi_for_stmt (use_stmt);
5124       gsi_remove (&gsi, true);
5125
5126       set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
5127       set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
5128     }
5129   reassoc_branch_fixups.release ();
5130 }
5131
5132 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
5133 void debug_ops_vector (vec<operand_entry *> ops);
5134
5135 /* Dump the operand entry vector OPS to FILE.  */
5136
5137 void
5138 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
5139 {
5140   operand_entry *oe;
5141   unsigned int i;
5142
5143   FOR_EACH_VEC_ELT (ops, i, oe)
5144     {
5145       fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
5146       print_generic_expr (file, oe->op, 0);
5147       fprintf (file, "\n");
5148     }
5149 }
5150
5151 /* Dump the operand entry vector OPS to STDERR.  */
5152
5153 DEBUG_FUNCTION void
5154 debug_ops_vector (vec<operand_entry *> ops)
5155 {
5156   dump_ops_vector (stderr, ops);
5157 }
5158
5159 /* Bubble up return status from reassociate_bb.  */
5160
5161 static bool
5162 do_reassoc (void)
5163 {
5164   break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
5165   return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
5166 }
5167
5168 /* Initialize the reassociation pass.  */
5169
5170 static void
5171 init_reassoc (void)
5172 {
5173   int i;
5174   long rank = 2;
5175   int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
5176
5177   /* Find the loops, so that we can prevent moving calculations in
5178      them.  */
5179   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
5180
5181   memset (&reassociate_stats, 0, sizeof (reassociate_stats));
5182
5183   next_operand_entry_id = 0;
5184
5185   /* Reverse RPO (Reverse Post Order) will give us something where
5186      deeper loops come later.  */
5187   pre_and_rev_post_order_compute (NULL, bbs, false);
5188   bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
5189   operand_rank = new hash_map<tree, long>;
5190
5191   /* Give each default definition a distinct rank.  This includes
5192      parameters and the static chain.  Walk backwards over all
5193      SSA names so that we get proper rank ordering according
5194      to tree_swap_operands_p.  */
5195   for (i = num_ssa_names - 1; i > 0; --i)
5196     {
5197       tree name = ssa_name (i);
5198       if (name && SSA_NAME_IS_DEFAULT_DEF (name))
5199         insert_operand_rank (name, ++rank);
5200     }
5201
5202   /* Set up rank for each BB  */
5203   for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
5204     bb_rank[bbs[i]] = ++rank  << 16;
5205
5206   free (bbs);
5207   calculate_dominance_info (CDI_POST_DOMINATORS);
5208   plus_negates = vNULL;
5209 }
5210
5211 /* Cleanup after the reassociation pass, and print stats if
5212    requested.  */
5213
5214 static void
5215 fini_reassoc (void)
5216 {
5217   statistics_counter_event (cfun, "Linearized",
5218                             reassociate_stats.linearized);
5219   statistics_counter_event (cfun, "Constants eliminated",
5220                             reassociate_stats.constants_eliminated);
5221   statistics_counter_event (cfun, "Ops eliminated",
5222                             reassociate_stats.ops_eliminated);
5223   statistics_counter_event (cfun, "Statements rewritten",
5224                             reassociate_stats.rewritten);
5225   statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
5226                             reassociate_stats.pows_encountered);
5227   statistics_counter_event (cfun, "Built-in powi calls created",
5228                             reassociate_stats.pows_created);
5229
5230   delete operand_rank;
5231   operand_entry_pool.release ();
5232   free (bb_rank);
5233   plus_negates.release ();
5234   free_dominance_info (CDI_POST_DOMINATORS);
5235   loop_optimizer_finalize ();
5236 }
5237
5238 /* Gate and execute functions for Reassociation.  If INSERT_POWI_P, enable
5239    insertion of __builtin_powi calls.
5240
5241    Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
5242    optimization of a gimple conditional.  Otherwise returns zero.  */
5243
5244 static unsigned int
5245 execute_reassoc (bool insert_powi_p)
5246 {
5247   reassoc_insert_powi_p = insert_powi_p;
5248
5249   init_reassoc ();
5250
5251   bool cfg_cleanup_needed;
5252   cfg_cleanup_needed = do_reassoc ();
5253   repropagate_negates ();
5254   branch_fixup ();
5255
5256   fini_reassoc ();
5257   return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
5258 }
5259
5260 namespace {
5261
5262 const pass_data pass_data_reassoc =
5263 {
5264   GIMPLE_PASS, /* type */
5265   "reassoc", /* name */
5266   OPTGROUP_NONE, /* optinfo_flags */
5267   TV_TREE_REASSOC, /* tv_id */
5268   ( PROP_cfg | PROP_ssa ), /* properties_required */
5269   0, /* properties_provided */
5270   0, /* properties_destroyed */
5271   0, /* todo_flags_start */
5272   TODO_update_ssa_only_virtuals, /* todo_flags_finish */
5273 };
5274
5275 class pass_reassoc : public gimple_opt_pass
5276 {
5277 public:
5278   pass_reassoc (gcc::context *ctxt)
5279     : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
5280   {}
5281
5282   /* opt_pass methods: */
5283   opt_pass * clone () { return new pass_reassoc (m_ctxt); }
5284   void set_pass_param (unsigned int n, bool param)
5285     {
5286       gcc_assert (n == 0);
5287       insert_powi_p = param;
5288     }
5289   virtual bool gate (function *) { return flag_tree_reassoc != 0; }
5290   virtual unsigned int execute (function *)
5291     { return execute_reassoc (insert_powi_p); }
5292
5293  private:
5294   /* Enable insertion of __builtin_powi calls during execute_reassoc.  See
5295      point 3a in the pass header comment.  */
5296   bool insert_powi_p;
5297 }; // class pass_reassoc
5298
5299 } // anon namespace
5300
5301 gimple_opt_pass *
5302 make_pass_reassoc (gcc::context *ctxt)
5303 {
5304   return new pass_reassoc (ctxt);
5305 }