gcc/tree-ssa-loop-ivcanon.c

   1 /* Induction variable canonicalization and loop peeling.
   2    Copyright (C) 2004-2015 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass detects the loops that iterate a constant number of times,
  21    adds a canonical induction variable (step -1, tested against 0)
  22    and replaces the exit test.  This enables the less powerful rtl
  23    level analysis to use this information.
  24
  25    This might spoil the code in some cases (by increasing register pressure).
  26    Note that in the case the new variable is not needed, ivopts will get rid
  27    of it, so it might only be a problem when there are no other linear induction
  28    variables.  In that case the created optimization possibilities are likely
  29    to pay up.
  30
  31    We also perform
  32      - complete unrolling (or peeling) when the loops is rolling few enough
  33        times
  34      - simple peeling (i.e. copying few initial iterations prior the loop)
  35        when number of iteration estimate is known (typically by the profile
  36        info).  */
  37
  38 #include "config.h"
  39 #include "system.h"
  40 #include "coretypes.h"
  41 #include "tm.h"
  42 #include "alias.h"
  43 #include "symtab.h"
  44 #include "tree.h"
  45 #include "fold-const.h"
  46 #include "tm_p.h"
  47 #include "profile.h"
  48 #include "predict.h"
  49 #include "hard-reg-set.h"
  50 #include "function.h"
  51 #include "dominance.h"
  52 #include "cfg.h"
  53 #include "basic-block.h"
  54 #include "gimple-pretty-print.h"
  55 #include "tree-ssa-alias.h"
  56 #include "internal-fn.h"
  57 #include "gimple-fold.h"
  58 #include "tree-eh.h"
  59 #include "gimple-expr.h"
  60 #include "gimple.h"
  61 #include "gimple-iterator.h"
  62 #include "gimple-ssa.h"
  63 #include "cgraph.h"
  64 #include "tree-cfg.h"
  65 #include "tree-phinodes.h"
  66 #include "ssa-iterators.h"
  67 #include "stringpool.h"
  68 #include "tree-ssanames.h"
  69 #include "tree-ssa-loop-manip.h"
  70 #include "tree-ssa-loop-niter.h"
  71 #include "tree-ssa-loop.h"
  72 #include "tree-into-ssa.h"
  73 #include "cfgloop.h"
  74 #include "tree-pass.h"
  75 #include "tree-chrec.h"
  76 #include "tree-scalar-evolution.h"
  77 #include "params.h"
  78 #include "flags.h"
  79 #include "tree-inline.h"
  80 #include "target.h"
  81 #include "tree-cfgcleanup.h"
  82 #include "builtins.h"
  83
  84 /* Specifies types of loops that may be unrolled.  */
  85
  86 enum unroll_level
  87 {
  88   UL_SINGLE_ITER,       /* Only loops that exit immediately in the first
  89                            iteration.  */
  90   UL_NO_GROWTH,         /* Only loops whose unrolling will not cause increase
  91                            of code size.  */
  92   UL_ALL                /* All suitable loops.  */
  93 };
  94
  95 /* Adds a canonical induction variable to LOOP iterating NITER times.  EXIT
  96    is the exit edge whose condition is replaced.  */
  97
  98 static void
  99 create_canonical_iv (struct loop *loop, edge exit, tree niter)
 100 {
 101   edge in;
 102   tree type, var;
 103   gcond *cond;
 104   gimple_stmt_iterator incr_at;
 105   enum tree_code cmp;
 106
 107   if (dump_file && (dump_flags & TDF_DETAILS))
 108     {
 109       fprintf (dump_file, "Added canonical iv to loop %d, ", loop->num);
 110       print_generic_expr (dump_file, niter, TDF_SLIM);
 111       fprintf (dump_file, " iterations.\n");
 112     }
 113
 114   cond = as_a <gcond *> (last_stmt (exit->src));
 115   in = EDGE_SUCC (exit->src, 0);
 116   if (in == exit)
 117     in = EDGE_SUCC (exit->src, 1);
 118
 119   /* Note that we do not need to worry about overflows, since
 120      type of niter is always unsigned and all comparisons are
 121      just for equality/nonequality -- i.e. everything works
 122      with a modulo arithmetics.  */
 123
 124   type = TREE_TYPE (niter);
 125   niter = fold_build2 (PLUS_EXPR, type,
 126                        niter,
 127                        build_int_cst (type, 1));
 128   incr_at = gsi_last_bb (in->src);
 129   create_iv (niter,
 130              build_int_cst (type, -1),
 131              NULL_TREE, loop,
 132              &incr_at, false, NULL, &var);
 133
 134   cmp = (exit->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
 135   gimple_cond_set_code (cond, cmp);
 136   gimple_cond_set_lhs (cond, var);
 137   gimple_cond_set_rhs (cond, build_int_cst (type, 0));
 138   update_stmt (cond);
 139 }
 140
 141 /* Describe size of loop as detected by tree_estimate_loop_size.  */
 142 struct loop_size
 143 {
 144   /* Number of instructions in the loop.  */
 145   int overall;
 146
 147   /* Number of instructions that will be likely optimized out in
 148      peeled iterations of loop  (i.e. computation based on induction
 149      variable where induction variable starts at known constant.)  */
 150   int eliminated_by_peeling;
 151
 152   /* Same statistics for last iteration of loop: it is smaller because
 153      instructions after exit are not executed.  */
 154   int last_iteration;
 155   int last_iteration_eliminated_by_peeling;
 156
 157   /* If some IV computation will become constant.  */
 158   bool constant_iv;
 159
 160   /* Number of call stmts that are not a builtin and are pure or const
 161      present on the hot path.  */
 162   int num_pure_calls_on_hot_path;
 163   /* Number of call stmts that are not a builtin and are not pure nor const
 164      present on the hot path.  */
 165   int num_non_pure_calls_on_hot_path;
 166   /* Number of statements other than calls in the loop.  */
 167   int non_call_stmts_on_hot_path;
 168   /* Number of branches seen on the hot path.  */
 169   int num_branches_on_hot_path;
 170 };
 171
 172 /* Return true if OP in STMT will be constant after peeling LOOP.  */
 173
 174 static bool
 175 constant_after_peeling (tree op, gimple stmt, struct loop *loop)
 176 {
 177   affine_iv iv;
 178
 179   if (is_gimple_min_invariant (op))
 180     return true;
 181
 182   /* We can still fold accesses to constant arrays when index is known.  */
 183   if (TREE_CODE (op) != SSA_NAME)
 184     {
 185       tree base = op;
 186
 187       /* First make fast look if we see constant array inside.  */
 188       while (handled_component_p (base))
 189         base = TREE_OPERAND (base, 0);
 190       if ((DECL_P (base)
 191            && ctor_for_folding (base) != error_mark_node)
 192           || CONSTANT_CLASS_P (base))
 193         {
 194           /* If so, see if we understand all the indices.  */
 195           base = op;
 196           while (handled_component_p (base))
 197             {
 198               if (TREE_CODE (base) == ARRAY_REF
 199                   && !constant_after_peeling (TREE_OPERAND (base, 1), stmt, loop))
 200                 return false;
 201               base = TREE_OPERAND (base, 0);
 202             }
 203           return true;
 204         }
 205       return false;
 206     }
 207
 208   /* Induction variables are constants.  */
 209   if (!simple_iv (loop, loop_containing_stmt (stmt), op, &iv, false))
 210     return false;
 211   if (!is_gimple_min_invariant (iv.base))
 212     return false;
 213   if (!is_gimple_min_invariant (iv.step))
 214     return false;
 215   return true;
 216 }
 217
 218 /* Computes an estimated number of insns in LOOP.
 219    EXIT (if non-NULL) is an exite edge that will be eliminated in all but last
 220    iteration of the loop.
 221    EDGE_TO_CANCEL (if non-NULL) is an non-exit edge eliminated in the last iteration
 222    of loop.
 223    Return results in SIZE, estimate benefits for complete unrolling exiting by EXIT.
 224    Stop estimating after UPPER_BOUND is met.  Return true in this case.  */
 225
 226 static bool
 227 tree_estimate_loop_size (struct loop *loop, edge exit, edge edge_to_cancel, struct loop_size *size,
 228                          int upper_bound)
 229 {
 230   basic_block *body = get_loop_body (loop);
 231   gimple_stmt_iterator gsi;
 232   unsigned int i;
 233   bool after_exit;
 234   vec<basic_block> path = get_loop_hot_path (loop);
 235
 236   size->overall = 0;
 237   size->eliminated_by_peeling = 0;
 238   size->last_iteration = 0;
 239   size->last_iteration_eliminated_by_peeling = 0;
 240   size->num_pure_calls_on_hot_path = 0;
 241   size->num_non_pure_calls_on_hot_path = 0;
 242   size->non_call_stmts_on_hot_path = 0;
 243   size->num_branches_on_hot_path = 0;
 244   size->constant_iv = 0;
 245
 246   if (dump_file && (dump_flags & TDF_DETAILS))
 247     fprintf (dump_file, "Estimating sizes for loop %i\n", loop->num);
 248   for (i = 0; i < loop->num_nodes; i++)
 249     {
 250       if (edge_to_cancel && body[i] != edge_to_cancel->src
 251           && dominated_by_p (CDI_DOMINATORS, body[i], edge_to_cancel->src))
 252         after_exit = true;
 253       else
 254         after_exit = false;
 255       if (dump_file && (dump_flags & TDF_DETAILS))
 256         fprintf (dump_file, " BB: %i, after_exit: %i\n", body[i]->index, after_exit);
 257
 258       for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
 259         {
 260           gimple stmt = gsi_stmt (gsi);
 261           int num = estimate_num_insns (stmt, &eni_size_weights);
 262           bool likely_eliminated = false;
 263           bool likely_eliminated_last = false;
 264           bool likely_eliminated_peeled = false;
 265
 266           if (dump_file && (dump_flags & TDF_DETAILS))
 267             {
 268               fprintf (dump_file, "  size: %3i ", num);
 269               print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, 0);
 270             }
 271
 272           /* Look for reasons why we might optimize this stmt away. */
 273
 274           if (gimple_has_side_effects (stmt))
 275             ;
 276           /* Exit conditional.  */
 277           else if (exit && body[i] == exit->src
 278                    && stmt == last_stmt (exit->src))
 279             {
 280               if (dump_file && (dump_flags & TDF_DETAILS))
 281                 fprintf (dump_file, "   Exit condition will be eliminated "
 282                          "in peeled copies.\n");
 283               likely_eliminated_peeled = true;
 284             }
 285           else if (edge_to_cancel && body[i] == edge_to_cancel->src
 286                    && stmt == last_stmt (edge_to_cancel->src))
 287             {
 288               if (dump_file && (dump_flags & TDF_DETAILS))
 289                 fprintf (dump_file, "   Exit condition will be eliminated "
 290                          "in last copy.\n");
 291               likely_eliminated_last = true;
 292             }
 293           /* Sets of IV variables  */
 294           else if (gimple_code (stmt) == GIMPLE_ASSIGN
 295               && constant_after_peeling (gimple_assign_lhs (stmt), stmt, loop))
 296             {
 297               if (dump_file && (dump_flags & TDF_DETAILS))
 298                 fprintf (dump_file, "   Induction variable computation will"
 299                          " be folded away.\n");
 300               likely_eliminated = true;
 301             }
 302           /* Assignments of IV variables.  */
 303           else if (gimple_code (stmt) == GIMPLE_ASSIGN
 304                    && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
 305                    && constant_after_peeling (gimple_assign_rhs1 (stmt), stmt, loop)
 306                    && (gimple_assign_rhs_class (stmt) != GIMPLE_BINARY_RHS
 307                        || constant_after_peeling (gimple_assign_rhs2 (stmt),
 308                                                   stmt, loop)))
 309             {
 310               size->constant_iv = true;
 311               if (dump_file && (dump_flags & TDF_DETAILS))
 312                 fprintf (dump_file, "   Constant expression will be folded away.\n");
 313               likely_eliminated = true;
 314             }
 315           /* Conditionals.  */
 316           else if ((gimple_code (stmt) == GIMPLE_COND
 317                     && constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop)
 318                     && constant_after_peeling (gimple_cond_rhs (stmt), stmt, loop))
 319                    || (gimple_code (stmt) == GIMPLE_SWITCH
 320                        && constant_after_peeling (gimple_switch_index (
 321                                                     as_a <gswitch *> (stmt)),
 322                                                   stmt, loop)))
 323             {
 324               if (dump_file && (dump_flags & TDF_DETAILS))
 325                 fprintf (dump_file, "   Constant conditional.\n");
 326               likely_eliminated = true;
 327             }
 328
 329           size->overall += num;
 330           if (likely_eliminated || likely_eliminated_peeled)
 331             size->eliminated_by_peeling += num;
 332           if (!after_exit)
 333             {
 334               size->last_iteration += num;
 335               if (likely_eliminated || likely_eliminated_last)
 336                 size->last_iteration_eliminated_by_peeling += num;
 337             }
 338           if ((size->overall * 3 / 2 - size->eliminated_by_peeling
 339               - size->last_iteration_eliminated_by_peeling) > upper_bound)
 340             {
 341               free (body);
 342               path.release ();
 343               return true;
 344             }
 345         }
 346     }
 347   while (path.length ())
 348     {
 349       basic_block bb = path.pop ();
 350       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 351         {
 352           gimple stmt = gsi_stmt (gsi);
 353           if (gimple_code (stmt) == GIMPLE_CALL)
 354             {
 355               int flags = gimple_call_flags (stmt);
 356               tree decl = gimple_call_fndecl (stmt);
 357
 358               if (decl && DECL_IS_BUILTIN (decl)
 359                   && is_inexpensive_builtin (decl))
 360                 ;
 361               else if (flags & (ECF_PURE | ECF_CONST))
 362                 size->num_pure_calls_on_hot_path++;
 363               else
 364                 size->num_non_pure_calls_on_hot_path++;
 365               size->num_branches_on_hot_path ++;
 366             }
 367           else if (gimple_code (stmt) != GIMPLE_CALL
 368                    && gimple_code (stmt) != GIMPLE_DEBUG)
 369             size->non_call_stmts_on_hot_path++;
 370           if (((gimple_code (stmt) == GIMPLE_COND
 371                 && (!constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop)
 372                     || constant_after_peeling (gimple_cond_rhs (stmt), stmt, loop)))
 373                || (gimple_code (stmt) == GIMPLE_SWITCH
 374                    && !constant_after_peeling (gimple_switch_index (
 375                                                  as_a <gswitch *> (stmt)),
 376                                                stmt, loop)))
 377               && (!exit || bb != exit->src))
 378             size->num_branches_on_hot_path++;
 379         }
 380     }
 381   path.release ();
 382   if (dump_file && (dump_flags & TDF_DETAILS))
 383     fprintf (dump_file, "size: %i-%i, last_iteration: %i-%i\n", size->overall,
 384              size->eliminated_by_peeling, size->last_iteration,
 385              size->last_iteration_eliminated_by_peeling);
 386
 387   free (body);
 388   return false;
 389 }
 390
 391 /* Estimate number of insns of completely unrolled loop.
 392    It is (NUNROLL + 1) * size of loop body with taking into account
 393    the fact that in last copy everything after exit conditional
 394    is dead and that some instructions will be eliminated after
 395    peeling.
 396
 397    Loop body is likely going to simplify further, this is difficult
 398    to guess, we just decrease the result by 1/3.  */
 399
 400 static unsigned HOST_WIDE_INT
 401 estimated_unrolled_size (struct loop_size *size,
 402                          unsigned HOST_WIDE_INT nunroll)
 403 {
 404   HOST_WIDE_INT unr_insns = ((nunroll)
 405                              * (HOST_WIDE_INT) (size->overall
 406                                                 - size->eliminated_by_peeling));
 407   if (!nunroll)
 408     unr_insns = 0;
 409   unr_insns += size->last_iteration - size->last_iteration_eliminated_by_peeling;
 410
 411   unr_insns = unr_insns * 2 / 3;
 412   if (unr_insns <= 0)
 413     unr_insns = 1;
 414
 415   return unr_insns;
 416 }
 417
 418 /* Loop LOOP is known to not loop.  See if there is an edge in the loop
 419    body that can be remove to make the loop to always exit and at
 420    the same time it does not make any code potentially executed
 421    during the last iteration dead.
 422
 423    After complete unrolling we still may get rid of the conditional
 424    on the exit in the last copy even if we have no idea what it does.
 425    This is quite common case for loops of form
 426
 427      int a[5];
 428      for (i=0;i<b;i++)
 429        a[i]=0;
 430
 431    Here we prove the loop to iterate 5 times but we do not know
 432    it from induction variable.
 433
 434    For now we handle only simple case where there is exit condition
 435    just before the latch block and the latch block contains no statements
 436    with side effect that may otherwise terminate the execution of loop
 437    (such as by EH or by terminating the program or longjmp).
 438
 439    In the general case we may want to cancel the paths leading to statements
 440    loop-niter identified as having undefined effect in the last iteration.
 441    The other cases are hopefully rare and will be cleaned up later.  */
 442
 443 static edge
 444 loop_edge_to_cancel (struct loop *loop)
 445 {
 446   vec<edge> exits;
 447   unsigned i;
 448   edge edge_to_cancel;
 449   gimple_stmt_iterator gsi;
 450
 451   /* We want only one predecestor of the loop.  */
 452   if (EDGE_COUNT (loop->latch->preds) > 1)
 453     return NULL;
 454
 455   exits = get_loop_exit_edges (loop);
 456
 457   FOR_EACH_VEC_ELT (exits, i, edge_to_cancel)
 458     {
 459        /* Find the other edge than the loop exit
 460           leaving the conditoinal.  */
 461        if (EDGE_COUNT (edge_to_cancel->src->succs) != 2)
 462          continue;
 463        if (EDGE_SUCC (edge_to_cancel->src, 0) == edge_to_cancel)
 464          edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 1);
 465        else
 466          edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 0);
 467
 468       /* We only can handle conditionals.  */
 469       if (!(edge_to_cancel->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
 470         continue;
 471
 472       /* We should never have conditionals in the loop latch. */
 473       gcc_assert (edge_to_cancel->dest != loop->header);
 474
 475       /* Check that it leads to loop latch.  */
 476       if (edge_to_cancel->dest != loop->latch)
 477         continue;
 478
 479       exits.release ();
 480
 481       /* Verify that the code in loop latch does nothing that may end program
 482          execution without really reaching the exit.  This may include
 483          non-pure/const function calls, EH statements, volatile ASMs etc.  */
 484       for (gsi = gsi_start_bb (loop->latch); !gsi_end_p (gsi); gsi_next (&gsi))
 485         if (gimple_has_side_effects (gsi_stmt (gsi)))
 486            return NULL;
 487       return edge_to_cancel;
 488     }
 489   exits.release ();
 490   return NULL;
 491 }
 492
 493 /* Remove all tests for exits that are known to be taken after LOOP was
 494    peeled NPEELED times. Put gcc_unreachable before every statement
 495    known to not be executed.  */
 496
 497 static bool
 498 remove_exits_and_undefined_stmts (struct loop *loop, unsigned int npeeled)
 499 {
 500   struct nb_iter_bound *elt;
 501   bool changed = false;
 502
 503   for (elt = loop->bounds; elt; elt = elt->next)
 504     {
 505       /* If statement is known to be undefined after peeling, turn it
 506          into unreachable (or trap when debugging experience is supposed
 507          to be good).  */
 508       if (!elt->is_exit
 509           && wi::ltu_p (elt->bound, npeeled))
 510         {
 511           gimple_stmt_iterator gsi = gsi_for_stmt (elt->stmt);
 512           gcall *stmt = gimple_build_call
 513               (builtin_decl_implicit (BUILT_IN_UNREACHABLE), 0);
 514           gimple_set_location (stmt, gimple_location (elt->stmt));
 515           gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
 516           split_block (gimple_bb (stmt), stmt);
 517           changed = true;
 518           if (dump_file && (dump_flags & TDF_DETAILS))
 519             {
 520               fprintf (dump_file, "Forced statement unreachable: ");
 521               print_gimple_stmt (dump_file, elt->stmt, 0, 0);
 522             }
 523         }
 524       /* If we know the exit will be taken after peeling, update.  */
 525       else if (elt->is_exit
 526                && wi::leu_p (elt->bound, npeeled))
 527         {
 528           basic_block bb = gimple_bb (elt->stmt);
 529           edge exit_edge = EDGE_SUCC (bb, 0);
 530
 531           if (dump_file && (dump_flags & TDF_DETAILS))
 532             {
 533               fprintf (dump_file, "Forced exit to be taken: ");
 534               print_gimple_stmt (dump_file, elt->stmt, 0, 0);
 535             }
 536           if (!loop_exit_edge_p (loop, exit_edge))
 537             exit_edge = EDGE_SUCC (bb, 1);
 538           gcc_checking_assert (loop_exit_edge_p (loop, exit_edge));
 539           gcond *cond_stmt = as_a <gcond *> (elt->stmt);
 540           if (exit_edge->flags & EDGE_TRUE_VALUE)
 541             gimple_cond_make_true (cond_stmt);
 542           else
 543             gimple_cond_make_false (cond_stmt);
 544           update_stmt (cond_stmt);
 545           changed = true;
 546         }
 547     }
 548   return changed;
 549 }
 550
 551 /* Remove all exits that are known to be never taken because of the loop bound
 552    discovered.  */
 553
 554 static bool
 555 remove_redundant_iv_tests (struct loop *loop)
 556 {
 557   struct nb_iter_bound *elt;
 558   bool changed = false;
 559
 560   if (!loop->any_upper_bound)
 561     return false;
 562   for (elt = loop->bounds; elt; elt = elt->next)
 563     {
 564       /* Exit is pointless if it won't be taken before loop reaches
 565          upper bound.  */
 566       if (elt->is_exit && loop->any_upper_bound
 567           && wi::ltu_p (loop->nb_iterations_upper_bound, elt->bound))
 568         {
 569           basic_block bb = gimple_bb (elt->stmt);
 570           edge exit_edge = EDGE_SUCC (bb, 0);
 571           struct tree_niter_desc niter;
 572
 573           if (!loop_exit_edge_p (loop, exit_edge))
 574             exit_edge = EDGE_SUCC (bb, 1);
 575
 576           /* Only when we know the actual number of iterations, not
 577              just a bound, we can remove the exit.  */
 578           if (!number_of_iterations_exit (loop, exit_edge,
 579                                           &niter, false, false)
 580               || !integer_onep (niter.assumptions)
 581               || !integer_zerop (niter.may_be_zero)
 582               || !niter.niter
 583               || TREE_CODE (niter.niter) != INTEGER_CST
 584               || !wi::ltu_p (loop->nb_iterations_upper_bound,
 585                              wi::to_widest (niter.niter)))
 586             continue;
 587
 588           if (dump_file && (dump_flags & TDF_DETAILS))
 589             {
 590               fprintf (dump_file, "Removed pointless exit: ");
 591               print_gimple_stmt (dump_file, elt->stmt, 0, 0);
 592             }
 593           gcond *cond_stmt = as_a <gcond *> (elt->stmt);
 594           if (exit_edge->flags & EDGE_TRUE_VALUE)
 595             gimple_cond_make_false (cond_stmt);
 596           else
 597             gimple_cond_make_true (cond_stmt);
 598           update_stmt (cond_stmt);
 599           changed = true;
 600         }
 601     }
 602   return changed;
 603 }
 604
 605 /* Stores loops that will be unlooped after we process whole loop tree. */
 606 static vec<loop_p> loops_to_unloop;
 607 static vec<int> loops_to_unloop_nunroll;
 608
 609 /* Cancel all fully unrolled loops by putting __builtin_unreachable
 610    on the latch edge.
 611    We do it after all unrolling since unlooping moves basic blocks
 612    across loop boundaries trashing loop closed SSA form as well
 613    as SCEV info needed to be intact during unrolling.
 614
 615    IRRED_INVALIDATED is used to bookkeep if information about
 616    irreducible regions may become invalid as a result
 617    of the transformation.
 618    LOOP_CLOSED_SSA_INVALIDATED is used to bookkepp the case
 619    when we need to go into loop closed SSA form.  */
 620
 621 static void
 622 unloop_loops (bitmap loop_closed_ssa_invalidated,
 623               bool *irred_invalidated)
 624 {
 625   while (loops_to_unloop.length ())
 626     {
 627       struct loop *loop = loops_to_unloop.pop ();
 628       int n_unroll = loops_to_unloop_nunroll.pop ();
 629       basic_block latch = loop->latch;
 630       edge latch_edge = loop_latch_edge (loop);
 631       int flags = latch_edge->flags;
 632       location_t locus = latch_edge->goto_locus;
 633       gcall *stmt;
 634       gimple_stmt_iterator gsi;
 635
 636       remove_exits_and_undefined_stmts (loop, n_unroll);
 637
 638       /* Unloop destroys the latch edge.  */
 639       unloop (loop, irred_invalidated, loop_closed_ssa_invalidated);
 640
 641       /* Create new basic block for the latch edge destination and wire
 642          it in.  */
 643       stmt = gimple_build_call (builtin_decl_implicit (BUILT_IN_UNREACHABLE), 0);
 644       latch_edge = make_edge (latch, create_basic_block (NULL, NULL, latch), flags);
 645       latch_edge->probability = 0;
 646       latch_edge->count = 0;
 647       latch_edge->flags |= flags;
 648       latch_edge->goto_locus = locus;
 649
 650       latch_edge->dest->loop_father = current_loops->tree_root;
 651       latch_edge->dest->count = 0;
 652       latch_edge->dest->frequency = 0;
 653       set_immediate_dominator (CDI_DOMINATORS, latch_edge->dest, latch_edge->src);
 654
 655       gsi = gsi_start_bb (latch_edge->dest);
 656       gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
 657     }
 658   loops_to_unloop.release ();
 659   loops_to_unloop_nunroll.release ();
 660 }
 661
 662 /* Tries to unroll LOOP completely, i.e. NITER times.
 663    UL determines which loops we are allowed to unroll.
 664    EXIT is the exit of the loop that should be eliminated.
 665    MAXITER specfy bound on number of iterations, -1 if it is
 666    not known or too large for HOST_WIDE_INT.  The location
 667    LOCUS corresponding to the loop is used when emitting
 668    a summary of the unroll to the dump file.  */
 669
 670 static bool
 671 try_unroll_loop_completely (struct loop *loop,
 672                             edge exit, tree niter,
 673                             enum unroll_level ul,
 674                             HOST_WIDE_INT maxiter,
 675                             location_t locus)
 676 {
 677   unsigned HOST_WIDE_INT n_unroll = 0, ninsns, unr_insns;
 678   struct loop_size size;
 679   bool n_unroll_found = false;
 680   edge edge_to_cancel = NULL;
 681   int report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_RTL | TDF_DETAILS;
 682
 683   /* See if we proved number of iterations to be low constant.
 684
 685      EXIT is an edge that will be removed in all but last iteration of
 686      the loop.
 687
 688      EDGE_TO_CACNEL is an edge that will be removed from the last iteration
 689      of the unrolled sequence and is expected to make the final loop not
 690      rolling.
 691
 692      If the number of execution of loop is determined by standard induction
 693      variable test, then EXIT and EDGE_TO_CANCEL are the two edges leaving
 694      from the iv test.  */
 695   if (tree_fits_uhwi_p (niter))
 696     {
 697       n_unroll = tree_to_uhwi (niter);
 698       n_unroll_found = true;
 699       edge_to_cancel = EDGE_SUCC (exit->src, 0);
 700       if (edge_to_cancel == exit)
 701         edge_to_cancel = EDGE_SUCC (exit->src, 1);
 702     }
 703   /* We do not know the number of iterations and thus we can not eliminate
 704      the EXIT edge.  */
 705   else
 706     exit = NULL;
 707
 708   /* See if we can improve our estimate by using recorded loop bounds.  */
 709   if (maxiter >= 0
 710       && (!n_unroll_found || (unsigned HOST_WIDE_INT)maxiter < n_unroll))
 711     {
 712       n_unroll = maxiter;
 713       n_unroll_found = true;
 714       /* Loop terminates before the IV variable test, so we can not
 715          remove it in the last iteration.  */
 716       edge_to_cancel = NULL;
 717     }
 718
 719   if (!n_unroll_found)
 720     return false;
 721
 722   if (n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
 723     {
 724       if (dump_file && (dump_flags & TDF_DETAILS))
 725         fprintf (dump_file, "Not unrolling loop %d "
 726                  "(--param max-completely-peeled-times limit reached).\n",
 727                  loop->num);
 728       return false;
 729     }
 730
 731   if (!edge_to_cancel)
 732     edge_to_cancel = loop_edge_to_cancel (loop);
 733
 734   if (n_unroll)
 735     {
 736       sbitmap wont_exit;
 737       edge e;
 738       unsigned i;
 739       bool large;
 740       vec<edge> to_remove = vNULL;
 741       if (ul == UL_SINGLE_ITER)
 742         return false;
 743
 744       large = tree_estimate_loop_size
 745                  (loop, exit, edge_to_cancel, &size,
 746                   PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS));
 747       ninsns = size.overall;
 748       if (large)
 749         {
 750           if (dump_file && (dump_flags & TDF_DETAILS))
 751             fprintf (dump_file, "Not unrolling loop %d: it is too large.\n",
 752                      loop->num);
 753           return false;
 754         }
 755
 756       unr_insns = estimated_unrolled_size (&size, n_unroll);
 757       if (dump_file && (dump_flags & TDF_DETAILS))
 758         {
 759           fprintf (dump_file, "  Loop size: %d\n", (int) ninsns);
 760           fprintf (dump_file, "  Estimated size after unrolling: %d\n",
 761                    (int) unr_insns);
 762         }
 763
 764       /* If the code is going to shrink, we don't need to be extra cautious
 765          on guessing if the unrolling is going to be profitable.  */
 766       if (unr_insns
 767           /* If there is IV variable that will become constant, we save
 768              one instruction in the loop prologue we do not account
 769              otherwise.  */
 770           <= ninsns + (size.constant_iv != false))
 771         ;
 772       /* We unroll only inner loops, because we do not consider it profitable
 773          otheriwse.  We still can cancel loopback edge of not rolling loop;
 774          this is always a good idea.  */
 775       else if (ul == UL_NO_GROWTH)
 776         {
 777           if (dump_file && (dump_flags & TDF_DETAILS))
 778             fprintf (dump_file, "Not unrolling loop %d: size would grow.\n",
 779                      loop->num);
 780           return false;
 781         }
 782       /* Outer loops tend to be less interesting candidates for complete
 783          unrolling unless we can do a lot of propagation into the inner loop
 784          body.  For now we disable outer loop unrolling when the code would
 785          grow.  */
 786       else if (loop->inner)
 787         {
 788           if (dump_file && (dump_flags & TDF_DETAILS))
 789             fprintf (dump_file, "Not unrolling loop %d: "
 790                      "it is not innermost and code would grow.\n",
 791                      loop->num);
 792           return false;
 793         }
 794       /* If there is call on a hot path through the loop, then
 795          there is most probably not much to optimize.  */
 796       else if (size.num_non_pure_calls_on_hot_path)
 797         {
 798           if (dump_file && (dump_flags & TDF_DETAILS))
 799             fprintf (dump_file, "Not unrolling loop %d: "
 800                      "contains call and code would grow.\n",
 801                      loop->num);
 802           return false;
 803         }
 804       /* If there is pure/const call in the function, then we
 805          can still optimize the unrolled loop body if it contains
 806          some other interesting code than the calls and code
 807          storing or cumulating the return value.  */
 808       else if (size.num_pure_calls_on_hot_path
 809                /* One IV increment, one test, one ivtmp store
 810                   and one useful stmt.  That is about minimal loop
 811                   doing pure call.  */
 812                && (size.non_call_stmts_on_hot_path
 813                    <= 3 + size.num_pure_calls_on_hot_path))
 814         {
 815           if (dump_file && (dump_flags & TDF_DETAILS))
 816             fprintf (dump_file, "Not unrolling loop %d: "
 817                      "contains just pure calls and code would grow.\n",
 818                      loop->num);
 819           return false;
 820         }
 821       /* Complette unrolling is major win when control flow is removed and
 822          one big basic block is created.  If the loop contains control flow
 823          the optimization may still be a win because of eliminating the loop
 824          overhead but it also may blow the branch predictor tables.
 825          Limit number of branches on the hot path through the peeled
 826          sequence.  */
 827       else if (size.num_branches_on_hot_path * (int)n_unroll
 828                > PARAM_VALUE (PARAM_MAX_PEEL_BRANCHES))
 829         {
 830           if (dump_file && (dump_flags & TDF_DETAILS))
 831             fprintf (dump_file, "Not unrolling loop %d: "
 832                      " number of branches on hot path in the unrolled sequence"
 833                      " reach --param max-peel-branches limit.\n",
 834                      loop->num);
 835           return false;
 836         }
 837       else if (unr_insns
 838                > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS))
 839         {
 840           if (dump_file && (dump_flags & TDF_DETAILS))
 841             fprintf (dump_file, "Not unrolling loop %d: "
 842                      "(--param max-completely-peeled-insns limit reached).\n",
 843                      loop->num);
 844           return false;
 845         }
 846       dump_printf_loc (report_flags, locus,
 847                        "loop turned into non-loop; it never loops.\n");
 848
 849       initialize_original_copy_tables ();
 850       wont_exit = sbitmap_alloc (n_unroll + 1);
 851       bitmap_ones (wont_exit);
 852       bitmap_clear_bit (wont_exit, 0);
 853
 854       if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 855                                                  n_unroll, wont_exit,
 856                                                  exit, &to_remove,
 857                                                  DLTHE_FLAG_UPDATE_FREQ
 858                                                  | DLTHE_FLAG_COMPLETTE_PEEL))
 859         {
 860           free_original_copy_tables ();
 861           free (wont_exit);
 862           if (dump_file && (dump_flags & TDF_DETAILS))
 863             fprintf (dump_file, "Failed to duplicate the loop\n");
 864           return false;
 865         }
 866
 867       FOR_EACH_VEC_ELT (to_remove, i, e)
 868         {
 869           bool ok = remove_path (e);
 870           gcc_assert (ok);
 871         }
 872
 873       to_remove.release ();
 874       free (wont_exit);
 875       free_original_copy_tables ();
 876     }
 877
 878
 879   /* Remove the conditional from the last copy of the loop.  */
 880   if (edge_to_cancel)
 881     {
 882       gcond *cond = as_a <gcond *> (last_stmt (edge_to_cancel->src));
 883       if (edge_to_cancel->flags & EDGE_TRUE_VALUE)
 884         gimple_cond_make_false (cond);
 885       else
 886         gimple_cond_make_true (cond);
 887       update_stmt (cond);
 888       /* Do not remove the path. Doing so may remove outer loop
 889          and confuse bookkeeping code in tree_unroll_loops_completelly.  */
 890     }
 891
 892   /* Store the loop for later unlooping and exit removal.  */
 893   loops_to_unloop.safe_push (loop);
 894   loops_to_unloop_nunroll.safe_push (n_unroll);
 895
 896   if (dump_enabled_p ())
 897     {
 898       if (!n_unroll)
 899         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
 900                          "loop turned into non-loop; it never loops\n");
 901       else
 902         {
 903           dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
 904                            "loop with %d iterations completely unrolled",
 905                            (int) (n_unroll + 1));
 906           if (profile_info)
 907             dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS,
 908                          " (header execution count %d)",
 909                          (int)loop->header->count);
 910           dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, "\n");
 911         }
 912     }
 913
 914   if (dump_file && (dump_flags & TDF_DETAILS))
 915     {
 916       if (exit)
 917         fprintf (dump_file, "Exit condition of peeled iterations was "
 918                  "eliminated.\n");
 919       if (edge_to_cancel)
 920         fprintf (dump_file, "Last iteration exit edge was proved true.\n");
 921       else
 922         fprintf (dump_file, "Latch of last iteration was marked by "
 923                  "__builtin_unreachable ().\n");
 924     }
 925
 926   return true;
 927 }
 928
 929 /* Return number of instructions after peeling.  */
 930 static unsigned HOST_WIDE_INT
 931 estimated_peeled_sequence_size (struct loop_size *size,
 932                                 unsigned HOST_WIDE_INT npeel)
 933 {
 934   return MAX (npeel * (HOST_WIDE_INT) (size->overall
 935                                        - size->eliminated_by_peeling), 1);
 936 }
 937
 938 /* If the loop is expected to iterate N times and is
 939    small enough, duplicate the loop body N+1 times before
 940    the loop itself.  This way the hot path will never
 941    enter the loop.
 942    Parameters are the same as for try_unroll_loops_completely */
 943
 944 static bool
 945 try_peel_loop (struct loop *loop,
 946                edge exit, tree niter,
 947                HOST_WIDE_INT maxiter)
 948 {
 949   int npeel;
 950   struct loop_size size;
 951   int peeled_size;
 952   sbitmap wont_exit;
 953   unsigned i;
 954   vec<edge> to_remove = vNULL;
 955   edge e;
 956
 957   /* If the iteration bound is known and large, then we can safely eliminate
 958      the check in peeled copies.  */
 959   if (TREE_CODE (niter) != INTEGER_CST)
 960     exit = NULL;
 961
 962   if (!flag_peel_loops || PARAM_VALUE (PARAM_MAX_PEEL_TIMES) <= 0)
 963     return false;
 964
 965   /* Peel only innermost loops.  */
 966   if (loop->inner)
 967     {
 968       if (dump_file)
 969         fprintf (dump_file, "Not peeling: outer loop\n");
 970       return false;
 971     }
 972
 973   if (!optimize_loop_for_speed_p (loop))
 974     {
 975       if (dump_file)
 976         fprintf (dump_file, "Not peeling: cold loop\n");
 977       return false;
 978     }
 979
 980   /* Check if there is an estimate on the number of iterations.  */
 981   npeel = estimated_loop_iterations_int (loop);
 982   if (npeel < 0)
 983     {
 984       if (dump_file)
 985         fprintf (dump_file, "Not peeling: number of iterations is not "
 986                  "estimated\n");
 987       return false;
 988     }
 989   if (maxiter >= 0 && maxiter <= npeel)
 990     {
 991       if (dump_file)
 992         fprintf (dump_file, "Not peeling: upper bound is known so can "
 993                  "unroll completely\n");
 994       return false;
 995     }
 996
 997   /* We want to peel estimated number of iterations + 1 (so we never
 998      enter the loop on quick path).  Check against PARAM_MAX_PEEL_TIMES
 999      and be sure to avoid overflows.  */
1000   if (npeel > PARAM_VALUE (PARAM_MAX_PEEL_TIMES) - 1)
1001     {
1002       if (dump_file)
1003         fprintf (dump_file, "Not peeling: rolls too much "
1004                  "(%i + 1 > --param max-peel-times)\n", npeel);
1005       return false;
1006     }
1007   npeel++;
1008
1009   /* Check peeled loops size.  */
1010   tree_estimate_loop_size (loop, exit, NULL, &size,
1011                            PARAM_VALUE (PARAM_MAX_PEELED_INSNS));
1012   if ((peeled_size = estimated_peeled_sequence_size (&size, npeel))
1013       > PARAM_VALUE (PARAM_MAX_PEELED_INSNS))
1014     {
1015       if (dump_file)
1016         fprintf (dump_file, "Not peeling: peeled sequence size is too large "
1017                  "(%i insns > --param max-peel-insns)", peeled_size);
1018       return false;
1019     }
1020
1021   /* Duplicate possibly eliminating the exits.  */
1022   initialize_original_copy_tables ();
1023   wont_exit = sbitmap_alloc (npeel + 1);
1024   bitmap_ones (wont_exit);
1025   bitmap_clear_bit (wont_exit, 0);
1026   if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1027                                              npeel, wont_exit,
1028                                              exit, &to_remove,
1029                                              DLTHE_FLAG_UPDATE_FREQ
1030                                              | DLTHE_FLAG_COMPLETTE_PEEL))
1031     {
1032       free_original_copy_tables ();
1033       free (wont_exit);
1034       return false;
1035     }
1036   FOR_EACH_VEC_ELT (to_remove, i, e)
1037     {
1038       bool ok = remove_path (e);
1039       gcc_assert (ok);
1040     }
1041   free (wont_exit);
1042   free_original_copy_tables ();
1043   if (dump_file && (dump_flags & TDF_DETAILS))
1044     {
1045       fprintf (dump_file, "Peeled loop %d, %i times.\n",
1046                loop->num, npeel);
1047     }
1048   if (loop->any_upper_bound)
1049     loop->nb_iterations_upper_bound -= npeel;
1050   loop->nb_iterations_estimate = 0;
1051   /* Make sure to mark loop cold so we do not try to peel it more.  */
1052   scale_loop_profile (loop, 1, 0);
1053   loop->header->count = 0;
1054   return true;
1055 }
1056 /* Adds a canonical induction variable to LOOP if suitable.
1057    CREATE_IV is true if we may create a new iv.  UL determines
1058    which loops we are allowed to completely unroll.  If TRY_EVAL is true, we try
1059    to determine the number of iterations of a loop by direct evaluation.
1060    Returns true if cfg is changed.   */
1061
1062 static bool
1063 canonicalize_loop_induction_variables (struct loop *loop,
1064                                        bool create_iv, enum unroll_level ul,
1065                                        bool try_eval)
1066 {
1067   edge exit = NULL;
1068   tree niter;
1069   HOST_WIDE_INT maxiter;
1070   bool modified = false;
1071   location_t locus = UNKNOWN_LOCATION;
1072
1073   niter = number_of_latch_executions (loop);
1074   exit = single_exit (loop);
1075   if (TREE_CODE (niter) == INTEGER_CST)
1076     locus = gimple_location (last_stmt (exit->src));
1077   else
1078     {
1079       /* If the loop has more than one exit, try checking all of them
1080          for # of iterations determinable through scev.  */
1081       if (!exit)
1082         niter = find_loop_niter (loop, &exit);
1083
1084       /* Finally if everything else fails, try brute force evaluation.  */
1085       if (try_eval
1086           && (chrec_contains_undetermined (niter)
1087               || TREE_CODE (niter) != INTEGER_CST))
1088         niter = find_loop_niter_by_eval (loop, &exit);
1089
1090       if (exit)
1091         locus = gimple_location (last_stmt (exit->src));
1092
1093       if (TREE_CODE (niter) != INTEGER_CST)
1094         exit = NULL;
1095     }
1096
1097   /* We work exceptionally hard here to estimate the bound
1098      by find_loop_niter_by_eval.  Be sure to keep it for future.  */
1099   if (niter && TREE_CODE (niter) == INTEGER_CST)
1100     {
1101       record_niter_bound (loop, wi::to_widest (niter),
1102                           exit == single_likely_exit (loop), true);
1103     }
1104
1105   /* Force re-computation of loop bounds so we can remove redundant exits.  */
1106   maxiter = max_loop_iterations_int (loop);
1107
1108   if (dump_file && (dump_flags & TDF_DETAILS)
1109       && TREE_CODE (niter) == INTEGER_CST)
1110     {
1111       fprintf (dump_file, "Loop %d iterates ", loop->num);
1112       print_generic_expr (dump_file, niter, TDF_SLIM);
1113       fprintf (dump_file, " times.\n");
1114     }
1115   if (dump_file && (dump_flags & TDF_DETAILS)
1116       && maxiter >= 0)
1117     {
1118       fprintf (dump_file, "Loop %d iterates at most %i times.\n", loop->num,
1119                (int)maxiter);
1120     }
1121
1122   /* Remove exits that are known to be never taken based on loop bound.
1123      Needs to be called after compilation of max_loop_iterations_int that
1124      populates the loop bounds.  */
1125   modified |= remove_redundant_iv_tests (loop);
1126
1127   if (try_unroll_loop_completely (loop, exit, niter, ul, maxiter, locus))
1128     return true;
1129
1130   if (create_iv
1131       && niter && !chrec_contains_undetermined (niter)
1132       && exit && just_once_each_iteration_p (loop, exit->src))
1133     create_canonical_iv (loop, exit, niter);
1134
1135   if (ul == UL_ALL)
1136     modified |= try_peel_loop (loop, exit, niter, maxiter);
1137
1138   return modified;
1139 }
1140
1141 /* The main entry point of the pass.  Adds canonical induction variables
1142    to the suitable loops.  */
1143
1144 unsigned int
1145 canonicalize_induction_variables (void)
1146 {
1147   struct loop *loop;
1148   bool changed = false;
1149   bool irred_invalidated = false;
1150   bitmap loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
1151
1152   free_numbers_of_iterations_estimates ();
1153   estimate_numbers_of_iterations ();
1154
1155   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
1156     {
1157       changed |= canonicalize_loop_induction_variables (loop,
1158                                                         true, UL_SINGLE_ITER,
1159                                                         true);
1160     }
1161   gcc_assert (!need_ssa_update_p (cfun));
1162
1163   unloop_loops (loop_closed_ssa_invalidated, &irred_invalidated);
1164   if (irred_invalidated
1165       && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
1166     mark_irreducible_loops ();
1167
1168   /* Clean up the information about numbers of iterations, since brute force
1169      evaluation could reveal new information.  */
1170   scev_reset ();
1171
1172   if (!bitmap_empty_p (loop_closed_ssa_invalidated))
1173     {
1174       gcc_checking_assert (loops_state_satisfies_p (LOOP_CLOSED_SSA));
1175       rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1176     }
1177   BITMAP_FREE (loop_closed_ssa_invalidated);
1178
1179   if (changed)
1180     return TODO_cleanup_cfg;
1181   return 0;
1182 }
1183
1184 /* Propagate VAL into all uses of SSA_NAME.  */
1185
1186 static void
1187 propagate_into_all_uses (tree ssa_name, tree val)
1188 {
1189   imm_use_iterator iter;
1190   gimple use_stmt;
1191
1192   FOR_EACH_IMM_USE_STMT (use_stmt, iter, ssa_name)
1193     {
1194       gimple_stmt_iterator use_stmt_gsi = gsi_for_stmt (use_stmt);
1195       use_operand_p use;
1196
1197       FOR_EACH_IMM_USE_ON_STMT (use, iter)
1198         SET_USE (use, val);
1199
1200       if (is_gimple_assign (use_stmt)
1201           && get_gimple_rhs_class (gimple_assign_rhs_code (use_stmt))
1202              == GIMPLE_SINGLE_RHS)
1203         {
1204           tree rhs = gimple_assign_rhs1 (use_stmt);
1205
1206           if (TREE_CODE (rhs) == ADDR_EXPR)
1207             recompute_tree_invariant_for_addr_expr (rhs);
1208         }
1209
1210       fold_stmt_inplace (&use_stmt_gsi);
1211       update_stmt (use_stmt);
1212       maybe_clean_or_replace_eh_stmt (use_stmt, use_stmt);
1213     }
1214 }
1215
1216 /* Propagate constant SSA_NAMEs defined in basic block BB.  */
1217
1218 static void
1219 propagate_constants_for_unrolling (basic_block bb)
1220 {
1221   /* Look for degenerate PHI nodes with constant argument.  */
1222   for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi); )
1223     {
1224       gphi *phi = gsi.phi ();
1225       tree result = gimple_phi_result (phi);
1226       tree arg = gimple_phi_arg_def (phi, 0);
1227
1228       if (gimple_phi_num_args (phi) == 1 && TREE_CODE (arg) == INTEGER_CST)
1229         {
1230           propagate_into_all_uses (result, arg);
1231           gsi_remove (&gsi, true);
1232           release_ssa_name (result);
1233         }
1234       else
1235         gsi_next (&gsi);
1236     }
1237
1238   /* Look for assignments to SSA names with constant RHS.  */
1239   for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
1240     {
1241       gimple stmt = gsi_stmt (gsi);
1242       tree lhs;
1243
1244       if (is_gimple_assign (stmt)
1245           && gimple_assign_rhs_code (stmt) == INTEGER_CST
1246           && (lhs = gimple_assign_lhs (stmt), TREE_CODE (lhs) == SSA_NAME)
1247           && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
1248         {
1249           propagate_into_all_uses (lhs, gimple_assign_rhs1 (stmt));
1250           gsi_remove (&gsi, true);
1251           release_ssa_name (lhs);
1252         }
1253       else
1254         gsi_next (&gsi);
1255     }
1256 }
1257
1258 /* Process loops from innermost to outer, stopping at the innermost
1259    loop we unrolled.  */
1260
1261 static bool
1262 tree_unroll_loops_completely_1 (bool may_increase_size, bool unroll_outer,
1263                                 vec<loop_p, va_heap>& father_stack,
1264                                 struct loop *loop)
1265 {
1266   struct loop *loop_father;
1267   bool changed = false;
1268   struct loop *inner;
1269   enum unroll_level ul;
1270
1271   /* Process inner loops first.  */
1272   for (inner = loop->inner; inner != NULL; inner = inner->next)
1273     changed |= tree_unroll_loops_completely_1 (may_increase_size,
1274                                                unroll_outer, father_stack,
1275                                                inner);
1276
1277   /* If we changed an inner loop we cannot process outer loops in this
1278      iteration because SSA form is not up-to-date.  Continue with
1279      siblings of outer loops instead.  */
1280   if (changed)
1281     return true;
1282
1283   /* Don't unroll #pragma omp simd loops until the vectorizer
1284      attempts to vectorize those.  */
1285   if (loop->force_vectorize)
1286     return false;
1287
1288   /* Try to unroll this loop.  */
1289   loop_father = loop_outer (loop);
1290   if (!loop_father)
1291     return false;
1292
1293   if (may_increase_size && optimize_loop_nest_for_speed_p (loop)
1294       /* Unroll outermost loops only if asked to do so or they do
1295          not cause code growth.  */
1296       && (unroll_outer || loop_outer (loop_father)))
1297     ul = UL_ALL;
1298   else
1299     ul = UL_NO_GROWTH;
1300
1301   if (canonicalize_loop_induction_variables
1302         (loop, false, ul, !flag_tree_loop_ivcanon))
1303     {
1304       /* If we'll continue unrolling, we need to propagate constants
1305          within the new basic blocks to fold away induction variable
1306          computations; otherwise, the size might blow up before the
1307          iteration is complete and the IR eventually cleaned up.  */
1308       if (loop_outer (loop_father) && !loop_father->aux)
1309         {
1310           father_stack.safe_push (loop_father);
1311           loop_father->aux = loop_father;
1312         }
1313
1314       return true;
1315     }
1316
1317   return false;
1318 }
1319
1320 /* Unroll LOOPS completely if they iterate just few times.  Unless
1321    MAY_INCREASE_SIZE is true, perform the unrolling only if the
1322    size of the code does not increase.  */
1323
1324 unsigned int
1325 tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer)
1326 {
1327   auto_vec<loop_p, 16> father_stack;
1328   bool changed;
1329   int iteration = 0;
1330   bool irred_invalidated = false;
1331
1332   do
1333     {
1334       changed = false;
1335       bitmap loop_closed_ssa_invalidated = NULL;
1336
1337       if (loops_state_satisfies_p (LOOP_CLOSED_SSA))
1338         loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
1339
1340       free_numbers_of_iterations_estimates ();
1341       estimate_numbers_of_iterations ();
1342
1343       changed = tree_unroll_loops_completely_1 (may_increase_size,
1344                                                 unroll_outer, father_stack,
1345                                                 current_loops->tree_root);
1346       if (changed)
1347         {
1348           struct loop **iter;
1349           unsigned i;
1350
1351           /* Be sure to skip unlooped loops while procesing father_stack
1352              array.  */
1353           FOR_EACH_VEC_ELT (loops_to_unloop, i, iter)
1354             (*iter)->aux = NULL;
1355           FOR_EACH_VEC_ELT (father_stack, i, iter)
1356             if (!(*iter)->aux)
1357               *iter = NULL;
1358           unloop_loops (loop_closed_ssa_invalidated, &irred_invalidated);
1359
1360           /* We can not use TODO_update_ssa_no_phi because VOPS gets confused.  */
1361           if (loop_closed_ssa_invalidated
1362               && !bitmap_empty_p (loop_closed_ssa_invalidated))
1363             rewrite_into_loop_closed_ssa (loop_closed_ssa_invalidated,
1364                                           TODO_update_ssa);
1365           else
1366             update_ssa (TODO_update_ssa);
1367
1368           /* Propagate the constants within the new basic blocks.  */
1369           FOR_EACH_VEC_ELT (father_stack, i, iter)
1370             if (*iter)
1371               {
1372                 unsigned j;
1373                 basic_block *body = get_loop_body_in_dom_order (*iter);
1374                 for (j = 0; j < (*iter)->num_nodes; j++)
1375                   propagate_constants_for_unrolling (body[j]);
1376                 free (body);
1377                 (*iter)->aux = NULL;
1378               }
1379           father_stack.truncate (0);
1380
1381           /* This will take care of removing completely unrolled loops
1382              from the loop structures so we can continue unrolling now
1383              innermost loops.  */
1384           if (cleanup_tree_cfg ())
1385             update_ssa (TODO_update_ssa_only_virtuals);
1386
1387           /* Clean up the information about numbers of iterations, since
1388              complete unrolling might have invalidated it.  */
1389           scev_reset ();
1390 #ifdef ENABLE_CHECKING
1391           if (loops_state_satisfies_p (LOOP_CLOSED_SSA))
1392             verify_loop_closed_ssa (true);
1393 #endif
1394         }
1395       if (loop_closed_ssa_invalidated)
1396         BITMAP_FREE (loop_closed_ssa_invalidated);
1397     }
1398   while (changed
1399          && ++iteration <= PARAM_VALUE (PARAM_MAX_UNROLL_ITERATIONS));
1400
1401   father_stack.release ();
1402
1403   if (irred_invalidated
1404       && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
1405     mark_irreducible_loops ();
1406
1407   return 0;
1408 }
1409
1410 /* Canonical induction variable creation pass.  */
1411
1412 namespace {
1413
1414 const pass_data pass_data_iv_canon =
1415 {
1416   GIMPLE_PASS, /* type */
1417   "ivcanon", /* name */
1418   OPTGROUP_LOOP, /* optinfo_flags */
1419   TV_TREE_LOOP_IVCANON, /* tv_id */
1420   ( PROP_cfg | PROP_ssa ), /* properties_required */
1421   0, /* properties_provided */
1422   0, /* properties_destroyed */
1423   0, /* todo_flags_start */
1424   0, /* todo_flags_finish */
1425 };
1426
1427 class pass_iv_canon : public gimple_opt_pass
1428 {
1429 public:
1430   pass_iv_canon (gcc::context *ctxt)
1431     : gimple_opt_pass (pass_data_iv_canon, ctxt)
1432   {}
1433
1434   /* opt_pass methods: */
1435   virtual bool gate (function *) { return flag_tree_loop_ivcanon != 0; }
1436   virtual unsigned int execute (function *fun);
1437
1438 }; // class pass_iv_canon
1439
1440 unsigned int
1441 pass_iv_canon::execute (function *fun)
1442 {
1443   if (number_of_loops (fun) <= 1)
1444     return 0;
1445
1446   return canonicalize_induction_variables ();
1447 }
1448
1449 } // anon namespace
1450
1451 gimple_opt_pass *
1452 make_pass_iv_canon (gcc::context *ctxt)
1453 {
1454   return new pass_iv_canon (ctxt);
1455 }
1456
1457 /* Complete unrolling of loops.  */
1458
1459 namespace {
1460
1461 const pass_data pass_data_complete_unroll =
1462 {
1463   GIMPLE_PASS, /* type */
1464   "cunroll", /* name */
1465   OPTGROUP_LOOP, /* optinfo_flags */
1466   TV_COMPLETE_UNROLL, /* tv_id */
1467   ( PROP_cfg | PROP_ssa ), /* properties_required */
1468   0, /* properties_provided */
1469   0, /* properties_destroyed */
1470   0, /* todo_flags_start */
1471   0, /* todo_flags_finish */
1472 };
1473
1474 class pass_complete_unroll : public gimple_opt_pass
1475 {
1476 public:
1477   pass_complete_unroll (gcc::context *ctxt)
1478     : gimple_opt_pass (pass_data_complete_unroll, ctxt)
1479   {}
1480
1481   /* opt_pass methods: */
1482   virtual unsigned int execute (function *);
1483
1484 }; // class pass_complete_unroll
1485
1486 unsigned int
1487 pass_complete_unroll::execute (function *fun)
1488 {
1489   if (number_of_loops (fun) <= 1)
1490     return 0;
1491
1492   return tree_unroll_loops_completely (flag_unroll_loops
1493                                        || flag_peel_loops
1494                                        || optimize >= 3, true);
1495 }
1496
1497 } // anon namespace
1498
1499 gimple_opt_pass *
1500 make_pass_complete_unroll (gcc::context *ctxt)
1501 {
1502   return new pass_complete_unroll (ctxt);
1503 }
1504
1505 /* Complete unrolling of inner loops.  */
1506
1507 namespace {
1508
1509 const pass_data pass_data_complete_unrolli =
1510 {
1511   GIMPLE_PASS, /* type */
1512   "cunrolli", /* name */
1513   OPTGROUP_LOOP, /* optinfo_flags */
1514   TV_COMPLETE_UNROLL, /* tv_id */
1515   ( PROP_cfg | PROP_ssa ), /* properties_required */
1516   0, /* properties_provided */
1517   0, /* properties_destroyed */
1518   0, /* todo_flags_start */
1519   0, /* todo_flags_finish */
1520 };
1521
1522 class pass_complete_unrolli : public gimple_opt_pass
1523 {
1524 public:
1525   pass_complete_unrolli (gcc::context *ctxt)
1526     : gimple_opt_pass (pass_data_complete_unrolli, ctxt)
1527   {}
1528
1529   /* opt_pass methods: */
1530   virtual bool gate (function *) { return optimize >= 2; }
1531   virtual unsigned int execute (function *);
1532
1533 }; // class pass_complete_unrolli
1534
1535 unsigned int
1536 pass_complete_unrolli::execute (function *fun)
1537 {
1538   unsigned ret = 0;
1539
1540   loop_optimizer_init (LOOPS_NORMAL
1541                        | LOOPS_HAVE_RECORDED_EXITS);
1542   if (number_of_loops (fun) > 1)
1543     {
1544       scev_initialize ();
1545       ret = tree_unroll_loops_completely (optimize >= 3, false);
1546       free_numbers_of_iterations_estimates ();
1547       scev_finalize ();
1548     }
1549   loop_optimizer_finalize ();
1550
1551   return ret;
1552 }
1553
1554 } // anon namespace
1555
1556 gimple_opt_pass *
1557 make_pass_complete_unrolli (gcc::context *ctxt)
1558 {
1559   return new pass_complete_unrolli (ctxt);
1560 }
1561
1562