gcc/tree-ssa-loop-ivcanon.c

   1 /* Induction variable canonicalization and loop peeling.
   2    Copyright (C) 2004-2017 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass detects the loops that iterate a constant number of times,
  21    adds a canonical induction variable (step -1, tested against 0)
  22    and replaces the exit test.  This enables the less powerful rtl
  23    level analysis to use this information.
  24
  25    This might spoil the code in some cases (by increasing register pressure).
  26    Note that in the case the new variable is not needed, ivopts will get rid
  27    of it, so it might only be a problem when there are no other linear induction
  28    variables.  In that case the created optimization possibilities are likely
  29    to pay up.
  30
  31    We also perform
  32      - complete unrolling (or peeling) when the loops is rolling few enough
  33        times
  34      - simple peeling (i.e. copying few initial iterations prior the loop)
  35        when number of iteration estimate is known (typically by the profile
  36        info).  */
  37
  38 #include "config.h"
  39 #include "system.h"
  40 #include "coretypes.h"
  41 #include "backend.h"
  42 #include "tree.h"
  43 #include "gimple.h"
  44 #include "cfghooks.h"
  45 #include "tree-pass.h"
  46 #include "ssa.h"
  47 #include "cgraph.h"
  48 #include "gimple-pretty-print.h"
  49 #include "fold-const.h"
  50 #include "profile.h"
  51 #include "gimple-fold.h"
  52 #include "tree-eh.h"
  53 #include "gimple-iterator.h"
  54 #include "tree-cfg.h"
  55 #include "tree-ssa-loop-manip.h"
  56 #include "tree-ssa-loop-niter.h"
  57 #include "tree-ssa-loop.h"
  58 #include "tree-into-ssa.h"
  59 #include "cfgloop.h"
  60 #include "tree-chrec.h"
  61 #include "tree-scalar-evolution.h"
  62 #include "params.h"
  63 #include "tree-inline.h"
  64 #include "tree-cfgcleanup.h"
  65 #include "builtins.h"
  66
  67 /* Specifies types of loops that may be unrolled.  */
  68
  69 enum unroll_level
  70 {
  71   UL_SINGLE_ITER,       /* Only loops that exit immediately in the first
  72                            iteration.  */
  73   UL_NO_GROWTH,         /* Only loops whose unrolling will not cause increase
  74                            of code size.  */
  75   UL_ALL                /* All suitable loops.  */
  76 };
  77
  78 /* Adds a canonical induction variable to LOOP iterating NITER times.  EXIT
  79    is the exit edge whose condition is replaced.  */
  80
  81 static void
  82 create_canonical_iv (struct loop *loop, edge exit, tree niter)
  83 {
  84   edge in;
  85   tree type, var;
  86   gcond *cond;
  87   gimple_stmt_iterator incr_at;
  88   enum tree_code cmp;
  89
  90   if (dump_file && (dump_flags & TDF_DETAILS))
  91     {
  92       fprintf (dump_file, "Added canonical iv to loop %d, ", loop->num);
  93       print_generic_expr (dump_file, niter, TDF_SLIM);
  94       fprintf (dump_file, " iterations.\n");
  95     }
  96
  97   cond = as_a <gcond *> (last_stmt (exit->src));
  98   in = EDGE_SUCC (exit->src, 0);
  99   if (in == exit)
 100     in = EDGE_SUCC (exit->src, 1);
 101
 102   /* Note that we do not need to worry about overflows, since
 103      type of niter is always unsigned and all comparisons are
 104      just for equality/nonequality -- i.e. everything works
 105      with a modulo arithmetics.  */
 106
 107   type = TREE_TYPE (niter);
 108   niter = fold_build2 (PLUS_EXPR, type,
 109                        niter,
 110                        build_int_cst (type, 1));
 111   incr_at = gsi_last_bb (in->src);
 112   create_iv (niter,
 113              build_int_cst (type, -1),
 114              NULL_TREE, loop,
 115              &incr_at, false, NULL, &var);
 116
 117   cmp = (exit->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
 118   gimple_cond_set_code (cond, cmp);
 119   gimple_cond_set_lhs (cond, var);
 120   gimple_cond_set_rhs (cond, build_int_cst (type, 0));
 121   update_stmt (cond);
 122 }
 123
 124 /* Describe size of loop as detected by tree_estimate_loop_size.  */
 125 struct loop_size
 126 {
 127   /* Number of instructions in the loop.  */
 128   int overall;
 129
 130   /* Number of instructions that will be likely optimized out in
 131      peeled iterations of loop  (i.e. computation based on induction
 132      variable where induction variable starts at known constant.)  */
 133   int eliminated_by_peeling;
 134
 135   /* Same statistics for last iteration of loop: it is smaller because
 136      instructions after exit are not executed.  */
 137   int last_iteration;
 138   int last_iteration_eliminated_by_peeling;
 139
 140   /* If some IV computation will become constant.  */
 141   bool constant_iv;
 142
 143   /* Number of call stmts that are not a builtin and are pure or const
 144      present on the hot path.  */
 145   int num_pure_calls_on_hot_path;
 146   /* Number of call stmts that are not a builtin and are not pure nor const
 147      present on the hot path.  */
 148   int num_non_pure_calls_on_hot_path;
 149   /* Number of statements other than calls in the loop.  */
 150   int non_call_stmts_on_hot_path;
 151   /* Number of branches seen on the hot path.  */
 152   int num_branches_on_hot_path;
 153 };
 154
 155 /* Return true if OP in STMT will be constant after peeling LOOP.  */
 156
 157 static bool
 158 constant_after_peeling (tree op, gimple *stmt, struct loop *loop)
 159 {
 160   if (is_gimple_min_invariant (op))
 161     return true;
 162
 163   /* We can still fold accesses to constant arrays when index is known.  */
 164   if (TREE_CODE (op) != SSA_NAME)
 165     {
 166       tree base = op;
 167
 168       /* First make fast look if we see constant array inside.  */
 169       while (handled_component_p (base))
 170         base = TREE_OPERAND (base, 0);
 171       if ((DECL_P (base)
 172            && ctor_for_folding (base) != error_mark_node)
 173           || CONSTANT_CLASS_P (base))
 174         {
 175           /* If so, see if we understand all the indices.  */
 176           base = op;
 177           while (handled_component_p (base))
 178             {
 179               if (TREE_CODE (base) == ARRAY_REF
 180                   && !constant_after_peeling (TREE_OPERAND (base, 1), stmt, loop))
 181                 return false;
 182               base = TREE_OPERAND (base, 0);
 183             }
 184           return true;
 185         }
 186       return false;
 187     }
 188
 189   /* Induction variables are constants when defined in loop.  */
 190   if (loop_containing_stmt (stmt) != loop)
 191     return false;
 192   tree ev = analyze_scalar_evolution (loop, op);
 193   if (chrec_contains_undetermined (ev)
 194       || chrec_contains_symbols (ev))
 195     return false;
 196   return true;
 197 }
 198
 199 /* Computes an estimated number of insns in LOOP.
 200    EXIT (if non-NULL) is an exite edge that will be eliminated in all but last
 201    iteration of the loop.
 202    EDGE_TO_CANCEL (if non-NULL) is an non-exit edge eliminated in the last iteration
 203    of loop.
 204    Return results in SIZE, estimate benefits for complete unrolling exiting by EXIT.
 205    Stop estimating after UPPER_BOUND is met.  Return true in this case.  */
 206
 207 static bool
 208 tree_estimate_loop_size (struct loop *loop, edge exit, edge edge_to_cancel,
 209                          struct loop_size *size, int upper_bound)
 210 {
 211   basic_block *body = get_loop_body (loop);
 212   gimple_stmt_iterator gsi;
 213   unsigned int i;
 214   bool after_exit;
 215   vec<basic_block> path = get_loop_hot_path (loop);
 216
 217   size->overall = 0;
 218   size->eliminated_by_peeling = 0;
 219   size->last_iteration = 0;
 220   size->last_iteration_eliminated_by_peeling = 0;
 221   size->num_pure_calls_on_hot_path = 0;
 222   size->num_non_pure_calls_on_hot_path = 0;
 223   size->non_call_stmts_on_hot_path = 0;
 224   size->num_branches_on_hot_path = 0;
 225   size->constant_iv = 0;
 226
 227   if (dump_file && (dump_flags & TDF_DETAILS))
 228     fprintf (dump_file, "Estimating sizes for loop %i\n", loop->num);
 229   for (i = 0; i < loop->num_nodes; i++)
 230     {
 231       if (edge_to_cancel && body[i] != edge_to_cancel->src
 232           && dominated_by_p (CDI_DOMINATORS, body[i], edge_to_cancel->src))
 233         after_exit = true;
 234       else
 235         after_exit = false;
 236       if (dump_file && (dump_flags & TDF_DETAILS))
 237         fprintf (dump_file, " BB: %i, after_exit: %i\n", body[i]->index,
 238                  after_exit);
 239
 240       for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
 241         {
 242           gimple *stmt = gsi_stmt (gsi);
 243           int num = estimate_num_insns (stmt, &eni_size_weights);
 244           bool likely_eliminated = false;
 245           bool likely_eliminated_last = false;
 246           bool likely_eliminated_peeled = false;
 247
 248           if (dump_file && (dump_flags & TDF_DETAILS))
 249             {
 250               fprintf (dump_file, "  size: %3i ", num);
 251               print_gimple_stmt (dump_file, gsi_stmt (gsi), 0);
 252             }
 253
 254           /* Look for reasons why we might optimize this stmt away. */
 255
 256           if (!gimple_has_side_effects (stmt))
 257             {
 258               /* Exit conditional.  */
 259               if (exit && body[i] == exit->src
 260                   && stmt == last_stmt (exit->src))
 261                 {
 262                   if (dump_file && (dump_flags & TDF_DETAILS))
 263                     fprintf (dump_file, "   Exit condition will be eliminated "
 264                              "in peeled copies.\n");
 265                   likely_eliminated_peeled = true;
 266                 }
 267               if (edge_to_cancel && body[i] == edge_to_cancel->src
 268                   && stmt == last_stmt (edge_to_cancel->src))
 269                 {
 270                   if (dump_file && (dump_flags & TDF_DETAILS))
 271                     fprintf (dump_file, "   Exit condition will be eliminated "
 272                              "in last copy.\n");
 273                   likely_eliminated_last = true;
 274                 }
 275               /* Sets of IV variables  */
 276               if (gimple_code (stmt) == GIMPLE_ASSIGN
 277                   && constant_after_peeling (gimple_assign_lhs (stmt), stmt, loop))
 278                 {
 279                   if (dump_file && (dump_flags & TDF_DETAILS))
 280                     fprintf (dump_file, "   Induction variable computation will"
 281                              " be folded away.\n");
 282                   likely_eliminated = true;
 283                 }
 284               /* Assignments of IV variables.  */
 285               else if (gimple_code (stmt) == GIMPLE_ASSIGN
 286                        && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
 287                        && constant_after_peeling (gimple_assign_rhs1 (stmt),
 288                                                   stmt, loop)
 289                        && (gimple_assign_rhs_class (stmt) != GIMPLE_BINARY_RHS
 290                            || constant_after_peeling (gimple_assign_rhs2 (stmt),
 291                                                       stmt, loop)))
 292                 {
 293                   size->constant_iv = true;
 294                   if (dump_file && (dump_flags & TDF_DETAILS))
 295                     fprintf (dump_file,
 296                              "   Constant expression will be folded away.\n");
 297                   likely_eliminated = true;
 298                 }
 299               /* Conditionals.  */
 300               else if ((gimple_code (stmt) == GIMPLE_COND
 301                         && constant_after_peeling (gimple_cond_lhs (stmt), stmt,
 302                                                    loop)
 303                         && constant_after_peeling (gimple_cond_rhs (stmt), stmt,
 304                                                    loop)
 305                         /* We don't simplify all constant compares so make sure
 306                            they are not both constant already.  See PR70288.  */
 307                         && (! is_gimple_min_invariant (gimple_cond_lhs (stmt))
 308                             || ! is_gimple_min_invariant
 309                                  (gimple_cond_rhs (stmt))))
 310                        || (gimple_code (stmt) == GIMPLE_SWITCH
 311                            && constant_after_peeling (gimple_switch_index (
 312                                                         as_a <gswitch *>
 313                                                           (stmt)),
 314                                                       stmt, loop)
 315                            && ! is_gimple_min_invariant
 316                                    (gimple_switch_index
 317                                       (as_a <gswitch *> (stmt)))))
 318                 {
 319                   if (dump_file && (dump_flags & TDF_DETAILS))
 320                     fprintf (dump_file, "   Constant conditional.\n");
 321                   likely_eliminated = true;
 322                 }
 323             }
 324
 325           size->overall += num;
 326           if (likely_eliminated || likely_eliminated_peeled)
 327             size->eliminated_by_peeling += num;
 328           if (!after_exit)
 329             {
 330               size->last_iteration += num;
 331               if (likely_eliminated || likely_eliminated_last)
 332                 size->last_iteration_eliminated_by_peeling += num;
 333             }
 334           if ((size->overall * 3 / 2 - size->eliminated_by_peeling
 335               - size->last_iteration_eliminated_by_peeling) > upper_bound)
 336             {
 337               free (body);
 338               path.release ();
 339               return true;
 340             }
 341         }
 342     }
 343   while (path.length ())
 344     {
 345       basic_block bb = path.pop ();
 346       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 347         {
 348           gimple *stmt = gsi_stmt (gsi);
 349           if (gimple_code (stmt) == GIMPLE_CALL
 350               && !gimple_inexpensive_call_p (as_a <gcall *>  (stmt)))
 351             {
 352               int flags = gimple_call_flags (stmt);
 353               if (flags & (ECF_PURE | ECF_CONST))
 354                 size->num_pure_calls_on_hot_path++;
 355               else
 356                 size->num_non_pure_calls_on_hot_path++;
 357               size->num_branches_on_hot_path ++;
 358             }
 359           /* Count inexpensive calls as non-calls, because they will likely
 360              expand inline.  */
 361           else if (gimple_code (stmt) != GIMPLE_DEBUG)
 362             size->non_call_stmts_on_hot_path++;
 363           if (((gimple_code (stmt) == GIMPLE_COND
 364                 && (!constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop)
 365                     || constant_after_peeling (gimple_cond_rhs (stmt), stmt,
 366                                                loop)))
 367                || (gimple_code (stmt) == GIMPLE_SWITCH
 368                    && !constant_after_peeling (gimple_switch_index (
 369                                                  as_a <gswitch *> (stmt)),
 370                                                stmt, loop)))
 371               && (!exit || bb != exit->src))
 372             size->num_branches_on_hot_path++;
 373         }
 374     }
 375   path.release ();
 376   if (dump_file && (dump_flags & TDF_DETAILS))
 377     fprintf (dump_file, "size: %i-%i, last_iteration: %i-%i\n", size->overall,
 378              size->eliminated_by_peeling, size->last_iteration,
 379              size->last_iteration_eliminated_by_peeling);
 380
 381   free (body);
 382   return false;
 383 }
 384
 385 /* Estimate number of insns of completely unrolled loop.
 386    It is (NUNROLL + 1) * size of loop body with taking into account
 387    the fact that in last copy everything after exit conditional
 388    is dead and that some instructions will be eliminated after
 389    peeling.
 390
 391    Loop body is likely going to simplify further, this is difficult
 392    to guess, we just decrease the result by 1/3.  */
 393
 394 static unsigned HOST_WIDE_INT
 395 estimated_unrolled_size (struct loop_size *size,
 396                          unsigned HOST_WIDE_INT nunroll)
 397 {
 398   HOST_WIDE_INT unr_insns = ((nunroll)
 399                              * (HOST_WIDE_INT) (size->overall
 400                                                 - size->eliminated_by_peeling));
 401   if (!nunroll)
 402     unr_insns = 0;
 403   unr_insns += size->last_iteration - size->last_iteration_eliminated_by_peeling;
 404
 405   unr_insns = unr_insns * 2 / 3;
 406   if (unr_insns <= 0)
 407     unr_insns = 1;
 408
 409   return unr_insns;
 410 }
 411
 412 /* Loop LOOP is known to not loop.  See if there is an edge in the loop
 413    body that can be remove to make the loop to always exit and at
 414    the same time it does not make any code potentially executed
 415    during the last iteration dead.
 416
 417    After complete unrolling we still may get rid of the conditional
 418    on the exit in the last copy even if we have no idea what it does.
 419    This is quite common case for loops of form
 420
 421      int a[5];
 422      for (i=0;i<b;i++)
 423        a[i]=0;
 424
 425    Here we prove the loop to iterate 5 times but we do not know
 426    it from induction variable.
 427
 428    For now we handle only simple case where there is exit condition
 429    just before the latch block and the latch block contains no statements
 430    with side effect that may otherwise terminate the execution of loop
 431    (such as by EH or by terminating the program or longjmp).
 432
 433    In the general case we may want to cancel the paths leading to statements
 434    loop-niter identified as having undefined effect in the last iteration.
 435    The other cases are hopefully rare and will be cleaned up later.  */
 436
 437 static edge
 438 loop_edge_to_cancel (struct loop *loop)
 439 {
 440   vec<edge> exits;
 441   unsigned i;
 442   edge edge_to_cancel;
 443   gimple_stmt_iterator gsi;
 444
 445   /* We want only one predecestor of the loop.  */
 446   if (EDGE_COUNT (loop->latch->preds) > 1)
 447     return NULL;
 448
 449   exits = get_loop_exit_edges (loop);
 450
 451   FOR_EACH_VEC_ELT (exits, i, edge_to_cancel)
 452     {
 453        /* Find the other edge than the loop exit
 454           leaving the conditoinal.  */
 455        if (EDGE_COUNT (edge_to_cancel->src->succs) != 2)
 456          continue;
 457        if (EDGE_SUCC (edge_to_cancel->src, 0) == edge_to_cancel)
 458          edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 1);
 459        else
 460          edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 0);
 461
 462       /* We only can handle conditionals.  */
 463       if (!(edge_to_cancel->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
 464         continue;
 465
 466       /* We should never have conditionals in the loop latch. */
 467       gcc_assert (edge_to_cancel->dest != loop->header);
 468
 469       /* Check that it leads to loop latch.  */
 470       if (edge_to_cancel->dest != loop->latch)
 471         continue;
 472
 473       exits.release ();
 474
 475       /* Verify that the code in loop latch does nothing that may end program
 476          execution without really reaching the exit.  This may include
 477          non-pure/const function calls, EH statements, volatile ASMs etc.  */
 478       for (gsi = gsi_start_bb (loop->latch); !gsi_end_p (gsi); gsi_next (&gsi))
 479         if (gimple_has_side_effects (gsi_stmt (gsi)))
 480            return NULL;
 481       return edge_to_cancel;
 482     }
 483   exits.release ();
 484   return NULL;
 485 }
 486
 487 /* Remove all tests for exits that are known to be taken after LOOP was
 488    peeled NPEELED times. Put gcc_unreachable before every statement
 489    known to not be executed.  */
 490
 491 static bool
 492 remove_exits_and_undefined_stmts (struct loop *loop, unsigned int npeeled)
 493 {
 494   struct nb_iter_bound *elt;
 495   bool changed = false;
 496
 497   for (elt = loop->bounds; elt; elt = elt->next)
 498     {
 499       /* If statement is known to be undefined after peeling, turn it
 500          into unreachable (or trap when debugging experience is supposed
 501          to be good).  */
 502       if (!elt->is_exit
 503           && wi::ltu_p (elt->bound, npeeled))
 504         {
 505           gimple_stmt_iterator gsi = gsi_for_stmt (elt->stmt);
 506           gcall *stmt = gimple_build_call
 507               (builtin_decl_implicit (BUILT_IN_UNREACHABLE), 0);
 508           gimple_set_location (stmt, gimple_location (elt->stmt));
 509           gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
 510           split_block (gimple_bb (stmt), stmt);
 511           changed = true;
 512           if (dump_file && (dump_flags & TDF_DETAILS))
 513             {
 514               fprintf (dump_file, "Forced statement unreachable: ");
 515               print_gimple_stmt (dump_file, elt->stmt, 0);
 516             }
 517         }
 518       /* If we know the exit will be taken after peeling, update.  */
 519       else if (elt->is_exit
 520                && wi::leu_p (elt->bound, npeeled))
 521         {
 522           basic_block bb = gimple_bb (elt->stmt);
 523           edge exit_edge = EDGE_SUCC (bb, 0);
 524
 525           if (dump_file && (dump_flags & TDF_DETAILS))
 526             {
 527               fprintf (dump_file, "Forced exit to be taken: ");
 528               print_gimple_stmt (dump_file, elt->stmt, 0);
 529             }
 530           if (!loop_exit_edge_p (loop, exit_edge))
 531             exit_edge = EDGE_SUCC (bb, 1);
 532           exit_edge->probability = profile_probability::always ();
 533           exit_edge->count = exit_edge->src->count;
 534           gcc_checking_assert (loop_exit_edge_p (loop, exit_edge));
 535           gcond *cond_stmt = as_a <gcond *> (elt->stmt);
 536           if (exit_edge->flags & EDGE_TRUE_VALUE)
 537             gimple_cond_make_true (cond_stmt);
 538           else
 539             gimple_cond_make_false (cond_stmt);
 540           update_stmt (cond_stmt);
 541           changed = true;
 542         }
 543     }
 544   return changed;
 545 }
 546
 547 /* Remove all exits that are known to be never taken because of the loop bound
 548    discovered.  */
 549
 550 static bool
 551 remove_redundant_iv_tests (struct loop *loop)
 552 {
 553   struct nb_iter_bound *elt;
 554   bool changed = false;
 555
 556   if (!loop->any_upper_bound)
 557     return false;
 558   for (elt = loop->bounds; elt; elt = elt->next)
 559     {
 560       /* Exit is pointless if it won't be taken before loop reaches
 561          upper bound.  */
 562       if (elt->is_exit && loop->any_upper_bound
 563           && wi::ltu_p (loop->nb_iterations_upper_bound, elt->bound))
 564         {
 565           basic_block bb = gimple_bb (elt->stmt);
 566           edge exit_edge = EDGE_SUCC (bb, 0);
 567           struct tree_niter_desc niter;
 568
 569           if (!loop_exit_edge_p (loop, exit_edge))
 570             exit_edge = EDGE_SUCC (bb, 1);
 571
 572           /* Only when we know the actual number of iterations, not
 573              just a bound, we can remove the exit.  */
 574           if (!number_of_iterations_exit (loop, exit_edge,
 575                                           &niter, false, false)
 576               || !integer_onep (niter.assumptions)
 577               || !integer_zerop (niter.may_be_zero)
 578               || !niter.niter
 579               || TREE_CODE (niter.niter) != INTEGER_CST
 580               || !wi::ltu_p (loop->nb_iterations_upper_bound,
 581                              wi::to_widest (niter.niter)))
 582             continue;
 583
 584           if (dump_file && (dump_flags & TDF_DETAILS))
 585             {
 586               fprintf (dump_file, "Removed pointless exit: ");
 587               print_gimple_stmt (dump_file, elt->stmt, 0);
 588             }
 589           gcond *cond_stmt = as_a <gcond *> (elt->stmt);
 590           if (exit_edge->flags & EDGE_TRUE_VALUE)
 591             gimple_cond_make_false (cond_stmt);
 592           else
 593             gimple_cond_make_true (cond_stmt);
 594           update_stmt (cond_stmt);
 595           changed = true;
 596         }
 597     }
 598   return changed;
 599 }
 600
 601 /* Stores loops that will be unlooped and edges that will be removed
 602    after we process whole loop tree. */
 603 static vec<loop_p> loops_to_unloop;
 604 static vec<int> loops_to_unloop_nunroll;
 605 static vec<edge> edges_to_remove;
 606 /* Stores loops that has been peeled.  */
 607 static bitmap peeled_loops;
 608
 609 /* Cancel all fully unrolled loops by putting __builtin_unreachable
 610    on the latch edge.
 611    We do it after all unrolling since unlooping moves basic blocks
 612    across loop boundaries trashing loop closed SSA form as well
 613    as SCEV info needed to be intact during unrolling.
 614
 615    IRRED_INVALIDATED is used to bookkeep if information about
 616    irreducible regions may become invalid as a result
 617    of the transformation.
 618    LOOP_CLOSED_SSA_INVALIDATED is used to bookkepp the case
 619    when we need to go into loop closed SSA form.  */
 620
 621 static void
 622 unloop_loops (bitmap loop_closed_ssa_invalidated,
 623               bool *irred_invalidated)
 624 {
 625   while (loops_to_unloop.length ())
 626     {
 627       struct loop *loop = loops_to_unloop.pop ();
 628       int n_unroll = loops_to_unloop_nunroll.pop ();
 629       basic_block latch = loop->latch;
 630       edge latch_edge = loop_latch_edge (loop);
 631       int flags = latch_edge->flags;
 632       location_t locus = latch_edge->goto_locus;
 633       gcall *stmt;
 634       gimple_stmt_iterator gsi;
 635
 636       remove_exits_and_undefined_stmts (loop, n_unroll);
 637
 638       /* Unloop destroys the latch edge.  */
 639       unloop (loop, irred_invalidated, loop_closed_ssa_invalidated);
 640
 641       /* Create new basic block for the latch edge destination and wire
 642          it in.  */
 643       stmt = gimple_build_call (builtin_decl_implicit (BUILT_IN_UNREACHABLE), 0);
 644       latch_edge = make_edge (latch, create_basic_block (NULL, NULL, latch), flags);
 645       latch_edge->probability = profile_probability::never ();
 646       latch_edge->count = profile_count::zero ();
 647       latch_edge->flags |= flags;
 648       latch_edge->goto_locus = locus;
 649
 650       add_bb_to_loop (latch_edge->dest, current_loops->tree_root);
 651       latch_edge->dest->count = profile_count::zero ();
 652       latch_edge->dest->frequency = 0;
 653       set_immediate_dominator (CDI_DOMINATORS, latch_edge->dest, latch_edge->src);
 654
 655       gsi = gsi_start_bb (latch_edge->dest);
 656       gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
 657     }
 658   loops_to_unloop.release ();
 659   loops_to_unloop_nunroll.release ();
 660
 661   /* Remove edges in peeled copies.  */
 662   unsigned i;
 663   edge e;
 664   FOR_EACH_VEC_ELT (edges_to_remove, i, e)
 665     {
 666       bool ok = remove_path (e, irred_invalidated, loop_closed_ssa_invalidated);
 667       gcc_assert (ok);
 668     }
 669   edges_to_remove.release ();
 670 }
 671
 672 /* Tries to unroll LOOP completely, i.e. NITER times.
 673    UL determines which loops we are allowed to unroll.
 674    EXIT is the exit of the loop that should be eliminated.
 675    MAXITER specfy bound on number of iterations, -1 if it is
 676    not known or too large for HOST_WIDE_INT.  The location
 677    LOCUS corresponding to the loop is used when emitting
 678    a summary of the unroll to the dump file.  */
 679
 680 static bool
 681 try_unroll_loop_completely (struct loop *loop,
 682                             edge exit, tree niter,
 683                             enum unroll_level ul,
 684                             HOST_WIDE_INT maxiter,
 685                             location_t locus)
 686 {
 687   unsigned HOST_WIDE_INT n_unroll = 0, ninsns, unr_insns;
 688   struct loop_size size;
 689   bool n_unroll_found = false;
 690   edge edge_to_cancel = NULL;
 691   dump_flags_t report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS;
 692
 693   /* See if we proved number of iterations to be low constant.
 694
 695      EXIT is an edge that will be removed in all but last iteration of
 696      the loop.
 697
 698      EDGE_TO_CACNEL is an edge that will be removed from the last iteration
 699      of the unrolled sequence and is expected to make the final loop not
 700      rolling.
 701
 702      If the number of execution of loop is determined by standard induction
 703      variable test, then EXIT and EDGE_TO_CANCEL are the two edges leaving
 704      from the iv test.  */
 705   if (tree_fits_uhwi_p (niter))
 706     {
 707       n_unroll = tree_to_uhwi (niter);
 708       n_unroll_found = true;
 709       edge_to_cancel = EDGE_SUCC (exit->src, 0);
 710       if (edge_to_cancel == exit)
 711         edge_to_cancel = EDGE_SUCC (exit->src, 1);
 712     }
 713   /* We do not know the number of iterations and thus we can not eliminate
 714      the EXIT edge.  */
 715   else
 716     exit = NULL;
 717
 718   /* See if we can improve our estimate by using recorded loop bounds.  */
 719   if (maxiter >= 0
 720       && (!n_unroll_found || (unsigned HOST_WIDE_INT)maxiter < n_unroll))
 721     {
 722       n_unroll = maxiter;
 723       n_unroll_found = true;
 724       /* Loop terminates before the IV variable test, so we can not
 725          remove it in the last iteration.  */
 726       edge_to_cancel = NULL;
 727     }
 728
 729   if (!n_unroll_found)
 730     return false;
 731
 732   if (n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
 733     {
 734       if (dump_file && (dump_flags & TDF_DETAILS))
 735         fprintf (dump_file, "Not unrolling loop %d "
 736                  "(--param max-completely-peel-times limit reached).\n",
 737                  loop->num);
 738       return false;
 739     }
 740
 741   if (!edge_to_cancel)
 742     edge_to_cancel = loop_edge_to_cancel (loop);
 743
 744   if (n_unroll)
 745     {
 746       bool large;
 747       if (ul == UL_SINGLE_ITER)
 748         return false;
 749
 750       /* EXIT can be removed only if we are sure it passes first N_UNROLL
 751          iterations.  */
 752       bool remove_exit = (exit && niter
 753                           && TREE_CODE (niter) == INTEGER_CST
 754                           && wi::leu_p (n_unroll, wi::to_widest (niter)));
 755
 756       large = tree_estimate_loop_size
 757                  (loop, remove_exit ? exit : NULL, edge_to_cancel, &size,
 758                   PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS));
 759       ninsns = size.overall;
 760       if (large)
 761         {
 762           if (dump_file && (dump_flags & TDF_DETAILS))
 763             fprintf (dump_file, "Not unrolling loop %d: it is too large.\n",
 764                      loop->num);
 765           return false;
 766         }
 767
 768       unr_insns = estimated_unrolled_size (&size, n_unroll);
 769       if (dump_file && (dump_flags & TDF_DETAILS))
 770         {
 771           fprintf (dump_file, "  Loop size: %d\n", (int) ninsns);
 772           fprintf (dump_file, "  Estimated size after unrolling: %d\n",
 773                    (int) unr_insns);
 774         }
 775
 776       /* If the code is going to shrink, we don't need to be extra cautious
 777          on guessing if the unrolling is going to be profitable.  */
 778       if (unr_insns
 779           /* If there is IV variable that will become constant, we save
 780              one instruction in the loop prologue we do not account
 781              otherwise.  */
 782           <= ninsns + (size.constant_iv != false))
 783         ;
 784       /* We unroll only inner loops, because we do not consider it profitable
 785          otheriwse.  We still can cancel loopback edge of not rolling loop;
 786          this is always a good idea.  */
 787       else if (ul == UL_NO_GROWTH)
 788         {
 789           if (dump_file && (dump_flags & TDF_DETAILS))
 790             fprintf (dump_file, "Not unrolling loop %d: size would grow.\n",
 791                      loop->num);
 792           return false;
 793         }
 794       /* Outer loops tend to be less interesting candidates for complete
 795          unrolling unless we can do a lot of propagation into the inner loop
 796          body.  For now we disable outer loop unrolling when the code would
 797          grow.  */
 798       else if (loop->inner)
 799         {
 800           if (dump_file && (dump_flags & TDF_DETAILS))
 801             fprintf (dump_file, "Not unrolling loop %d: "
 802                      "it is not innermost and code would grow.\n",
 803                      loop->num);
 804           return false;
 805         }
 806       /* If there is call on a hot path through the loop, then
 807          there is most probably not much to optimize.  */
 808       else if (size.num_non_pure_calls_on_hot_path)
 809         {
 810           if (dump_file && (dump_flags & TDF_DETAILS))
 811             fprintf (dump_file, "Not unrolling loop %d: "
 812                      "contains call and code would grow.\n",
 813                      loop->num);
 814           return false;
 815         }
 816       /* If there is pure/const call in the function, then we
 817          can still optimize the unrolled loop body if it contains
 818          some other interesting code than the calls and code
 819          storing or cumulating the return value.  */
 820       else if (size.num_pure_calls_on_hot_path
 821                /* One IV increment, one test, one ivtmp store
 822                   and one useful stmt.  That is about minimal loop
 823                   doing pure call.  */
 824                && (size.non_call_stmts_on_hot_path
 825                    <= 3 + size.num_pure_calls_on_hot_path))
 826         {
 827           if (dump_file && (dump_flags & TDF_DETAILS))
 828             fprintf (dump_file, "Not unrolling loop %d: "
 829                      "contains just pure calls and code would grow.\n",
 830                      loop->num);
 831           return false;
 832         }
 833       /* Complete unrolling is a major win when control flow is removed and
 834          one big basic block is created.  If the loop contains control flow
 835          the optimization may still be a win because of eliminating the loop
 836          overhead but it also may blow the branch predictor tables.
 837          Limit number of branches on the hot path through the peeled
 838          sequence.  */
 839       else if (size.num_branches_on_hot_path * (int)n_unroll
 840                > PARAM_VALUE (PARAM_MAX_PEEL_BRANCHES))
 841         {
 842           if (dump_file && (dump_flags & TDF_DETAILS))
 843             fprintf (dump_file, "Not unrolling loop %d: "
 844                      " number of branches on hot path in the unrolled sequence"
 845                      " reach --param max-peel-branches limit.\n",
 846                      loop->num);
 847           return false;
 848         }
 849       else if (unr_insns
 850                > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS))
 851         {
 852           if (dump_file && (dump_flags & TDF_DETAILS))
 853             fprintf (dump_file, "Not unrolling loop %d: "
 854                      "(--param max-completely-peeled-insns limit reached).\n",
 855                      loop->num);
 856           return false;
 857         }
 858       if (!n_unroll)
 859         dump_printf_loc (report_flags, locus,
 860                          "loop turned into non-loop; it never loops.\n");
 861
 862       initialize_original_copy_tables ();
 863       auto_sbitmap wont_exit (n_unroll + 1);
 864       if (exit && niter
 865           && TREE_CODE (niter) == INTEGER_CST
 866           && wi::leu_p (n_unroll, wi::to_widest (niter)))
 867         {
 868           bitmap_ones (wont_exit);
 869           if (wi::eq_p (wi::to_widest (niter), n_unroll)
 870               || edge_to_cancel)
 871             bitmap_clear_bit (wont_exit, 0);
 872         }
 873       else
 874         {
 875           exit = NULL;
 876           bitmap_clear (wont_exit);
 877         }
 878
 879       if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 880                                                  n_unroll, wont_exit,
 881                                                  exit, &edges_to_remove,
 882                                                  DLTHE_FLAG_UPDATE_FREQ
 883                                                  | DLTHE_FLAG_COMPLETTE_PEEL))
 884         {
 885           free_original_copy_tables ();
 886           if (dump_file && (dump_flags & TDF_DETAILS))
 887             fprintf (dump_file, "Failed to duplicate the loop\n");
 888           return false;
 889         }
 890
 891       free_original_copy_tables ();
 892     }
 893
 894   /* Remove the conditional from the last copy of the loop.  */
 895   if (edge_to_cancel)
 896     {
 897       gcond *cond = as_a <gcond *> (last_stmt (edge_to_cancel->src));
 898       force_edge_cold (edge_to_cancel, true);
 899       if (edge_to_cancel->flags & EDGE_TRUE_VALUE)
 900         gimple_cond_make_false (cond);
 901       else
 902         gimple_cond_make_true (cond);
 903       update_stmt (cond);
 904       /* Do not remove the path. Doing so may remove outer loop
 905          and confuse bookkeeping code in tree_unroll_loops_completelly.  */
 906     }
 907
 908   /* Store the loop for later unlooping and exit removal.  */
 909   loops_to_unloop.safe_push (loop);
 910   loops_to_unloop_nunroll.safe_push (n_unroll);
 911
 912   if (dump_enabled_p ())
 913     {
 914       if (!n_unroll)
 915         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
 916                          "loop turned into non-loop; it never loops\n");
 917       else
 918         {
 919           dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
 920                            "loop with %d iterations completely unrolled",
 921                            (int) (n_unroll + 1));
 922           if (loop->header->count.initialized_p ())
 923             dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS,
 924                          " (header execution count %d)",
 925                          (int)loop->header->count.to_gcov_type ());
 926           dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, "\n");
 927         }
 928     }
 929
 930   if (dump_file && (dump_flags & TDF_DETAILS))
 931     {
 932       if (exit)
 933         fprintf (dump_file, "Exit condition of peeled iterations was "
 934                  "eliminated.\n");
 935       if (edge_to_cancel)
 936         fprintf (dump_file, "Last iteration exit edge was proved true.\n");
 937       else
 938         fprintf (dump_file, "Latch of last iteration was marked by "
 939                  "__builtin_unreachable ().\n");
 940     }
 941
 942   return true;
 943 }
 944
 945 /* Return number of instructions after peeling.  */
 946 static unsigned HOST_WIDE_INT
 947 estimated_peeled_sequence_size (struct loop_size *size,
 948                                 unsigned HOST_WIDE_INT npeel)
 949 {
 950   return MAX (npeel * (HOST_WIDE_INT) (size->overall
 951                                        - size->eliminated_by_peeling), 1);
 952 }
 953
 954 /* If the loop is expected to iterate N times and is
 955    small enough, duplicate the loop body N+1 times before
 956    the loop itself.  This way the hot path will never
 957    enter the loop.
 958    Parameters are the same as for try_unroll_loops_completely */
 959
 960 static bool
 961 try_peel_loop (struct loop *loop,
 962                edge exit, tree niter,
 963                HOST_WIDE_INT maxiter)
 964 {
 965   HOST_WIDE_INT npeel;
 966   struct loop_size size;
 967   int peeled_size;
 968
 969   if (!flag_peel_loops || PARAM_VALUE (PARAM_MAX_PEEL_TIMES) <= 0
 970       || !peeled_loops)
 971     return false;
 972
 973   if (bitmap_bit_p (peeled_loops, loop->num))
 974     {
 975       if (dump_file)
 976         fprintf (dump_file, "Not peeling: loop is already peeled\n");
 977       return false;
 978     }
 979
 980   /* Peel only innermost loops.
 981      While the code is perfectly capable of peeling non-innermost loops,
 982      the heuristics would probably need some improvements. */
 983   if (loop->inner)
 984     {
 985       if (dump_file)
 986         fprintf (dump_file, "Not peeling: outer loop\n");
 987       return false;
 988     }
 989
 990   if (!optimize_loop_for_speed_p (loop))
 991     {
 992       if (dump_file)
 993         fprintf (dump_file, "Not peeling: cold loop\n");
 994       return false;
 995     }
 996
 997   /* Check if there is an estimate on the number of iterations.  */
 998   npeel = estimated_loop_iterations_int (loop);
 999   if (npeel < 0)
1000     npeel = likely_max_loop_iterations_int (loop);
1001   if (npeel < 0)
1002     {
1003       if (dump_file)
1004         fprintf (dump_file, "Not peeling: number of iterations is not "
1005                  "estimated\n");
1006       return false;
1007     }
1008   if (maxiter >= 0 && maxiter <= npeel)
1009     {
1010       if (dump_file)
1011         fprintf (dump_file, "Not peeling: upper bound is known so can "
1012                  "unroll completely\n");
1013       return false;
1014     }
1015
1016   /* We want to peel estimated number of iterations + 1 (so we never
1017      enter the loop on quick path).  Check against PARAM_MAX_PEEL_TIMES
1018      and be sure to avoid overflows.  */
1019   if (npeel > PARAM_VALUE (PARAM_MAX_PEEL_TIMES) - 1)
1020     {
1021       if (dump_file)
1022         fprintf (dump_file, "Not peeling: rolls too much "
1023                  "(%i + 1 > --param max-peel-times)\n", (int) npeel);
1024       return false;
1025     }
1026   npeel++;
1027
1028   /* Check peeled loops size.  */
1029   tree_estimate_loop_size (loop, exit, NULL, &size,
1030                            PARAM_VALUE (PARAM_MAX_PEELED_INSNS));
1031   if ((peeled_size = estimated_peeled_sequence_size (&size, (int) npeel))
1032       > PARAM_VALUE (PARAM_MAX_PEELED_INSNS))
1033     {
1034       if (dump_file)
1035         fprintf (dump_file, "Not peeling: peeled sequence size is too large "
1036                  "(%i insns > --param max-peel-insns)", peeled_size);
1037       return false;
1038     }
1039
1040   /* Duplicate possibly eliminating the exits.  */
1041   initialize_original_copy_tables ();
1042   auto_sbitmap wont_exit (npeel + 1);
1043   if (exit && niter
1044       && TREE_CODE (niter) == INTEGER_CST
1045       && wi::leu_p (npeel, wi::to_widest (niter)))
1046     {
1047       bitmap_ones (wont_exit);
1048       bitmap_clear_bit (wont_exit, 0);
1049     }
1050   else
1051     {
1052       exit = NULL;
1053       bitmap_clear (wont_exit);
1054     }
1055   if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1056                                              npeel, wont_exit,
1057                                              exit, &edges_to_remove,
1058                                              DLTHE_FLAG_UPDATE_FREQ))
1059     {
1060       free_original_copy_tables ();
1061       return false;
1062     }
1063   free_original_copy_tables ();
1064   if (dump_file && (dump_flags & TDF_DETAILS))
1065     {
1066       fprintf (dump_file, "Peeled loop %d, %i times.\n",
1067                loop->num, (int) npeel);
1068     }
1069   if (loop->any_estimate)
1070     {
1071       if (wi::ltu_p (npeel, loop->nb_iterations_estimate))
1072         loop->nb_iterations_estimate -= npeel;
1073       else
1074         loop->nb_iterations_estimate = 0;
1075     }
1076   if (loop->any_upper_bound)
1077     {
1078       if (wi::ltu_p (npeel, loop->nb_iterations_upper_bound))
1079         loop->nb_iterations_upper_bound -= npeel;
1080       else
1081         loop->nb_iterations_upper_bound = 0;
1082     }
1083   if (loop->any_likely_upper_bound)
1084     {
1085       if (wi::ltu_p (npeel, loop->nb_iterations_likely_upper_bound))
1086         loop->nb_iterations_likely_upper_bound -= npeel;
1087       else
1088         {
1089           loop->any_estimate = true;
1090           loop->nb_iterations_estimate = 0;
1091           loop->nb_iterations_likely_upper_bound = 0;
1092         }
1093     }
1094   profile_count entry_count = profile_count::zero ();
1095   int entry_freq = 0;
1096
1097   edge e;
1098   edge_iterator ei;
1099   FOR_EACH_EDGE (e, ei, loop->header->preds)
1100     if (e->src != loop->latch)
1101       {
1102         if (e->src->count.initialized_p ())
1103           entry_count = e->src->count + e->src->count;
1104         entry_freq += e->src->frequency;
1105         gcc_assert (!flow_bb_inside_loop_p (loop, e->src));
1106       }
1107   profile_probability p = profile_probability::very_unlikely ();
1108   if (loop->header->count > 0)
1109     p = entry_count.probability_in (loop->header->count);
1110   else if (loop->header->frequency)
1111     p = profile_probability::probability_in_gcov_type
1112                  (entry_freq, loop->header->frequency);
1113   scale_loop_profile (loop, p, 0);
1114   bitmap_set_bit (peeled_loops, loop->num);
1115   return true;
1116 }
1117 /* Adds a canonical induction variable to LOOP if suitable.
1118    CREATE_IV is true if we may create a new iv.  UL determines
1119    which loops we are allowed to completely unroll.  If TRY_EVAL is true, we try
1120    to determine the number of iterations of a loop by direct evaluation.
1121    Returns true if cfg is changed.   */
1122
1123 static bool
1124 canonicalize_loop_induction_variables (struct loop *loop,
1125                                        bool create_iv, enum unroll_level ul,
1126                                        bool try_eval)
1127 {
1128   edge exit = NULL;
1129   tree niter;
1130   HOST_WIDE_INT maxiter;
1131   bool modified = false;
1132   location_t locus = UNKNOWN_LOCATION;
1133
1134   niter = number_of_latch_executions (loop);
1135   exit = single_exit (loop);
1136   if (TREE_CODE (niter) == INTEGER_CST)
1137     locus = gimple_location (last_stmt (exit->src));
1138   else
1139     {
1140       /* If the loop has more than one exit, try checking all of them
1141          for # of iterations determinable through scev.  */
1142       if (!exit)
1143         niter = find_loop_niter (loop, &exit);
1144
1145       /* Finally if everything else fails, try brute force evaluation.  */
1146       if (try_eval
1147           && (chrec_contains_undetermined (niter)
1148               || TREE_CODE (niter) != INTEGER_CST))
1149         niter = find_loop_niter_by_eval (loop, &exit);
1150
1151       if (exit)
1152         locus = gimple_location (last_stmt (exit->src));
1153
1154       if (TREE_CODE (niter) != INTEGER_CST)
1155         exit = NULL;
1156     }
1157
1158   /* We work exceptionally hard here to estimate the bound
1159      by find_loop_niter_by_eval.  Be sure to keep it for future.  */
1160   if (niter && TREE_CODE (niter) == INTEGER_CST)
1161     {
1162       record_niter_bound (loop, wi::to_widest (niter),
1163                           exit == single_likely_exit (loop), true);
1164     }
1165
1166   /* Force re-computation of loop bounds so we can remove redundant exits.  */
1167   maxiter = max_loop_iterations_int (loop);
1168
1169   if (dump_file && (dump_flags & TDF_DETAILS)
1170       && TREE_CODE (niter) == INTEGER_CST)
1171     {
1172       fprintf (dump_file, "Loop %d iterates ", loop->num);
1173       print_generic_expr (dump_file, niter, TDF_SLIM);
1174       fprintf (dump_file, " times.\n");
1175     }
1176   if (dump_file && (dump_flags & TDF_DETAILS)
1177       && maxiter >= 0)
1178     {
1179       fprintf (dump_file, "Loop %d iterates at most %i times.\n", loop->num,
1180                (int)maxiter);
1181     }
1182   if (dump_file && (dump_flags & TDF_DETAILS)
1183       && likely_max_loop_iterations_int (loop) >= 0)
1184     {
1185       fprintf (dump_file, "Loop %d likely iterates at most %i times.\n",
1186                loop->num, (int)likely_max_loop_iterations_int (loop));
1187     }
1188
1189   /* Remove exits that are known to be never taken based on loop bound.
1190      Needs to be called after compilation of max_loop_iterations_int that
1191      populates the loop bounds.  */
1192   modified |= remove_redundant_iv_tests (loop);
1193
1194   if (try_unroll_loop_completely (loop, exit, niter, ul, maxiter, locus))
1195     return true;
1196
1197   if (create_iv
1198       && niter && !chrec_contains_undetermined (niter)
1199       && exit && just_once_each_iteration_p (loop, exit->src))
1200     create_canonical_iv (loop, exit, niter);
1201
1202   if (ul == UL_ALL)
1203     modified |= try_peel_loop (loop, exit, niter, maxiter);
1204
1205   return modified;
1206 }
1207
1208 /* The main entry point of the pass.  Adds canonical induction variables
1209    to the suitable loops.  */
1210
1211 unsigned int
1212 canonicalize_induction_variables (void)
1213 {
1214   struct loop *loop;
1215   bool changed = false;
1216   bool irred_invalidated = false;
1217   bitmap loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
1218
1219   estimate_numbers_of_iterations (cfun);
1220
1221   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
1222     {
1223       changed |= canonicalize_loop_induction_variables (loop,
1224                                                         true, UL_SINGLE_ITER,
1225                                                         true);
1226     }
1227   gcc_assert (!need_ssa_update_p (cfun));
1228
1229   unloop_loops (loop_closed_ssa_invalidated, &irred_invalidated);
1230   if (irred_invalidated
1231       && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
1232     mark_irreducible_loops ();
1233
1234   /* Clean up the information about numbers of iterations, since brute force
1235      evaluation could reveal new information.  */
1236   free_numbers_of_iterations_estimates (cfun);
1237   scev_reset ();
1238
1239   if (!bitmap_empty_p (loop_closed_ssa_invalidated))
1240     {
1241       gcc_checking_assert (loops_state_satisfies_p (LOOP_CLOSED_SSA));
1242       rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1243     }
1244   BITMAP_FREE (loop_closed_ssa_invalidated);
1245
1246   if (changed)
1247     return TODO_cleanup_cfg;
1248   return 0;
1249 }
1250
1251 /* Propagate constant SSA_NAMEs defined in basic block BB.  */
1252
1253 static void
1254 propagate_constants_for_unrolling (basic_block bb)
1255 {
1256   /* Look for degenerate PHI nodes with constant argument.  */
1257   for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi); )
1258     {
1259       gphi *phi = gsi.phi ();
1260       tree result = gimple_phi_result (phi);
1261       tree arg = gimple_phi_arg_def (phi, 0);
1262
1263       if (! SSA_NAME_OCCURS_IN_ABNORMAL_PHI (result)
1264           && gimple_phi_num_args (phi) == 1
1265           && CONSTANT_CLASS_P (arg))
1266         {
1267           replace_uses_by (result, arg);
1268           gsi_remove (&gsi, true);
1269           release_ssa_name (result);
1270         }
1271       else
1272         gsi_next (&gsi);
1273     }
1274
1275   /* Look for assignments to SSA names with constant RHS.  */
1276   for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
1277     {
1278       gimple *stmt = gsi_stmt (gsi);
1279       tree lhs;
1280
1281       if (is_gimple_assign (stmt)
1282           && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_constant
1283           && (lhs = gimple_assign_lhs (stmt), TREE_CODE (lhs) == SSA_NAME)
1284           && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
1285         {
1286           replace_uses_by (lhs, gimple_assign_rhs1 (stmt));
1287           gsi_remove (&gsi, true);
1288           release_ssa_name (lhs);
1289         }
1290       else
1291         gsi_next (&gsi);
1292     }
1293 }
1294
1295 /* Process loops from innermost to outer, stopping at the innermost
1296    loop we unrolled.  */
1297
1298 static bool
1299 tree_unroll_loops_completely_1 (bool may_increase_size, bool unroll_outer,
1300                                 bitmap father_bbs, struct loop *loop)
1301 {
1302   struct loop *loop_father;
1303   bool changed = false;
1304   struct loop *inner;
1305   enum unroll_level ul;
1306
1307   /* Process inner loops first.  */
1308   for (inner = loop->inner; inner != NULL; inner = inner->next)
1309     changed |= tree_unroll_loops_completely_1 (may_increase_size,
1310                                                unroll_outer, father_bbs,
1311                                                inner);
1312
1313   /* If we changed an inner loop we cannot process outer loops in this
1314      iteration because SSA form is not up-to-date.  Continue with
1315      siblings of outer loops instead.  */
1316   if (changed)
1317     return true;
1318
1319   /* Don't unroll #pragma omp simd loops until the vectorizer
1320      attempts to vectorize those.  */
1321   if (loop->force_vectorize)
1322     return false;
1323
1324   /* Try to unroll this loop.  */
1325   loop_father = loop_outer (loop);
1326   if (!loop_father)
1327     return false;
1328
1329   if (may_increase_size && optimize_loop_nest_for_speed_p (loop)
1330       /* Unroll outermost loops only if asked to do so or they do
1331          not cause code growth.  */
1332       && (unroll_outer || loop_outer (loop_father)))
1333     ul = UL_ALL;
1334   else
1335     ul = UL_NO_GROWTH;
1336
1337   if (canonicalize_loop_induction_variables
1338         (loop, false, ul, !flag_tree_loop_ivcanon))
1339     {
1340       /* If we'll continue unrolling, we need to propagate constants
1341          within the new basic blocks to fold away induction variable
1342          computations; otherwise, the size might blow up before the
1343          iteration is complete and the IR eventually cleaned up.  */
1344       if (loop_outer (loop_father))
1345         bitmap_set_bit (father_bbs, loop_father->header->index);
1346
1347       return true;
1348     }
1349
1350   return false;
1351 }
1352
1353 /* Unroll LOOPS completely if they iterate just few times.  Unless
1354    MAY_INCREASE_SIZE is true, perform the unrolling only if the
1355    size of the code does not increase.  */
1356
1357 unsigned int
1358 tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer)
1359 {
1360   bitmap father_bbs = BITMAP_ALLOC (NULL);
1361   bool changed;
1362   int iteration = 0;
1363   bool irred_invalidated = false;
1364
1365   estimate_numbers_of_iterations (cfun);
1366
1367   do
1368     {
1369       changed = false;
1370       bitmap loop_closed_ssa_invalidated = NULL;
1371
1372       if (loops_state_satisfies_p (LOOP_CLOSED_SSA))
1373         loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
1374
1375       free_numbers_of_iterations_estimates (cfun);
1376       estimate_numbers_of_iterations (cfun);
1377
1378       changed = tree_unroll_loops_completely_1 (may_increase_size,
1379                                                 unroll_outer, father_bbs,
1380                                                 current_loops->tree_root);
1381       if (changed)
1382         {
1383           unsigned i;
1384
1385           unloop_loops (loop_closed_ssa_invalidated, &irred_invalidated);
1386
1387           /* We can not use TODO_update_ssa_no_phi because VOPS gets confused.  */
1388           if (loop_closed_ssa_invalidated
1389               && !bitmap_empty_p (loop_closed_ssa_invalidated))
1390             rewrite_into_loop_closed_ssa (loop_closed_ssa_invalidated,
1391                                           TODO_update_ssa);
1392           else
1393             update_ssa (TODO_update_ssa);
1394
1395           /* father_bbs is a bitmap of loop father header BB indices.
1396              Translate that to what non-root loops these BBs belong to now.  */
1397           bitmap_iterator bi;
1398           bitmap fathers = BITMAP_ALLOC (NULL);
1399           EXECUTE_IF_SET_IN_BITMAP (father_bbs, 0, i, bi)
1400             {
1401               basic_block unrolled_loop_bb = BASIC_BLOCK_FOR_FN (cfun, i);
1402               if (! unrolled_loop_bb)
1403                 continue;
1404               if (loop_outer (unrolled_loop_bb->loop_father))
1405                 bitmap_set_bit (fathers,
1406                                 unrolled_loop_bb->loop_father->num);
1407             }
1408           bitmap_clear (father_bbs);
1409           /* Propagate the constants within the new basic blocks.  */
1410           EXECUTE_IF_SET_IN_BITMAP (fathers, 0, i, bi)
1411             {
1412               loop_p father = get_loop (cfun, i);
1413               basic_block *body = get_loop_body_in_dom_order (father);
1414               for (unsigned j = 0; j < father->num_nodes; j++)
1415                 propagate_constants_for_unrolling (body[j]);
1416               free (body);
1417             }
1418           BITMAP_FREE (fathers);
1419
1420           /* This will take care of removing completely unrolled loops
1421              from the loop structures so we can continue unrolling now
1422              innermost loops.  */
1423           if (cleanup_tree_cfg ())
1424             update_ssa (TODO_update_ssa_only_virtuals);
1425
1426           /* Clean up the information about numbers of iterations, since
1427              complete unrolling might have invalidated it.  */
1428           scev_reset ();
1429           if (flag_checking && loops_state_satisfies_p (LOOP_CLOSED_SSA))
1430             verify_loop_closed_ssa (true);
1431         }
1432       if (loop_closed_ssa_invalidated)
1433         BITMAP_FREE (loop_closed_ssa_invalidated);
1434     }
1435   while (changed
1436          && ++iteration <= PARAM_VALUE (PARAM_MAX_UNROLL_ITERATIONS));
1437
1438   BITMAP_FREE (father_bbs);
1439
1440   if (irred_invalidated
1441       && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
1442     mark_irreducible_loops ();
1443
1444   return 0;
1445 }
1446
1447 /* Canonical induction variable creation pass.  */
1448
1449 namespace {
1450
1451 const pass_data pass_data_iv_canon =
1452 {
1453   GIMPLE_PASS, /* type */
1454   "ivcanon", /* name */
1455   OPTGROUP_LOOP, /* optinfo_flags */
1456   TV_TREE_LOOP_IVCANON, /* tv_id */
1457   ( PROP_cfg | PROP_ssa ), /* properties_required */
1458   0, /* properties_provided */
1459   0, /* properties_destroyed */
1460   0, /* todo_flags_start */
1461   0, /* todo_flags_finish */
1462 };
1463
1464 class pass_iv_canon : public gimple_opt_pass
1465 {
1466 public:
1467   pass_iv_canon (gcc::context *ctxt)
1468     : gimple_opt_pass (pass_data_iv_canon, ctxt)
1469   {}
1470
1471   /* opt_pass methods: */
1472   virtual bool gate (function *) { return flag_tree_loop_ivcanon != 0; }
1473   virtual unsigned int execute (function *fun);
1474
1475 }; // class pass_iv_canon
1476
1477 unsigned int
1478 pass_iv_canon::execute (function *fun)
1479 {
1480   if (number_of_loops (fun) <= 1)
1481     return 0;
1482
1483   return canonicalize_induction_variables ();
1484 }
1485
1486 } // anon namespace
1487
1488 gimple_opt_pass *
1489 make_pass_iv_canon (gcc::context *ctxt)
1490 {
1491   return new pass_iv_canon (ctxt);
1492 }
1493
1494 /* Complete unrolling of loops.  */
1495
1496 namespace {
1497
1498 const pass_data pass_data_complete_unroll =
1499 {
1500   GIMPLE_PASS, /* type */
1501   "cunroll", /* name */
1502   OPTGROUP_LOOP, /* optinfo_flags */
1503   TV_COMPLETE_UNROLL, /* tv_id */
1504   ( PROP_cfg | PROP_ssa ), /* properties_required */
1505   0, /* properties_provided */
1506   0, /* properties_destroyed */
1507   0, /* todo_flags_start */
1508   0, /* todo_flags_finish */
1509 };
1510
1511 class pass_complete_unroll : public gimple_opt_pass
1512 {
1513 public:
1514   pass_complete_unroll (gcc::context *ctxt)
1515     : gimple_opt_pass (pass_data_complete_unroll, ctxt)
1516   {}
1517
1518   /* opt_pass methods: */
1519   virtual unsigned int execute (function *);
1520
1521 }; // class pass_complete_unroll
1522
1523 unsigned int
1524 pass_complete_unroll::execute (function *fun)
1525 {
1526   if (number_of_loops (fun) <= 1)
1527     return 0;
1528
1529   /* If we ever decide to run loop peeling more than once, we will need to
1530      track loops already peeled in loop structures themselves to avoid
1531      re-peeling the same loop multiple times.  */
1532   if (flag_peel_loops)
1533     peeled_loops = BITMAP_ALLOC (NULL);
1534   int val = tree_unroll_loops_completely (flag_unroll_loops
1535                                           || flag_peel_loops
1536                                           || optimize >= 3, true);
1537   if (peeled_loops)
1538     {
1539       BITMAP_FREE (peeled_loops);
1540       peeled_loops = NULL;
1541     }
1542   return val;
1543 }
1544
1545 } // anon namespace
1546
1547 gimple_opt_pass *
1548 make_pass_complete_unroll (gcc::context *ctxt)
1549 {
1550   return new pass_complete_unroll (ctxt);
1551 }
1552
1553 /* Complete unrolling of inner loops.  */
1554
1555 namespace {
1556
1557 const pass_data pass_data_complete_unrolli =
1558 {
1559   GIMPLE_PASS, /* type */
1560   "cunrolli", /* name */
1561   OPTGROUP_LOOP, /* optinfo_flags */
1562   TV_COMPLETE_UNROLL, /* tv_id */
1563   ( PROP_cfg | PROP_ssa ), /* properties_required */
1564   0, /* properties_provided */
1565   0, /* properties_destroyed */
1566   0, /* todo_flags_start */
1567   0, /* todo_flags_finish */
1568 };
1569
1570 class pass_complete_unrolli : public gimple_opt_pass
1571 {
1572 public:
1573   pass_complete_unrolli (gcc::context *ctxt)
1574     : gimple_opt_pass (pass_data_complete_unrolli, ctxt)
1575   {}
1576
1577   /* opt_pass methods: */
1578   virtual bool gate (function *) { return optimize >= 2; }
1579   virtual unsigned int execute (function *);
1580
1581 }; // class pass_complete_unrolli
1582
1583 unsigned int
1584 pass_complete_unrolli::execute (function *fun)
1585 {
1586   unsigned ret = 0;
1587
1588   loop_optimizer_init (LOOPS_NORMAL
1589                        | LOOPS_HAVE_RECORDED_EXITS);
1590   if (number_of_loops (fun) > 1)
1591     {
1592       scev_initialize ();
1593       ret = tree_unroll_loops_completely (optimize >= 3, false);
1594       scev_finalize ();
1595     }
1596   loop_optimizer_finalize ();
1597
1598   return ret;
1599 }
1600
1601 } // anon namespace
1602
1603 gimple_opt_pass *
1604 make_pass_complete_unrolli (gcc::context *ctxt)
1605 {
1606   return new pass_complete_unrolli (ctxt);
1607 }
1608
1609