gcc/tree-ssa-loop-ivcanon.c

   1 /* Induction variable canonicalization and loop peeling.
   2    Copyright (C) 2004-2019 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass detects the loops that iterate a constant number of times,
  21    adds a canonical induction variable (step -1, tested against 0)
  22    and replaces the exit test.  This enables the less powerful rtl
  23    level analysis to use this information.
  24
  25    This might spoil the code in some cases (by increasing register pressure).
  26    Note that in the case the new variable is not needed, ivopts will get rid
  27    of it, so it might only be a problem when there are no other linear induction
  28    variables.  In that case the created optimization possibilities are likely
  29    to pay up.
  30
  31    We also perform
  32      - complete unrolling (or peeling) when the loops is rolling few enough
  33        times
  34      - simple peeling (i.e. copying few initial iterations prior the loop)
  35        when number of iteration estimate is known (typically by the profile
  36        info).  */
  37
  38 #include "config.h"
  39 #include "system.h"
  40 #include "coretypes.h"
  41 #include "backend.h"
  42 #include "tree.h"
  43 #include "gimple.h"
  44 #include "cfghooks.h"
  45 #include "tree-pass.h"
  46 #include "ssa.h"
  47 #include "cgraph.h"
  48 #include "gimple-pretty-print.h"
  49 #include "fold-const.h"
  50 #include "profile.h"
  51 #include "gimple-fold.h"
  52 #include "tree-eh.h"
  53 #include "gimple-iterator.h"
  54 #include "tree-cfg.h"
  55 #include "tree-ssa-loop-manip.h"
  56 #include "tree-ssa-loop-niter.h"
  57 #include "tree-ssa-loop.h"
  58 #include "tree-into-ssa.h"
  59 #include "cfgloop.h"
  60 #include "tree-chrec.h"
  61 #include "tree-scalar-evolution.h"
  62 #include "tree-inline.h"
  63 #include "tree-cfgcleanup.h"
  64 #include "builtins.h"
  65 #include "tree-ssa-sccvn.h"
  66 #include "dbgcnt.h"
  67
  68 /* Specifies types of loops that may be unrolled.  */
  69
  70 enum unroll_level
  71 {
  72   UL_SINGLE_ITER,       /* Only loops that exit immediately in the first
  73                            iteration.  */
  74   UL_NO_GROWTH,         /* Only loops whose unrolling will not cause increase
  75                            of code size.  */
  76   UL_ALL                /* All suitable loops.  */
  77 };
  78
  79 /* Adds a canonical induction variable to LOOP iterating NITER times.  EXIT
  80    is the exit edge whose condition is replaced.  The ssa versions of the new
  81    IV before and after increment will be stored in VAR_BEFORE and VAR_AFTER
  82    if they are not NULL.  */
  83
  84 void
  85 create_canonical_iv (class loop *loop, edge exit, tree niter,
  86                      tree *var_before = NULL, tree *var_after = NULL)
  87 {
  88   edge in;
  89   tree type, var;
  90   gcond *cond;
  91   gimple_stmt_iterator incr_at;
  92   enum tree_code cmp;
  93
  94   if (dump_file && (dump_flags & TDF_DETAILS))
  95     {
  96       fprintf (dump_file, "Added canonical iv to loop %d, ", loop->num);
  97       print_generic_expr (dump_file, niter, TDF_SLIM);
  98       fprintf (dump_file, " iterations.\n");
  99     }
 100
 101   cond = as_a <gcond *> (last_stmt (exit->src));
 102   in = EDGE_SUCC (exit->src, 0);
 103   if (in == exit)
 104     in = EDGE_SUCC (exit->src, 1);
 105
 106   /* Note that we do not need to worry about overflows, since
 107      type of niter is always unsigned and all comparisons are
 108      just for equality/nonequality -- i.e. everything works
 109      with a modulo arithmetics.  */
 110
 111   type = TREE_TYPE (niter);
 112   niter = fold_build2 (PLUS_EXPR, type,
 113                        niter,
 114                        build_int_cst (type, 1));
 115   incr_at = gsi_last_bb (in->src);
 116   create_iv (niter,
 117              build_int_cst (type, -1),
 118              NULL_TREE, loop,
 119              &incr_at, false, var_before, &var);
 120   if (var_after)
 121     *var_after = var;
 122
 123   cmp = (exit->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
 124   gimple_cond_set_code (cond, cmp);
 125   gimple_cond_set_lhs (cond, var);
 126   gimple_cond_set_rhs (cond, build_int_cst (type, 0));
 127   update_stmt (cond);
 128 }
 129
 130 /* Describe size of loop as detected by tree_estimate_loop_size.  */
 131 struct loop_size
 132 {
 133   /* Number of instructions in the loop.  */
 134   int overall;
 135
 136   /* Number of instructions that will be likely optimized out in
 137      peeled iterations of loop  (i.e. computation based on induction
 138      variable where induction variable starts at known constant.)  */
 139   int eliminated_by_peeling;
 140
 141   /* Same statistics for last iteration of loop: it is smaller because
 142      instructions after exit are not executed.  */
 143   int last_iteration;
 144   int last_iteration_eliminated_by_peeling;
 145
 146   /* If some IV computation will become constant.  */
 147   bool constant_iv;
 148
 149   /* Number of call stmts that are not a builtin and are pure or const
 150      present on the hot path.  */
 151   int num_pure_calls_on_hot_path;
 152   /* Number of call stmts that are not a builtin and are not pure nor const
 153      present on the hot path.  */
 154   int num_non_pure_calls_on_hot_path;
 155   /* Number of statements other than calls in the loop.  */
 156   int non_call_stmts_on_hot_path;
 157   /* Number of branches seen on the hot path.  */
 158   int num_branches_on_hot_path;
 159 };
 160
 161 /* Return true if OP in STMT will be constant after peeling LOOP.  */
 162
 163 static bool
 164 constant_after_peeling (tree op, gimple *stmt, class loop *loop)
 165 {
 166   if (CONSTANT_CLASS_P (op))
 167     return true;
 168
 169   /* We can still fold accesses to constant arrays when index is known.  */
 170   if (TREE_CODE (op) != SSA_NAME)
 171     {
 172       tree base = op;
 173
 174       /* First make fast look if we see constant array inside.  */
 175       while (handled_component_p (base))
 176         base = TREE_OPERAND (base, 0);
 177       if ((DECL_P (base)
 178            && ctor_for_folding (base) != error_mark_node)
 179           || CONSTANT_CLASS_P (base))
 180         {
 181           /* If so, see if we understand all the indices.  */
 182           base = op;
 183           while (handled_component_p (base))
 184             {
 185               if (TREE_CODE (base) == ARRAY_REF
 186                   && !constant_after_peeling (TREE_OPERAND (base, 1), stmt, loop))
 187                 return false;
 188               base = TREE_OPERAND (base, 0);
 189             }
 190           return true;
 191         }
 192       return false;
 193     }
 194
 195   /* Induction variables are constants when defined in loop.  */
 196   if (loop_containing_stmt (stmt) != loop)
 197     return false;
 198   tree ev = analyze_scalar_evolution (loop, op);
 199   if (chrec_contains_undetermined (ev)
 200       || chrec_contains_symbols (ev))
 201     return false;
 202   return true;
 203 }
 204
 205 /* Computes an estimated number of insns in LOOP.
 206    EXIT (if non-NULL) is an exite edge that will be eliminated in all but last
 207    iteration of the loop.
 208    EDGE_TO_CANCEL (if non-NULL) is an non-exit edge eliminated in the last iteration
 209    of loop.
 210    Return results in SIZE, estimate benefits for complete unrolling exiting by EXIT.
 211    Stop estimating after UPPER_BOUND is met.  Return true in this case.  */
 212
 213 static bool
 214 tree_estimate_loop_size (class loop *loop, edge exit, edge edge_to_cancel,
 215                          struct loop_size *size, int upper_bound)
 216 {
 217   basic_block *body = get_loop_body (loop);
 218   gimple_stmt_iterator gsi;
 219   unsigned int i;
 220   bool after_exit;
 221   vec<basic_block> path = get_loop_hot_path (loop);
 222
 223   size->overall = 0;
 224   size->eliminated_by_peeling = 0;
 225   size->last_iteration = 0;
 226   size->last_iteration_eliminated_by_peeling = 0;
 227   size->num_pure_calls_on_hot_path = 0;
 228   size->num_non_pure_calls_on_hot_path = 0;
 229   size->non_call_stmts_on_hot_path = 0;
 230   size->num_branches_on_hot_path = 0;
 231   size->constant_iv = 0;
 232
 233   if (dump_file && (dump_flags & TDF_DETAILS))
 234     fprintf (dump_file, "Estimating sizes for loop %i\n", loop->num);
 235   for (i = 0; i < loop->num_nodes; i++)
 236     {
 237       if (edge_to_cancel && body[i] != edge_to_cancel->src
 238           && dominated_by_p (CDI_DOMINATORS, body[i], edge_to_cancel->src))
 239         after_exit = true;
 240       else
 241         after_exit = false;
 242       if (dump_file && (dump_flags & TDF_DETAILS))
 243         fprintf (dump_file, " BB: %i, after_exit: %i\n", body[i]->index,
 244                  after_exit);
 245
 246       for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
 247         {
 248           gimple *stmt = gsi_stmt (gsi);
 249           int num = estimate_num_insns (stmt, &eni_size_weights);
 250           bool likely_eliminated = false;
 251           bool likely_eliminated_last = false;
 252           bool likely_eliminated_peeled = false;
 253
 254           if (dump_file && (dump_flags & TDF_DETAILS))
 255             {
 256               fprintf (dump_file, "  size: %3i ", num);
 257               print_gimple_stmt (dump_file, gsi_stmt (gsi), 0);
 258             }
 259
 260           /* Look for reasons why we might optimize this stmt away. */
 261
 262           if (!gimple_has_side_effects (stmt))
 263             {
 264               /* Exit conditional.  */
 265               if (exit && body[i] == exit->src
 266                   && stmt == last_stmt (exit->src))
 267                 {
 268                   if (dump_file && (dump_flags & TDF_DETAILS))
 269                     fprintf (dump_file, "   Exit condition will be eliminated "
 270                              "in peeled copies.\n");
 271                   likely_eliminated_peeled = true;
 272                 }
 273               if (edge_to_cancel && body[i] == edge_to_cancel->src
 274                   && stmt == last_stmt (edge_to_cancel->src))
 275                 {
 276                   if (dump_file && (dump_flags & TDF_DETAILS))
 277                     fprintf (dump_file, "   Exit condition will be eliminated "
 278                              "in last copy.\n");
 279                   likely_eliminated_last = true;
 280                 }
 281               /* Sets of IV variables  */
 282               if (gimple_code (stmt) == GIMPLE_ASSIGN
 283                   && constant_after_peeling (gimple_assign_lhs (stmt), stmt, loop))
 284                 {
 285                   if (dump_file && (dump_flags & TDF_DETAILS))
 286                     fprintf (dump_file, "   Induction variable computation will"
 287                              " be folded away.\n");
 288                   likely_eliminated = true;
 289                 }
 290               /* Assignments of IV variables.  */
 291               else if (gimple_code (stmt) == GIMPLE_ASSIGN
 292                        && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
 293                        && constant_after_peeling (gimple_assign_rhs1 (stmt),
 294                                                   stmt, loop)
 295                        && (gimple_assign_rhs_class (stmt) != GIMPLE_BINARY_RHS
 296                            || constant_after_peeling (gimple_assign_rhs2 (stmt),
 297                                                       stmt, loop))
 298                        && gimple_assign_rhs_class (stmt) != GIMPLE_TERNARY_RHS)
 299                 {
 300                   size->constant_iv = true;
 301                   if (dump_file && (dump_flags & TDF_DETAILS))
 302                     fprintf (dump_file,
 303                              "   Constant expression will be folded away.\n");
 304                   likely_eliminated = true;
 305                 }
 306               /* Conditionals.  */
 307               else if ((gimple_code (stmt) == GIMPLE_COND
 308                         && constant_after_peeling (gimple_cond_lhs (stmt), stmt,
 309                                                    loop)
 310                         && constant_after_peeling (gimple_cond_rhs (stmt), stmt,
 311                                                    loop)
 312                         /* We don't simplify all constant compares so make sure
 313                            they are not both constant already.  See PR70288.  */
 314                         && (! is_gimple_min_invariant (gimple_cond_lhs (stmt))
 315                             || ! is_gimple_min_invariant
 316                                  (gimple_cond_rhs (stmt))))
 317                        || (gimple_code (stmt) == GIMPLE_SWITCH
 318                            && constant_after_peeling (gimple_switch_index (
 319                                                         as_a <gswitch *>
 320                                                           (stmt)),
 321                                                       stmt, loop)
 322                            && ! is_gimple_min_invariant
 323                                    (gimple_switch_index
 324                                       (as_a <gswitch *> (stmt)))))
 325                 {
 326                   if (dump_file && (dump_flags & TDF_DETAILS))
 327                     fprintf (dump_file, "   Constant conditional.\n");
 328                   likely_eliminated = true;
 329                 }
 330             }
 331
 332           size->overall += num;
 333           if (likely_eliminated || likely_eliminated_peeled)
 334             size->eliminated_by_peeling += num;
 335           if (!after_exit)
 336             {
 337               size->last_iteration += num;
 338               if (likely_eliminated || likely_eliminated_last)
 339                 size->last_iteration_eliminated_by_peeling += num;
 340             }
 341           if ((size->overall * 3 / 2 - size->eliminated_by_peeling
 342               - size->last_iteration_eliminated_by_peeling) > upper_bound)
 343             {
 344               free (body);
 345               path.release ();
 346               return true;
 347             }
 348         }
 349     }
 350   while (path.length ())
 351     {
 352       basic_block bb = path.pop ();
 353       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 354         {
 355           gimple *stmt = gsi_stmt (gsi);
 356           if (gimple_code (stmt) == GIMPLE_CALL
 357               && !gimple_inexpensive_call_p (as_a <gcall *>  (stmt)))
 358             {
 359               int flags = gimple_call_flags (stmt);
 360               if (flags & (ECF_PURE | ECF_CONST))
 361                 size->num_pure_calls_on_hot_path++;
 362               else
 363                 size->num_non_pure_calls_on_hot_path++;
 364               size->num_branches_on_hot_path ++;
 365             }
 366           /* Count inexpensive calls as non-calls, because they will likely
 367              expand inline.  */
 368           else if (gimple_code (stmt) != GIMPLE_DEBUG)
 369             size->non_call_stmts_on_hot_path++;
 370           if (((gimple_code (stmt) == GIMPLE_COND
 371                 && (!constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop)
 372                     || !constant_after_peeling (gimple_cond_rhs (stmt), stmt,
 373                                                 loop)))
 374                || (gimple_code (stmt) == GIMPLE_SWITCH
 375                    && !constant_after_peeling (gimple_switch_index (
 376                                                  as_a <gswitch *> (stmt)),
 377                                                stmt, loop)))
 378               && (!exit || bb != exit->src))
 379             size->num_branches_on_hot_path++;
 380         }
 381     }
 382   path.release ();
 383   if (dump_file && (dump_flags & TDF_DETAILS))
 384     fprintf (dump_file, "size: %i-%i, last_iteration: %i-%i\n", size->overall,
 385              size->eliminated_by_peeling, size->last_iteration,
 386              size->last_iteration_eliminated_by_peeling);
 387
 388   free (body);
 389   return false;
 390 }
 391
 392 /* Estimate number of insns of completely unrolled loop.
 393    It is (NUNROLL + 1) * size of loop body with taking into account
 394    the fact that in last copy everything after exit conditional
 395    is dead and that some instructions will be eliminated after
 396    peeling.
 397
 398    Loop body is likely going to simplify further, this is difficult
 399    to guess, we just decrease the result by 1/3.  */
 400
 401 static unsigned HOST_WIDE_INT
 402 estimated_unrolled_size (struct loop_size *size,
 403                          unsigned HOST_WIDE_INT nunroll)
 404 {
 405   HOST_WIDE_INT unr_insns = ((nunroll)
 406                              * (HOST_WIDE_INT) (size->overall
 407                                                 - size->eliminated_by_peeling));
 408   if (!nunroll)
 409     unr_insns = 0;
 410   unr_insns += size->last_iteration - size->last_iteration_eliminated_by_peeling;
 411
 412   unr_insns = unr_insns * 2 / 3;
 413   if (unr_insns <= 0)
 414     unr_insns = 1;
 415
 416   return unr_insns;
 417 }
 418
 419 /* Loop LOOP is known to not loop.  See if there is an edge in the loop
 420    body that can be remove to make the loop to always exit and at
 421    the same time it does not make any code potentially executed
 422    during the last iteration dead.
 423
 424    After complete unrolling we still may get rid of the conditional
 425    on the exit in the last copy even if we have no idea what it does.
 426    This is quite common case for loops of form
 427
 428      int a[5];
 429      for (i=0;i<b;i++)
 430        a[i]=0;
 431
 432    Here we prove the loop to iterate 5 times but we do not know
 433    it from induction variable.
 434
 435    For now we handle only simple case where there is exit condition
 436    just before the latch block and the latch block contains no statements
 437    with side effect that may otherwise terminate the execution of loop
 438    (such as by EH or by terminating the program or longjmp).
 439
 440    In the general case we may want to cancel the paths leading to statements
 441    loop-niter identified as having undefined effect in the last iteration.
 442    The other cases are hopefully rare and will be cleaned up later.  */
 443
 444 static edge
 445 loop_edge_to_cancel (class loop *loop)
 446 {
 447   vec<edge> exits;
 448   unsigned i;
 449   edge edge_to_cancel;
 450   gimple_stmt_iterator gsi;
 451
 452   /* We want only one predecestor of the loop.  */
 453   if (EDGE_COUNT (loop->latch->preds) > 1)
 454     return NULL;
 455
 456   exits = get_loop_exit_edges (loop);
 457
 458   FOR_EACH_VEC_ELT (exits, i, edge_to_cancel)
 459     {
 460        /* Find the other edge than the loop exit
 461           leaving the conditoinal.  */
 462        if (EDGE_COUNT (edge_to_cancel->src->succs) != 2)
 463          continue;
 464        if (EDGE_SUCC (edge_to_cancel->src, 0) == edge_to_cancel)
 465          edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 1);
 466        else
 467          edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 0);
 468
 469       /* We only can handle conditionals.  */
 470       if (!(edge_to_cancel->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
 471         continue;
 472
 473       /* We should never have conditionals in the loop latch. */
 474       gcc_assert (edge_to_cancel->dest != loop->header);
 475
 476       /* Check that it leads to loop latch.  */
 477       if (edge_to_cancel->dest != loop->latch)
 478         continue;
 479
 480       exits.release ();
 481
 482       /* Verify that the code in loop latch does nothing that may end program
 483          execution without really reaching the exit.  This may include
 484          non-pure/const function calls, EH statements, volatile ASMs etc.  */
 485       for (gsi = gsi_start_bb (loop->latch); !gsi_end_p (gsi); gsi_next (&gsi))
 486         if (gimple_has_side_effects (gsi_stmt (gsi)))
 487            return NULL;
 488       return edge_to_cancel;
 489     }
 490   exits.release ();
 491   return NULL;
 492 }
 493
 494 /* Remove all tests for exits that are known to be taken after LOOP was
 495    peeled NPEELED times. Put gcc_unreachable before every statement
 496    known to not be executed.  */
 497
 498 static bool
 499 remove_exits_and_undefined_stmts (class loop *loop, unsigned int npeeled)
 500 {
 501   class nb_iter_bound *elt;
 502   bool changed = false;
 503
 504   for (elt = loop->bounds; elt; elt = elt->next)
 505     {
 506       /* If statement is known to be undefined after peeling, turn it
 507          into unreachable (or trap when debugging experience is supposed
 508          to be good).  */
 509       if (!elt->is_exit
 510           && wi::ltu_p (elt->bound, npeeled))
 511         {
 512           gimple_stmt_iterator gsi = gsi_for_stmt (elt->stmt);
 513           gcall *stmt = gimple_build_call
 514               (builtin_decl_implicit (BUILT_IN_UNREACHABLE), 0);
 515           gimple_set_location (stmt, gimple_location (elt->stmt));
 516           gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
 517           split_block (gimple_bb (stmt), stmt);
 518           changed = true;
 519           if (dump_file && (dump_flags & TDF_DETAILS))
 520             {
 521               fprintf (dump_file, "Forced statement unreachable: ");
 522               print_gimple_stmt (dump_file, elt->stmt, 0);
 523             }
 524         }
 525       /* If we know the exit will be taken after peeling, update.  */
 526       else if (elt->is_exit
 527                && wi::leu_p (elt->bound, npeeled))
 528         {
 529           basic_block bb = gimple_bb (elt->stmt);
 530           edge exit_edge = EDGE_SUCC (bb, 0);
 531
 532           if (dump_file && (dump_flags & TDF_DETAILS))
 533             {
 534               fprintf (dump_file, "Forced exit to be taken: ");
 535               print_gimple_stmt (dump_file, elt->stmt, 0);
 536             }
 537           if (!loop_exit_edge_p (loop, exit_edge))
 538             exit_edge = EDGE_SUCC (bb, 1);
 539           exit_edge->probability = profile_probability::always ();
 540           gcc_checking_assert (loop_exit_edge_p (loop, exit_edge));
 541           gcond *cond_stmt = as_a <gcond *> (elt->stmt);
 542           if (exit_edge->flags & EDGE_TRUE_VALUE)
 543             gimple_cond_make_true (cond_stmt);
 544           else
 545             gimple_cond_make_false (cond_stmt);
 546           update_stmt (cond_stmt);
 547           changed = true;
 548         }
 549     }
 550   return changed;
 551 }
 552
 553 /* Remove all exits that are known to be never taken because of the loop bound
 554    discovered.  */
 555
 556 static bool
 557 remove_redundant_iv_tests (class loop *loop)
 558 {
 559   class nb_iter_bound *elt;
 560   bool changed = false;
 561
 562   if (!loop->any_upper_bound)
 563     return false;
 564   for (elt = loop->bounds; elt; elt = elt->next)
 565     {
 566       /* Exit is pointless if it won't be taken before loop reaches
 567          upper bound.  */
 568       if (elt->is_exit && loop->any_upper_bound
 569           && wi::ltu_p (loop->nb_iterations_upper_bound, elt->bound))
 570         {
 571           basic_block bb = gimple_bb (elt->stmt);
 572           edge exit_edge = EDGE_SUCC (bb, 0);
 573           class tree_niter_desc niter;
 574
 575           if (!loop_exit_edge_p (loop, exit_edge))
 576             exit_edge = EDGE_SUCC (bb, 1);
 577
 578           /* Only when we know the actual number of iterations, not
 579              just a bound, we can remove the exit.  */
 580           if (!number_of_iterations_exit (loop, exit_edge,
 581                                           &niter, false, false)
 582               || !integer_onep (niter.assumptions)
 583               || !integer_zerop (niter.may_be_zero)
 584               || !niter.niter
 585               || TREE_CODE (niter.niter) != INTEGER_CST
 586               || !wi::ltu_p (loop->nb_iterations_upper_bound,
 587                              wi::to_widest (niter.niter)))
 588             continue;
 589
 590           if (dump_file && (dump_flags & TDF_DETAILS))
 591             {
 592               fprintf (dump_file, "Removed pointless exit: ");
 593               print_gimple_stmt (dump_file, elt->stmt, 0);
 594             }
 595           gcond *cond_stmt = as_a <gcond *> (elt->stmt);
 596           if (exit_edge->flags & EDGE_TRUE_VALUE)
 597             gimple_cond_make_false (cond_stmt);
 598           else
 599             gimple_cond_make_true (cond_stmt);
 600           update_stmt (cond_stmt);
 601           changed = true;
 602         }
 603     }
 604   return changed;
 605 }
 606
 607 /* Stores loops that will be unlooped and edges that will be removed
 608    after we process whole loop tree. */
 609 static vec<loop_p> loops_to_unloop;
 610 static vec<int> loops_to_unloop_nunroll;
 611 static vec<edge> edges_to_remove;
 612 /* Stores loops that has been peeled.  */
 613 static bitmap peeled_loops;
 614
 615 /* Cancel all fully unrolled loops by putting __builtin_unreachable
 616    on the latch edge.
 617    We do it after all unrolling since unlooping moves basic blocks
 618    across loop boundaries trashing loop closed SSA form as well
 619    as SCEV info needed to be intact during unrolling.
 620
 621    IRRED_INVALIDATED is used to bookkeep if information about
 622    irreducible regions may become invalid as a result
 623    of the transformation.
 624    LOOP_CLOSED_SSA_INVALIDATED is used to bookkepp the case
 625    when we need to go into loop closed SSA form.  */
 626
 627 static void
 628 unloop_loops (bitmap loop_closed_ssa_invalidated,
 629               bool *irred_invalidated)
 630 {
 631   while (loops_to_unloop.length ())
 632     {
 633       class loop *loop = loops_to_unloop.pop ();
 634       int n_unroll = loops_to_unloop_nunroll.pop ();
 635       basic_block latch = loop->latch;
 636       edge latch_edge = loop_latch_edge (loop);
 637       int flags = latch_edge->flags;
 638       location_t locus = latch_edge->goto_locus;
 639       gcall *stmt;
 640       gimple_stmt_iterator gsi;
 641
 642       remove_exits_and_undefined_stmts (loop, n_unroll);
 643
 644       /* Unloop destroys the latch edge.  */
 645       unloop (loop, irred_invalidated, loop_closed_ssa_invalidated);
 646
 647       /* Create new basic block for the latch edge destination and wire
 648          it in.  */
 649       stmt = gimple_build_call (builtin_decl_implicit (BUILT_IN_UNREACHABLE), 0);
 650       latch_edge = make_edge (latch, create_basic_block (NULL, NULL, latch), flags);
 651       latch_edge->probability = profile_probability::never ();
 652       latch_edge->flags |= flags;
 653       latch_edge->goto_locus = locus;
 654
 655       add_bb_to_loop (latch_edge->dest, current_loops->tree_root);
 656       latch_edge->dest->count = profile_count::zero ();
 657       set_immediate_dominator (CDI_DOMINATORS, latch_edge->dest, latch_edge->src);
 658
 659       gsi = gsi_start_bb (latch_edge->dest);
 660       gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
 661     }
 662   loops_to_unloop.release ();
 663   loops_to_unloop_nunroll.release ();
 664
 665   /* Remove edges in peeled copies.  Given remove_path removes dominated
 666      regions we need to cope with removal of already removed paths.  */
 667   unsigned i;
 668   edge e;
 669   auto_vec<int, 20> src_bbs;
 670   src_bbs.reserve_exact (edges_to_remove.length ());
 671   FOR_EACH_VEC_ELT (edges_to_remove, i, e)
 672     src_bbs.quick_push (e->src->index);
 673   FOR_EACH_VEC_ELT (edges_to_remove, i, e)
 674     if (BASIC_BLOCK_FOR_FN (cfun, src_bbs[i]))
 675       {
 676         bool ok = remove_path (e, irred_invalidated,
 677                                loop_closed_ssa_invalidated);
 678         gcc_assert (ok);
 679       }
 680   edges_to_remove.release ();
 681 }
 682
 683 /* Tries to unroll LOOP completely, i.e. NITER times.
 684    UL determines which loops we are allowed to unroll.
 685    EXIT is the exit of the loop that should be eliminated.
 686    MAXITER specfy bound on number of iterations, -1 if it is
 687    not known or too large for HOST_WIDE_INT.  The location
 688    LOCUS corresponding to the loop is used when emitting
 689    a summary of the unroll to the dump file.  */
 690
 691 static bool
 692 try_unroll_loop_completely (class loop *loop,
 693                             edge exit, tree niter, bool may_be_zero,
 694                             enum unroll_level ul,
 695                             HOST_WIDE_INT maxiter,
 696                             dump_user_location_t locus, bool allow_peel)
 697 {
 698   unsigned HOST_WIDE_INT n_unroll = 0;
 699   bool n_unroll_found = false;
 700   edge edge_to_cancel = NULL;
 701
 702   /* See if we proved number of iterations to be low constant.
 703
 704      EXIT is an edge that will be removed in all but last iteration of
 705      the loop.
 706
 707      EDGE_TO_CACNEL is an edge that will be removed from the last iteration
 708      of the unrolled sequence and is expected to make the final loop not
 709      rolling.
 710
 711      If the number of execution of loop is determined by standard induction
 712      variable test, then EXIT and EDGE_TO_CANCEL are the two edges leaving
 713      from the iv test.  */
 714   if (tree_fits_uhwi_p (niter))
 715     {
 716       n_unroll = tree_to_uhwi (niter);
 717       n_unroll_found = true;
 718       edge_to_cancel = EDGE_SUCC (exit->src, 0);
 719       if (edge_to_cancel == exit)
 720         edge_to_cancel = EDGE_SUCC (exit->src, 1);
 721     }
 722   /* We do not know the number of iterations and thus we cannot eliminate
 723      the EXIT edge.  */
 724   else
 725     exit = NULL;
 726
 727   /* See if we can improve our estimate by using recorded loop bounds.  */
 728   if ((allow_peel || maxiter == 0 || ul == UL_NO_GROWTH)
 729       && maxiter >= 0
 730       && (!n_unroll_found || (unsigned HOST_WIDE_INT)maxiter < n_unroll))
 731     {
 732       n_unroll = maxiter;
 733       n_unroll_found = true;
 734       /* Loop terminates before the IV variable test, so we cannot
 735          remove it in the last iteration.  */
 736       edge_to_cancel = NULL;
 737     }
 738
 739   if (!n_unroll_found)
 740     return false;
 741
 742   if (!loop->unroll
 743       && n_unroll > (unsigned) param_max_completely_peel_times)
 744     {
 745       if (dump_file && (dump_flags & TDF_DETAILS))
 746         fprintf (dump_file, "Not unrolling loop %d "
 747                  "(--param max-completely-peel-times limit reached).\n",
 748                  loop->num);
 749       return false;
 750     }
 751
 752   if (!edge_to_cancel)
 753     edge_to_cancel = loop_edge_to_cancel (loop);
 754
 755   if (n_unroll)
 756     {
 757       if (ul == UL_SINGLE_ITER)
 758         return false;
 759
 760       if (loop->unroll)
 761         {
 762           /* If the unrolling factor is too large, bail out.  */
 763           if (n_unroll > (unsigned)loop->unroll)
 764             {
 765               if (dump_file && (dump_flags & TDF_DETAILS))
 766                 fprintf (dump_file,
 767                          "Not unrolling loop %d: "
 768                          "user didn't want it unrolled completely.\n",
 769                          loop->num);
 770               return false;
 771             }
 772         }
 773       else
 774         {
 775           struct loop_size size;
 776           /* EXIT can be removed only if we are sure it passes first N_UNROLL
 777              iterations.  */
 778           bool remove_exit = (exit && niter
 779                               && TREE_CODE (niter) == INTEGER_CST
 780                               && wi::leu_p (n_unroll, wi::to_widest (niter)));
 781           bool large
 782             = tree_estimate_loop_size
 783                 (loop, remove_exit ? exit : NULL, edge_to_cancel, &size,
 784                  param_max_completely_peeled_insns);
 785           if (large)
 786             {
 787               if (dump_file && (dump_flags & TDF_DETAILS))
 788                 fprintf (dump_file, "Not unrolling loop %d: it is too large.\n",
 789                          loop->num);
 790               return false;
 791             }
 792
 793           unsigned HOST_WIDE_INT ninsns = size.overall;
 794           unsigned HOST_WIDE_INT unr_insns
 795             = estimated_unrolled_size (&size, n_unroll);
 796           if (dump_file && (dump_flags & TDF_DETAILS))
 797             {
 798               fprintf (dump_file, "  Loop size: %d\n", (int) ninsns);
 799               fprintf (dump_file, "  Estimated size after unrolling: %d\n",
 800                        (int) unr_insns);
 801             }
 802
 803           /* If the code is going to shrink, we don't need to be extra
 804              cautious on guessing if the unrolling is going to be
 805              profitable.  */
 806           if (unr_insns
 807               /* If there is IV variable that will become constant, we
 808                  save one instruction in the loop prologue we do not
 809                  account otherwise.  */
 810               <= ninsns + (size.constant_iv != false))
 811             ;
 812           /* We unroll only inner loops, because we do not consider it
 813              profitable otheriwse.  We still can cancel loopback edge
 814              of not rolling loop; this is always a good idea.  */
 815           else if (ul == UL_NO_GROWTH)
 816             {
 817               if (dump_file && (dump_flags & TDF_DETAILS))
 818                 fprintf (dump_file, "Not unrolling loop %d: size would grow.\n",
 819                          loop->num);
 820               return false;
 821             }
 822           /* Outer loops tend to be less interesting candidates for
 823              complete unrolling unless we can do a lot of propagation
 824              into the inner loop body.  For now we disable outer loop
 825              unrolling when the code would grow.  */
 826           else if (loop->inner)
 827             {
 828               if (dump_file && (dump_flags & TDF_DETAILS))
 829                 fprintf (dump_file, "Not unrolling loop %d: "
 830                          "it is not innermost and code would grow.\n",
 831                          loop->num);
 832               return false;
 833             }
 834           /* If there is call on a hot path through the loop, then
 835              there is most probably not much to optimize.  */
 836           else if (size.num_non_pure_calls_on_hot_path)
 837             {
 838               if (dump_file && (dump_flags & TDF_DETAILS))
 839                 fprintf (dump_file, "Not unrolling loop %d: "
 840                          "contains call and code would grow.\n",
 841                          loop->num);
 842               return false;
 843             }
 844           /* If there is pure/const call in the function, then we can
 845              still optimize the unrolled loop body if it contains some
 846              other interesting code than the calls and code storing or
 847              cumulating the return value.  */
 848           else if (size.num_pure_calls_on_hot_path
 849                    /* One IV increment, one test, one ivtmp store and
 850                       one useful stmt.  That is about minimal loop
 851                       doing pure call.  */
 852                    && (size.non_call_stmts_on_hot_path
 853                        <= 3 + size.num_pure_calls_on_hot_path))
 854             {
 855               if (dump_file && (dump_flags & TDF_DETAILS))
 856                 fprintf (dump_file, "Not unrolling loop %d: "
 857                          "contains just pure calls and code would grow.\n",
 858                          loop->num);
 859               return false;
 860             }
 861           /* Complete unrolling is major win when control flow is
 862              removed and one big basic block is created.  If the loop
 863              contains control flow the optimization may still be a win
 864              because of eliminating the loop overhead but it also may
 865              blow the branch predictor tables.  Limit number of
 866              branches on the hot path through the peeled sequence.  */
 867           else if (size.num_branches_on_hot_path * (int)n_unroll
 868                    > param_max_peel_branches)
 869             {
 870               if (dump_file && (dump_flags & TDF_DETAILS))
 871                 fprintf (dump_file, "Not unrolling loop %d: "
 872                          "number of branches on hot path in the unrolled "
 873                          "sequence reaches --param max-peel-branches limit.\n",
 874                          loop->num);
 875               return false;
 876             }
 877           else if (unr_insns
 878                    > (unsigned) param_max_completely_peeled_insns)
 879             {
 880               if (dump_file && (dump_flags & TDF_DETAILS))
 881                 fprintf (dump_file, "Not unrolling loop %d: "
 882                          "number of insns in the unrolled sequence reaches "
 883                          "--param max-completely-peeled-insns limit.\n",
 884                          loop->num);
 885               return false;
 886             }
 887         }
 888
 889       if (!dbg_cnt (gimple_unroll))
 890         return false;
 891
 892       initialize_original_copy_tables ();
 893       auto_sbitmap wont_exit (n_unroll + 1);
 894       if (exit && niter
 895           && TREE_CODE (niter) == INTEGER_CST
 896           && wi::leu_p (n_unroll, wi::to_widest (niter)))
 897         {
 898           bitmap_ones (wont_exit);
 899           if (wi::eq_p (wi::to_widest (niter), n_unroll)
 900               || edge_to_cancel)
 901             bitmap_clear_bit (wont_exit, 0);
 902         }
 903       else
 904         {
 905           exit = NULL;
 906           bitmap_clear (wont_exit);
 907         }
 908       if (may_be_zero)
 909         bitmap_clear_bit (wont_exit, 1);
 910
 911       if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 912                                                  n_unroll, wont_exit,
 913                                                  exit, &edges_to_remove,
 914                                                  DLTHE_FLAG_UPDATE_FREQ
 915                                                  | DLTHE_FLAG_COMPLETTE_PEEL))
 916         {
 917           free_original_copy_tables ();
 918           if (dump_file && (dump_flags & TDF_DETAILS))
 919             fprintf (dump_file, "Failed to duplicate the loop\n");
 920           return false;
 921         }
 922
 923       free_original_copy_tables ();
 924     }
 925
 926   /* Remove the conditional from the last copy of the loop.  */
 927   if (edge_to_cancel)
 928     {
 929       gcond *cond = as_a <gcond *> (last_stmt (edge_to_cancel->src));
 930       force_edge_cold (edge_to_cancel, true);
 931       if (edge_to_cancel->flags & EDGE_TRUE_VALUE)
 932         gimple_cond_make_false (cond);
 933       else
 934         gimple_cond_make_true (cond);
 935       update_stmt (cond);
 936       /* Do not remove the path, as doing so may remove outer loop and
 937          confuse bookkeeping code in tree_unroll_loops_completely.  */
 938     }
 939
 940   /* Store the loop for later unlooping and exit removal.  */
 941   loops_to_unloop.safe_push (loop);
 942   loops_to_unloop_nunroll.safe_push (n_unroll);
 943
 944   if (dump_enabled_p ())
 945     {
 946       if (!n_unroll)
 947         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
 948                          "loop turned into non-loop; it never loops\n");
 949       else
 950         {
 951           dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
 952                            "loop with %d iterations completely unrolled",
 953                            (int) n_unroll);
 954           if (loop->header->count.initialized_p ())
 955             dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS,
 956                          " (header execution count %d)",
 957                          (int)loop->header->count.to_gcov_type ());
 958           dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, "\n");
 959         }
 960     }
 961
 962   if (dump_file && (dump_flags & TDF_DETAILS))
 963     {
 964       if (exit)
 965         fprintf (dump_file, "Exit condition of peeled iterations was "
 966                  "eliminated.\n");
 967       if (edge_to_cancel)
 968         fprintf (dump_file, "Last iteration exit edge was proved true.\n");
 969       else
 970         fprintf (dump_file, "Latch of last iteration was marked by "
 971                  "__builtin_unreachable ().\n");
 972     }
 973
 974   return true;
 975 }
 976
 977 /* Return number of instructions after peeling.  */
 978 static unsigned HOST_WIDE_INT
 979 estimated_peeled_sequence_size (struct loop_size *size,
 980                                 unsigned HOST_WIDE_INT npeel)
 981 {
 982   return MAX (npeel * (HOST_WIDE_INT) (size->overall
 983                                        - size->eliminated_by_peeling), 1);
 984 }
 985
 986 /* If the loop is expected to iterate N times and is
 987    small enough, duplicate the loop body N+1 times before
 988    the loop itself.  This way the hot path will never
 989    enter the loop.
 990    Parameters are the same as for try_unroll_loops_completely */
 991
 992 static bool
 993 try_peel_loop (class loop *loop,
 994                edge exit, tree niter, bool may_be_zero,
 995                HOST_WIDE_INT maxiter)
 996 {
 997   HOST_WIDE_INT npeel;
 998   struct loop_size size;
 999   int peeled_size;
1000
1001   if (!flag_peel_loops
1002       || param_max_peel_times <= 0
1003       || !peeled_loops)
1004     return false;
1005
1006   if (bitmap_bit_p (peeled_loops, loop->num))
1007     {
1008       if (dump_file)
1009         fprintf (dump_file, "Not peeling: loop is already peeled\n");
1010       return false;
1011     }
1012
1013   /* We don't peel loops that will be unrolled as this can duplicate a
1014      loop more times than the user requested.  */
1015   if (loop->unroll)
1016     {
1017       if (dump_file)
1018         fprintf (dump_file, "Not peeling: user didn't want it peeled.\n");
1019       return false;
1020     }
1021
1022   /* Peel only innermost loops.
1023      While the code is perfectly capable of peeling non-innermost loops,
1024      the heuristics would probably need some improvements. */
1025   if (loop->inner)
1026     {
1027       if (dump_file)
1028         fprintf (dump_file, "Not peeling: outer loop\n");
1029       return false;
1030     }
1031
1032   if (!optimize_loop_for_speed_p (loop))
1033     {
1034       if (dump_file)
1035         fprintf (dump_file, "Not peeling: cold loop\n");
1036       return false;
1037     }
1038
1039   /* Check if there is an estimate on the number of iterations.  */
1040   npeel = estimated_loop_iterations_int (loop);
1041   if (npeel < 0)
1042     npeel = likely_max_loop_iterations_int (loop);
1043   if (npeel < 0)
1044     {
1045       if (dump_file)
1046         fprintf (dump_file, "Not peeling: number of iterations is not "
1047                  "estimated\n");
1048       return false;
1049     }
1050   if (maxiter >= 0 && maxiter <= npeel)
1051     {
1052       if (dump_file)
1053         fprintf (dump_file, "Not peeling: upper bound is known so can "
1054                  "unroll completely\n");
1055       return false;
1056     }
1057
1058   /* We want to peel estimated number of iterations + 1 (so we never
1059      enter the loop on quick path).  Check against PARAM_MAX_PEEL_TIMES
1060      and be sure to avoid overflows.  */
1061   if (npeel > param_max_peel_times - 1)
1062     {
1063       if (dump_file)
1064         fprintf (dump_file, "Not peeling: rolls too much "
1065                  "(%i + 1 > --param max-peel-times)\n", (int) npeel);
1066       return false;
1067     }
1068   npeel++;
1069
1070   /* Check peeled loops size.  */
1071   tree_estimate_loop_size (loop, exit, NULL, &size,
1072                            param_max_peeled_insns);
1073   if ((peeled_size = estimated_peeled_sequence_size (&size, (int) npeel))
1074       > param_max_peeled_insns)
1075     {
1076       if (dump_file)
1077         fprintf (dump_file, "Not peeling: peeled sequence size is too large "
1078                  "(%i insns > --param max-peel-insns)", peeled_size);
1079       return false;
1080     }
1081
1082   if (!dbg_cnt (gimple_unroll))
1083     return false;
1084
1085   /* Duplicate possibly eliminating the exits.  */
1086   initialize_original_copy_tables ();
1087   auto_sbitmap wont_exit (npeel + 1);
1088   if (exit && niter
1089       && TREE_CODE (niter) == INTEGER_CST
1090       && wi::leu_p (npeel, wi::to_widest (niter)))
1091     {
1092       bitmap_ones (wont_exit);
1093       bitmap_clear_bit (wont_exit, 0);
1094     }
1095   else
1096     {
1097       exit = NULL;
1098       bitmap_clear (wont_exit);
1099     }
1100   if (may_be_zero)
1101     bitmap_clear_bit (wont_exit, 1);
1102   if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1103                                              npeel, wont_exit,
1104                                              exit, &edges_to_remove,
1105                                              DLTHE_FLAG_UPDATE_FREQ))
1106     {
1107       free_original_copy_tables ();
1108       return false;
1109     }
1110   free_original_copy_tables ();
1111   if (dump_file && (dump_flags & TDF_DETAILS))
1112     {
1113       fprintf (dump_file, "Peeled loop %d, %i times.\n",
1114                loop->num, (int) npeel);
1115     }
1116   if (loop->any_estimate)
1117     {
1118       if (wi::ltu_p (npeel, loop->nb_iterations_estimate))
1119         loop->nb_iterations_estimate -= npeel;
1120       else
1121         loop->nb_iterations_estimate = 0;
1122     }
1123   if (loop->any_upper_bound)
1124     {
1125       if (wi::ltu_p (npeel, loop->nb_iterations_upper_bound))
1126         loop->nb_iterations_upper_bound -= npeel;
1127       else
1128         loop->nb_iterations_upper_bound = 0;
1129     }
1130   if (loop->any_likely_upper_bound)
1131     {
1132       if (wi::ltu_p (npeel, loop->nb_iterations_likely_upper_bound))
1133         loop->nb_iterations_likely_upper_bound -= npeel;
1134       else
1135         {
1136           loop->any_estimate = true;
1137           loop->nb_iterations_estimate = 0;
1138           loop->nb_iterations_likely_upper_bound = 0;
1139         }
1140     }
1141   profile_count entry_count = profile_count::zero ();
1142
1143   edge e;
1144   edge_iterator ei;
1145   FOR_EACH_EDGE (e, ei, loop->header->preds)
1146     if (e->src != loop->latch)
1147       {
1148         if (e->src->count.initialized_p ())
1149           entry_count += e->src->count;
1150         gcc_assert (!flow_bb_inside_loop_p (loop, e->src));
1151       }
1152   profile_probability p;
1153   p = entry_count.probability_in (loop->header->count);
1154   scale_loop_profile (loop, p, 0);
1155   bitmap_set_bit (peeled_loops, loop->num);
1156   return true;
1157 }
1158 /* Adds a canonical induction variable to LOOP if suitable.
1159    CREATE_IV is true if we may create a new iv.  UL determines
1160    which loops we are allowed to completely unroll.  If TRY_EVAL is true, we try
1161    to determine the number of iterations of a loop by direct evaluation.
1162    Returns true if cfg is changed.   */
1163
1164 static bool
1165 canonicalize_loop_induction_variables (class loop *loop,
1166                                        bool create_iv, enum unroll_level ul,
1167                                        bool try_eval, bool allow_peel)
1168 {
1169   edge exit = NULL;
1170   tree niter;
1171   HOST_WIDE_INT maxiter;
1172   bool modified = false;
1173   dump_user_location_t locus;
1174   class tree_niter_desc niter_desc;
1175   bool may_be_zero = false;
1176
1177   /* For unrolling allow conditional constant or zero iterations, thus
1178      perform loop-header copying on-the-fly.  */
1179   exit = single_exit (loop);
1180   niter = chrec_dont_know;
1181   if (exit && number_of_iterations_exit (loop, exit, &niter_desc, false))
1182     {
1183       niter = niter_desc.niter;
1184       may_be_zero
1185         = niter_desc.may_be_zero && !integer_zerop (niter_desc.may_be_zero);
1186     }
1187   if (TREE_CODE (niter) == INTEGER_CST)
1188     locus = last_stmt (exit->src);
1189   else
1190     {
1191       /* For non-constant niter fold may_be_zero into niter again.  */
1192       if (may_be_zero)
1193         {
1194           if (COMPARISON_CLASS_P (niter_desc.may_be_zero))
1195             niter = fold_build3 (COND_EXPR, TREE_TYPE (niter),
1196                                  niter_desc.may_be_zero,
1197                                  build_int_cst (TREE_TYPE (niter), 0), niter);
1198           else
1199             niter = chrec_dont_know;
1200           may_be_zero = false;
1201         }
1202
1203       /* If the loop has more than one exit, try checking all of them
1204          for # of iterations determinable through scev.  */
1205       if (!exit)
1206         niter = find_loop_niter (loop, &exit);
1207
1208       /* Finally if everything else fails, try brute force evaluation.  */
1209       if (try_eval
1210           && (chrec_contains_undetermined (niter)
1211               || TREE_CODE (niter) != INTEGER_CST))
1212         niter = find_loop_niter_by_eval (loop, &exit);
1213
1214       if (exit)
1215         locus = last_stmt (exit->src);
1216
1217       if (TREE_CODE (niter) != INTEGER_CST)
1218         exit = NULL;
1219     }
1220
1221   /* We work exceptionally hard here to estimate the bound
1222      by find_loop_niter_by_eval.  Be sure to keep it for future.  */
1223   if (niter && TREE_CODE (niter) == INTEGER_CST)
1224     {
1225       record_niter_bound (loop, wi::to_widest (niter),
1226                           exit == single_likely_exit (loop), true);
1227     }
1228
1229   /* Force re-computation of loop bounds so we can remove redundant exits.  */
1230   maxiter = max_loop_iterations_int (loop);
1231
1232   if (dump_file && (dump_flags & TDF_DETAILS)
1233       && TREE_CODE (niter) == INTEGER_CST)
1234     {
1235       fprintf (dump_file, "Loop %d iterates ", loop->num);
1236       print_generic_expr (dump_file, niter, TDF_SLIM);
1237       fprintf (dump_file, " times.\n");
1238     }
1239   if (dump_file && (dump_flags & TDF_DETAILS)
1240       && maxiter >= 0)
1241     {
1242       fprintf (dump_file, "Loop %d iterates at most %i times.\n", loop->num,
1243                (int)maxiter);
1244     }
1245   if (dump_file && (dump_flags & TDF_DETAILS)
1246       && likely_max_loop_iterations_int (loop) >= 0)
1247     {
1248       fprintf (dump_file, "Loop %d likely iterates at most %i times.\n",
1249                loop->num, (int)likely_max_loop_iterations_int (loop));
1250     }
1251
1252   /* Remove exits that are known to be never taken based on loop bound.
1253      Needs to be called after compilation of max_loop_iterations_int that
1254      populates the loop bounds.  */
1255   modified |= remove_redundant_iv_tests (loop);
1256
1257   if (try_unroll_loop_completely (loop, exit, niter, may_be_zero, ul,
1258                                   maxiter, locus, allow_peel))
1259     return true;
1260
1261   if (create_iv
1262       && niter && !chrec_contains_undetermined (niter)
1263       && exit && just_once_each_iteration_p (loop, exit->src))
1264     {
1265       tree iv_niter = niter;
1266       if (may_be_zero)
1267         {
1268           if (COMPARISON_CLASS_P (niter_desc.may_be_zero))
1269             iv_niter = fold_build3 (COND_EXPR, TREE_TYPE (iv_niter),
1270                                     niter_desc.may_be_zero,
1271                                     build_int_cst (TREE_TYPE (iv_niter), 0),
1272                                     iv_niter);
1273           else
1274             iv_niter = NULL_TREE;
1275         }
1276       if (iv_niter)
1277         create_canonical_iv (loop, exit, iv_niter);
1278     }
1279
1280   if (ul == UL_ALL)
1281     modified |= try_peel_loop (loop, exit, niter, may_be_zero, maxiter);
1282
1283   return modified;
1284 }
1285
1286 /* The main entry point of the pass.  Adds canonical induction variables
1287    to the suitable loops.  */
1288
1289 unsigned int
1290 canonicalize_induction_variables (void)
1291 {
1292   class loop *loop;
1293   bool changed = false;
1294   bool irred_invalidated = false;
1295   bitmap loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
1296
1297   estimate_numbers_of_iterations (cfun);
1298
1299   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
1300     {
1301       changed |= canonicalize_loop_induction_variables (loop,
1302                                                         true, UL_SINGLE_ITER,
1303                                                         true, false);
1304     }
1305   gcc_assert (!need_ssa_update_p (cfun));
1306
1307   unloop_loops (loop_closed_ssa_invalidated, &irred_invalidated);
1308   if (irred_invalidated
1309       && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
1310     mark_irreducible_loops ();
1311
1312   /* Clean up the information about numbers of iterations, since brute force
1313      evaluation could reveal new information.  */
1314   free_numbers_of_iterations_estimates (cfun);
1315   scev_reset ();
1316
1317   if (!bitmap_empty_p (loop_closed_ssa_invalidated))
1318     {
1319       gcc_checking_assert (loops_state_satisfies_p (LOOP_CLOSED_SSA));
1320       rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1321     }
1322   BITMAP_FREE (loop_closed_ssa_invalidated);
1323
1324   if (changed)
1325     return TODO_cleanup_cfg;
1326   return 0;
1327 }
1328
1329 /* Process loops from innermost to outer, stopping at the innermost
1330    loop we unrolled.  */
1331
1332 static bool
1333 tree_unroll_loops_completely_1 (bool may_increase_size, bool unroll_outer,
1334                                 bitmap father_bbs, class loop *loop)
1335 {
1336   class loop *loop_father;
1337   bool changed = false;
1338   class loop *inner;
1339   enum unroll_level ul;
1340   unsigned num = number_of_loops (cfun);
1341
1342   /* Process inner loops first.  Don't walk loops added by the recursive
1343      calls because SSA form is not up-to-date.  They can be handled in the
1344      next iteration.  */
1345   bitmap child_father_bbs = NULL;
1346   for (inner = loop->inner; inner != NULL; inner = inner->next)
1347     if ((unsigned) inner->num < num)
1348       {
1349         if (!child_father_bbs)
1350           child_father_bbs = BITMAP_ALLOC (NULL);
1351         if (tree_unroll_loops_completely_1 (may_increase_size, unroll_outer,
1352                                             child_father_bbs, inner))
1353           {
1354             bitmap_ior_into (father_bbs, child_father_bbs);
1355             bitmap_clear (child_father_bbs);
1356             changed = true;
1357           }
1358       }
1359   if (child_father_bbs)
1360     BITMAP_FREE (child_father_bbs);
1361
1362   /* If we changed an inner loop we cannot process outer loops in this
1363      iteration because SSA form is not up-to-date.  Continue with
1364      siblings of outer loops instead.  */
1365   if (changed)
1366     {
1367       /* If we are recorded as father clear all other fathers that
1368          are necessarily covered already to avoid redundant work.  */
1369       if (bitmap_bit_p (father_bbs, loop->header->index))
1370         {
1371           bitmap_clear (father_bbs);
1372           bitmap_set_bit (father_bbs, loop->header->index);
1373         }
1374       return true;
1375     }
1376
1377   /* Don't unroll #pragma omp simd loops until the vectorizer
1378      attempts to vectorize those.  */
1379   if (loop->force_vectorize)
1380     return false;
1381
1382   /* Try to unroll this loop.  */
1383   loop_father = loop_outer (loop);
1384   if (!loop_father)
1385     return false;
1386
1387   if (loop->unroll > 1)
1388     ul = UL_ALL;
1389   else if (may_increase_size && optimize_loop_nest_for_speed_p (loop)
1390       /* Unroll outermost loops only if asked to do so or they do
1391          not cause code growth.  */
1392       && (unroll_outer || loop_outer (loop_father)))
1393     ul = UL_ALL;
1394   else
1395     ul = UL_NO_GROWTH;
1396
1397   if (canonicalize_loop_induction_variables
1398         (loop, false, ul, !flag_tree_loop_ivcanon, unroll_outer))
1399     {
1400       /* If we'll continue unrolling, we need to propagate constants
1401          within the new basic blocks to fold away induction variable
1402          computations; otherwise, the size might blow up before the
1403          iteration is complete and the IR eventually cleaned up.  */
1404       if (loop_outer (loop_father))
1405         {
1406           /* Once we process our father we will have processed
1407              the fathers of our children as well, so avoid doing
1408              redundant work and clear fathers we've gathered sofar.  */
1409           bitmap_clear (father_bbs);
1410           bitmap_set_bit (father_bbs, loop_father->header->index);
1411         }
1412
1413       return true;
1414     }
1415
1416   return false;
1417 }
1418
1419 /* Unroll LOOPS completely if they iterate just few times.  Unless
1420    MAY_INCREASE_SIZE is true, perform the unrolling only if the
1421    size of the code does not increase.  */
1422
1423 static unsigned int
1424 tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer)
1425 {
1426   bitmap father_bbs = BITMAP_ALLOC (NULL);
1427   bool changed;
1428   int iteration = 0;
1429   bool irred_invalidated = false;
1430
1431   estimate_numbers_of_iterations (cfun);
1432
1433   do
1434     {
1435       changed = false;
1436       bitmap loop_closed_ssa_invalidated = NULL;
1437
1438       if (loops_state_satisfies_p (LOOP_CLOSED_SSA))
1439         loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
1440
1441       free_numbers_of_iterations_estimates (cfun);
1442       estimate_numbers_of_iterations (cfun);
1443
1444       changed = tree_unroll_loops_completely_1 (may_increase_size,
1445                                                 unroll_outer, father_bbs,
1446                                                 current_loops->tree_root);
1447       if (changed)
1448         {
1449           unsigned i;
1450
1451           unloop_loops (loop_closed_ssa_invalidated, &irred_invalidated);
1452
1453           /* We cannot use TODO_update_ssa_no_phi because VOPS gets confused.  */
1454           if (loop_closed_ssa_invalidated
1455               && !bitmap_empty_p (loop_closed_ssa_invalidated))
1456             rewrite_into_loop_closed_ssa (loop_closed_ssa_invalidated,
1457                                           TODO_update_ssa);
1458           else
1459             update_ssa (TODO_update_ssa);
1460
1461           /* father_bbs is a bitmap of loop father header BB indices.
1462              Translate that to what non-root loops these BBs belong to now.  */
1463           bitmap_iterator bi;
1464           bitmap fathers = BITMAP_ALLOC (NULL);
1465           EXECUTE_IF_SET_IN_BITMAP (father_bbs, 0, i, bi)
1466             {
1467               basic_block unrolled_loop_bb = BASIC_BLOCK_FOR_FN (cfun, i);
1468               if (! unrolled_loop_bb)
1469                 continue;
1470               if (loop_outer (unrolled_loop_bb->loop_father))
1471                 bitmap_set_bit (fathers,
1472                                 unrolled_loop_bb->loop_father->num);
1473             }
1474           bitmap_clear (father_bbs);
1475           /* Propagate the constants within the new basic blocks.  */
1476           EXECUTE_IF_SET_IN_BITMAP (fathers, 0, i, bi)
1477             {
1478               loop_p father = get_loop (cfun, i);
1479               bitmap exit_bbs = BITMAP_ALLOC (NULL);
1480               loop_exit *exit = father->exits->next;
1481               while (exit->e)
1482                 {
1483                   bitmap_set_bit (exit_bbs, exit->e->dest->index);
1484                   exit = exit->next;
1485                 }
1486               do_rpo_vn (cfun, loop_preheader_edge (father), exit_bbs);
1487             }
1488           BITMAP_FREE (fathers);
1489
1490           /* This will take care of removing completely unrolled loops
1491              from the loop structures so we can continue unrolling now
1492              innermost loops.  */
1493           if (cleanup_tree_cfg ())
1494             update_ssa (TODO_update_ssa_only_virtuals);
1495
1496           /* Clean up the information about numbers of iterations, since
1497              complete unrolling might have invalidated it.  */
1498           scev_reset ();
1499           if (flag_checking && loops_state_satisfies_p (LOOP_CLOSED_SSA))
1500             verify_loop_closed_ssa (true);
1501         }
1502       if (loop_closed_ssa_invalidated)
1503         BITMAP_FREE (loop_closed_ssa_invalidated);
1504     }
1505   while (changed
1506          && ++iteration <= param_max_unroll_iterations);
1507
1508   BITMAP_FREE (father_bbs);
1509
1510   if (irred_invalidated
1511       && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
1512     mark_irreducible_loops ();
1513
1514   return 0;
1515 }
1516
1517 /* Canonical induction variable creation pass.  */
1518
1519 namespace {
1520
1521 const pass_data pass_data_iv_canon =
1522 {
1523   GIMPLE_PASS, /* type */
1524   "ivcanon", /* name */
1525   OPTGROUP_LOOP, /* optinfo_flags */
1526   TV_TREE_LOOP_IVCANON, /* tv_id */
1527   ( PROP_cfg | PROP_ssa ), /* properties_required */
1528   0, /* properties_provided */
1529   0, /* properties_destroyed */
1530   0, /* todo_flags_start */
1531   0, /* todo_flags_finish */
1532 };
1533
1534 class pass_iv_canon : public gimple_opt_pass
1535 {
1536 public:
1537   pass_iv_canon (gcc::context *ctxt)
1538     : gimple_opt_pass (pass_data_iv_canon, ctxt)
1539   {}
1540
1541   /* opt_pass methods: */
1542   virtual bool gate (function *) { return flag_tree_loop_ivcanon != 0; }
1543   virtual unsigned int execute (function *fun);
1544
1545 }; // class pass_iv_canon
1546
1547 unsigned int
1548 pass_iv_canon::execute (function *fun)
1549 {
1550   if (number_of_loops (fun) <= 1)
1551     return 0;
1552
1553   return canonicalize_induction_variables ();
1554 }
1555
1556 } // anon namespace
1557
1558 gimple_opt_pass *
1559 make_pass_iv_canon (gcc::context *ctxt)
1560 {
1561   return new pass_iv_canon (ctxt);
1562 }
1563
1564 /* Complete unrolling of loops.  */
1565
1566 namespace {
1567
1568 const pass_data pass_data_complete_unroll =
1569 {
1570   GIMPLE_PASS, /* type */
1571   "cunroll", /* name */
1572   OPTGROUP_LOOP, /* optinfo_flags */
1573   TV_COMPLETE_UNROLL, /* tv_id */
1574   ( PROP_cfg | PROP_ssa ), /* properties_required */
1575   0, /* properties_provided */
1576   0, /* properties_destroyed */
1577   0, /* todo_flags_start */
1578   0, /* todo_flags_finish */
1579 };
1580
1581 class pass_complete_unroll : public gimple_opt_pass
1582 {
1583 public:
1584   pass_complete_unroll (gcc::context *ctxt)
1585     : gimple_opt_pass (pass_data_complete_unroll, ctxt)
1586   {}
1587
1588   /* opt_pass methods: */
1589   virtual unsigned int execute (function *);
1590
1591 }; // class pass_complete_unroll
1592
1593 unsigned int
1594 pass_complete_unroll::execute (function *fun)
1595 {
1596   if (number_of_loops (fun) <= 1)
1597     return 0;
1598
1599   /* If we ever decide to run loop peeling more than once, we will need to
1600      track loops already peeled in loop structures themselves to avoid
1601      re-peeling the same loop multiple times.  */
1602   if (flag_peel_loops)
1603     peeled_loops = BITMAP_ALLOC (NULL);
1604   unsigned int val = tree_unroll_loops_completely (flag_unroll_loops
1605                                                    || flag_peel_loops
1606                                                    || optimize >= 3, true);
1607   if (peeled_loops)
1608     {
1609       BITMAP_FREE (peeled_loops);
1610       peeled_loops = NULL;
1611     }
1612   return val;
1613 }
1614
1615 } // anon namespace
1616
1617 gimple_opt_pass *
1618 make_pass_complete_unroll (gcc::context *ctxt)
1619 {
1620   return new pass_complete_unroll (ctxt);
1621 }
1622
1623 /* Complete unrolling of inner loops.  */
1624
1625 namespace {
1626
1627 const pass_data pass_data_complete_unrolli =
1628 {
1629   GIMPLE_PASS, /* type */
1630   "cunrolli", /* name */
1631   OPTGROUP_LOOP, /* optinfo_flags */
1632   TV_COMPLETE_UNROLL, /* tv_id */
1633   ( PROP_cfg | PROP_ssa ), /* properties_required */
1634   0, /* properties_provided */
1635   0, /* properties_destroyed */
1636   0, /* todo_flags_start */
1637   0, /* todo_flags_finish */
1638 };
1639
1640 class pass_complete_unrolli : public gimple_opt_pass
1641 {
1642 public:
1643   pass_complete_unrolli (gcc::context *ctxt)
1644     : gimple_opt_pass (pass_data_complete_unrolli, ctxt)
1645   {}
1646
1647   /* opt_pass methods: */
1648   virtual bool gate (function *) { return optimize >= 2; }
1649   virtual unsigned int execute (function *);
1650
1651 }; // class pass_complete_unrolli
1652
1653 unsigned int
1654 pass_complete_unrolli::execute (function *fun)
1655 {
1656   unsigned ret = 0;
1657
1658   loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
1659   if (number_of_loops (fun) > 1)
1660     {
1661       scev_initialize ();
1662       ret = tree_unroll_loops_completely (optimize >= 3, false);
1663       scev_finalize ();
1664     }
1665   loop_optimizer_finalize ();
1666
1667   return ret;
1668 }
1669
1670 } // anon namespace
1671
1672 gimple_opt_pass *
1673 make_pass_complete_unrolli (gcc::context *ctxt)
1674 {
1675   return new pass_complete_unrolli (ctxt);
1676 }
1677
1678