gcc/tree-ssa-loop-ivcanon.cc

   1 /* Induction variable canonicalization and loop peeling.
   2    Copyright (C) 2004-2023 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass detects the loops that iterate a constant number of times,
  21    adds a canonical induction variable (step -1, tested against 0)
  22    and replaces the exit test.  This enables the less powerful rtl
  23    level analysis to use this information.
  24
  25    This might spoil the code in some cases (by increasing register pressure).
  26    Note that in the case the new variable is not needed, ivopts will get rid
  27    of it, so it might only be a problem when there are no other linear induction
  28    variables.  In that case the created optimization possibilities are likely
  29    to pay up.
  30
  31    We also perform
  32      - complete unrolling (or peeling) when the loops is rolling few enough
  33        times
  34      - simple peeling (i.e. copying few initial iterations prior the loop)
  35        when number of iteration estimate is known (typically by the profile
  36        info).  */
  37
  38 #include "config.h"
  39 #include "system.h"
  40 #include "coretypes.h"
  41 #include "backend.h"
  42 #include "tree.h"
  43 #include "gimple.h"
  44 #include "cfghooks.h"
  45 #include "tree-pass.h"
  46 #include "ssa.h"
  47 #include "cgraph.h"
  48 #include "gimple-pretty-print.h"
  49 #include "fold-const.h"
  50 #include "profile.h"
  51 #include "gimple-iterator.h"
  52 #include "gimple-fold.h"
  53 #include "tree-eh.h"
  54 #include "tree-cfg.h"
  55 #include "tree-ssa-loop-manip.h"
  56 #include "tree-ssa-loop-niter.h"
  57 #include "tree-ssa-loop.h"
  58 #include "tree-into-ssa.h"
  59 #include "cfgloop.h"
  60 #include "tree-chrec.h"
  61 #include "tree-scalar-evolution.h"
  62 #include "tree-inline.h"
  63 #include "tree-cfgcleanup.h"
  64 #include "builtins.h"
  65 #include "tree-ssa-sccvn.h"
  66 #include "tree-vectorizer.h" /* For find_loop_location */
  67 #include "dbgcnt.h"
  68
  69 /* Specifies types of loops that may be unrolled.  */
  70
  71 enum unroll_level
  72 {
  73   UL_SINGLE_ITER,       /* Only loops that exit immediately in the first
  74                            iteration.  */
  75   UL_NO_GROWTH,         /* Only loops whose unrolling will not cause increase
  76                            of code size.  */
  77   UL_ALL                /* All suitable loops.  */
  78 };
  79
  80 /* Adds a canonical induction variable to LOOP iterating NITER times.  EXIT
  81    is the exit edge whose condition is replaced.  The ssa versions of the new
  82    IV before and after increment will be stored in VAR_BEFORE and VAR_AFTER
  83    if they are not NULL.  */
  84
  85 void
  86 create_canonical_iv (class loop *loop, edge exit, tree niter,
  87                      tree *var_before = NULL, tree *var_after = NULL)
  88 {
  89   edge in;
  90   tree type, var;
  91   gcond *cond;
  92   gimple_stmt_iterator incr_at;
  93   enum tree_code cmp;
  94
  95   if (dump_file && (dump_flags & TDF_DETAILS))
  96     {
  97       fprintf (dump_file, "Added canonical iv to loop %d, ", loop->num);
  98       print_generic_expr (dump_file, niter, TDF_SLIM);
  99       fprintf (dump_file, " iterations.\n");
 100     }
 101
 102   cond = as_a <gcond *> (*gsi_last_bb (exit->src));
 103   in = EDGE_SUCC (exit->src, 0);
 104   if (in == exit)
 105     in = EDGE_SUCC (exit->src, 1);
 106
 107   /* Note that we do not need to worry about overflows, since
 108      type of niter is always unsigned and all comparisons are
 109      just for equality/nonequality -- i.e. everything works
 110      with a modulo arithmetics.  */
 111
 112   type = TREE_TYPE (niter);
 113   niter = fold_build2 (PLUS_EXPR, type,
 114                        niter,
 115                        build_int_cst (type, 1));
 116   incr_at = gsi_last_bb (in->src);
 117   create_iv (niter, PLUS_EXPR,
 118              build_int_cst (type, -1),
 119              NULL_TREE, loop,
 120              &incr_at, false, var_before, &var);
 121   if (var_after)
 122     *var_after = var;
 123
 124   cmp = (exit->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
 125   gimple_cond_set_code (cond, cmp);
 126   gimple_cond_set_lhs (cond, var);
 127   gimple_cond_set_rhs (cond, build_int_cst (type, 0));
 128   update_stmt (cond);
 129 }
 130
 131 /* Describe size of loop as detected by tree_estimate_loop_size.  */
 132 struct loop_size
 133 {
 134   /* Number of instructions in the loop.  */
 135   int overall;
 136
 137   /* Number of instructions that will be likely optimized out in
 138      peeled iterations of loop  (i.e. computation based on induction
 139      variable where induction variable starts at known constant.)  */
 140   int eliminated_by_peeling;
 141
 142   /* Same statistics for last iteration of loop: it is smaller because
 143      instructions after exit are not executed.  */
 144   int last_iteration;
 145   int last_iteration_eliminated_by_peeling;
 146
 147   /* If some IV computation will become constant.  */
 148   bool constant_iv;
 149
 150   /* Number of call stmts that are not a builtin and are pure or const
 151      present on the hot path.  */
 152   int num_pure_calls_on_hot_path;
 153   /* Number of call stmts that are not a builtin and are not pure nor const
 154      present on the hot path.  */
 155   int num_non_pure_calls_on_hot_path;
 156   /* Number of statements other than calls in the loop.  */
 157   int non_call_stmts_on_hot_path;
 158   /* Number of branches seen on the hot path.  */
 159   int num_branches_on_hot_path;
 160 };
 161
 162 /* Return true if OP in STMT will be constant after peeling LOOP.  */
 163
 164 static bool
 165 constant_after_peeling (tree op, gimple *stmt, class loop *loop)
 166 {
 167   if (CONSTANT_CLASS_P (op))
 168     return true;
 169
 170   /* Get at the actual SSA operand.  */
 171   if (handled_component_p (op)
 172       && TREE_CODE (TREE_OPERAND (op, 0)) == SSA_NAME)
 173     op = TREE_OPERAND (op, 0);
 174
 175   /* We can still fold accesses to constant arrays when index is known.  */
 176   if (TREE_CODE (op) != SSA_NAME)
 177     {
 178       tree base = op;
 179
 180       /* First make fast look if we see constant array inside.  */
 181       while (handled_component_p (base))
 182         base = TREE_OPERAND (base, 0);
 183       if ((DECL_P (base)
 184            && ctor_for_folding (base) != error_mark_node)
 185           || CONSTANT_CLASS_P (base))
 186         {
 187           /* If so, see if we understand all the indices.  */
 188           base = op;
 189           while (handled_component_p (base))
 190             {
 191               if (TREE_CODE (base) == ARRAY_REF
 192                   && !constant_after_peeling (TREE_OPERAND (base, 1), stmt, loop))
 193                 return false;
 194               base = TREE_OPERAND (base, 0);
 195             }
 196           return true;
 197         }
 198       return false;
 199     }
 200
 201   /* Induction variables are constants when defined in loop.  */
 202   if (loop_containing_stmt (stmt) != loop)
 203     return false;
 204   tree ev = analyze_scalar_evolution (loop, op);
 205   if (chrec_contains_undetermined (ev)
 206       || chrec_contains_symbols (ev))
 207     {
 208       if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (op)))
 209         {
 210           gassign *ass = nullptr;
 211           gphi *phi = nullptr;
 212           if (is_a <gassign *> (SSA_NAME_DEF_STMT (op)))
 213             {
 214               ass = as_a <gassign *> (SSA_NAME_DEF_STMT (op));
 215               if (TREE_CODE (gimple_assign_rhs1 (ass)) == SSA_NAME)
 216                 phi = dyn_cast <gphi *>
 217                         (SSA_NAME_DEF_STMT (gimple_assign_rhs1  (ass)));
 218             }
 219           else if (is_a <gphi *> (SSA_NAME_DEF_STMT (op)))
 220             {
 221               phi = as_a <gphi *> (SSA_NAME_DEF_STMT (op));
 222               if (gimple_bb (phi) == loop->header)
 223                 {
 224                   tree def = gimple_phi_arg_def_from_edge
 225                     (phi, loop_latch_edge (loop));
 226                   if (TREE_CODE (def) == SSA_NAME
 227                       && is_a <gassign *> (SSA_NAME_DEF_STMT (def)))
 228                     ass = as_a <gassign *> (SSA_NAME_DEF_STMT (def));
 229                 }
 230             }
 231           if (ass && phi)
 232             {
 233               tree rhs1 = gimple_assign_rhs1 (ass);
 234               if (gimple_assign_rhs_class (ass) == GIMPLE_BINARY_RHS
 235                   && CONSTANT_CLASS_P (gimple_assign_rhs2 (ass))
 236                   && rhs1 == gimple_phi_result (phi)
 237                   && gimple_bb (phi) == loop->header
 238                   && (gimple_phi_arg_def_from_edge (phi, loop_latch_edge (loop))
 239                       == gimple_assign_lhs (ass))
 240                   && (CONSTANT_CLASS_P (gimple_phi_arg_def_from_edge
 241                                          (phi, loop_preheader_edge (loop)))))
 242                 return true;
 243             }
 244         }
 245       return false;
 246     }
 247   return true;
 248 }
 249
 250 /* Computes an estimated number of insns in LOOP.
 251    EXIT (if non-NULL) is an exite edge that will be eliminated in all but last
 252    iteration of the loop.
 253    EDGE_TO_CANCEL (if non-NULL) is an non-exit edge eliminated in the last iteration
 254    of loop.
 255    Return results in SIZE, estimate benefits for complete unrolling exiting by EXIT.
 256    Stop estimating after UPPER_BOUND is met.  Return true in this case.  */
 257
 258 static bool
 259 tree_estimate_loop_size (class loop *loop, edge exit, edge edge_to_cancel,
 260                          struct loop_size *size, int upper_bound)
 261 {
 262   basic_block *body = get_loop_body (loop);
 263   gimple_stmt_iterator gsi;
 264   unsigned int i;
 265   bool after_exit;
 266   auto_vec<basic_block> path = get_loop_hot_path (loop);
 267
 268   size->overall = 0;
 269   size->eliminated_by_peeling = 0;
 270   size->last_iteration = 0;
 271   size->last_iteration_eliminated_by_peeling = 0;
 272   size->num_pure_calls_on_hot_path = 0;
 273   size->num_non_pure_calls_on_hot_path = 0;
 274   size->non_call_stmts_on_hot_path = 0;
 275   size->num_branches_on_hot_path = 0;
 276   size->constant_iv = 0;
 277
 278   if (dump_file && (dump_flags & TDF_DETAILS))
 279     fprintf (dump_file, "Estimating sizes for loop %i\n", loop->num);
 280   for (i = 0; i < loop->num_nodes; i++)
 281     {
 282       if (edge_to_cancel && body[i] != edge_to_cancel->src
 283           && dominated_by_p (CDI_DOMINATORS, body[i], edge_to_cancel->src))
 284         after_exit = true;
 285       else
 286         after_exit = false;
 287       if (dump_file && (dump_flags & TDF_DETAILS))
 288         fprintf (dump_file, " BB: %i, after_exit: %i\n", body[i]->index,
 289                  after_exit);
 290
 291       for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
 292         {
 293           gimple *stmt = gsi_stmt (gsi);
 294           int num = estimate_num_insns (stmt, &eni_size_weights);
 295           bool likely_eliminated = false;
 296           bool likely_eliminated_last = false;
 297           bool likely_eliminated_peeled = false;
 298
 299           if (dump_file && (dump_flags & TDF_DETAILS))
 300             {
 301               fprintf (dump_file, "  size: %3i ", num);
 302               print_gimple_stmt (dump_file, gsi_stmt (gsi), 0);
 303             }
 304
 305           /* Look for reasons why we might optimize this stmt away. */
 306
 307           if (!gimple_has_side_effects (stmt))
 308             {
 309               /* Exit conditional.  */
 310               if (exit && body[i] == exit->src
 311                   && stmt == *gsi_last_bb (exit->src))
 312                 {
 313                   if (dump_file && (dump_flags & TDF_DETAILS))
 314                     fprintf (dump_file, "   Exit condition will be eliminated "
 315                              "in peeled copies.\n");
 316                   likely_eliminated_peeled = true;
 317                 }
 318               if (edge_to_cancel && body[i] == edge_to_cancel->src
 319                   && stmt == *gsi_last_bb (edge_to_cancel->src))
 320                 {
 321                   if (dump_file && (dump_flags & TDF_DETAILS))
 322                     fprintf (dump_file, "   Exit condition will be eliminated "
 323                              "in last copy.\n");
 324                   likely_eliminated_last = true;
 325                 }
 326               /* Sets of IV variables  */
 327               if (gimple_code (stmt) == GIMPLE_ASSIGN
 328                   && constant_after_peeling (gimple_assign_lhs (stmt), stmt, loop))
 329                 {
 330                   if (dump_file && (dump_flags & TDF_DETAILS))
 331                     fprintf (dump_file, "   Induction variable computation will"
 332                              " be folded away.\n");
 333                   likely_eliminated = true;
 334                 }
 335               /* Assignments of IV variables.  */
 336               else if (gimple_code (stmt) == GIMPLE_ASSIGN
 337                        && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
 338                        && constant_after_peeling (gimple_assign_rhs1 (stmt),
 339                                                   stmt, loop)
 340                        && (gimple_assign_rhs_class (stmt) != GIMPLE_BINARY_RHS
 341                            || constant_after_peeling (gimple_assign_rhs2 (stmt),
 342                                                       stmt, loop))
 343                        && gimple_assign_rhs_class (stmt) != GIMPLE_TERNARY_RHS)
 344                 {
 345                   size->constant_iv = true;
 346                   if (dump_file && (dump_flags & TDF_DETAILS))
 347                     fprintf (dump_file,
 348                              "   Constant expression will be folded away.\n");
 349                   likely_eliminated = true;
 350                 }
 351               /* Conditionals.  */
 352               else if ((gimple_code (stmt) == GIMPLE_COND
 353                         && constant_after_peeling (gimple_cond_lhs (stmt), stmt,
 354                                                    loop)
 355                         && constant_after_peeling (gimple_cond_rhs (stmt), stmt,
 356                                                    loop)
 357                         /* We don't simplify all constant compares so make sure
 358                            they are not both constant already.  See PR70288.  */
 359                         && (! is_gimple_min_invariant (gimple_cond_lhs (stmt))
 360                             || ! is_gimple_min_invariant
 361                                  (gimple_cond_rhs (stmt))))
 362                        || (gimple_code (stmt) == GIMPLE_SWITCH
 363                            && constant_after_peeling (gimple_switch_index (
 364                                                         as_a <gswitch *>
 365                                                           (stmt)),
 366                                                       stmt, loop)
 367                            && ! is_gimple_min_invariant
 368                                    (gimple_switch_index
 369                                       (as_a <gswitch *> (stmt)))))
 370                 {
 371                   if (dump_file && (dump_flags & TDF_DETAILS))
 372                     fprintf (dump_file, "   Constant conditional.\n");
 373                   likely_eliminated = true;
 374                 }
 375             }
 376
 377           size->overall += num;
 378           if (likely_eliminated || likely_eliminated_peeled)
 379             size->eliminated_by_peeling += num;
 380           if (!after_exit)
 381             {
 382               size->last_iteration += num;
 383               if (likely_eliminated || likely_eliminated_last)
 384                 size->last_iteration_eliminated_by_peeling += num;
 385             }
 386           if ((size->overall * 3 / 2 - size->eliminated_by_peeling
 387               - size->last_iteration_eliminated_by_peeling) > upper_bound)
 388             {
 389               free (body);
 390               return true;
 391             }
 392         }
 393     }
 394   while (path.length ())
 395     {
 396       basic_block bb = path.pop ();
 397       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 398         {
 399           gimple *stmt = gsi_stmt (gsi);
 400           if (gimple_code (stmt) == GIMPLE_CALL
 401               && !gimple_inexpensive_call_p (as_a <gcall *>  (stmt)))
 402             {
 403               int flags = gimple_call_flags (stmt);
 404               if (flags & (ECF_PURE | ECF_CONST))
 405                 size->num_pure_calls_on_hot_path++;
 406               else
 407                 size->num_non_pure_calls_on_hot_path++;
 408               size->num_branches_on_hot_path ++;
 409             }
 410           /* Count inexpensive calls as non-calls, because they will likely
 411              expand inline.  */
 412           else if (gimple_code (stmt) != GIMPLE_DEBUG)
 413             size->non_call_stmts_on_hot_path++;
 414           if (((gimple_code (stmt) == GIMPLE_COND
 415                 && (!constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop)
 416                     || !constant_after_peeling (gimple_cond_rhs (stmt), stmt,
 417                                                 loop)))
 418                || (gimple_code (stmt) == GIMPLE_SWITCH
 419                    && !constant_after_peeling (gimple_switch_index (
 420                                                  as_a <gswitch *> (stmt)),
 421                                                stmt, loop)))
 422               && (!exit || bb != exit->src))
 423             size->num_branches_on_hot_path++;
 424         }
 425     }
 426
 427   if (dump_file && (dump_flags & TDF_DETAILS))
 428     fprintf (dump_file, "size: %i-%i, last_iteration: %i-%i\n", size->overall,
 429              size->eliminated_by_peeling, size->last_iteration,
 430              size->last_iteration_eliminated_by_peeling);
 431
 432   free (body);
 433   return false;
 434 }
 435
 436 /* Estimate number of insns of completely unrolled loop.
 437    It is (NUNROLL + 1) * size of loop body with taking into account
 438    the fact that in last copy everything after exit conditional
 439    is dead and that some instructions will be eliminated after
 440    peeling.
 441
 442    Loop body is likely going to simplify further, this is difficult
 443    to guess, we just decrease the result by 1/3.  */
 444
 445 static unsigned HOST_WIDE_INT
 446 estimated_unrolled_size (struct loop_size *size,
 447                          unsigned HOST_WIDE_INT nunroll)
 448 {
 449   HOST_WIDE_INT unr_insns = ((nunroll)
 450                              * (HOST_WIDE_INT) (size->overall
 451                                                 - size->eliminated_by_peeling));
 452   if (!nunroll)
 453     unr_insns = 0;
 454   unr_insns += size->last_iteration - size->last_iteration_eliminated_by_peeling;
 455
 456   unr_insns = unr_insns * 2 / 3;
 457   if (unr_insns <= 0)
 458     unr_insns = 1;
 459
 460   return unr_insns;
 461 }
 462
 463 /* Loop LOOP is known to not loop.  See if there is an edge in the loop
 464    body that can be remove to make the loop to always exit and at
 465    the same time it does not make any code potentially executed
 466    during the last iteration dead.
 467
 468    After complete unrolling we still may get rid of the conditional
 469    on the exit in the last copy even if we have no idea what it does.
 470    This is quite common case for loops of form
 471
 472      int a[5];
 473      for (i=0;i<b;i++)
 474        a[i]=0;
 475
 476    Here we prove the loop to iterate 5 times but we do not know
 477    it from induction variable.
 478
 479    For now we handle only simple case where there is exit condition
 480    just before the latch block and the latch block contains no statements
 481    with side effect that may otherwise terminate the execution of loop
 482    (such as by EH or by terminating the program or longjmp).
 483
 484    In the general case we may want to cancel the paths leading to statements
 485    loop-niter identified as having undefined effect in the last iteration.
 486    The other cases are hopefully rare and will be cleaned up later.  */
 487
 488 static edge
 489 loop_edge_to_cancel (class loop *loop)
 490 {
 491   unsigned i;
 492   edge edge_to_cancel;
 493   gimple_stmt_iterator gsi;
 494
 495   /* We want only one predecestor of the loop.  */
 496   if (EDGE_COUNT (loop->latch->preds) > 1)
 497     return NULL;
 498
 499   auto_vec<edge> exits = get_loop_exit_edges (loop);
 500
 501   FOR_EACH_VEC_ELT (exits, i, edge_to_cancel)
 502     {
 503        /* Find the other edge than the loop exit
 504           leaving the conditoinal.  */
 505        if (EDGE_COUNT (edge_to_cancel->src->succs) != 2)
 506          continue;
 507        if (EDGE_SUCC (edge_to_cancel->src, 0) == edge_to_cancel)
 508          edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 1);
 509        else
 510          edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 0);
 511
 512       /* We only can handle conditionals.  */
 513       if (!(edge_to_cancel->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
 514         continue;
 515
 516       /* We should never have conditionals in the loop latch. */
 517       gcc_assert (edge_to_cancel->dest != loop->header);
 518
 519       /* Check that it leads to loop latch.  */
 520       if (edge_to_cancel->dest != loop->latch)
 521         continue;
 522
 523       /* Verify that the code in loop latch does nothing that may end program
 524          execution without really reaching the exit.  This may include
 525          non-pure/const function calls, EH statements, volatile ASMs etc.  */
 526       for (gsi = gsi_start_bb (loop->latch); !gsi_end_p (gsi); gsi_next (&gsi))
 527         if (gimple_has_side_effects (gsi_stmt (gsi)))
 528            return NULL;
 529       return edge_to_cancel;
 530     }
 531   return NULL;
 532 }
 533
 534 /* Remove all tests for exits that are known to be taken after LOOP was
 535    peeled NPEELED times. Put gcc_unreachable before every statement
 536    known to not be executed.  */
 537
 538 static bool
 539 remove_exits_and_undefined_stmts (class loop *loop, unsigned int npeeled)
 540 {
 541   class nb_iter_bound *elt;
 542   bool changed = false;
 543
 544   for (elt = loop->bounds; elt; elt = elt->next)
 545     {
 546       /* If statement is known to be undefined after peeling, turn it
 547          into unreachable (or trap when debugging experience is supposed
 548          to be good).  */
 549       if (!elt->is_exit
 550           && wi::ltu_p (elt->bound, npeeled))
 551         {
 552           gimple_stmt_iterator gsi = gsi_for_stmt (elt->stmt);
 553           location_t loc = gimple_location (elt->stmt);
 554           gcall *stmt = gimple_build_builtin_unreachable (loc);
 555           gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
 556           split_block (gimple_bb (stmt), stmt);
 557           changed = true;
 558           if (dump_file && (dump_flags & TDF_DETAILS))
 559             {
 560               fprintf (dump_file, "Forced statement unreachable: ");
 561               print_gimple_stmt (dump_file, elt->stmt, 0);
 562             }
 563         }
 564       /* If we know the exit will be taken after peeling, update.  */
 565       else if (elt->is_exit
 566                && wi::leu_p (elt->bound, npeeled))
 567         {
 568           basic_block bb = gimple_bb (elt->stmt);
 569           edge exit_edge = EDGE_SUCC (bb, 0);
 570
 571           if (dump_file && (dump_flags & TDF_DETAILS))
 572             {
 573               fprintf (dump_file, "Forced exit to be taken: ");
 574               print_gimple_stmt (dump_file, elt->stmt, 0);
 575             }
 576           if (!loop_exit_edge_p (loop, exit_edge))
 577             exit_edge = EDGE_SUCC (bb, 1);
 578           exit_edge->probability = profile_probability::always ();
 579           gcc_checking_assert (loop_exit_edge_p (loop, exit_edge));
 580           gcond *cond_stmt = as_a <gcond *> (elt->stmt);
 581           if (exit_edge->flags & EDGE_TRUE_VALUE)
 582             gimple_cond_make_true (cond_stmt);
 583           else
 584             gimple_cond_make_false (cond_stmt);
 585           update_stmt (cond_stmt);
 586           changed = true;
 587         }
 588     }
 589   return changed;
 590 }
 591
 592 /* Remove all exits that are known to be never taken because of the loop bound
 593    discovered.  */
 594
 595 static bool
 596 remove_redundant_iv_tests (class loop *loop)
 597 {
 598   class nb_iter_bound *elt;
 599   bool changed = false;
 600
 601   if (!loop->any_upper_bound)
 602     return false;
 603   for (elt = loop->bounds; elt; elt = elt->next)
 604     {
 605       /* Exit is pointless if it won't be taken before loop reaches
 606          upper bound.  */
 607       if (elt->is_exit && loop->any_upper_bound
 608           && wi::ltu_p (loop->nb_iterations_upper_bound, elt->bound))
 609         {
 610           basic_block bb = gimple_bb (elt->stmt);
 611           edge exit_edge = EDGE_SUCC (bb, 0);
 612           class tree_niter_desc niter;
 613
 614           if (!loop_exit_edge_p (loop, exit_edge))
 615             exit_edge = EDGE_SUCC (bb, 1);
 616
 617           /* Only when we know the actual number of iterations, not
 618              just a bound, we can remove the exit.  */
 619           if (!number_of_iterations_exit (loop, exit_edge,
 620                                           &niter, false, false)
 621               || !integer_onep (niter.assumptions)
 622               || !integer_zerop (niter.may_be_zero)
 623               || !niter.niter
 624               || TREE_CODE (niter.niter) != INTEGER_CST
 625               || !wi::ltu_p (loop->nb_iterations_upper_bound,
 626                              wi::to_widest (niter.niter)))
 627             continue;
 628
 629           if (dump_file && (dump_flags & TDF_DETAILS))
 630             {
 631               fprintf (dump_file, "Removed pointless exit: ");
 632               print_gimple_stmt (dump_file, elt->stmt, 0);
 633             }
 634           gcond *cond_stmt = as_a <gcond *> (elt->stmt);
 635           if (exit_edge->flags & EDGE_TRUE_VALUE)
 636             gimple_cond_make_false (cond_stmt);
 637           else
 638             gimple_cond_make_true (cond_stmt);
 639           update_stmt (cond_stmt);
 640           changed = true;
 641         }
 642     }
 643   return changed;
 644 }
 645
 646 /* Stores loops that will be unlooped and edges that will be removed
 647    after we process whole loop tree. */
 648 static vec<loop_p> loops_to_unloop;
 649 static vec<int> loops_to_unloop_nunroll;
 650 static vec<edge> edges_to_remove;
 651 /* Stores loops that has been peeled.  */
 652 static bitmap peeled_loops;
 653
 654 /* Cancel all fully unrolled loops by putting __builtin_unreachable
 655    on the latch edge.
 656    We do it after all unrolling since unlooping moves basic blocks
 657    across loop boundaries trashing loop closed SSA form as well
 658    as SCEV info needed to be intact during unrolling.
 659
 660    IRRED_INVALIDATED is used to bookkeep if information about
 661    irreducible regions may become invalid as a result
 662    of the transformation.
 663    LOOP_CLOSED_SSA_INVALIDATED is used to bookkepp the case
 664    when we need to go into loop closed SSA form.  */
 665
 666 void
 667 unloop_loops (vec<class loop *> &loops_to_unloop,
 668               vec<int> &loops_to_unloop_nunroll,
 669               bitmap loop_closed_ssa_invalidated,
 670               bool *irred_invalidated)
 671 {
 672   while (loops_to_unloop.length ())
 673     {
 674       class loop *loop = loops_to_unloop.pop ();
 675       int n_unroll = loops_to_unloop_nunroll.pop ();
 676       basic_block latch = loop->latch;
 677       edge latch_edge = loop_latch_edge (loop);
 678       int flags = latch_edge->flags;
 679       location_t locus = latch_edge->goto_locus;
 680       gcall *stmt;
 681       gimple_stmt_iterator gsi;
 682
 683       remove_exits_and_undefined_stmts (loop, n_unroll);
 684
 685       /* Unloop destroys the latch edge.  */
 686       unloop (loop, irred_invalidated, loop_closed_ssa_invalidated);
 687
 688       /* Create new basic block for the latch edge destination and wire
 689          it in.  */
 690       stmt = gimple_build_builtin_unreachable (locus);
 691       latch_edge = make_edge (latch, create_basic_block (NULL, NULL, latch), flags);
 692       latch_edge->probability = profile_probability::never ();
 693       latch_edge->flags |= flags;
 694       latch_edge->goto_locus = locus;
 695
 696       add_bb_to_loop (latch_edge->dest, current_loops->tree_root);
 697       latch_edge->dest->count = profile_count::zero ();
 698       set_immediate_dominator (CDI_DOMINATORS, latch_edge->dest, latch_edge->src);
 699
 700       gsi = gsi_start_bb (latch_edge->dest);
 701       gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
 702     }
 703
 704   /* Remove edges in peeled copies.  Given remove_path removes dominated
 705      regions we need to cope with removal of already removed paths.  */
 706   unsigned i;
 707   edge e;
 708   auto_vec<int, 20> src_bbs;
 709   src_bbs.reserve_exact (edges_to_remove.length ());
 710   FOR_EACH_VEC_ELT (edges_to_remove, i, e)
 711     src_bbs.quick_push (e->src->index);
 712   FOR_EACH_VEC_ELT (edges_to_remove, i, e)
 713     if (BASIC_BLOCK_FOR_FN (cfun, src_bbs[i]))
 714       {
 715         bool ok = remove_path (e, irred_invalidated,
 716                                loop_closed_ssa_invalidated);
 717         gcc_assert (ok);
 718       }
 719   edges_to_remove.release ();
 720 }
 721
 722 /* Tries to unroll LOOP completely, i.e. NITER times.
 723    UL determines which loops we are allowed to unroll.
 724    EXIT is the exit of the loop that should be eliminated.
 725    MAXITER specfy bound on number of iterations, -1 if it is
 726    not known or too large for HOST_WIDE_INT.  The location
 727    LOCUS corresponding to the loop is used when emitting
 728    a summary of the unroll to the dump file.  */
 729
 730 static bool
 731 try_unroll_loop_completely (class loop *loop,
 732                             edge exit, tree niter, bool may_be_zero,
 733                             enum unroll_level ul,
 734                             HOST_WIDE_INT maxiter,
 735                             dump_user_location_t locus, bool allow_peel)
 736 {
 737   unsigned HOST_WIDE_INT n_unroll = 0;
 738   bool n_unroll_found = false;
 739   edge edge_to_cancel = NULL;
 740
 741   /* See if we proved number of iterations to be low constant.
 742
 743      EXIT is an edge that will be removed in all but last iteration of
 744      the loop.
 745
 746      EDGE_TO_CACNEL is an edge that will be removed from the last iteration
 747      of the unrolled sequence and is expected to make the final loop not
 748      rolling.
 749
 750      If the number of execution of loop is determined by standard induction
 751      variable test, then EXIT and EDGE_TO_CANCEL are the two edges leaving
 752      from the iv test.  */
 753   if (tree_fits_uhwi_p (niter))
 754     {
 755       n_unroll = tree_to_uhwi (niter);
 756       n_unroll_found = true;
 757       edge_to_cancel = EDGE_SUCC (exit->src, 0);
 758       if (edge_to_cancel == exit)
 759         edge_to_cancel = EDGE_SUCC (exit->src, 1);
 760     }
 761   /* We do not know the number of iterations and thus we cannot eliminate
 762      the EXIT edge.  */
 763   else
 764     exit = NULL;
 765
 766   /* See if we can improve our estimate by using recorded loop bounds.  */
 767   if ((maxiter == 0 || ul != UL_SINGLE_ITER)
 768       && maxiter >= 0
 769       && (!n_unroll_found || (unsigned HOST_WIDE_INT)maxiter < n_unroll))
 770     {
 771       n_unroll = maxiter;
 772       n_unroll_found = true;
 773       /* Loop terminates before the IV variable test, so we cannot
 774          remove it in the last iteration.  */
 775       edge_to_cancel = NULL;
 776       /* If we do not allow peeling and we iterate just allow cases
 777          that do not grow code.  */
 778       if (!allow_peel && maxiter != 0)
 779         ul = UL_NO_GROWTH;
 780     }
 781
 782   if (!n_unroll_found)
 783     return false;
 784
 785   if (!loop->unroll
 786       && n_unroll > (unsigned) param_max_completely_peel_times)
 787     {
 788       if (dump_file && (dump_flags & TDF_DETAILS))
 789         fprintf (dump_file, "Not unrolling loop %d "
 790                  "(--param max-completely-peel-times limit reached).\n",
 791                  loop->num);
 792       return false;
 793     }
 794
 795   if (!edge_to_cancel)
 796     edge_to_cancel = loop_edge_to_cancel (loop);
 797
 798   if (n_unroll)
 799     {
 800       if (ul == UL_SINGLE_ITER)
 801         return false;
 802
 803       if (loop->unroll)
 804         {
 805           /* If the unrolling factor is too large, bail out.  */
 806           if (n_unroll > (unsigned)loop->unroll)
 807             {
 808               if (dump_file && (dump_flags & TDF_DETAILS))
 809                 fprintf (dump_file,
 810                          "Not unrolling loop %d: "
 811                          "user didn't want it unrolled completely.\n",
 812                          loop->num);
 813               return false;
 814             }
 815         }
 816       else
 817         {
 818           struct loop_size size;
 819           /* EXIT can be removed only if we are sure it passes first N_UNROLL
 820              iterations.  */
 821           bool remove_exit = (exit && niter
 822                               && TREE_CODE (niter) == INTEGER_CST
 823                               && wi::leu_p (n_unroll, wi::to_widest (niter)));
 824           bool large
 825             = tree_estimate_loop_size
 826                 (loop, remove_exit ? exit : NULL, edge_to_cancel, &size,
 827                  param_max_completely_peeled_insns);
 828           if (large)
 829             {
 830               if (dump_file && (dump_flags & TDF_DETAILS))
 831                 fprintf (dump_file, "Not unrolling loop %d: it is too large.\n",
 832                          loop->num);
 833               return false;
 834             }
 835
 836           unsigned HOST_WIDE_INT ninsns = size.overall;
 837           unsigned HOST_WIDE_INT unr_insns
 838             = estimated_unrolled_size (&size, n_unroll);
 839           if (dump_file && (dump_flags & TDF_DETAILS))
 840             {
 841               fprintf (dump_file, "  Loop size: %d\n", (int) ninsns);
 842               fprintf (dump_file, "  Estimated size after unrolling: %d\n",
 843                        (int) unr_insns);
 844             }
 845
 846           /* If the code is going to shrink, we don't need to be extra
 847              cautious on guessing if the unrolling is going to be
 848              profitable.  */
 849           if (unr_insns
 850               /* If there is IV variable that will become constant, we
 851                  save one instruction in the loop prologue we do not
 852                  account otherwise.  */
 853               <= ninsns + (size.constant_iv != false))
 854             ;
 855           /* We unroll only inner loops, because we do not consider it
 856              profitable otheriwse.  We still can cancel loopback edge
 857              of not rolling loop; this is always a good idea.  */
 858           else if (ul == UL_NO_GROWTH)
 859             {
 860               if (dump_file && (dump_flags & TDF_DETAILS))
 861                 fprintf (dump_file, "Not unrolling loop %d: size would grow.\n",
 862                          loop->num);
 863               return false;
 864             }
 865           /* Outer loops tend to be less interesting candidates for
 866              complete unrolling unless we can do a lot of propagation
 867              into the inner loop body.  For now we disable outer loop
 868              unrolling when the code would grow.  */
 869           else if (loop->inner)
 870             {
 871               if (dump_file && (dump_flags & TDF_DETAILS))
 872                 fprintf (dump_file, "Not unrolling loop %d: "
 873                          "it is not innermost and code would grow.\n",
 874                          loop->num);
 875               return false;
 876             }
 877           /* If there is call on a hot path through the loop, then
 878              there is most probably not much to optimize.  */
 879           else if (size.num_non_pure_calls_on_hot_path)
 880             {
 881               if (dump_file && (dump_flags & TDF_DETAILS))
 882                 fprintf (dump_file, "Not unrolling loop %d: "
 883                          "contains call and code would grow.\n",
 884                          loop->num);
 885               return false;
 886             }
 887           /* If there is pure/const call in the function, then we can
 888              still optimize the unrolled loop body if it contains some
 889              other interesting code than the calls and code storing or
 890              cumulating the return value.  */
 891           else if (size.num_pure_calls_on_hot_path
 892                    /* One IV increment, one test, one ivtmp store and
 893                       one useful stmt.  That is about minimal loop
 894                       doing pure call.  */
 895                    && (size.non_call_stmts_on_hot_path
 896                        <= 3 + size.num_pure_calls_on_hot_path))
 897             {
 898               if (dump_file && (dump_flags & TDF_DETAILS))
 899                 fprintf (dump_file, "Not unrolling loop %d: "
 900                          "contains just pure calls and code would grow.\n",
 901                          loop->num);
 902               return false;
 903             }
 904           /* Complete unrolling is major win when control flow is
 905              removed and one big basic block is created.  If the loop
 906              contains control flow the optimization may still be a win
 907              because of eliminating the loop overhead but it also may
 908              blow the branch predictor tables.  Limit number of
 909              branches on the hot path through the peeled sequence.  */
 910           else if (size.num_branches_on_hot_path * (int)n_unroll
 911                    > param_max_peel_branches)
 912             {
 913               if (dump_file && (dump_flags & TDF_DETAILS))
 914                 fprintf (dump_file, "Not unrolling loop %d: "
 915                          "number of branches on hot path in the unrolled "
 916                          "sequence reaches --param max-peel-branches limit.\n",
 917                          loop->num);
 918               return false;
 919             }
 920           else if (unr_insns
 921                    > (unsigned) param_max_completely_peeled_insns)
 922             {
 923               if (dump_file && (dump_flags & TDF_DETAILS))
 924                 fprintf (dump_file, "Not unrolling loop %d: "
 925                          "number of insns in the unrolled sequence reaches "
 926                          "--param max-completely-peeled-insns limit.\n",
 927                          loop->num);
 928               return false;
 929             }
 930         }
 931
 932       if (!dbg_cnt (gimple_unroll))
 933         return false;
 934
 935       initialize_original_copy_tables ();
 936       auto_sbitmap wont_exit (n_unroll + 1);
 937       if (exit && niter
 938           && TREE_CODE (niter) == INTEGER_CST
 939           && wi::leu_p (n_unroll, wi::to_widest (niter)))
 940         {
 941           bitmap_ones (wont_exit);
 942           if (wi::eq_p (wi::to_widest (niter), n_unroll)
 943               || edge_to_cancel)
 944             bitmap_clear_bit (wont_exit, 0);
 945         }
 946       else
 947         {
 948           exit = NULL;
 949           bitmap_clear (wont_exit);
 950         }
 951       if (may_be_zero)
 952         bitmap_clear_bit (wont_exit, 1);
 953
 954       /* If loop was originally estimated to iterate too many times,
 955          reduce the profile to avoid new profile inconsistencies.  */
 956       scale_loop_profile (loop, profile_probability::always (), n_unroll);
 957
 958       if (!gimple_duplicate_loop_body_to_header_edge (
 959             loop, loop_preheader_edge (loop), n_unroll, wont_exit, exit,
 960             &edges_to_remove,
 961             DLTHE_FLAG_UPDATE_FREQ | DLTHE_FLAG_COMPLETTE_PEEL))
 962         {
 963           free_original_copy_tables ();
 964           if (dump_file && (dump_flags & TDF_DETAILS))
 965             fprintf (dump_file, "Failed to duplicate the loop\n");
 966           return false;
 967         }
 968
 969       free_original_copy_tables ();
 970     }
 971   else
 972     scale_loop_profile (loop, profile_probability::always (), 0);
 973
 974   /* Remove the conditional from the last copy of the loop.  */
 975   if (edge_to_cancel)
 976     {
 977       gcond *cond = as_a <gcond *> (*gsi_last_bb (edge_to_cancel->src));
 978       force_edge_cold (edge_to_cancel, true);
 979       if (edge_to_cancel->flags & EDGE_TRUE_VALUE)
 980         gimple_cond_make_false (cond);
 981       else
 982         gimple_cond_make_true (cond);
 983       update_stmt (cond);
 984       /* Do not remove the path, as doing so may remove outer loop and
 985          confuse bookkeeping code in tree_unroll_loops_completely.  */
 986     }
 987
 988   /* Store the loop for later unlooping and exit removal.  */
 989   loops_to_unloop.safe_push (loop);
 990   loops_to_unloop_nunroll.safe_push (n_unroll);
 991
 992   if (dump_enabled_p ())
 993     {
 994       if (!n_unroll)
 995         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
 996                          "loop turned into non-loop; it never loops\n");
 997       else
 998         {
 999           dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
1000                            "loop with %d iterations completely unrolled",
1001                            (int) n_unroll);
1002           if (loop->header->count.initialized_p ())
1003             dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS,
1004                          " (header execution count %d)",
1005                          (int)loop->header->count.to_gcov_type ());
1006           dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, "\n");
1007         }
1008     }
1009
1010   if (dump_file && (dump_flags & TDF_DETAILS))
1011     {
1012       if (exit)
1013         fprintf (dump_file, "Exit condition of peeled iterations was "
1014                  "eliminated.\n");
1015       if (edge_to_cancel)
1016         fprintf (dump_file, "Last iteration exit edge was proved true.\n");
1017       else
1018         fprintf (dump_file, "Latch of last iteration was marked by "
1019                  "__builtin_unreachable ().\n");
1020     }
1021
1022   return true;
1023 }
1024
1025 /* Return number of instructions after peeling.  */
1026 static unsigned HOST_WIDE_INT
1027 estimated_peeled_sequence_size (struct loop_size *size,
1028                                 unsigned HOST_WIDE_INT npeel)
1029 {
1030   return MAX (npeel * (HOST_WIDE_INT) (size->overall
1031                                        - size->eliminated_by_peeling), 1);
1032 }
1033
1034 /* Update loop estimates after peeling LOOP by NPEEL.
1035    If PRECISE is false only likely exists were duplicated and thus
1036    do not update any estimates that are supposed to be always reliable.  */
1037 void
1038 adjust_loop_info_after_peeling (class loop *loop, int npeel, bool precise)
1039 {
1040   if (loop->any_estimate)
1041     {
1042       /* Since peeling is mostly about loops where first few
1043          iterations are special, it is not quite correct to
1044          assume that the remaining iterations will behave
1045          the same way.  However we do not have better info
1046          so update the esitmate, since it is likely better
1047          than keeping it as it is.
1048
1049          Remove it if it looks wrong.
1050
1051          TODO: We likely want to special case the situation where
1052          peeling is optimizing out exit edges and only update
1053          estimates here.  */
1054       if (wi::leu_p (npeel, loop->nb_iterations_estimate))
1055         loop->nb_iterations_estimate -= npeel;
1056       else
1057         loop->any_estimate = false;
1058     }
1059   if (loop->any_upper_bound && precise)
1060     {
1061       if (wi::leu_p (npeel, loop->nb_iterations_upper_bound))
1062         loop->nb_iterations_upper_bound -= npeel;
1063       else
1064         {
1065           /* Peeling maximal number of iterations or more
1066              makes no sense and is a bug.
1067              We should peel completely.  */
1068           gcc_unreachable ();
1069         }
1070     }
1071   if (loop->any_likely_upper_bound)
1072     {
1073       if (wi::leu_p (npeel, loop->nb_iterations_likely_upper_bound))
1074         loop->nb_iterations_likely_upper_bound -= npeel;
1075       else
1076         {
1077           loop->any_estimate = true;
1078           loop->nb_iterations_estimate = 0;
1079           loop->nb_iterations_likely_upper_bound = 0;
1080         }
1081     }
1082 }
1083
1084 /* If the loop is expected to iterate N times and is
1085    small enough, duplicate the loop body N+1 times before
1086    the loop itself.  This way the hot path will never
1087    enter the loop.
1088    Parameters are the same as for try_unroll_loops_completely */
1089
1090 static bool
1091 try_peel_loop (class loop *loop,
1092                edge exit, tree niter, bool may_be_zero,
1093                HOST_WIDE_INT maxiter)
1094 {
1095   HOST_WIDE_INT npeel;
1096   struct loop_size size;
1097   int peeled_size;
1098
1099   if (!flag_peel_loops
1100       || param_max_peel_times <= 0
1101       || !peeled_loops)
1102     return false;
1103
1104   if (bitmap_bit_p (peeled_loops, loop->num))
1105     {
1106       if (dump_file)
1107         fprintf (dump_file, "Not peeling: loop is already peeled\n");
1108       return false;
1109     }
1110
1111   /* We don't peel loops that will be unrolled as this can duplicate a
1112      loop more times than the user requested.  */
1113   if (loop->unroll)
1114     {
1115       if (dump_file)
1116         fprintf (dump_file, "Not peeling: user didn't want it peeled.\n");
1117       return false;
1118     }
1119
1120   /* Peel only innermost loops.
1121      While the code is perfectly capable of peeling non-innermost loops,
1122      the heuristics would probably need some improvements. */
1123   if (loop->inner)
1124     {
1125       if (dump_file)
1126         fprintf (dump_file, "Not peeling: outer loop\n");
1127       return false;
1128     }
1129
1130   if (!optimize_loop_for_speed_p (loop))
1131     {
1132       if (dump_file)
1133         fprintf (dump_file, "Not peeling: cold loop\n");
1134       return false;
1135     }
1136
1137   /* Check if there is an estimate on the number of iterations.  */
1138   npeel = estimated_loop_iterations_int (loop);
1139   if (npeel < 0)
1140     npeel = likely_max_loop_iterations_int (loop);
1141   if (npeel < 0)
1142     {
1143       if (dump_file)
1144         fprintf (dump_file, "Not peeling: number of iterations is not "
1145                  "estimated\n");
1146       return false;
1147     }
1148   if (maxiter >= 0 && maxiter <= npeel)
1149     {
1150       if (dump_file)
1151         fprintf (dump_file, "Not peeling: upper bound is known so can "
1152                  "unroll completely\n");
1153       return false;
1154     }
1155
1156   /* We want to peel estimated number of iterations + 1 (so we never
1157      enter the loop on quick path).  Check against PARAM_MAX_PEEL_TIMES
1158      and be sure to avoid overflows.  */
1159   if (npeel > param_max_peel_times - 1)
1160     {
1161       if (dump_file)
1162         fprintf (dump_file, "Not peeling: rolls too much "
1163                  "(%i + 1 > --param max-peel-times)\n", (int) npeel);
1164       return false;
1165     }
1166   npeel++;
1167
1168   /* Check peeled loops size.  */
1169   tree_estimate_loop_size (loop, exit, NULL, &size,
1170                            param_max_peeled_insns);
1171   if ((peeled_size = estimated_peeled_sequence_size (&size, (int) npeel))
1172       > param_max_peeled_insns)
1173     {
1174       if (dump_file)
1175         fprintf (dump_file, "Not peeling: peeled sequence size is too large "
1176                  "(%i insns > --param max-peel-insns)", peeled_size);
1177       return false;
1178     }
1179
1180   if (!dbg_cnt (gimple_unroll))
1181     return false;
1182
1183   /* Duplicate possibly eliminating the exits.  */
1184   initialize_original_copy_tables ();
1185   auto_sbitmap wont_exit (npeel + 1);
1186   if (exit && niter
1187       && TREE_CODE (niter) == INTEGER_CST
1188       && wi::leu_p (npeel, wi::to_widest (niter)))
1189     {
1190       bitmap_ones (wont_exit);
1191       bitmap_clear_bit (wont_exit, 0);
1192     }
1193   else
1194     {
1195       exit = NULL;
1196       bitmap_clear (wont_exit);
1197     }
1198   if (may_be_zero)
1199     bitmap_clear_bit (wont_exit, 1);
1200
1201   if (!gimple_duplicate_loop_body_to_header_edge (
1202         loop, loop_preheader_edge (loop), npeel, wont_exit, exit,
1203         &edges_to_remove, DLTHE_FLAG_UPDATE_FREQ))
1204     {
1205       free_original_copy_tables ();
1206       return false;
1207     }
1208   free_original_copy_tables ();
1209   if (dump_file && (dump_flags & TDF_DETAILS))
1210     {
1211       fprintf (dump_file, "Peeled loop %d, %i times.\n",
1212                loop->num, (int) npeel);
1213     }
1214   adjust_loop_info_after_peeling (loop, npeel, true);
1215
1216   bitmap_set_bit (peeled_loops, loop->num);
1217   return true;
1218 }
1219 /* Adds a canonical induction variable to LOOP if suitable.
1220    CREATE_IV is true if we may create a new iv.  UL determines
1221    which loops we are allowed to completely unroll.  If TRY_EVAL is true, we try
1222    to determine the number of iterations of a loop by direct evaluation.
1223    Returns true if cfg is changed.   */
1224
1225 static bool
1226 canonicalize_loop_induction_variables (class loop *loop,
1227                                        bool create_iv, enum unroll_level ul,
1228                                        bool try_eval, bool allow_peel)
1229 {
1230   edge exit = NULL;
1231   tree niter;
1232   HOST_WIDE_INT maxiter;
1233   bool modified = false;
1234   class tree_niter_desc niter_desc;
1235   bool may_be_zero = false;
1236
1237   /* For unrolling allow conditional constant or zero iterations, thus
1238      perform loop-header copying on-the-fly.  */
1239   exit = single_exit (loop);
1240   niter = chrec_dont_know;
1241   if (exit && number_of_iterations_exit (loop, exit, &niter_desc, false))
1242     {
1243       niter = niter_desc.niter;
1244       may_be_zero
1245         = niter_desc.may_be_zero && !integer_zerop (niter_desc.may_be_zero);
1246     }
1247   if (TREE_CODE (niter) != INTEGER_CST)
1248     {
1249       /* For non-constant niter fold may_be_zero into niter again.  */
1250       if (may_be_zero)
1251         {
1252           if (COMPARISON_CLASS_P (niter_desc.may_be_zero))
1253             niter = fold_build3 (COND_EXPR, TREE_TYPE (niter),
1254                                  niter_desc.may_be_zero,
1255                                  build_int_cst (TREE_TYPE (niter), 0), niter);
1256           else
1257             niter = chrec_dont_know;
1258           may_be_zero = false;
1259         }
1260
1261       /* If the loop has more than one exit, try checking all of them
1262          for # of iterations determinable through scev.  */
1263       if (!exit)
1264         niter = find_loop_niter (loop, &exit);
1265
1266       /* Finally if everything else fails, try brute force evaluation.  */
1267       if (try_eval
1268           && (chrec_contains_undetermined (niter)
1269               || TREE_CODE (niter) != INTEGER_CST))
1270         niter = find_loop_niter_by_eval (loop, &exit);
1271
1272       if (TREE_CODE (niter) != INTEGER_CST)
1273         exit = NULL;
1274     }
1275
1276   /* We work exceptionally hard here to estimate the bound
1277      by find_loop_niter_by_eval.  Be sure to keep it for future.  */
1278   if (niter && TREE_CODE (niter) == INTEGER_CST)
1279     {
1280       auto_vec<edge> exits = get_loop_exit_edges  (loop);
1281       record_niter_bound (loop, wi::to_widest (niter),
1282                           exit == single_likely_exit (loop, exits), true);
1283     }
1284
1285   /* Force re-computation of loop bounds so we can remove redundant exits.  */
1286   maxiter = max_loop_iterations_int (loop);
1287
1288   if (dump_file && (dump_flags & TDF_DETAILS)
1289       && TREE_CODE (niter) == INTEGER_CST)
1290     {
1291       fprintf (dump_file, "Loop %d iterates ", loop->num);
1292       print_generic_expr (dump_file, niter, TDF_SLIM);
1293       fprintf (dump_file, " times.\n");
1294     }
1295   if (dump_file && (dump_flags & TDF_DETAILS)
1296       && maxiter >= 0)
1297     {
1298       fprintf (dump_file, "Loop %d iterates at most %i times.\n", loop->num,
1299                (int)maxiter);
1300     }
1301   if (dump_file && (dump_flags & TDF_DETAILS)
1302       && likely_max_loop_iterations_int (loop) >= 0)
1303     {
1304       fprintf (dump_file, "Loop %d likely iterates at most %i times.\n",
1305                loop->num, (int)likely_max_loop_iterations_int (loop));
1306     }
1307
1308   /* Remove exits that are known to be never taken based on loop bound.
1309      Needs to be called after compilation of max_loop_iterations_int that
1310      populates the loop bounds.  */
1311   modified |= remove_redundant_iv_tests (loop);
1312
1313   dump_user_location_t locus = find_loop_location (loop);
1314   if (try_unroll_loop_completely (loop, exit, niter, may_be_zero, ul,
1315                                   maxiter, locus, allow_peel))
1316     return true;
1317
1318   if (create_iv
1319       && niter && !chrec_contains_undetermined (niter)
1320       && exit && just_once_each_iteration_p (loop, exit->src))
1321     {
1322       tree iv_niter = niter;
1323       if (may_be_zero)
1324         {
1325           if (COMPARISON_CLASS_P (niter_desc.may_be_zero))
1326             iv_niter = fold_build3 (COND_EXPR, TREE_TYPE (iv_niter),
1327                                     niter_desc.may_be_zero,
1328                                     build_int_cst (TREE_TYPE (iv_niter), 0),
1329                                     iv_niter);
1330           else
1331             iv_niter = NULL_TREE;
1332         }
1333       if (iv_niter)
1334         create_canonical_iv (loop, exit, iv_niter);
1335     }
1336
1337   if (ul == UL_ALL)
1338     modified |= try_peel_loop (loop, exit, niter, may_be_zero, maxiter);
1339
1340   return modified;
1341 }
1342
1343 /* The main entry point of the pass.  Adds canonical induction variables
1344    to the suitable loops.  */
1345
1346 unsigned int
1347 canonicalize_induction_variables (void)
1348 {
1349   bool changed = false;
1350   bool irred_invalidated = false;
1351   bitmap loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
1352
1353   estimate_numbers_of_iterations (cfun);
1354
1355   for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
1356     {
1357       changed |= canonicalize_loop_induction_variables (loop,
1358                                                         true, UL_SINGLE_ITER,
1359                                                         true, false);
1360     }
1361   gcc_assert (!need_ssa_update_p (cfun));
1362
1363   unloop_loops (loops_to_unloop, loops_to_unloop_nunroll,
1364                 loop_closed_ssa_invalidated, &irred_invalidated);
1365   loops_to_unloop.release ();
1366   loops_to_unloop_nunroll.release ();
1367   if (irred_invalidated
1368       && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
1369     mark_irreducible_loops ();
1370
1371   /* Clean up the information about numbers of iterations, since brute force
1372      evaluation could reveal new information.  */
1373   free_numbers_of_iterations_estimates (cfun);
1374   scev_reset ();
1375
1376   if (!bitmap_empty_p (loop_closed_ssa_invalidated))
1377     {
1378       gcc_checking_assert (loops_state_satisfies_p (LOOP_CLOSED_SSA));
1379       rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1380     }
1381   BITMAP_FREE (loop_closed_ssa_invalidated);
1382
1383   if (changed)
1384     return TODO_cleanup_cfg;
1385   return 0;
1386 }
1387
1388 /* Process loops from innermost to outer, stopping at the innermost
1389    loop we unrolled.  */
1390
1391 static bool
1392 tree_unroll_loops_completely_1 (bool may_increase_size, bool unroll_outer,
1393                                 bitmap father_bbs, class loop *loop)
1394 {
1395   class loop *loop_father;
1396   bool changed = false;
1397   class loop *inner;
1398   enum unroll_level ul;
1399   unsigned num = number_of_loops (cfun);
1400
1401   /* Process inner loops first.  Don't walk loops added by the recursive
1402      calls because SSA form is not up-to-date.  They can be handled in the
1403      next iteration.  */
1404   bitmap child_father_bbs = NULL;
1405   for (inner = loop->inner; inner != NULL; inner = inner->next)
1406     if ((unsigned) inner->num < num)
1407       {
1408         if (!child_father_bbs)
1409           child_father_bbs = BITMAP_ALLOC (NULL);
1410         if (tree_unroll_loops_completely_1 (may_increase_size, unroll_outer,
1411                                             child_father_bbs, inner))
1412           {
1413             bitmap_ior_into (father_bbs, child_father_bbs);
1414             bitmap_clear (child_father_bbs);
1415             changed = true;
1416           }
1417       }
1418   if (child_father_bbs)
1419     BITMAP_FREE (child_father_bbs);
1420
1421   /* If we changed an inner loop we cannot process outer loops in this
1422      iteration because SSA form is not up-to-date.  Continue with
1423      siblings of outer loops instead.  */
1424   if (changed)
1425     {
1426       /* If we are recorded as father clear all other fathers that
1427          are necessarily covered already to avoid redundant work.  */
1428       if (bitmap_bit_p (father_bbs, loop->header->index))
1429         {
1430           bitmap_clear (father_bbs);
1431           bitmap_set_bit (father_bbs, loop->header->index);
1432         }
1433       return true;
1434     }
1435
1436   /* Don't unroll #pragma omp simd loops until the vectorizer
1437      attempts to vectorize those.  */
1438   if (loop->force_vectorize)
1439     return false;
1440
1441   /* Try to unroll this loop.  */
1442   loop_father = loop_outer (loop);
1443   if (!loop_father)
1444     return false;
1445
1446   if (loop->unroll > 1)
1447     ul = UL_ALL;
1448   else if (may_increase_size && optimize_loop_nest_for_speed_p (loop)
1449       /* Unroll outermost loops only if asked to do so or they do
1450          not cause code growth.  */
1451       && (unroll_outer || loop_outer (loop_father)))
1452     ul = UL_ALL;
1453   else
1454     ul = UL_NO_GROWTH;
1455
1456   if (canonicalize_loop_induction_variables
1457         (loop, false, ul, !flag_tree_loop_ivcanon, unroll_outer))
1458     {
1459       /* If we'll continue unrolling, we need to propagate constants
1460          within the new basic blocks to fold away induction variable
1461          computations; otherwise, the size might blow up before the
1462          iteration is complete and the IR eventually cleaned up.  */
1463       if (loop_outer (loop_father))
1464         {
1465           /* Once we process our father we will have processed
1466              the fathers of our children as well, so avoid doing
1467              redundant work and clear fathers we've gathered sofar.  */
1468           bitmap_clear (father_bbs);
1469           bitmap_set_bit (father_bbs, loop_father->header->index);
1470         }
1471       else if (unroll_outer)
1472         /* Trigger scalar cleanup once any outermost loop gets unrolled.  */
1473         cfun->pending_TODOs |= PENDING_TODO_force_next_scalar_cleanup;
1474
1475       return true;
1476     }
1477
1478   return false;
1479 }
1480
1481 /* Unroll LOOPS completely if they iterate just few times.  Unless
1482    MAY_INCREASE_SIZE is true, perform the unrolling only if the
1483    size of the code does not increase.  */
1484
1485 static unsigned int
1486 tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer)
1487 {
1488   bitmap father_bbs = BITMAP_ALLOC (NULL);
1489   bool changed;
1490   int iteration = 0;
1491   bool irred_invalidated = false;
1492
1493   estimate_numbers_of_iterations (cfun);
1494
1495   do
1496     {
1497       changed = false;
1498       bitmap loop_closed_ssa_invalidated = NULL;
1499
1500       if (loops_state_satisfies_p (LOOP_CLOSED_SSA))
1501         loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
1502
1503       free_numbers_of_iterations_estimates (cfun);
1504       estimate_numbers_of_iterations (cfun);
1505
1506       changed = tree_unroll_loops_completely_1 (may_increase_size,
1507                                                 unroll_outer, father_bbs,
1508                                                 current_loops->tree_root);
1509       if (changed)
1510         {
1511           unsigned i;
1512
1513           unloop_loops (loops_to_unloop,
1514                         loops_to_unloop_nunroll,
1515                         loop_closed_ssa_invalidated,
1516                         &irred_invalidated);
1517           loops_to_unloop.release ();
1518           loops_to_unloop_nunroll.release ();
1519
1520           /* We cannot use TODO_update_ssa_no_phi because VOPS gets confused.  */
1521           if (loop_closed_ssa_invalidated
1522               && !bitmap_empty_p (loop_closed_ssa_invalidated))
1523             rewrite_into_loop_closed_ssa (loop_closed_ssa_invalidated,
1524                                           TODO_update_ssa);
1525           else
1526             update_ssa (TODO_update_ssa);
1527
1528           /* father_bbs is a bitmap of loop father header BB indices.
1529              Translate that to what non-root loops these BBs belong to now.  */
1530           bitmap_iterator bi;
1531           bitmap fathers = BITMAP_ALLOC (NULL);
1532           EXECUTE_IF_SET_IN_BITMAP (father_bbs, 0, i, bi)
1533             {
1534               basic_block unrolled_loop_bb = BASIC_BLOCK_FOR_FN (cfun, i);
1535               if (! unrolled_loop_bb)
1536                 continue;
1537               if (loop_outer (unrolled_loop_bb->loop_father))
1538                 bitmap_set_bit (fathers,
1539                                 unrolled_loop_bb->loop_father->num);
1540             }
1541           bitmap_clear (father_bbs);
1542           /* Propagate the constants within the new basic blocks.  */
1543           EXECUTE_IF_SET_IN_BITMAP (fathers, 0, i, bi)
1544             {
1545               loop_p father = get_loop (cfun, i);
1546               bitmap exit_bbs = BITMAP_ALLOC (NULL);
1547               loop_exit *exit = father->exits->next;
1548               while (exit->e)
1549                 {
1550                   bitmap_set_bit (exit_bbs, exit->e->dest->index);
1551                   exit = exit->next;
1552                 }
1553               do_rpo_vn (cfun, loop_preheader_edge (father), exit_bbs);
1554             }
1555           BITMAP_FREE (fathers);
1556
1557           /* Clean up the information about numbers of iterations, since
1558              complete unrolling might have invalidated it.  */
1559           scev_reset ();
1560
1561           /* This will take care of removing completely unrolled loops
1562              from the loop structures so we can continue unrolling now
1563              innermost loops.  */
1564           if (cleanup_tree_cfg ())
1565             update_ssa (TODO_update_ssa_only_virtuals);
1566
1567           if (flag_checking && loops_state_satisfies_p (LOOP_CLOSED_SSA))
1568             verify_loop_closed_ssa (true);
1569         }
1570       if (loop_closed_ssa_invalidated)
1571         BITMAP_FREE (loop_closed_ssa_invalidated);
1572     }
1573   while (changed
1574          && ++iteration <= param_max_unroll_iterations);
1575
1576   BITMAP_FREE (father_bbs);
1577
1578   if (irred_invalidated
1579       && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
1580     mark_irreducible_loops ();
1581
1582   return 0;
1583 }
1584
1585 /* Canonical induction variable creation pass.  */
1586
1587 namespace {
1588
1589 const pass_data pass_data_iv_canon =
1590 {
1591   GIMPLE_PASS, /* type */
1592   "ivcanon", /* name */
1593   OPTGROUP_LOOP, /* optinfo_flags */
1594   TV_TREE_LOOP_IVCANON, /* tv_id */
1595   ( PROP_cfg | PROP_ssa ), /* properties_required */
1596   0, /* properties_provided */
1597   0, /* properties_destroyed */
1598   0, /* todo_flags_start */
1599   0, /* todo_flags_finish */
1600 };
1601
1602 class pass_iv_canon : public gimple_opt_pass
1603 {
1604 public:
1605   pass_iv_canon (gcc::context *ctxt)
1606     : gimple_opt_pass (pass_data_iv_canon, ctxt)
1607   {}
1608
1609   /* opt_pass methods: */
1610   bool gate (function *) final override { return flag_tree_loop_ivcanon != 0; }
1611   unsigned int execute (function *fun) final override;
1612
1613 }; // class pass_iv_canon
1614
1615 unsigned int
1616 pass_iv_canon::execute (function *fun)
1617 {
1618   if (number_of_loops (fun) <= 1)
1619     return 0;
1620
1621   return canonicalize_induction_variables ();
1622 }
1623
1624 } // anon namespace
1625
1626 gimple_opt_pass *
1627 make_pass_iv_canon (gcc::context *ctxt)
1628 {
1629   return new pass_iv_canon (ctxt);
1630 }
1631
1632 /* Complete unrolling of loops.  */
1633
1634 namespace {
1635
1636 const pass_data pass_data_complete_unroll =
1637 {
1638   GIMPLE_PASS, /* type */
1639   "cunroll", /* name */
1640   OPTGROUP_LOOP, /* optinfo_flags */
1641   TV_COMPLETE_UNROLL, /* tv_id */
1642   ( PROP_cfg | PROP_ssa ), /* properties_required */
1643   0, /* properties_provided */
1644   0, /* properties_destroyed */
1645   0, /* todo_flags_start */
1646   0, /* todo_flags_finish */
1647 };
1648
1649 class pass_complete_unroll : public gimple_opt_pass
1650 {
1651 public:
1652   pass_complete_unroll (gcc::context *ctxt)
1653     : gimple_opt_pass (pass_data_complete_unroll, ctxt)
1654   {}
1655
1656   /* opt_pass methods: */
1657   unsigned int execute (function *) final override;
1658
1659 }; // class pass_complete_unroll
1660
1661 unsigned int
1662 pass_complete_unroll::execute (function *fun)
1663 {
1664   if (number_of_loops (fun) <= 1)
1665     return 0;
1666
1667   /* If we ever decide to run loop peeling more than once, we will need to
1668      track loops already peeled in loop structures themselves to avoid
1669      re-peeling the same loop multiple times.  */
1670   if (flag_peel_loops)
1671     peeled_loops = BITMAP_ALLOC (NULL);
1672   unsigned int val = tree_unroll_loops_completely (flag_cunroll_grow_size,
1673                                                    true);
1674   if (peeled_loops)
1675     {
1676       BITMAP_FREE (peeled_loops);
1677       peeled_loops = NULL;
1678     }
1679   return val;
1680 }
1681
1682 } // anon namespace
1683
1684 gimple_opt_pass *
1685 make_pass_complete_unroll (gcc::context *ctxt)
1686 {
1687   return new pass_complete_unroll (ctxt);
1688 }
1689
1690 /* Complete unrolling of inner loops.  */
1691
1692 namespace {
1693
1694 const pass_data pass_data_complete_unrolli =
1695 {
1696   GIMPLE_PASS, /* type */
1697   "cunrolli", /* name */
1698   OPTGROUP_LOOP, /* optinfo_flags */
1699   TV_COMPLETE_UNROLL, /* tv_id */
1700   ( PROP_cfg | PROP_ssa ), /* properties_required */
1701   0, /* properties_provided */
1702   0, /* properties_destroyed */
1703   0, /* todo_flags_start */
1704   0, /* todo_flags_finish */
1705 };
1706
1707 class pass_complete_unrolli : public gimple_opt_pass
1708 {
1709 public:
1710   pass_complete_unrolli (gcc::context *ctxt)
1711     : gimple_opt_pass (pass_data_complete_unrolli, ctxt)
1712   {}
1713
1714   /* opt_pass methods: */
1715   bool gate (function *) final override { return optimize >= 2; }
1716   unsigned int execute (function *) final override;
1717
1718 }; // class pass_complete_unrolli
1719
1720 unsigned int
1721 pass_complete_unrolli::execute (function *fun)
1722 {
1723   unsigned ret = 0;
1724
1725   loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
1726   if (number_of_loops (fun) > 1)
1727     {
1728       scev_initialize ();
1729       ret = tree_unroll_loops_completely (optimize >= 3, false);
1730       scev_finalize ();
1731     }
1732   loop_optimizer_finalize ();
1733
1734   return ret;
1735 }
1736
1737 } // anon namespace
1738
1739 gimple_opt_pass *
1740 make_pass_complete_unrolli (gcc::context *ctxt)
1741 {
1742   return new pass_complete_unrolli (ctxt);
1743 }
1744
1745