gcc/tree-ssa-loop-ivcanon.c

   1 /* Induction variable canonicalization and loop peeling.
   2    Copyright (C) 2004-2017 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass detects the loops that iterate a constant number of times,
  21    adds a canonical induction variable (step -1, tested against 0)
  22    and replaces the exit test.  This enables the less powerful rtl
  23    level analysis to use this information.
  24
  25    This might spoil the code in some cases (by increasing register pressure).
  26    Note that in the case the new variable is not needed, ivopts will get rid
  27    of it, so it might only be a problem when there are no other linear induction
  28    variables.  In that case the created optimization possibilities are likely
  29    to pay up.
  30
  31    We also perform
  32      - complete unrolling (or peeling) when the loops is rolling few enough
  33        times
  34      - simple peeling (i.e. copying few initial iterations prior the loop)
  35        when number of iteration estimate is known (typically by the profile
  36        info).  */
  37
  38 #include "config.h"
  39 #include "system.h"
  40 #include "coretypes.h"
  41 #include "backend.h"
  42 #include "tree.h"
  43 #include "gimple.h"
  44 #include "cfghooks.h"
  45 #include "tree-pass.h"
  46 #include "ssa.h"
  47 #include "cgraph.h"
  48 #include "gimple-pretty-print.h"
  49 #include "fold-const.h"
  50 #include "profile.h"
  51 #include "gimple-fold.h"
  52 #include "tree-eh.h"
  53 #include "gimple-iterator.h"
  54 #include "tree-cfg.h"
  55 #include "tree-ssa-loop-manip.h"
  56 #include "tree-ssa-loop-niter.h"
  57 #include "tree-ssa-loop.h"
  58 #include "tree-into-ssa.h"
  59 #include "cfgloop.h"
  60 #include "tree-chrec.h"
  61 #include "tree-scalar-evolution.h"
  62 #include "params.h"
  63 #include "tree-inline.h"
  64 #include "tree-cfgcleanup.h"
  65 #include "builtins.h"
  66
  67 /* Specifies types of loops that may be unrolled.  */
  68
  69 enum unroll_level
  70 {
  71   UL_SINGLE_ITER,       /* Only loops that exit immediately in the first
  72                            iteration.  */
  73   UL_NO_GROWTH,         /* Only loops whose unrolling will not cause increase
  74                            of code size.  */
  75   UL_ALL                /* All suitable loops.  */
  76 };
  77
  78 /* Adds a canonical induction variable to LOOP iterating NITER times.  EXIT
  79    is the exit edge whose condition is replaced.  */
  80
  81 static void
  82 create_canonical_iv (struct loop *loop, edge exit, tree niter)
  83 {
  84   edge in;
  85   tree type, var;
  86   gcond *cond;
  87   gimple_stmt_iterator incr_at;
  88   enum tree_code cmp;
  89
  90   if (dump_file && (dump_flags & TDF_DETAILS))
  91     {
  92       fprintf (dump_file, "Added canonical iv to loop %d, ", loop->num);
  93       print_generic_expr (dump_file, niter, TDF_SLIM);
  94       fprintf (dump_file, " iterations.\n");
  95     }
  96
  97   cond = as_a <gcond *> (last_stmt (exit->src));
  98   in = EDGE_SUCC (exit->src, 0);
  99   if (in == exit)
 100     in = EDGE_SUCC (exit->src, 1);
 101
 102   /* Note that we do not need to worry about overflows, since
 103      type of niter is always unsigned and all comparisons are
 104      just for equality/nonequality -- i.e. everything works
 105      with a modulo arithmetics.  */
 106
 107   type = TREE_TYPE (niter);
 108   niter = fold_build2 (PLUS_EXPR, type,
 109                        niter,
 110                        build_int_cst (type, 1));
 111   incr_at = gsi_last_bb (in->src);
 112   create_iv (niter,
 113              build_int_cst (type, -1),
 114              NULL_TREE, loop,
 115              &incr_at, false, NULL, &var);
 116
 117   cmp = (exit->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
 118   gimple_cond_set_code (cond, cmp);
 119   gimple_cond_set_lhs (cond, var);
 120   gimple_cond_set_rhs (cond, build_int_cst (type, 0));
 121   update_stmt (cond);
 122 }
 123
 124 /* Describe size of loop as detected by tree_estimate_loop_size.  */
 125 struct loop_size
 126 {
 127   /* Number of instructions in the loop.  */
 128   int overall;
 129
 130   /* Number of instructions that will be likely optimized out in
 131      peeled iterations of loop  (i.e. computation based on induction
 132      variable where induction variable starts at known constant.)  */
 133   int eliminated_by_peeling;
 134
 135   /* Same statistics for last iteration of loop: it is smaller because
 136      instructions after exit are not executed.  */
 137   int last_iteration;
 138   int last_iteration_eliminated_by_peeling;
 139
 140   /* If some IV computation will become constant.  */
 141   bool constant_iv;
 142
 143   /* Number of call stmts that are not a builtin and are pure or const
 144      present on the hot path.  */
 145   int num_pure_calls_on_hot_path;
 146   /* Number of call stmts that are not a builtin and are not pure nor const
 147      present on the hot path.  */
 148   int num_non_pure_calls_on_hot_path;
 149   /* Number of statements other than calls in the loop.  */
 150   int non_call_stmts_on_hot_path;
 151   /* Number of branches seen on the hot path.  */
 152   int num_branches_on_hot_path;
 153 };
 154
 155 /* Return true if OP in STMT will be constant after peeling LOOP.  */
 156
 157 static bool
 158 constant_after_peeling (tree op, gimple *stmt, struct loop *loop)
 159 {
 160   if (is_gimple_min_invariant (op))
 161     return true;
 162
 163   /* We can still fold accesses to constant arrays when index is known.  */
 164   if (TREE_CODE (op) != SSA_NAME)
 165     {
 166       tree base = op;
 167
 168       /* First make fast look if we see constant array inside.  */
 169       while (handled_component_p (base))
 170         base = TREE_OPERAND (base, 0);
 171       if ((DECL_P (base)
 172            && ctor_for_folding (base) != error_mark_node)
 173           || CONSTANT_CLASS_P (base))
 174         {
 175           /* If so, see if we understand all the indices.  */
 176           base = op;
 177           while (handled_component_p (base))
 178             {
 179               if (TREE_CODE (base) == ARRAY_REF
 180                   && !constant_after_peeling (TREE_OPERAND (base, 1), stmt, loop))
 181                 return false;
 182               base = TREE_OPERAND (base, 0);
 183             }
 184           return true;
 185         }
 186       return false;
 187     }
 188
 189   /* Induction variables are constants when defined in loop.  */
 190   if (loop_containing_stmt (stmt) != loop)
 191     return false;
 192   tree ev = analyze_scalar_evolution (loop, op);
 193   if (chrec_contains_undetermined (ev)
 194       || chrec_contains_symbols (ev))
 195     return false;
 196   return true;
 197 }
 198
 199 /* Computes an estimated number of insns in LOOP.
 200    EXIT (if non-NULL) is an exite edge that will be eliminated in all but last
 201    iteration of the loop.
 202    EDGE_TO_CANCEL (if non-NULL) is an non-exit edge eliminated in the last iteration
 203    of loop.
 204    Return results in SIZE, estimate benefits for complete unrolling exiting by EXIT.
 205    Stop estimating after UPPER_BOUND is met.  Return true in this case.  */
 206
 207 static bool
 208 tree_estimate_loop_size (struct loop *loop, edge exit, edge edge_to_cancel,
 209                          struct loop_size *size, int upper_bound)
 210 {
 211   basic_block *body = get_loop_body (loop);
 212   gimple_stmt_iterator gsi;
 213   unsigned int i;
 214   bool after_exit;
 215   vec<basic_block> path = get_loop_hot_path (loop);
 216
 217   size->overall = 0;
 218   size->eliminated_by_peeling = 0;
 219   size->last_iteration = 0;
 220   size->last_iteration_eliminated_by_peeling = 0;
 221   size->num_pure_calls_on_hot_path = 0;
 222   size->num_non_pure_calls_on_hot_path = 0;
 223   size->non_call_stmts_on_hot_path = 0;
 224   size->num_branches_on_hot_path = 0;
 225   size->constant_iv = 0;
 226
 227   if (dump_file && (dump_flags & TDF_DETAILS))
 228     fprintf (dump_file, "Estimating sizes for loop %i\n", loop->num);
 229   for (i = 0; i < loop->num_nodes; i++)
 230     {
 231       if (edge_to_cancel && body[i] != edge_to_cancel->src
 232           && dominated_by_p (CDI_DOMINATORS, body[i], edge_to_cancel->src))
 233         after_exit = true;
 234       else
 235         after_exit = false;
 236       if (dump_file && (dump_flags & TDF_DETAILS))
 237         fprintf (dump_file, " BB: %i, after_exit: %i\n", body[i]->index,
 238                  after_exit);
 239
 240       for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
 241         {
 242           gimple *stmt = gsi_stmt (gsi);
 243           int num = estimate_num_insns (stmt, &eni_size_weights);
 244           bool likely_eliminated = false;
 245           bool likely_eliminated_last = false;
 246           bool likely_eliminated_peeled = false;
 247
 248           if (dump_file && (dump_flags & TDF_DETAILS))
 249             {
 250               fprintf (dump_file, "  size: %3i ", num);
 251               print_gimple_stmt (dump_file, gsi_stmt (gsi), 0);
 252             }
 253
 254           /* Look for reasons why we might optimize this stmt away. */
 255
 256           if (!gimple_has_side_effects (stmt))
 257             {
 258               /* Exit conditional.  */
 259               if (exit && body[i] == exit->src
 260                   && stmt == last_stmt (exit->src))
 261                 {
 262                   if (dump_file && (dump_flags & TDF_DETAILS))
 263                     fprintf (dump_file, "   Exit condition will be eliminated "
 264                              "in peeled copies.\n");
 265                   likely_eliminated_peeled = true;
 266                 }
 267               if (edge_to_cancel && body[i] == edge_to_cancel->src
 268                   && stmt == last_stmt (edge_to_cancel->src))
 269                 {
 270                   if (dump_file && (dump_flags & TDF_DETAILS))
 271                     fprintf (dump_file, "   Exit condition will be eliminated "
 272                              "in last copy.\n");
 273                   likely_eliminated_last = true;
 274                 }
 275               /* Sets of IV variables  */
 276               if (gimple_code (stmt) == GIMPLE_ASSIGN
 277                   && constant_after_peeling (gimple_assign_lhs (stmt), stmt, loop))
 278                 {
 279                   if (dump_file && (dump_flags & TDF_DETAILS))
 280                     fprintf (dump_file, "   Induction variable computation will"
 281                              " be folded away.\n");
 282                   likely_eliminated = true;
 283                 }
 284               /* Assignments of IV variables.  */
 285               else if (gimple_code (stmt) == GIMPLE_ASSIGN
 286                        && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
 287                        && constant_after_peeling (gimple_assign_rhs1 (stmt),
 288                                                   stmt, loop)
 289                        && (gimple_assign_rhs_class (stmt) != GIMPLE_BINARY_RHS
 290                            || constant_after_peeling (gimple_assign_rhs2 (stmt),
 291                                                       stmt, loop)))
 292                 {
 293                   size->constant_iv = true;
 294                   if (dump_file && (dump_flags & TDF_DETAILS))
 295                     fprintf (dump_file,
 296                              "   Constant expression will be folded away.\n");
 297                   likely_eliminated = true;
 298                 }
 299               /* Conditionals.  */
 300               else if ((gimple_code (stmt) == GIMPLE_COND
 301                         && constant_after_peeling (gimple_cond_lhs (stmt), stmt,
 302                                                    loop)
 303                         && constant_after_peeling (gimple_cond_rhs (stmt), stmt,
 304                                                    loop)
 305                         /* We don't simplify all constant compares so make sure
 306                            they are not both constant already.  See PR70288.  */
 307                         && (! is_gimple_min_invariant (gimple_cond_lhs (stmt))
 308                             || ! is_gimple_min_invariant
 309                                  (gimple_cond_rhs (stmt))))
 310                        || (gimple_code (stmt) == GIMPLE_SWITCH
 311                            && constant_after_peeling (gimple_switch_index (
 312                                                         as_a <gswitch *>
 313                                                           (stmt)),
 314                                                       stmt, loop)
 315                            && ! is_gimple_min_invariant
 316                                    (gimple_switch_index
 317                                       (as_a <gswitch *> (stmt)))))
 318                 {
 319                   if (dump_file && (dump_flags & TDF_DETAILS))
 320                     fprintf (dump_file, "   Constant conditional.\n");
 321                   likely_eliminated = true;
 322                 }
 323             }
 324
 325           size->overall += num;
 326           if (likely_eliminated || likely_eliminated_peeled)
 327             size->eliminated_by_peeling += num;
 328           if (!after_exit)
 329             {
 330               size->last_iteration += num;
 331               if (likely_eliminated || likely_eliminated_last)
 332                 size->last_iteration_eliminated_by_peeling += num;
 333             }
 334           if ((size->overall * 3 / 2 - size->eliminated_by_peeling
 335               - size->last_iteration_eliminated_by_peeling) > upper_bound)
 336             {
 337               free (body);
 338               path.release ();
 339               return true;
 340             }
 341         }
 342     }
 343   while (path.length ())
 344     {
 345       basic_block bb = path.pop ();
 346       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 347         {
 348           gimple *stmt = gsi_stmt (gsi);
 349           if (gimple_code (stmt) == GIMPLE_CALL
 350               && !gimple_inexpensive_call_p (as_a <gcall *>  (stmt)))
 351             {
 352               int flags = gimple_call_flags (stmt);
 353               if (flags & (ECF_PURE | ECF_CONST))
 354                 size->num_pure_calls_on_hot_path++;
 355               else
 356                 size->num_non_pure_calls_on_hot_path++;
 357               size->num_branches_on_hot_path ++;
 358             }
 359           /* Count inexpensive calls as non-calls, because they will likely
 360              expand inline.  */
 361           else if (gimple_code (stmt) != GIMPLE_DEBUG)
 362             size->non_call_stmts_on_hot_path++;
 363           if (((gimple_code (stmt) == GIMPLE_COND
 364                 && (!constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop)
 365                     || constant_after_peeling (gimple_cond_rhs (stmt), stmt,
 366                                                loop)))
 367                || (gimple_code (stmt) == GIMPLE_SWITCH
 368                    && !constant_after_peeling (gimple_switch_index (
 369                                                  as_a <gswitch *> (stmt)),
 370                                                stmt, loop)))
 371               && (!exit || bb != exit->src))
 372             size->num_branches_on_hot_path++;
 373         }
 374     }
 375   path.release ();
 376   if (dump_file && (dump_flags & TDF_DETAILS))
 377     fprintf (dump_file, "size: %i-%i, last_iteration: %i-%i\n", size->overall,
 378              size->eliminated_by_peeling, size->last_iteration,
 379              size->last_iteration_eliminated_by_peeling);
 380
 381   free (body);
 382   return false;
 383 }
 384
 385 /* Estimate number of insns of completely unrolled loop.
 386    It is (NUNROLL + 1) * size of loop body with taking into account
 387    the fact that in last copy everything after exit conditional
 388    is dead and that some instructions will be eliminated after
 389    peeling.
 390
 391    Loop body is likely going to simplify further, this is difficult
 392    to guess, we just decrease the result by 1/3.  */
 393
 394 static unsigned HOST_WIDE_INT
 395 estimated_unrolled_size (struct loop_size *size,
 396                          unsigned HOST_WIDE_INT nunroll)
 397 {
 398   HOST_WIDE_INT unr_insns = ((nunroll)
 399                              * (HOST_WIDE_INT) (size->overall
 400                                                 - size->eliminated_by_peeling));
 401   if (!nunroll)
 402     unr_insns = 0;
 403   unr_insns += size->last_iteration - size->last_iteration_eliminated_by_peeling;
 404
 405   unr_insns = unr_insns * 2 / 3;
 406   if (unr_insns <= 0)
 407     unr_insns = 1;
 408
 409   return unr_insns;
 410 }
 411
 412 /* Loop LOOP is known to not loop.  See if there is an edge in the loop
 413    body that can be remove to make the loop to always exit and at
 414    the same time it does not make any code potentially executed
 415    during the last iteration dead.
 416
 417    After complete unrolling we still may get rid of the conditional
 418    on the exit in the last copy even if we have no idea what it does.
 419    This is quite common case for loops of form
 420
 421      int a[5];
 422      for (i=0;i<b;i++)
 423        a[i]=0;
 424
 425    Here we prove the loop to iterate 5 times but we do not know
 426    it from induction variable.
 427
 428    For now we handle only simple case where there is exit condition
 429    just before the latch block and the latch block contains no statements
 430    with side effect that may otherwise terminate the execution of loop
 431    (such as by EH or by terminating the program or longjmp).
 432
 433    In the general case we may want to cancel the paths leading to statements
 434    loop-niter identified as having undefined effect in the last iteration.
 435    The other cases are hopefully rare and will be cleaned up later.  */
 436
 437 static edge
 438 loop_edge_to_cancel (struct loop *loop)
 439 {
 440   vec<edge> exits;
 441   unsigned i;
 442   edge edge_to_cancel;
 443   gimple_stmt_iterator gsi;
 444
 445   /* We want only one predecestor of the loop.  */
 446   if (EDGE_COUNT (loop->latch->preds) > 1)
 447     return NULL;
 448
 449   exits = get_loop_exit_edges (loop);
 450
 451   FOR_EACH_VEC_ELT (exits, i, edge_to_cancel)
 452     {
 453        /* Find the other edge than the loop exit
 454           leaving the conditoinal.  */
 455        if (EDGE_COUNT (edge_to_cancel->src->succs) != 2)
 456          continue;
 457        if (EDGE_SUCC (edge_to_cancel->src, 0) == edge_to_cancel)
 458          edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 1);
 459        else
 460          edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 0);
 461
 462       /* We only can handle conditionals.  */
 463       if (!(edge_to_cancel->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
 464         continue;
 465
 466       /* We should never have conditionals in the loop latch. */
 467       gcc_assert (edge_to_cancel->dest != loop->header);
 468
 469       /* Check that it leads to loop latch.  */
 470       if (edge_to_cancel->dest != loop->latch)
 471         continue;
 472
 473       exits.release ();
 474
 475       /* Verify that the code in loop latch does nothing that may end program
 476          execution without really reaching the exit.  This may include
 477          non-pure/const function calls, EH statements, volatile ASMs etc.  */
 478       for (gsi = gsi_start_bb (loop->latch); !gsi_end_p (gsi); gsi_next (&gsi))
 479         if (gimple_has_side_effects (gsi_stmt (gsi)))
 480            return NULL;
 481       return edge_to_cancel;
 482     }
 483   exits.release ();
 484   return NULL;
 485 }
 486
 487 /* Remove all tests for exits that are known to be taken after LOOP was
 488    peeled NPEELED times. Put gcc_unreachable before every statement
 489    known to not be executed.  */
 490
 491 static bool
 492 remove_exits_and_undefined_stmts (struct loop *loop, unsigned int npeeled)
 493 {
 494   struct nb_iter_bound *elt;
 495   bool changed = false;
 496
 497   for (elt = loop->bounds; elt; elt = elt->next)
 498     {
 499       /* If statement is known to be undefined after peeling, turn it
 500          into unreachable (or trap when debugging experience is supposed
 501          to be good).  */
 502       if (!elt->is_exit
 503           && wi::ltu_p (elt->bound, npeeled))
 504         {
 505           gimple_stmt_iterator gsi = gsi_for_stmt (elt->stmt);
 506           gcall *stmt = gimple_build_call
 507               (builtin_decl_implicit (BUILT_IN_UNREACHABLE), 0);
 508           gimple_set_location (stmt, gimple_location (elt->stmt));
 509           gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
 510           split_block (gimple_bb (stmt), stmt);
 511           changed = true;
 512           if (dump_file && (dump_flags & TDF_DETAILS))
 513             {
 514               fprintf (dump_file, "Forced statement unreachable: ");
 515               print_gimple_stmt (dump_file, elt->stmt, 0);
 516             }
 517         }
 518       /* If we know the exit will be taken after peeling, update.  */
 519       else if (elt->is_exit
 520                && wi::leu_p (elt->bound, npeeled))
 521         {
 522           basic_block bb = gimple_bb (elt->stmt);
 523           edge exit_edge = EDGE_SUCC (bb, 0);
 524
 525           if (dump_file && (dump_flags & TDF_DETAILS))
 526             {
 527               fprintf (dump_file, "Forced exit to be taken: ");
 528               print_gimple_stmt (dump_file, elt->stmt, 0);
 529             }
 530           if (!loop_exit_edge_p (loop, exit_edge))
 531             exit_edge = EDGE_SUCC (bb, 1);
 532           exit_edge->probability = profile_probability::always ();
 533           gcc_checking_assert (loop_exit_edge_p (loop, exit_edge));
 534           gcond *cond_stmt = as_a <gcond *> (elt->stmt);
 535           if (exit_edge->flags & EDGE_TRUE_VALUE)
 536             gimple_cond_make_true (cond_stmt);
 537           else
 538             gimple_cond_make_false (cond_stmt);
 539           update_stmt (cond_stmt);
 540           changed = true;
 541         }
 542     }
 543   return changed;
 544 }
 545
 546 /* Remove all exits that are known to be never taken because of the loop bound
 547    discovered.  */
 548
 549 static bool
 550 remove_redundant_iv_tests (struct loop *loop)
 551 {
 552   struct nb_iter_bound *elt;
 553   bool changed = false;
 554
 555   if (!loop->any_upper_bound)
 556     return false;
 557   for (elt = loop->bounds; elt; elt = elt->next)
 558     {
 559       /* Exit is pointless if it won't be taken before loop reaches
 560          upper bound.  */
 561       if (elt->is_exit && loop->any_upper_bound
 562           && wi::ltu_p (loop->nb_iterations_upper_bound, elt->bound))
 563         {
 564           basic_block bb = gimple_bb (elt->stmt);
 565           edge exit_edge = EDGE_SUCC (bb, 0);
 566           struct tree_niter_desc niter;
 567
 568           if (!loop_exit_edge_p (loop, exit_edge))
 569             exit_edge = EDGE_SUCC (bb, 1);
 570
 571           /* Only when we know the actual number of iterations, not
 572              just a bound, we can remove the exit.  */
 573           if (!number_of_iterations_exit (loop, exit_edge,
 574                                           &niter, false, false)
 575               || !integer_onep (niter.assumptions)
 576               || !integer_zerop (niter.may_be_zero)
 577               || !niter.niter
 578               || TREE_CODE (niter.niter) != INTEGER_CST
 579               || !wi::ltu_p (loop->nb_iterations_upper_bound,
 580                              wi::to_widest (niter.niter)))
 581             continue;
 582
 583           if (dump_file && (dump_flags & TDF_DETAILS))
 584             {
 585               fprintf (dump_file, "Removed pointless exit: ");
 586               print_gimple_stmt (dump_file, elt->stmt, 0);
 587             }
 588           gcond *cond_stmt = as_a <gcond *> (elt->stmt);
 589           if (exit_edge->flags & EDGE_TRUE_VALUE)
 590             gimple_cond_make_false (cond_stmt);
 591           else
 592             gimple_cond_make_true (cond_stmt);
 593           update_stmt (cond_stmt);
 594           changed = true;
 595         }
 596     }
 597   return changed;
 598 }
 599
 600 /* Stores loops that will be unlooped and edges that will be removed
 601    after we process whole loop tree. */
 602 static vec<loop_p> loops_to_unloop;
 603 static vec<int> loops_to_unloop_nunroll;
 604 static vec<edge> edges_to_remove;
 605 /* Stores loops that has been peeled.  */
 606 static bitmap peeled_loops;
 607
 608 /* Cancel all fully unrolled loops by putting __builtin_unreachable
 609    on the latch edge.
 610    We do it after all unrolling since unlooping moves basic blocks
 611    across loop boundaries trashing loop closed SSA form as well
 612    as SCEV info needed to be intact during unrolling.
 613
 614    IRRED_INVALIDATED is used to bookkeep if information about
 615    irreducible regions may become invalid as a result
 616    of the transformation.
 617    LOOP_CLOSED_SSA_INVALIDATED is used to bookkepp the case
 618    when we need to go into loop closed SSA form.  */
 619
 620 static void
 621 unloop_loops (bitmap loop_closed_ssa_invalidated,
 622               bool *irred_invalidated)
 623 {
 624   while (loops_to_unloop.length ())
 625     {
 626       struct loop *loop = loops_to_unloop.pop ();
 627       int n_unroll = loops_to_unloop_nunroll.pop ();
 628       basic_block latch = loop->latch;
 629       edge latch_edge = loop_latch_edge (loop);
 630       int flags = latch_edge->flags;
 631       location_t locus = latch_edge->goto_locus;
 632       gcall *stmt;
 633       gimple_stmt_iterator gsi;
 634
 635       remove_exits_and_undefined_stmts (loop, n_unroll);
 636
 637       /* Unloop destroys the latch edge.  */
 638       unloop (loop, irred_invalidated, loop_closed_ssa_invalidated);
 639
 640       /* Create new basic block for the latch edge destination and wire
 641          it in.  */
 642       stmt = gimple_build_call (builtin_decl_implicit (BUILT_IN_UNREACHABLE), 0);
 643       latch_edge = make_edge (latch, create_basic_block (NULL, NULL, latch), flags);
 644       latch_edge->probability = profile_probability::never ();
 645       latch_edge->flags |= flags;
 646       latch_edge->goto_locus = locus;
 647
 648       add_bb_to_loop (latch_edge->dest, current_loops->tree_root);
 649       latch_edge->dest->count = profile_count::zero ();
 650       set_immediate_dominator (CDI_DOMINATORS, latch_edge->dest, latch_edge->src);
 651
 652       gsi = gsi_start_bb (latch_edge->dest);
 653       gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
 654     }
 655   loops_to_unloop.release ();
 656   loops_to_unloop_nunroll.release ();
 657
 658   /* Remove edges in peeled copies.  */
 659   unsigned i;
 660   edge e;
 661   FOR_EACH_VEC_ELT (edges_to_remove, i, e)
 662     {
 663       bool ok = remove_path (e, irred_invalidated, loop_closed_ssa_invalidated);
 664       gcc_assert (ok);
 665     }
 666   edges_to_remove.release ();
 667 }
 668
 669 /* Tries to unroll LOOP completely, i.e. NITER times.
 670    UL determines which loops we are allowed to unroll.
 671    EXIT is the exit of the loop that should be eliminated.
 672    MAXITER specfy bound on number of iterations, -1 if it is
 673    not known or too large for HOST_WIDE_INT.  The location
 674    LOCUS corresponding to the loop is used when emitting
 675    a summary of the unroll to the dump file.  */
 676
 677 static bool
 678 try_unroll_loop_completely (struct loop *loop,
 679                             edge exit, tree niter,
 680                             enum unroll_level ul,
 681                             HOST_WIDE_INT maxiter,
 682                             location_t locus)
 683 {
 684   unsigned HOST_WIDE_INT n_unroll = 0;
 685   bool n_unroll_found = false;
 686   edge edge_to_cancel = NULL;
 687
 688   /* See if we proved number of iterations to be low constant.
 689
 690      EXIT is an edge that will be removed in all but last iteration of
 691      the loop.
 692
 693      EDGE_TO_CACNEL is an edge that will be removed from the last iteration
 694      of the unrolled sequence and is expected to make the final loop not
 695      rolling.
 696
 697      If the number of execution of loop is determined by standard induction
 698      variable test, then EXIT and EDGE_TO_CANCEL are the two edges leaving
 699      from the iv test.  */
 700   if (tree_fits_uhwi_p (niter))
 701     {
 702       n_unroll = tree_to_uhwi (niter);
 703       n_unroll_found = true;
 704       edge_to_cancel = EDGE_SUCC (exit->src, 0);
 705       if (edge_to_cancel == exit)
 706         edge_to_cancel = EDGE_SUCC (exit->src, 1);
 707     }
 708   /* We do not know the number of iterations and thus we can not eliminate
 709      the EXIT edge.  */
 710   else
 711     exit = NULL;
 712
 713   /* See if we can improve our estimate by using recorded loop bounds.  */
 714   if (maxiter >= 0
 715       && (!n_unroll_found || (unsigned HOST_WIDE_INT)maxiter < n_unroll))
 716     {
 717       n_unroll = maxiter;
 718       n_unroll_found = true;
 719       /* Loop terminates before the IV variable test, so we can not
 720          remove it in the last iteration.  */
 721       edge_to_cancel = NULL;
 722     }
 723
 724   if (!n_unroll_found)
 725     return false;
 726
 727   if (!loop->unroll
 728       && n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
 729     {
 730       if (dump_file && (dump_flags & TDF_DETAILS))
 731         fprintf (dump_file, "Not unrolling loop %d "
 732                  "(--param max-completely-peel-times limit reached).\n",
 733                  loop->num);
 734       return false;
 735     }
 736
 737   if (!edge_to_cancel)
 738     edge_to_cancel = loop_edge_to_cancel (loop);
 739
 740   if (n_unroll)
 741     {
 742       if (ul == UL_SINGLE_ITER)
 743         return false;
 744
 745       if (loop->unroll)
 746         {
 747           /* If the unrolling factor is too large, bail out.  */
 748           if (n_unroll > (unsigned)loop->unroll)
 749             {
 750               if (dump_file && (dump_flags & TDF_DETAILS))
 751                 fprintf (dump_file,
 752                          "Not unrolling loop %d: "
 753                          "user didn't want it unrolled completely.\n",
 754                          loop->num);
 755               return false;
 756             }
 757         }
 758       else
 759         {
 760           struct loop_size size;
 761           /* EXIT can be removed only if we are sure it passes first N_UNROLL
 762              iterations.  */
 763           bool remove_exit = (exit && niter
 764                               && TREE_CODE (niter) == INTEGER_CST
 765                               && wi::leu_p (n_unroll, wi::to_widest (niter)));
 766           bool large
 767             = tree_estimate_loop_size
 768                 (loop, remove_exit ? exit : NULL, edge_to_cancel, &size,
 769                  PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS));
 770           if (large)
 771             {
 772               if (dump_file && (dump_flags & TDF_DETAILS))
 773                 fprintf (dump_file, "Not unrolling loop %d: it is too large.\n",
 774                          loop->num);
 775               return false;
 776             }
 777
 778           unsigned HOST_WIDE_INT ninsns = size.overall;
 779           unsigned HOST_WIDE_INT unr_insns
 780             = estimated_unrolled_size (&size, n_unroll);
 781           if (dump_file && (dump_flags & TDF_DETAILS))
 782             {
 783               fprintf (dump_file, "  Loop size: %d\n", (int) ninsns);
 784               fprintf (dump_file, "  Estimated size after unrolling: %d\n",
 785                        (int) unr_insns);
 786             }
 787
 788           /* If the code is going to shrink, we don't need to be extra
 789              cautious on guessing if the unrolling is going to be
 790              profitable.  */
 791           if (unr_insns
 792               /* If there is IV variable that will become constant, we
 793                  save one instruction in the loop prologue we do not
 794                  account otherwise.  */
 795               <= ninsns + (size.constant_iv != false))
 796             ;
 797           /* We unroll only inner loops, because we do not consider it
 798              profitable otheriwse.  We still can cancel loopback edge
 799              of not rolling loop; this is always a good idea.  */
 800           else if (ul == UL_NO_GROWTH)
 801             {
 802               if (dump_file && (dump_flags & TDF_DETAILS))
 803                 fprintf (dump_file, "Not unrolling loop %d: size would grow.\n",
 804                          loop->num);
 805               return false;
 806             }
 807           /* Outer loops tend to be less interesting candidates for
 808              complete unrolling unless we can do a lot of propagation
 809              into the inner loop body.  For now we disable outer loop
 810              unrolling when the code would grow.  */
 811           else if (loop->inner)
 812             {
 813               if (dump_file && (dump_flags & TDF_DETAILS))
 814                 fprintf (dump_file, "Not unrolling loop %d: "
 815                          "it is not innermost and code would grow.\n",
 816                          loop->num);
 817               return false;
 818             }
 819           /* If there is call on a hot path through the loop, then
 820              there is most probably not much to optimize.  */
 821           else if (size.num_non_pure_calls_on_hot_path)
 822             {
 823               if (dump_file && (dump_flags & TDF_DETAILS))
 824                 fprintf (dump_file, "Not unrolling loop %d: "
 825                          "contains call and code would grow.\n",
 826                          loop->num);
 827               return false;
 828             }
 829           /* If there is pure/const call in the function, then we can
 830              still optimize the unrolled loop body if it contains some
 831              other interesting code than the calls and code storing or
 832              cumulating the return value.  */
 833           else if (size.num_pure_calls_on_hot_path
 834                    /* One IV increment, one test, one ivtmp store and
 835                       one useful stmt.  That is about minimal loop
 836                       doing pure call.  */
 837                    && (size.non_call_stmts_on_hot_path
 838                        <= 3 + size.num_pure_calls_on_hot_path))
 839             {
 840               if (dump_file && (dump_flags & TDF_DETAILS))
 841                 fprintf (dump_file, "Not unrolling loop %d: "
 842                          "contains just pure calls and code would grow.\n",
 843                          loop->num);
 844               return false;
 845             }
 846           /* Complete unrolling is major win when control flow is
 847              removed and one big basic block is created.  If the loop
 848              contains control flow the optimization may still be a win
 849              because of eliminating the loop overhead but it also may
 850              blow the branch predictor tables.  Limit number of
 851              branches on the hot path through the peeled sequence.  */
 852           else if (size.num_branches_on_hot_path * (int)n_unroll
 853                    > PARAM_VALUE (PARAM_MAX_PEEL_BRANCHES))
 854             {
 855               if (dump_file && (dump_flags & TDF_DETAILS))
 856                 fprintf (dump_file, "Not unrolling loop %d: "
 857                          "number of branches on hot path in the unrolled "
 858                          "sequence reaches --param max-peel-branches limit.\n",
 859                          loop->num);
 860               return false;
 861             }
 862           else if (unr_insns
 863                    > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS))
 864             {
 865               if (dump_file && (dump_flags & TDF_DETAILS))
 866                 fprintf (dump_file, "Not unrolling loop %d: "
 867                          "number of insns in the unrolled sequence reaches "
 868                          "--param max-completely-peeled-insns limit.\n",
 869                          loop->num);
 870               return false;
 871             }
 872         }
 873
 874       initialize_original_copy_tables ();
 875       auto_sbitmap wont_exit (n_unroll + 1);
 876       if (exit && niter
 877           && TREE_CODE (niter) == INTEGER_CST
 878           && wi::leu_p (n_unroll, wi::to_widest (niter)))
 879         {
 880           bitmap_ones (wont_exit);
 881           if (wi::eq_p (wi::to_widest (niter), n_unroll)
 882               || edge_to_cancel)
 883             bitmap_clear_bit (wont_exit, 0);
 884         }
 885       else
 886         {
 887           exit = NULL;
 888           bitmap_clear (wont_exit);
 889         }
 890
 891       if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 892                                                  n_unroll, wont_exit,
 893                                                  exit, &edges_to_remove,
 894                                                  DLTHE_FLAG_UPDATE_FREQ
 895                                                  | DLTHE_FLAG_COMPLETTE_PEEL))
 896         {
 897           free_original_copy_tables ();
 898           if (dump_file && (dump_flags & TDF_DETAILS))
 899             fprintf (dump_file, "Failed to duplicate the loop\n");
 900           return false;
 901         }
 902
 903       free_original_copy_tables ();
 904     }
 905
 906   /* Remove the conditional from the last copy of the loop.  */
 907   if (edge_to_cancel)
 908     {
 909       gcond *cond = as_a <gcond *> (last_stmt (edge_to_cancel->src));
 910       force_edge_cold (edge_to_cancel, true);
 911       if (edge_to_cancel->flags & EDGE_TRUE_VALUE)
 912         gimple_cond_make_false (cond);
 913       else
 914         gimple_cond_make_true (cond);
 915       update_stmt (cond);
 916       /* Do not remove the path, as doing so may remove outer loop and
 917          confuse bookkeeping code in tree_unroll_loops_completely.  */
 918     }
 919
 920   /* Store the loop for later unlooping and exit removal.  */
 921   loops_to_unloop.safe_push (loop);
 922   loops_to_unloop_nunroll.safe_push (n_unroll);
 923
 924   if (dump_enabled_p ())
 925     {
 926       if (!n_unroll)
 927         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
 928                          "loop turned into non-loop; it never loops\n");
 929       else
 930         {
 931           dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
 932                            "loop with %d iterations completely unrolled",
 933                            (int) n_unroll);
 934           if (loop->header->count.initialized_p ())
 935             dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS,
 936                          " (header execution count %d)",
 937                          (int)loop->header->count.to_gcov_type ());
 938           dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, "\n");
 939         }
 940     }
 941
 942   if (dump_file && (dump_flags & TDF_DETAILS))
 943     {
 944       if (exit)
 945         fprintf (dump_file, "Exit condition of peeled iterations was "
 946                  "eliminated.\n");
 947       if (edge_to_cancel)
 948         fprintf (dump_file, "Last iteration exit edge was proved true.\n");
 949       else
 950         fprintf (dump_file, "Latch of last iteration was marked by "
 951                  "__builtin_unreachable ().\n");
 952     }
 953
 954   return true;
 955 }
 956
 957 /* Return number of instructions after peeling.  */
 958 static unsigned HOST_WIDE_INT
 959 estimated_peeled_sequence_size (struct loop_size *size,
 960                                 unsigned HOST_WIDE_INT npeel)
 961 {
 962   return MAX (npeel * (HOST_WIDE_INT) (size->overall
 963                                        - size->eliminated_by_peeling), 1);
 964 }
 965
 966 /* If the loop is expected to iterate N times and is
 967    small enough, duplicate the loop body N+1 times before
 968    the loop itself.  This way the hot path will never
 969    enter the loop.
 970    Parameters are the same as for try_unroll_loops_completely */
 971
 972 static bool
 973 try_peel_loop (struct loop *loop,
 974                edge exit, tree niter,
 975                HOST_WIDE_INT maxiter)
 976 {
 977   HOST_WIDE_INT npeel;
 978   struct loop_size size;
 979   int peeled_size;
 980
 981   if (!flag_peel_loops
 982       || PARAM_VALUE (PARAM_MAX_PEEL_TIMES) <= 0
 983       || !peeled_loops)
 984     return false;
 985
 986   if (bitmap_bit_p (peeled_loops, loop->num))
 987     {
 988       if (dump_file)
 989         fprintf (dump_file, "Not peeling: loop is already peeled\n");
 990       return false;
 991     }
 992
 993   /* We don't peel loops that will be unrolled as this can duplicate a
 994      loop more times than the user requested.  */
 995   if (loop->unroll)
 996     {
 997       if (dump_file)
 998         fprintf (dump_file, "Not peeling: user didn't want it peeled.\n");
 999       return false;
1000     }
1001
1002   /* Peel only innermost loops.
1003      While the code is perfectly capable of peeling non-innermost loops,
1004      the heuristics would probably need some improvements. */
1005   if (loop->inner)
1006     {
1007       if (dump_file)
1008         fprintf (dump_file, "Not peeling: outer loop\n");
1009       return false;
1010     }
1011
1012   if (!optimize_loop_for_speed_p (loop))
1013     {
1014       if (dump_file)
1015         fprintf (dump_file, "Not peeling: cold loop\n");
1016       return false;
1017     }
1018
1019   /* Check if there is an estimate on the number of iterations.  */
1020   npeel = estimated_loop_iterations_int (loop);
1021   if (npeel < 0)
1022     npeel = likely_max_loop_iterations_int (loop);
1023   if (npeel < 0)
1024     {
1025       if (dump_file)
1026         fprintf (dump_file, "Not peeling: number of iterations is not "
1027                  "estimated\n");
1028       return false;
1029     }
1030   if (maxiter >= 0 && maxiter <= npeel)
1031     {
1032       if (dump_file)
1033         fprintf (dump_file, "Not peeling: upper bound is known so can "
1034                  "unroll completely\n");
1035       return false;
1036     }
1037
1038   /* We want to peel estimated number of iterations + 1 (so we never
1039      enter the loop on quick path).  Check against PARAM_MAX_PEEL_TIMES
1040      and be sure to avoid overflows.  */
1041   if (npeel > PARAM_VALUE (PARAM_MAX_PEEL_TIMES) - 1)
1042     {
1043       if (dump_file)
1044         fprintf (dump_file, "Not peeling: rolls too much "
1045                  "(%i + 1 > --param max-peel-times)\n", (int) npeel);
1046       return false;
1047     }
1048   npeel++;
1049
1050   /* Check peeled loops size.  */
1051   tree_estimate_loop_size (loop, exit, NULL, &size,
1052                            PARAM_VALUE (PARAM_MAX_PEELED_INSNS));
1053   if ((peeled_size = estimated_peeled_sequence_size (&size, (int) npeel))
1054       > PARAM_VALUE (PARAM_MAX_PEELED_INSNS))
1055     {
1056       if (dump_file)
1057         fprintf (dump_file, "Not peeling: peeled sequence size is too large "
1058                  "(%i insns > --param max-peel-insns)", peeled_size);
1059       return false;
1060     }
1061
1062   /* Duplicate possibly eliminating the exits.  */
1063   initialize_original_copy_tables ();
1064   auto_sbitmap wont_exit (npeel + 1);
1065   if (exit && niter
1066       && TREE_CODE (niter) == INTEGER_CST
1067       && wi::leu_p (npeel, wi::to_widest (niter)))
1068     {
1069       bitmap_ones (wont_exit);
1070       bitmap_clear_bit (wont_exit, 0);
1071     }
1072   else
1073     {
1074       exit = NULL;
1075       bitmap_clear (wont_exit);
1076     }
1077   if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1078                                              npeel, wont_exit,
1079                                              exit, &edges_to_remove,
1080                                              DLTHE_FLAG_UPDATE_FREQ))
1081     {
1082       free_original_copy_tables ();
1083       return false;
1084     }
1085   free_original_copy_tables ();
1086   if (dump_file && (dump_flags & TDF_DETAILS))
1087     {
1088       fprintf (dump_file, "Peeled loop %d, %i times.\n",
1089                loop->num, (int) npeel);
1090     }
1091   if (loop->any_estimate)
1092     {
1093       if (wi::ltu_p (npeel, loop->nb_iterations_estimate))
1094         loop->nb_iterations_estimate -= npeel;
1095       else
1096         loop->nb_iterations_estimate = 0;
1097     }
1098   if (loop->any_upper_bound)
1099     {
1100       if (wi::ltu_p (npeel, loop->nb_iterations_upper_bound))
1101         loop->nb_iterations_upper_bound -= npeel;
1102       else
1103         loop->nb_iterations_upper_bound = 0;
1104     }
1105   if (loop->any_likely_upper_bound)
1106     {
1107       if (wi::ltu_p (npeel, loop->nb_iterations_likely_upper_bound))
1108         loop->nb_iterations_likely_upper_bound -= npeel;
1109       else
1110         {
1111           loop->any_estimate = true;
1112           loop->nb_iterations_estimate = 0;
1113           loop->nb_iterations_likely_upper_bound = 0;
1114         }
1115     }
1116   profile_count entry_count = profile_count::zero ();
1117
1118   edge e;
1119   edge_iterator ei;
1120   FOR_EACH_EDGE (e, ei, loop->header->preds)
1121     if (e->src != loop->latch)
1122       {
1123         if (e->src->count.initialized_p ())
1124           entry_count = e->src->count + e->src->count;
1125         gcc_assert (!flow_bb_inside_loop_p (loop, e->src));
1126       }
1127   profile_probability p = profile_probability::very_unlikely ();
1128   p = entry_count.probability_in (loop->header->count);
1129   scale_loop_profile (loop, p, 0);
1130   bitmap_set_bit (peeled_loops, loop->num);
1131   return true;
1132 }
1133 /* Adds a canonical induction variable to LOOP if suitable.
1134    CREATE_IV is true if we may create a new iv.  UL determines
1135    which loops we are allowed to completely unroll.  If TRY_EVAL is true, we try
1136    to determine the number of iterations of a loop by direct evaluation.
1137    Returns true if cfg is changed.   */
1138
1139 static bool
1140 canonicalize_loop_induction_variables (struct loop *loop,
1141                                        bool create_iv, enum unroll_level ul,
1142                                        bool try_eval)
1143 {
1144   edge exit = NULL;
1145   tree niter;
1146   HOST_WIDE_INT maxiter;
1147   bool modified = false;
1148   location_t locus = UNKNOWN_LOCATION;
1149
1150   niter = number_of_latch_executions (loop);
1151   exit = single_exit (loop);
1152   if (TREE_CODE (niter) == INTEGER_CST)
1153     locus = gimple_location (last_stmt (exit->src));
1154   else
1155     {
1156       /* If the loop has more than one exit, try checking all of them
1157          for # of iterations determinable through scev.  */
1158       if (!exit)
1159         niter = find_loop_niter (loop, &exit);
1160
1161       /* Finally if everything else fails, try brute force evaluation.  */
1162       if (try_eval
1163           && (chrec_contains_undetermined (niter)
1164               || TREE_CODE (niter) != INTEGER_CST))
1165         niter = find_loop_niter_by_eval (loop, &exit);
1166
1167       if (exit)
1168         locus = gimple_location (last_stmt (exit->src));
1169
1170       if (TREE_CODE (niter) != INTEGER_CST)
1171         exit = NULL;
1172     }
1173
1174   /* We work exceptionally hard here to estimate the bound
1175      by find_loop_niter_by_eval.  Be sure to keep it for future.  */
1176   if (niter && TREE_CODE (niter) == INTEGER_CST)
1177     {
1178       record_niter_bound (loop, wi::to_widest (niter),
1179                           exit == single_likely_exit (loop), true);
1180     }
1181
1182   /* Force re-computation of loop bounds so we can remove redundant exits.  */
1183   maxiter = max_loop_iterations_int (loop);
1184
1185   if (dump_file && (dump_flags & TDF_DETAILS)
1186       && TREE_CODE (niter) == INTEGER_CST)
1187     {
1188       fprintf (dump_file, "Loop %d iterates ", loop->num);
1189       print_generic_expr (dump_file, niter, TDF_SLIM);
1190       fprintf (dump_file, " times.\n");
1191     }
1192   if (dump_file && (dump_flags & TDF_DETAILS)
1193       && maxiter >= 0)
1194     {
1195       fprintf (dump_file, "Loop %d iterates at most %i times.\n", loop->num,
1196                (int)maxiter);
1197     }
1198   if (dump_file && (dump_flags & TDF_DETAILS)
1199       && likely_max_loop_iterations_int (loop) >= 0)
1200     {
1201       fprintf (dump_file, "Loop %d likely iterates at most %i times.\n",
1202                loop->num, (int)likely_max_loop_iterations_int (loop));
1203     }
1204
1205   /* Remove exits that are known to be never taken based on loop bound.
1206      Needs to be called after compilation of max_loop_iterations_int that
1207      populates the loop bounds.  */
1208   modified |= remove_redundant_iv_tests (loop);
1209
1210   if (try_unroll_loop_completely (loop, exit, niter, ul, maxiter, locus))
1211     return true;
1212
1213   if (create_iv
1214       && niter && !chrec_contains_undetermined (niter)
1215       && exit && just_once_each_iteration_p (loop, exit->src))
1216     create_canonical_iv (loop, exit, niter);
1217
1218   if (ul == UL_ALL)
1219     modified |= try_peel_loop (loop, exit, niter, maxiter);
1220
1221   return modified;
1222 }
1223
1224 /* The main entry point of the pass.  Adds canonical induction variables
1225    to the suitable loops.  */
1226
1227 unsigned int
1228 canonicalize_induction_variables (void)
1229 {
1230   struct loop *loop;
1231   bool changed = false;
1232   bool irred_invalidated = false;
1233   bitmap loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
1234
1235   estimate_numbers_of_iterations (cfun);
1236
1237   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
1238     {
1239       changed |= canonicalize_loop_induction_variables (loop,
1240                                                         true, UL_SINGLE_ITER,
1241                                                         true);
1242     }
1243   gcc_assert (!need_ssa_update_p (cfun));
1244
1245   unloop_loops (loop_closed_ssa_invalidated, &irred_invalidated);
1246   if (irred_invalidated
1247       && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
1248     mark_irreducible_loops ();
1249
1250   /* Clean up the information about numbers of iterations, since brute force
1251      evaluation could reveal new information.  */
1252   free_numbers_of_iterations_estimates (cfun);
1253   scev_reset ();
1254
1255   if (!bitmap_empty_p (loop_closed_ssa_invalidated))
1256     {
1257       gcc_checking_assert (loops_state_satisfies_p (LOOP_CLOSED_SSA));
1258       rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1259     }
1260   BITMAP_FREE (loop_closed_ssa_invalidated);
1261
1262   if (changed)
1263     return TODO_cleanup_cfg;
1264   return 0;
1265 }
1266
1267 /* Propagate constant SSA_NAMEs defined in basic block BB.  */
1268
1269 static void
1270 propagate_constants_for_unrolling (basic_block bb)
1271 {
1272   /* Look for degenerate PHI nodes with constant argument.  */
1273   for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi); )
1274     {
1275       gphi *phi = gsi.phi ();
1276       tree result = gimple_phi_result (phi);
1277       tree arg = gimple_phi_arg_def (phi, 0);
1278
1279       if (! SSA_NAME_OCCURS_IN_ABNORMAL_PHI (result)
1280           && gimple_phi_num_args (phi) == 1
1281           && CONSTANT_CLASS_P (arg))
1282         {
1283           replace_uses_by (result, arg);
1284           gsi_remove (&gsi, true);
1285           release_ssa_name (result);
1286         }
1287       else
1288         gsi_next (&gsi);
1289     }
1290
1291   /* Look for assignments to SSA names with constant RHS.  */
1292   for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
1293     {
1294       gimple *stmt = gsi_stmt (gsi);
1295       tree lhs;
1296
1297       if (is_gimple_assign (stmt)
1298           && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_constant
1299           && (lhs = gimple_assign_lhs (stmt), TREE_CODE (lhs) == SSA_NAME)
1300           && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
1301         {
1302           replace_uses_by (lhs, gimple_assign_rhs1 (stmt));
1303           gsi_remove (&gsi, true);
1304           release_ssa_name (lhs);
1305         }
1306       else
1307         gsi_next (&gsi);
1308     }
1309 }
1310
1311 /* Process loops from innermost to outer, stopping at the innermost
1312    loop we unrolled.  */
1313
1314 static bool
1315 tree_unroll_loops_completely_1 (bool may_increase_size, bool unroll_outer,
1316                                 bitmap father_bbs, struct loop *loop)
1317 {
1318   struct loop *loop_father;
1319   bool changed = false;
1320   struct loop *inner;
1321   enum unroll_level ul;
1322
1323   /* Process inner loops first.  */
1324   for (inner = loop->inner; inner != NULL; inner = inner->next)
1325     changed |= tree_unroll_loops_completely_1 (may_increase_size,
1326                                                unroll_outer, father_bbs,
1327                                                inner);
1328
1329   /* If we changed an inner loop we cannot process outer loops in this
1330      iteration because SSA form is not up-to-date.  Continue with
1331      siblings of outer loops instead.  */
1332   if (changed)
1333     return true;
1334
1335   /* Don't unroll #pragma omp simd loops until the vectorizer
1336      attempts to vectorize those.  */
1337   if (loop->force_vectorize)
1338     return false;
1339
1340   /* Try to unroll this loop.  */
1341   loop_father = loop_outer (loop);
1342   if (!loop_father)
1343     return false;
1344
1345   if (loop->unroll > 1)
1346     ul = UL_ALL;
1347   else if (may_increase_size && optimize_loop_nest_for_speed_p (loop)
1348       /* Unroll outermost loops only if asked to do so or they do
1349          not cause code growth.  */
1350       && (unroll_outer || loop_outer (loop_father)))
1351     ul = UL_ALL;
1352   else
1353     ul = UL_NO_GROWTH;
1354
1355   if (canonicalize_loop_induction_variables
1356         (loop, false, ul, !flag_tree_loop_ivcanon))
1357     {
1358       /* If we'll continue unrolling, we need to propagate constants
1359          within the new basic blocks to fold away induction variable
1360          computations; otherwise, the size might blow up before the
1361          iteration is complete and the IR eventually cleaned up.  */
1362       if (loop_outer (loop_father))
1363         bitmap_set_bit (father_bbs, loop_father->header->index);
1364
1365       return true;
1366     }
1367
1368   return false;
1369 }
1370
1371 /* Unroll LOOPS completely if they iterate just few times.  Unless
1372    MAY_INCREASE_SIZE is true, perform the unrolling only if the
1373    size of the code does not increase.  */
1374
1375 static unsigned int
1376 tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer)
1377 {
1378   bitmap father_bbs = BITMAP_ALLOC (NULL);
1379   bool changed;
1380   int iteration = 0;
1381   bool irred_invalidated = false;
1382
1383   estimate_numbers_of_iterations (cfun);
1384
1385   do
1386     {
1387       changed = false;
1388       bitmap loop_closed_ssa_invalidated = NULL;
1389
1390       if (loops_state_satisfies_p (LOOP_CLOSED_SSA))
1391         loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
1392
1393       free_numbers_of_iterations_estimates (cfun);
1394       estimate_numbers_of_iterations (cfun);
1395
1396       changed = tree_unroll_loops_completely_1 (may_increase_size,
1397                                                 unroll_outer, father_bbs,
1398                                                 current_loops->tree_root);
1399       if (changed)
1400         {
1401           unsigned i;
1402
1403           unloop_loops (loop_closed_ssa_invalidated, &irred_invalidated);
1404
1405           /* We can not use TODO_update_ssa_no_phi because VOPS gets confused.  */
1406           if (loop_closed_ssa_invalidated
1407               && !bitmap_empty_p (loop_closed_ssa_invalidated))
1408             rewrite_into_loop_closed_ssa (loop_closed_ssa_invalidated,
1409                                           TODO_update_ssa);
1410           else
1411             update_ssa (TODO_update_ssa);
1412
1413           /* father_bbs is a bitmap of loop father header BB indices.
1414              Translate that to what non-root loops these BBs belong to now.  */
1415           bitmap_iterator bi;
1416           bitmap fathers = BITMAP_ALLOC (NULL);
1417           EXECUTE_IF_SET_IN_BITMAP (father_bbs, 0, i, bi)
1418             {
1419               basic_block unrolled_loop_bb = BASIC_BLOCK_FOR_FN (cfun, i);
1420               if (! unrolled_loop_bb)
1421                 continue;
1422               if (loop_outer (unrolled_loop_bb->loop_father))
1423                 bitmap_set_bit (fathers,
1424                                 unrolled_loop_bb->loop_father->num);
1425             }
1426           bitmap_clear (father_bbs);
1427           /* Propagate the constants within the new basic blocks.  */
1428           EXECUTE_IF_SET_IN_BITMAP (fathers, 0, i, bi)
1429             {
1430               loop_p father = get_loop (cfun, i);
1431               basic_block *body = get_loop_body_in_dom_order (father);
1432               for (unsigned j = 0; j < father->num_nodes; j++)
1433                 propagate_constants_for_unrolling (body[j]);
1434               free (body);
1435             }
1436           BITMAP_FREE (fathers);
1437
1438           /* This will take care of removing completely unrolled loops
1439              from the loop structures so we can continue unrolling now
1440              innermost loops.  */
1441           if (cleanup_tree_cfg ())
1442             update_ssa (TODO_update_ssa_only_virtuals);
1443
1444           /* Clean up the information about numbers of iterations, since
1445              complete unrolling might have invalidated it.  */
1446           scev_reset ();
1447           if (flag_checking && loops_state_satisfies_p (LOOP_CLOSED_SSA))
1448             verify_loop_closed_ssa (true);
1449         }
1450       if (loop_closed_ssa_invalidated)
1451         BITMAP_FREE (loop_closed_ssa_invalidated);
1452     }
1453   while (changed
1454          && ++iteration <= PARAM_VALUE (PARAM_MAX_UNROLL_ITERATIONS));
1455
1456   BITMAP_FREE (father_bbs);
1457
1458   if (irred_invalidated
1459       && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
1460     mark_irreducible_loops ();
1461
1462   return 0;
1463 }
1464
1465 /* Canonical induction variable creation pass.  */
1466
1467 namespace {
1468
1469 const pass_data pass_data_iv_canon =
1470 {
1471   GIMPLE_PASS, /* type */
1472   "ivcanon", /* name */
1473   OPTGROUP_LOOP, /* optinfo_flags */
1474   TV_TREE_LOOP_IVCANON, /* tv_id */
1475   ( PROP_cfg | PROP_ssa ), /* properties_required */
1476   0, /* properties_provided */
1477   0, /* properties_destroyed */
1478   0, /* todo_flags_start */
1479   0, /* todo_flags_finish */
1480 };
1481
1482 class pass_iv_canon : public gimple_opt_pass
1483 {
1484 public:
1485   pass_iv_canon (gcc::context *ctxt)
1486     : gimple_opt_pass (pass_data_iv_canon, ctxt)
1487   {}
1488
1489   /* opt_pass methods: */
1490   virtual bool gate (function *) { return flag_tree_loop_ivcanon != 0; }
1491   virtual unsigned int execute (function *fun);
1492
1493 }; // class pass_iv_canon
1494
1495 unsigned int
1496 pass_iv_canon::execute (function *fun)
1497 {
1498   if (number_of_loops (fun) <= 1)
1499     return 0;
1500
1501   return canonicalize_induction_variables ();
1502 }
1503
1504 } // anon namespace
1505
1506 gimple_opt_pass *
1507 make_pass_iv_canon (gcc::context *ctxt)
1508 {
1509   return new pass_iv_canon (ctxt);
1510 }
1511
1512 /* Complete unrolling of loops.  */
1513
1514 namespace {
1515
1516 const pass_data pass_data_complete_unroll =
1517 {
1518   GIMPLE_PASS, /* type */
1519   "cunroll", /* name */
1520   OPTGROUP_LOOP, /* optinfo_flags */
1521   TV_COMPLETE_UNROLL, /* tv_id */
1522   ( PROP_cfg | PROP_ssa ), /* properties_required */
1523   0, /* properties_provided */
1524   0, /* properties_destroyed */
1525   0, /* todo_flags_start */
1526   0, /* todo_flags_finish */
1527 };
1528
1529 class pass_complete_unroll : public gimple_opt_pass
1530 {
1531 public:
1532   pass_complete_unroll (gcc::context *ctxt)
1533     : gimple_opt_pass (pass_data_complete_unroll, ctxt)
1534   {}
1535
1536   /* opt_pass methods: */
1537   virtual unsigned int execute (function *);
1538
1539 }; // class pass_complete_unroll
1540
1541 unsigned int
1542 pass_complete_unroll::execute (function *fun)
1543 {
1544   if (number_of_loops (fun) <= 1)
1545     return 0;
1546
1547   /* If we ever decide to run loop peeling more than once, we will need to
1548      track loops already peeled in loop structures themselves to avoid
1549      re-peeling the same loop multiple times.  */
1550   if (flag_peel_loops)
1551     peeled_loops = BITMAP_ALLOC (NULL);
1552   unsigned int val = tree_unroll_loops_completely (flag_unroll_loops
1553                                                    || flag_peel_loops
1554                                                    || optimize >= 3, true);
1555   if (peeled_loops)
1556     {
1557       BITMAP_FREE (peeled_loops);
1558       peeled_loops = NULL;
1559     }
1560   return val;
1561 }
1562
1563 } // anon namespace
1564
1565 gimple_opt_pass *
1566 make_pass_complete_unroll (gcc::context *ctxt)
1567 {
1568   return new pass_complete_unroll (ctxt);
1569 }
1570
1571 /* Complete unrolling of inner loops.  */
1572
1573 namespace {
1574
1575 const pass_data pass_data_complete_unrolli =
1576 {
1577   GIMPLE_PASS, /* type */
1578   "cunrolli", /* name */
1579   OPTGROUP_LOOP, /* optinfo_flags */
1580   TV_COMPLETE_UNROLL, /* tv_id */
1581   ( PROP_cfg | PROP_ssa ), /* properties_required */
1582   0, /* properties_provided */
1583   0, /* properties_destroyed */
1584   0, /* todo_flags_start */
1585   0, /* todo_flags_finish */
1586 };
1587
1588 class pass_complete_unrolli : public gimple_opt_pass
1589 {
1590 public:
1591   pass_complete_unrolli (gcc::context *ctxt)
1592     : gimple_opt_pass (pass_data_complete_unrolli, ctxt)
1593   {}
1594
1595   /* opt_pass methods: */
1596   virtual bool gate (function *) { return optimize >= 2; }
1597   virtual unsigned int execute (function *);
1598
1599 }; // class pass_complete_unrolli
1600
1601 unsigned int
1602 pass_complete_unrolli::execute (function *fun)
1603 {
1604   unsigned ret = 0;
1605
1606   loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
1607   if (number_of_loops (fun) > 1)
1608     {
1609       scev_initialize ();
1610       ret = tree_unroll_loops_completely (optimize >= 3, false);
1611       scev_finalize ();
1612     }
1613   loop_optimizer_finalize ();
1614
1615   return ret;
1616 }
1617
1618 } // anon namespace
1619
1620 gimple_opt_pass *
1621 make_pass_complete_unrolli (gcc::context *ctxt)
1622 {
1623   return new pass_complete_unrolli (ctxt);
1624 }
1625
1626