gcc/tree-ssa-loop-ivcanon.c

   1 /* Induction variable canonicalization.
   2    Copyright (C) 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass detects the loops that iterate a constant number of times,
  21    adds a canonical induction variable (step -1, tested against 0)
  22    and replaces the exit test.  This enables the less powerful rtl
  23    level analysis to use this information.
  24
  25    This might spoil the code in some cases (by increasing register pressure).
  26    Note that in the case the new variable is not needed, ivopts will get rid
  27    of it, so it might only be a problem when there are no other linear induction
  28    variables.  In that case the created optimization possibilities are likely
  29    to pay up.
  30
  31    Additionally in case we detect that it is beneficial to unroll the
  32    loop completely, we do it right here to expose the optimization
  33    possibilities to the following passes.  */
  34
  35 #include "config.h"
  36 #include "system.h"
  37 #include "coretypes.h"
  38 #include "tm.h"
  39 #include "tree.h"
  40 #include "rtl.h"
  41 #include "tm_p.h"
  42 #include "hard-reg-set.h"
  43 #include "basic-block.h"
  44 #include "output.h"
  45 #include "diagnostic.h"
  46 #include "tree-flow.h"
  47 #include "tree-dump.h"
  48 #include "cfgloop.h"
  49 #include "tree-pass.h"
  50 #include "ggc.h"
  51 #include "tree-chrec.h"
  52 #include "tree-scalar-evolution.h"
  53 #include "params.h"
  54 #include "flags.h"
  55 #include "tree-inline.h"
  56 #include "target.h"
  57
  58 /* Specifies types of loops that may be unrolled.  */
  59
  60 enum unroll_level
  61 {
  62   UL_SINGLE_ITER,       /* Only loops that exit immediately in the first
  63                            iteration.  */
  64   UL_NO_GROWTH,         /* Only loops whose unrolling will not cause increase
  65                            of code size.  */
  66   UL_ALL                /* All suitable loops.  */
  67 };
  68
  69 /* Adds a canonical induction variable to LOOP iterating NITER times.  EXIT
  70    is the exit edge whose condition is replaced.  */
  71
  72 static void
  73 create_canonical_iv (struct loop *loop, edge exit, tree niter)
  74 {
  75   edge in;
  76   tree type, var;
  77   gimple cond;
  78   gimple_stmt_iterator incr_at;
  79   enum tree_code cmp;
  80
  81   if (dump_file && (dump_flags & TDF_DETAILS))
  82     {
  83       fprintf (dump_file, "Added canonical iv to loop %d, ", loop->num);
  84       print_generic_expr (dump_file, niter, TDF_SLIM);
  85       fprintf (dump_file, " iterations.\n");
  86     }
  87
  88   cond = last_stmt (exit->src);
  89   in = EDGE_SUCC (exit->src, 0);
  90   if (in == exit)
  91     in = EDGE_SUCC (exit->src, 1);
  92
  93   /* Note that we do not need to worry about overflows, since
  94      type of niter is always unsigned and all comparisons are
  95      just for equality/nonequality -- i.e. everything works
  96      with a modulo arithmetics.  */
  97
  98   type = TREE_TYPE (niter);
  99   niter = fold_build2 (PLUS_EXPR, type,
 100                        niter,
 101                        build_int_cst (type, 1));
 102   incr_at = gsi_last_bb (in->src);
 103   create_iv (niter,
 104              build_int_cst (type, -1),
 105              NULL_TREE, loop,
 106              &incr_at, false, NULL, &var);
 107
 108   cmp = (exit->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
 109   gimple_cond_set_code (cond, cmp);
 110   gimple_cond_set_lhs (cond, var);
 111   gimple_cond_set_rhs (cond, build_int_cst (type, 0));
 112   update_stmt (cond);
 113 }
 114
 115 /* Computes an estimated number of insns in LOOP, weighted by WEIGHTS.  */
 116
 117 unsigned
 118 tree_num_loop_insns (struct loop *loop, eni_weights *weights)
 119 {
 120   basic_block *body = get_loop_body (loop);
 121   gimple_stmt_iterator gsi;
 122   unsigned size = 0, i;
 123
 124   for (i = 0; i < loop->num_nodes; i++)
 125     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
 126       size += estimate_num_insns (gsi_stmt (gsi), weights);
 127   free (body);
 128
 129   return size;
 130 }
 131
 132 /* Describe size of loop as detected by tree_estimate_loop_size.  */
 133 struct loop_size
 134 {
 135   /* Number of instructions in the loop.  */
 136   int overall;
 137
 138   /* Number of instructions that will be likely optimized out in
 139      peeled iterations of loop  (i.e. computation based on induction
 140      variable where induction variable starts at known constant.)  */
 141   int eliminated_by_peeling;
 142
 143   /* Same statistics for last iteration of loop: it is smaller because
 144      instructions after exit are not executed.  */
 145   int last_iteration;
 146   int last_iteration_eliminated_by_peeling;
 147 };
 148
 149 /* Return true if OP in STMT will be constant after peeling LOOP.  */
 150
 151 static bool
 152 constant_after_peeling (tree op, gimple stmt, struct loop *loop)
 153 {
 154   affine_iv iv;
 155
 156   if (is_gimple_min_invariant (op))
 157     return true;
 158
 159   /* We can still fold accesses to constant arrays when index is known.  */
 160   if (TREE_CODE (op) != SSA_NAME)
 161     {
 162       tree base = op;
 163
 164       /* First make fast look if we see constant array inside.  */
 165       while (handled_component_p (base))
 166         base = TREE_OPERAND (base, 0);
 167       if ((DECL_P (base)
 168            && TREE_STATIC (base)
 169            && TREE_READONLY (base)
 170            && (DECL_INITIAL (base)
 171                || (!DECL_EXTERNAL (base)
 172                    && targetm.binds_local_p (base))))
 173           || CONSTANT_CLASS_P (base))
 174         {
 175           /* If so, see if we understand all the indices.  */
 176           base = op;
 177           while (handled_component_p (base))
 178             {
 179               if (TREE_CODE (base) == ARRAY_REF
 180                   && !constant_after_peeling (TREE_OPERAND (base, 1), stmt, loop))
 181                 return false;
 182               base = TREE_OPERAND (base, 0);
 183             }
 184           return true;
 185         }
 186       return false;
 187     }
 188
 189   /* Induction variables are constants.  */
 190   if (!simple_iv (loop, loop_containing_stmt (stmt), op, &iv, false))
 191     return false;
 192   if (!is_gimple_min_invariant (iv.base))
 193     return false;
 194   if (!is_gimple_min_invariant (iv.step))
 195     return false;
 196   return true;
 197 }
 198
 199 /* Computes an estimated number of insns in LOOP, weighted by WEIGHTS.
 200    Return results in SIZE, estimate benefits for complete unrolling exiting by EXIT.  */
 201
 202 static void
 203 tree_estimate_loop_size (struct loop *loop, edge exit, struct loop_size *size)
 204 {
 205   basic_block *body = get_loop_body (loop);
 206   gimple_stmt_iterator gsi;
 207   unsigned int i;
 208   bool after_exit;
 209
 210   size->overall = 0;
 211   size->eliminated_by_peeling = 0;
 212   size->last_iteration = 0;
 213   size->last_iteration_eliminated_by_peeling = 0;
 214
 215   if (dump_file && (dump_flags & TDF_DETAILS))
 216     fprintf (dump_file, "Estimating sizes for loop %i\n", loop->num);
 217   for (i = 0; i < loop->num_nodes; i++)
 218     {
 219       if (exit && body[i] != exit->src
 220           && dominated_by_p (CDI_DOMINATORS, body[i], exit->src))
 221         after_exit = true;
 222       else
 223         after_exit = false;
 224       if (dump_file && (dump_flags & TDF_DETAILS))
 225         fprintf (dump_file, " BB: %i, after_exit: %i\n", body[i]->index, after_exit);
 226
 227       for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
 228         {
 229           gimple stmt = gsi_stmt (gsi);
 230           int num = estimate_num_insns (stmt, &eni_size_weights);
 231           bool likely_eliminated = false;
 232
 233           if (dump_file && (dump_flags & TDF_DETAILS))
 234             {
 235               fprintf (dump_file, "  size: %3i ", num);
 236               print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, 0);
 237             }
 238
 239           /* Look for reasons why we might optimize this stmt away. */
 240
 241           /* Exit conditional.  */
 242           if (body[i] == exit->src && stmt == last_stmt (exit->src))
 243             {
 244               if (dump_file && (dump_flags & TDF_DETAILS))
 245                 fprintf (dump_file, "   Exit condition will be eliminated.\n");
 246               likely_eliminated = true;
 247             }
 248           /* Sets of IV variables  */
 249           else if (gimple_code (stmt) == GIMPLE_ASSIGN
 250               && constant_after_peeling (gimple_assign_lhs (stmt), stmt, loop))
 251             {
 252               if (dump_file && (dump_flags & TDF_DETAILS))
 253                 fprintf (dump_file, "   Induction variable computation will"
 254                          " be folded away.\n");
 255               likely_eliminated = true;
 256             }
 257           /* Assignments of IV variables.  */
 258           else if (gimple_code (stmt) == GIMPLE_ASSIGN
 259                    && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
 260                    && constant_after_peeling (gimple_assign_rhs1 (stmt), stmt,loop)
 261                    && (gimple_assign_rhs_class (stmt) != GIMPLE_BINARY_RHS
 262                        || constant_after_peeling (gimple_assign_rhs2 (stmt),
 263                                                   stmt, loop)))
 264             {
 265               if (dump_file && (dump_flags & TDF_DETAILS))
 266                 fprintf (dump_file, "   Constant expression will be folded away.\n");
 267               likely_eliminated = true;
 268             }
 269           /* Conditionals.  */
 270           else if (gimple_code (stmt) == GIMPLE_COND
 271                    && constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop)
 272                    && constant_after_peeling (gimple_cond_rhs (stmt), stmt, loop))
 273             {
 274               if (dump_file && (dump_flags & TDF_DETAILS))
 275                 fprintf (dump_file, "   Constant conditional.\n");
 276               likely_eliminated = true;
 277             }
 278
 279           size->overall += num;
 280           if (likely_eliminated)
 281             size->eliminated_by_peeling += num;
 282           if (!after_exit)
 283             {
 284               size->last_iteration += num;
 285               if (likely_eliminated)
 286                 size->last_iteration_eliminated_by_peeling += num;
 287             }
 288         }
 289     }
 290   if (dump_file && (dump_flags & TDF_DETAILS))
 291     fprintf (dump_file, "size: %i-%i, last_iteration: %i-%i\n", size->overall,
 292              size->eliminated_by_peeling, size->last_iteration,
 293              size->last_iteration_eliminated_by_peeling);
 294
 295   free (body);
 296 }
 297
 298 /* Estimate number of insns of completely unrolled loop.
 299    It is (NUNROLL + 1) * size of loop body with taking into account
 300    the fact that in last copy everything after exit conditional
 301    is dead and that some instructions will be eliminated after
 302    peeling.
 303
 304    Loop body is likely going to simplify futher, this is difficult
 305    to guess, we just decrease the result by 1/3.  */
 306
 307 static unsigned HOST_WIDE_INT
 308 estimated_unrolled_size (struct loop_size *size,
 309                          unsigned HOST_WIDE_INT nunroll)
 310 {
 311   HOST_WIDE_INT unr_insns = ((nunroll)
 312                              * (HOST_WIDE_INT) (size->overall
 313                                                 - size->eliminated_by_peeling));
 314   if (!nunroll)
 315     unr_insns = 0;
 316   unr_insns += size->last_iteration - size->last_iteration_eliminated_by_peeling;
 317
 318   unr_insns = unr_insns * 2 / 3;
 319   if (unr_insns <= 0)
 320     unr_insns = 1;
 321
 322   return unr_insns;
 323 }
 324
 325 /* Tries to unroll LOOP completely, i.e. NITER times.
 326    UL determines which loops we are allowed to unroll.
 327    EXIT is the exit of the loop that should be eliminated.  */
 328
 329 static bool
 330 try_unroll_loop_completely (struct loop *loop,
 331                             edge exit, tree niter,
 332                             enum unroll_level ul)
 333 {
 334   unsigned HOST_WIDE_INT n_unroll, ninsns, max_unroll, unr_insns;
 335   gimple cond;
 336   struct loop_size size;
 337
 338   if (loop->inner)
 339     return false;
 340
 341   if (!host_integerp (niter, 1))
 342     return false;
 343   n_unroll = tree_low_cst (niter, 1);
 344
 345   max_unroll = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
 346   if (n_unroll > max_unroll)
 347     return false;
 348
 349   if (n_unroll)
 350     {
 351       if (ul == UL_SINGLE_ITER)
 352         return false;
 353
 354       tree_estimate_loop_size (loop, exit, &size);
 355       ninsns = size.overall;
 356
 357       unr_insns = estimated_unrolled_size (&size, n_unroll);
 358       if (dump_file && (dump_flags & TDF_DETAILS))
 359         {
 360           fprintf (dump_file, "  Loop size: %d\n", (int) ninsns);
 361           fprintf (dump_file, "  Estimated size after unrolling: %d\n",
 362                    (int) unr_insns);
 363         }
 364
 365       if (unr_insns > ninsns
 366           && (unr_insns
 367               > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS)))
 368         {
 369           if (dump_file && (dump_flags & TDF_DETAILS))
 370             fprintf (dump_file, "Not unrolling loop %d "
 371                      "(--param max-completely-peeled-insns limit reached).\n",
 372                      loop->num);
 373           return false;
 374         }
 375
 376       if (ul == UL_NO_GROWTH
 377           && unr_insns > ninsns)
 378         {
 379           if (dump_file && (dump_flags & TDF_DETAILS))
 380             fprintf (dump_file, "Not unrolling loop %d.\n", loop->num);
 381           return false;
 382         }
 383     }
 384
 385   if (n_unroll)
 386     {
 387       sbitmap wont_exit;
 388       edge e;
 389       unsigned i;
 390       VEC (edge, heap) *to_remove = NULL;
 391
 392       initialize_original_copy_tables ();
 393       wont_exit = sbitmap_alloc (n_unroll + 1);
 394       sbitmap_ones (wont_exit);
 395       RESET_BIT (wont_exit, 0);
 396
 397       if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 398                                                  n_unroll, wont_exit,
 399                                                  exit, &to_remove,
 400                                                  DLTHE_FLAG_UPDATE_FREQ
 401                                                  | DLTHE_FLAG_COMPLETTE_PEEL))
 402         {
 403           free_original_copy_tables ();
 404           free (wont_exit);
 405           return false;
 406         }
 407
 408       for (i = 0; VEC_iterate (edge, to_remove, i, e); i++)
 409         {
 410           bool ok = remove_path (e);
 411           gcc_assert (ok);
 412         }
 413
 414       VEC_free (edge, heap, to_remove);
 415       free (wont_exit);
 416       free_original_copy_tables ();
 417     }
 418
 419   cond = last_stmt (exit->src);
 420   if (exit->flags & EDGE_TRUE_VALUE)
 421     gimple_cond_make_true (cond);
 422   else
 423     gimple_cond_make_false (cond);
 424   update_stmt (cond);
 425   update_ssa (TODO_update_ssa);
 426
 427   if (dump_file && (dump_flags & TDF_DETAILS))
 428     fprintf (dump_file, "Unrolled loop %d completely.\n", loop->num);
 429
 430   return true;
 431 }
 432
 433 /* Adds a canonical induction variable to LOOP if suitable.
 434    CREATE_IV is true if we may create a new iv.  UL determines
 435    which loops we are allowed to completely unroll.  If TRY_EVAL is true, we try
 436    to determine the number of iterations of a loop by direct evaluation.
 437    Returns true if cfg is changed.  */
 438
 439 static bool
 440 canonicalize_loop_induction_variables (struct loop *loop,
 441                                        bool create_iv, enum unroll_level ul,
 442                                        bool try_eval)
 443 {
 444   edge exit = NULL;
 445   tree niter;
 446
 447   niter = number_of_latch_executions (loop);
 448   if (TREE_CODE (niter) == INTEGER_CST)
 449     {
 450       exit = single_exit (loop);
 451       if (!just_once_each_iteration_p (loop, exit->src))
 452         return false;
 453     }
 454   else
 455     {
 456       /* If the loop has more than one exit, try checking all of them
 457          for # of iterations determinable through scev.  */
 458       if (!single_exit (loop))
 459         niter = find_loop_niter (loop, &exit);
 460
 461       /* Finally if everything else fails, try brute force evaluation.  */
 462       if (try_eval
 463           && (chrec_contains_undetermined (niter)
 464               || TREE_CODE (niter) != INTEGER_CST))
 465         niter = find_loop_niter_by_eval (loop, &exit);
 466
 467       if (chrec_contains_undetermined (niter)
 468           || TREE_CODE (niter) != INTEGER_CST)
 469         return false;
 470     }
 471
 472   if (dump_file && (dump_flags & TDF_DETAILS))
 473     {
 474       fprintf (dump_file, "Loop %d iterates ", loop->num);
 475       print_generic_expr (dump_file, niter, TDF_SLIM);
 476       fprintf (dump_file, " times.\n");
 477     }
 478
 479   if (try_unroll_loop_completely (loop, exit, niter, ul))
 480     return true;
 481
 482   if (create_iv)
 483     create_canonical_iv (loop, exit, niter);
 484
 485   return false;
 486 }
 487
 488 /* The main entry point of the pass.  Adds canonical induction variables
 489    to the suitable loops.  */
 490
 491 unsigned int
 492 canonicalize_induction_variables (void)
 493 {
 494   loop_iterator li;
 495   struct loop *loop;
 496   bool changed = false;
 497
 498   FOR_EACH_LOOP (li, loop, 0)
 499     {
 500       changed |= canonicalize_loop_induction_variables (loop,
 501                                                         true, UL_SINGLE_ITER,
 502                                                         true);
 503     }
 504
 505   /* Clean up the information about numbers of iterations, since brute force
 506      evaluation could reveal new information.  */
 507   scev_reset ();
 508
 509   if (changed)
 510     return TODO_cleanup_cfg;
 511   return 0;
 512 }
 513
 514 /* Unroll LOOPS completely if they iterate just few times.  Unless
 515    MAY_INCREASE_SIZE is true, perform the unrolling only if the
 516    size of the code does not increase.  */
 517
 518 unsigned int
 519 tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer)
 520 {
 521   loop_iterator li;
 522   struct loop *loop;
 523   bool changed;
 524   enum unroll_level ul;
 525
 526   do
 527     {
 528       changed = false;
 529
 530       FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
 531         {
 532           if (may_increase_size && optimize_loop_for_speed_p (loop)
 533               /* Unroll outermost loops only if asked to do so or they do
 534                  not cause code growth.  */
 535               && (unroll_outer
 536                   || loop_outer (loop_outer (loop))))
 537             ul = UL_ALL;
 538           else
 539             ul = UL_NO_GROWTH;
 540           changed |= canonicalize_loop_induction_variables
 541                        (loop, false, ul, !flag_tree_loop_ivcanon);
 542         }
 543
 544       if (changed)
 545         {
 546           /* This will take care of removing completely unrolled loops
 547              from the loop structures so we can continue unrolling now
 548              innermost loops.  */
 549           if (cleanup_tree_cfg ())
 550             update_ssa (TODO_update_ssa_only_virtuals);
 551
 552           /* Clean up the information about numbers of iterations, since
 553              complete unrolling might have invalidated it.  */
 554           scev_reset ();
 555         }
 556     }
 557   while (changed);
 558
 559   return 0;
 560 }