gcc/loop-unroll.c

   1 /* Loop unrolling and peeling.
   2    Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 2, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to the Free
  18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  19 02110-1301, USA.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "rtl.h"
  26 #include "hard-reg-set.h"
  27 #include "obstack.h"
  28 #include "basic-block.h"
  29 #include "cfgloop.h"
  30 #include "cfglayout.h"
  31 #include "params.h"
  32 #include "output.h"
  33 #include "expr.h"
  34 #include "hashtab.h"
  35 #include "recog.h"
  36
  37 /* This pass performs loop unrolling and peeling.  We only perform these
  38    optimizations on innermost loops (with single exception) because
  39    the impact on performance is greatest here, and we want to avoid
  40    unnecessary code size growth.  The gain is caused by greater sequentiality
  41    of code, better code to optimize for further passes and in some cases
  42    by fewer testings of exit conditions.  The main problem is code growth,
  43    that impacts performance negatively due to effect of caches.
  44
  45    What we do:
  46
  47    -- complete peeling of once-rolling loops; this is the above mentioned
  48       exception, as this causes loop to be cancelled completely and
  49       does not cause code growth
  50    -- complete peeling of loops that roll (small) constant times.
  51    -- simple peeling of first iterations of loops that do not roll much
  52       (according to profile feedback)
  53    -- unrolling of loops that roll constant times; this is almost always
  54       win, as we get rid of exit condition tests.
  55    -- unrolling of loops that roll number of times that we can compute
  56       in runtime; we also get rid of exit condition tests here, but there
  57       is the extra expense for calculating the number of iterations
  58    -- simple unrolling of remaining loops; this is performed only if we
  59       are asked to, as the gain is questionable in this case and often
  60       it may even slow down the code
  61    For more detailed descriptions of each of those, see comments at
  62    appropriate function below.
  63
  64    There is a lot of parameters (defined and described in params.def) that
  65    control how much we unroll/peel.
  66
  67    ??? A great problem is that we don't have a good way how to determine
  68    how many times we should unroll the loop; the experiments I have made
  69    showed that this choice may affect performance in order of several %.
  70    */
  71
  72 /* Information about induction variables to split.  */
  73
  74 struct iv_to_split
  75 {
  76   rtx insn;             /* The insn in that the induction variable occurs.  */
  77   rtx base_var;         /* The variable on that the values in the further
  78                            iterations are based.  */
  79   rtx step;             /* Step of the induction variable.  */
  80   unsigned n_loc;
  81   unsigned loc[3];      /* Location where the definition of the induction
  82                            variable occurs in the insn.  For example if
  83                            N_LOC is 2, the expression is located at
  84                            XEXP (XEXP (single_set, loc[0]), loc[1]).  */
  85 };
  86
  87 /* Information about accumulators to expand.  */
  88
  89 struct var_to_expand
  90 {
  91   rtx insn;                        /* The insn in that the variable expansion occurs.  */
  92   rtx reg;                         /* The accumulator which is expanded.  */
  93   VEC(rtx,heap) *var_expansions;   /* The copies of the accumulator which is expanded.  */
  94   enum rtx_code op;                /* The type of the accumulation - addition, subtraction
  95                                       or multiplication.  */
  96   int expansion_count;             /* Count the number of expansions generated so far.  */
  97   int reuse_expansion;             /* The expansion we intend to reuse to expand
  98                                       the accumulator.  If REUSE_EXPANSION is 0 reuse
  99                                       the original accumulator.  Else use
 100                                       var_expansions[REUSE_EXPANSION - 1].  */
 101 };
 102
 103 /* Information about optimization applied in
 104    the unrolled loop.  */
 105
 106 struct opt_info
 107 {
 108   htab_t insns_to_split;           /* A hashtable of insns to split.  */
 109   htab_t insns_with_var_to_expand; /* A hashtable of insns with accumulators
 110                                       to expand.  */
 111   unsigned first_new_block;        /* The first basic block that was
 112                                       duplicated.  */
 113   basic_block loop_exit;           /* The loop exit basic block.  */
 114   basic_block loop_preheader;      /* The loop preheader basic block.  */
 115 };
 116
 117 static void decide_unrolling_and_peeling (int);
 118 static void peel_loops_completely (int);
 119 static void decide_peel_simple (struct loop *, int);
 120 static void decide_peel_once_rolling (struct loop *, int);
 121 static void decide_peel_completely (struct loop *, int);
 122 static void decide_unroll_stupid (struct loop *, int);
 123 static void decide_unroll_constant_iterations (struct loop *, int);
 124 static void decide_unroll_runtime_iterations (struct loop *, int);
 125 static void peel_loop_simple (struct loop *);
 126 static void peel_loop_completely (struct loop *);
 127 static void unroll_loop_stupid (struct loop *);
 128 static void unroll_loop_constant_iterations (struct loop *);
 129 static void unroll_loop_runtime_iterations (struct loop *);
 130 static struct opt_info *analyze_insns_in_loop (struct loop *);
 131 static void opt_info_start_duplication (struct opt_info *);
 132 static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
 133 static void free_opt_info (struct opt_info *);
 134 static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx);
 135 static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx);
 136 static struct iv_to_split *analyze_iv_to_split_insn (rtx);
 137 static void expand_var_during_unrolling (struct var_to_expand *, rtx);
 138 static int insert_var_expansion_initialization (void **, void *);
 139 static int combine_var_copies_in_loop_exit (void **, void *);
 140 static int release_var_copies (void **, void *);
 141 static rtx get_expansion (struct var_to_expand *);
 142
 143 /* Unroll and/or peel (depending on FLAGS) LOOPS.  */
 144 void
 145 unroll_and_peel_loops (int flags)
 146 {
 147   struct loop *loop;
 148   bool check;
 149   loop_iterator li;
 150
 151   /* First perform complete loop peeling (it is almost surely a win,
 152      and affects parameters for further decision a lot).  */
 153   peel_loops_completely (flags);
 154
 155   /* Now decide rest of unrolling and peeling.  */
 156   decide_unrolling_and_peeling (flags);
 157
 158   /* Scan the loops, inner ones first.  */
 159   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
 160     {
 161       check = true;
 162       /* And perform the appropriate transformations.  */
 163       switch (loop->lpt_decision.decision)
 164         {
 165         case LPT_PEEL_COMPLETELY:
 166           /* Already done.  */
 167           gcc_unreachable ();
 168         case LPT_PEEL_SIMPLE:
 169           peel_loop_simple (loop);
 170           break;
 171         case LPT_UNROLL_CONSTANT:
 172           unroll_loop_constant_iterations (loop);
 173           break;
 174         case LPT_UNROLL_RUNTIME:
 175           unroll_loop_runtime_iterations (loop);
 176           break;
 177         case LPT_UNROLL_STUPID:
 178           unroll_loop_stupid (loop);
 179           break;
 180         case LPT_NONE:
 181           check = false;
 182           break;
 183         default:
 184           gcc_unreachable ();
 185         }
 186       if (check)
 187         {
 188 #ifdef ENABLE_CHECKING
 189           verify_dominators (CDI_DOMINATORS);
 190           verify_loop_structure ();
 191 #endif
 192         }
 193     }
 194
 195   iv_analysis_done ();
 196 }
 197
 198 /* Check whether exit of the LOOP is at the end of loop body.  */
 199
 200 static bool
 201 loop_exit_at_end_p (struct loop *loop)
 202 {
 203   struct niter_desc *desc = get_simple_loop_desc (loop);
 204   rtx insn;
 205
 206   if (desc->in_edge->dest != loop->latch)
 207     return false;
 208
 209   /* Check that the latch is empty.  */
 210   FOR_BB_INSNS (loop->latch, insn)
 211     {
 212       if (INSN_P (insn))
 213         return false;
 214     }
 215
 216   return true;
 217 }
 218
 219 /* Depending on FLAGS, check whether to peel loops completely and do so.  */
 220 static void
 221 peel_loops_completely (int flags)
 222 {
 223   struct loop *loop;
 224   loop_iterator li;
 225
 226   /* Scan the loops, the inner ones first.  */
 227   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
 228     {
 229       loop->lpt_decision.decision = LPT_NONE;
 230
 231       if (dump_file)
 232         fprintf (dump_file,
 233                  "\n;; *** Considering loop %d for complete peeling ***\n",
 234                  loop->num);
 235
 236       loop->ninsns = num_loop_insns (loop);
 237
 238       decide_peel_once_rolling (loop, flags);
 239       if (loop->lpt_decision.decision == LPT_NONE)
 240         decide_peel_completely (loop, flags);
 241
 242       if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY)
 243         {
 244           peel_loop_completely (loop);
 245 #ifdef ENABLE_CHECKING
 246           verify_dominators (CDI_DOMINATORS);
 247           verify_loop_structure ();
 248 #endif
 249         }
 250     }
 251 }
 252
 253 /* Decide whether unroll or peel loops (depending on FLAGS) and how much.  */
 254 static void
 255 decide_unrolling_and_peeling (int flags)
 256 {
 257   struct loop *loop;
 258   loop_iterator li;
 259
 260   /* Scan the loops, inner ones first.  */
 261   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
 262     {
 263       loop->lpt_decision.decision = LPT_NONE;
 264
 265       if (dump_file)
 266         fprintf (dump_file, "\n;; *** Considering loop %d ***\n", loop->num);
 267
 268       /* Do not peel cold areas.  */
 269       if (!maybe_hot_bb_p (loop->header))
 270         {
 271           if (dump_file)
 272             fprintf (dump_file, ";; Not considering loop, cold area\n");
 273           continue;
 274         }
 275
 276       /* Can the loop be manipulated?  */
 277       if (!can_duplicate_loop_p (loop))
 278         {
 279           if (dump_file)
 280             fprintf (dump_file,
 281                      ";; Not considering loop, cannot duplicate\n");
 282           continue;
 283         }
 284
 285       /* Skip non-innermost loops.  */
 286       if (loop->inner)
 287         {
 288           if (dump_file)
 289             fprintf (dump_file, ";; Not considering loop, is not innermost\n");
 290           continue;
 291         }
 292
 293       loop->ninsns = num_loop_insns (loop);
 294       loop->av_ninsns = average_num_loop_insns (loop);
 295
 296       /* Try transformations one by one in decreasing order of
 297          priority.  */
 298
 299       decide_unroll_constant_iterations (loop, flags);
 300       if (loop->lpt_decision.decision == LPT_NONE)
 301         decide_unroll_runtime_iterations (loop, flags);
 302       if (loop->lpt_decision.decision == LPT_NONE)
 303         decide_unroll_stupid (loop, flags);
 304       if (loop->lpt_decision.decision == LPT_NONE)
 305         decide_peel_simple (loop, flags);
 306     }
 307 }
 308
 309 /* Decide whether the LOOP is once rolling and suitable for complete
 310    peeling.  */
 311 static void
 312 decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
 313 {
 314   struct niter_desc *desc;
 315
 316   if (dump_file)
 317     fprintf (dump_file, "\n;; Considering peeling once rolling loop\n");
 318
 319   /* Is the loop small enough?  */
 320   if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns)
 321     {
 322       if (dump_file)
 323         fprintf (dump_file, ";; Not considering loop, is too big\n");
 324       return;
 325     }
 326
 327   /* Check for simple loops.  */
 328   desc = get_simple_loop_desc (loop);
 329
 330   /* Check number of iterations.  */
 331   if (!desc->simple_p
 332       || desc->assumptions
 333       || desc->infinite
 334       || !desc->const_iter
 335       || desc->niter != 0)
 336     {
 337       if (dump_file)
 338         fprintf (dump_file,
 339                  ";; Unable to prove that the loop rolls exactly once\n");
 340       return;
 341     }
 342
 343   /* Success.  */
 344   if (dump_file)
 345     fprintf (dump_file, ";; Decided to peel exactly once rolling loop\n");
 346   loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
 347 }
 348
 349 /* Decide whether the LOOP is suitable for complete peeling.  */
 350 static void
 351 decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
 352 {
 353   unsigned npeel;
 354   struct niter_desc *desc;
 355
 356   if (dump_file)
 357     fprintf (dump_file, "\n;; Considering peeling completely\n");
 358
 359   /* Skip non-innermost loops.  */
 360   if (loop->inner)
 361     {
 362       if (dump_file)
 363         fprintf (dump_file, ";; Not considering loop, is not innermost\n");
 364       return;
 365     }
 366
 367   /* Do not peel cold areas.  */
 368   if (!maybe_hot_bb_p (loop->header))
 369     {
 370       if (dump_file)
 371         fprintf (dump_file, ";; Not considering loop, cold area\n");
 372       return;
 373     }
 374
 375   /* Can the loop be manipulated?  */
 376   if (!can_duplicate_loop_p (loop))
 377     {
 378       if (dump_file)
 379         fprintf (dump_file,
 380                  ";; Not considering loop, cannot duplicate\n");
 381       return;
 382     }
 383
 384   /* npeel = number of iterations to peel.  */
 385   npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS) / loop->ninsns;
 386   if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
 387     npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
 388
 389   /* Is the loop small enough?  */
 390   if (!npeel)
 391     {
 392       if (dump_file)
 393         fprintf (dump_file, ";; Not considering loop, is too big\n");
 394       return;
 395     }
 396
 397   /* Check for simple loops.  */
 398   desc = get_simple_loop_desc (loop);
 399
 400   /* Check number of iterations.  */
 401   if (!desc->simple_p
 402       || desc->assumptions
 403       || !desc->const_iter
 404       || desc->infinite)
 405     {
 406       if (dump_file)
 407         fprintf (dump_file,
 408                  ";; Unable to prove that the loop iterates constant times\n");
 409       return;
 410     }
 411
 412   if (desc->niter > npeel - 1)
 413     {
 414       if (dump_file)
 415         {
 416           fprintf (dump_file,
 417                    ";; Not peeling loop completely, rolls too much (");
 418           fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter);
 419           fprintf (dump_file, " iterations > %d [maximum peelings])\n", npeel);
 420         }
 421       return;
 422     }
 423
 424   /* Success.  */
 425   if (dump_file)
 426     fprintf (dump_file, ";; Decided to peel loop completely\n");
 427   loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
 428 }
 429
 430 /* Peel all iterations of LOOP, remove exit edges and cancel the loop
 431    completely.  The transformation done:
 432
 433    for (i = 0; i < 4; i++)
 434      body;
 435
 436    ==>
 437
 438    i = 0;
 439    body; i++;
 440    body; i++;
 441    body; i++;
 442    body; i++;
 443    */
 444 static void
 445 peel_loop_completely (struct loop *loop)
 446 {
 447   sbitmap wont_exit;
 448   unsigned HOST_WIDE_INT npeel;
 449   unsigned n_remove_edges, i;
 450   edge *remove_edges, ein;
 451   struct niter_desc *desc = get_simple_loop_desc (loop);
 452   struct opt_info *opt_info = NULL;
 453
 454   npeel = desc->niter;
 455
 456   if (npeel)
 457     {
 458       bool ok;
 459
 460       wont_exit = sbitmap_alloc (npeel + 1);
 461       sbitmap_ones (wont_exit);
 462       RESET_BIT (wont_exit, 0);
 463       if (desc->noloop_assumptions)
 464         RESET_BIT (wont_exit, 1);
 465
 466       remove_edges = XCNEWVEC (edge, npeel);
 467       n_remove_edges = 0;
 468
 469       if (flag_split_ivs_in_unroller)
 470         opt_info = analyze_insns_in_loop (loop);
 471
 472       opt_info_start_duplication (opt_info);
 473       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 474                                           npeel,
 475                                           wont_exit, desc->out_edge,
 476                                           remove_edges, &n_remove_edges,
 477                                           DLTHE_FLAG_UPDATE_FREQ
 478                                           | DLTHE_FLAG_COMPLETTE_PEEL
 479                                           | (opt_info
 480                                              ? DLTHE_RECORD_COPY_NUMBER : 0));
 481       gcc_assert (ok);
 482
 483       free (wont_exit);
 484
 485       if (opt_info)
 486         {
 487           apply_opt_in_copies (opt_info, npeel, false, true);
 488           free_opt_info (opt_info);
 489         }
 490
 491       /* Remove the exit edges.  */
 492       for (i = 0; i < n_remove_edges; i++)
 493         remove_path (remove_edges[i]);
 494       free (remove_edges);
 495     }
 496
 497   ein = desc->in_edge;
 498   free_simple_loop_desc (loop);
 499
 500   /* Now remove the unreachable part of the last iteration and cancel
 501      the loop.  */
 502   remove_path (ein);
 503
 504   if (dump_file)
 505     fprintf (dump_file, ";; Peeled loop completely, %d times\n", (int) npeel);
 506 }
 507
 508 /* Decide whether to unroll LOOP iterating constant number of times
 509    and how much.  */
 510
 511 static void
 512 decide_unroll_constant_iterations (struct loop *loop, int flags)
 513 {
 514   unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
 515   struct niter_desc *desc;
 516
 517   if (!(flags & UAP_UNROLL))
 518     {
 519       /* We were not asked to, just return back silently.  */
 520       return;
 521     }
 522
 523   if (dump_file)
 524     fprintf (dump_file,
 525              "\n;; Considering unrolling loop with constant "
 526              "number of iterations\n");
 527
 528   /* nunroll = total number of copies of the original loop body in
 529      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 530   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
 531   nunroll_by_av
 532     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
 533   if (nunroll > nunroll_by_av)
 534     nunroll = nunroll_by_av;
 535   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
 536     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 537
 538   /* Skip big loops.  */
 539   if (nunroll <= 1)
 540     {
 541       if (dump_file)
 542         fprintf (dump_file, ";; Not considering loop, is too big\n");
 543       return;
 544     }
 545
 546   /* Check for simple loops.  */
 547   desc = get_simple_loop_desc (loop);
 548
 549   /* Check number of iterations.  */
 550   if (!desc->simple_p || !desc->const_iter || desc->assumptions)
 551     {
 552       if (dump_file)
 553         fprintf (dump_file,
 554                  ";; Unable to prove that the loop iterates constant times\n");
 555       return;
 556     }
 557
 558   /* Check whether the loop rolls enough to consider.  */
 559   if (desc->niter < 2 * nunroll)
 560     {
 561       if (dump_file)
 562         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 563       return;
 564     }
 565
 566   /* Success; now compute number of iterations to unroll.  We alter
 567      nunroll so that as few as possible copies of loop body are
 568      necessary, while still not decreasing the number of unrollings
 569      too much (at most by 1).  */
 570   best_copies = 2 * nunroll + 10;
 571
 572   i = 2 * nunroll + 2;
 573   if (i - 1 >= desc->niter)
 574     i = desc->niter - 2;
 575
 576   for (; i >= nunroll - 1; i--)
 577     {
 578       unsigned exit_mod = desc->niter % (i + 1);
 579
 580       if (!loop_exit_at_end_p (loop))
 581         n_copies = exit_mod + i + 1;
 582       else if (exit_mod != (unsigned) i
 583                || desc->noloop_assumptions != NULL_RTX)
 584         n_copies = exit_mod + i + 2;
 585       else
 586         n_copies = i + 1;
 587
 588       if (n_copies < best_copies)
 589         {
 590           best_copies = n_copies;
 591           best_unroll = i;
 592         }
 593     }
 594
 595   if (dump_file)
 596     fprintf (dump_file, ";; max_unroll %d (%d copies, initial %d).\n",
 597              best_unroll + 1, best_copies, nunroll);
 598
 599   loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
 600   loop->lpt_decision.times = best_unroll;
 601
 602   if (dump_file)
 603     fprintf (dump_file,
 604              ";; Decided to unroll the constant times rolling loop, %d times.\n",
 605              loop->lpt_decision.times);
 606 }
 607
 608 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1
 609    times.  The transformation does this:
 610
 611    for (i = 0; i < 102; i++)
 612      body;
 613
 614    ==>
 615
 616    i = 0;
 617    body; i++;
 618    body; i++;
 619    while (i < 102)
 620      {
 621        body; i++;
 622        body; i++;
 623        body; i++;
 624        body; i++;
 625      }
 626   */
 627 static void
 628 unroll_loop_constant_iterations (struct loop *loop)
 629 {
 630   unsigned HOST_WIDE_INT niter;
 631   unsigned exit_mod;
 632   sbitmap wont_exit;
 633   unsigned n_remove_edges, i;
 634   edge *remove_edges;
 635   unsigned max_unroll = loop->lpt_decision.times;
 636   struct niter_desc *desc = get_simple_loop_desc (loop);
 637   bool exit_at_end = loop_exit_at_end_p (loop);
 638   struct opt_info *opt_info = NULL;
 639   bool ok;
 640
 641   niter = desc->niter;
 642
 643   /* Should not get here (such loop should be peeled instead).  */
 644   gcc_assert (niter > max_unroll + 1);
 645
 646   exit_mod = niter % (max_unroll + 1);
 647
 648   wont_exit = sbitmap_alloc (max_unroll + 1);
 649   sbitmap_ones (wont_exit);
 650
 651   remove_edges = XCNEWVEC (edge, max_unroll + exit_mod + 1);
 652   n_remove_edges = 0;
 653   if (flag_split_ivs_in_unroller
 654       || flag_variable_expansion_in_unroller)
 655     opt_info = analyze_insns_in_loop (loop);
 656
 657   if (!exit_at_end)
 658     {
 659       /* The exit is not at the end of the loop; leave exit test
 660          in the first copy, so that the loops that start with test
 661          of exit condition have continuous body after unrolling.  */
 662
 663       if (dump_file)
 664         fprintf (dump_file, ";; Condition on beginning of loop.\n");
 665
 666       /* Peel exit_mod iterations.  */
 667       RESET_BIT (wont_exit, 0);
 668       if (desc->noloop_assumptions)
 669         RESET_BIT (wont_exit, 1);
 670
 671       if (exit_mod)
 672         {
 673           opt_info_start_duplication (opt_info);
 674           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 675                                               exit_mod,
 676                                               wont_exit, desc->out_edge,
 677                                               remove_edges, &n_remove_edges,
 678                                               DLTHE_FLAG_UPDATE_FREQ
 679                                               | (opt_info && exit_mod > 1
 680                                                  ? DLTHE_RECORD_COPY_NUMBER
 681                                                    : 0));
 682           gcc_assert (ok);
 683
 684           if (opt_info && exit_mod > 1)
 685             apply_opt_in_copies (opt_info, exit_mod, false, false);
 686
 687           desc->noloop_assumptions = NULL_RTX;
 688           desc->niter -= exit_mod;
 689           desc->niter_max -= exit_mod;
 690         }
 691
 692       SET_BIT (wont_exit, 1);
 693     }
 694   else
 695     {
 696       /* Leave exit test in last copy, for the same reason as above if
 697          the loop tests the condition at the end of loop body.  */
 698
 699       if (dump_file)
 700         fprintf (dump_file, ";; Condition on end of loop.\n");
 701
 702       /* We know that niter >= max_unroll + 2; so we do not need to care of
 703          case when we would exit before reaching the loop.  So just peel
 704          exit_mod + 1 iterations.  */
 705       if (exit_mod != max_unroll
 706           || desc->noloop_assumptions)
 707         {
 708           RESET_BIT (wont_exit, 0);
 709           if (desc->noloop_assumptions)
 710             RESET_BIT (wont_exit, 1);
 711
 712           opt_info_start_duplication (opt_info);
 713           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 714                                               exit_mod + 1,
 715                                               wont_exit, desc->out_edge,
 716                                               remove_edges, &n_remove_edges,
 717                                               DLTHE_FLAG_UPDATE_FREQ
 718                                               | (opt_info && exit_mod > 0
 719                                                  ? DLTHE_RECORD_COPY_NUMBER
 720                                                    : 0));
 721           gcc_assert (ok);
 722
 723           if (opt_info && exit_mod > 0)
 724             apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
 725
 726           desc->niter -= exit_mod + 1;
 727           desc->niter_max -= exit_mod + 1;
 728           desc->noloop_assumptions = NULL_RTX;
 729
 730           SET_BIT (wont_exit, 0);
 731           SET_BIT (wont_exit, 1);
 732         }
 733
 734       RESET_BIT (wont_exit, max_unroll);
 735     }
 736
 737   /* Now unroll the loop.  */
 738
 739   opt_info_start_duplication (opt_info);
 740   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
 741                                       max_unroll,
 742                                       wont_exit, desc->out_edge,
 743                                       remove_edges, &n_remove_edges,
 744                                       DLTHE_FLAG_UPDATE_FREQ
 745                                       | (opt_info
 746                                          ? DLTHE_RECORD_COPY_NUMBER
 747                                            : 0));
 748   gcc_assert (ok);
 749
 750   if (opt_info)
 751     {
 752       apply_opt_in_copies (opt_info, max_unroll, true, true);
 753       free_opt_info (opt_info);
 754     }
 755
 756   free (wont_exit);
 757
 758   if (exit_at_end)
 759     {
 760       basic_block exit_block = get_bb_copy (desc->in_edge->src);
 761       /* Find a new in and out edge; they are in the last copy we have made.  */
 762
 763       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
 764         {
 765           desc->out_edge = EDGE_SUCC (exit_block, 0);
 766           desc->in_edge = EDGE_SUCC (exit_block, 1);
 767         }
 768       else
 769         {
 770           desc->out_edge = EDGE_SUCC (exit_block, 1);
 771           desc->in_edge = EDGE_SUCC (exit_block, 0);
 772         }
 773     }
 774
 775   desc->niter /= max_unroll + 1;
 776   desc->niter_max /= max_unroll + 1;
 777   desc->niter_expr = GEN_INT (desc->niter);
 778
 779   /* Remove the edges.  */
 780   for (i = 0; i < n_remove_edges; i++)
 781     remove_path (remove_edges[i]);
 782   free (remove_edges);
 783
 784   if (dump_file)
 785     fprintf (dump_file,
 786              ";; Unrolled loop %d times, constant # of iterations %i insns\n",
 787              max_unroll, num_loop_insns (loop));
 788 }
 789
 790 /* Decide whether to unroll LOOP iterating runtime computable number of times
 791    and how much.  */
 792 static void
 793 decide_unroll_runtime_iterations (struct loop *loop, int flags)
 794 {
 795   unsigned nunroll, nunroll_by_av, i;
 796   struct niter_desc *desc;
 797
 798   if (!(flags & UAP_UNROLL))
 799     {
 800       /* We were not asked to, just return back silently.  */
 801       return;
 802     }
 803
 804   if (dump_file)
 805     fprintf (dump_file,
 806              "\n;; Considering unrolling loop with runtime "
 807              "computable number of iterations\n");
 808
 809   /* nunroll = total number of copies of the original loop body in
 810      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 811   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
 812   nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
 813   if (nunroll > nunroll_by_av)
 814     nunroll = nunroll_by_av;
 815   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
 816     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 817
 818   /* Skip big loops.  */
 819   if (nunroll <= 1)
 820     {
 821       if (dump_file)
 822         fprintf (dump_file, ";; Not considering loop, is too big\n");
 823       return;
 824     }
 825
 826   /* Check for simple loops.  */
 827   desc = get_simple_loop_desc (loop);
 828
 829   /* Check simpleness.  */
 830   if (!desc->simple_p || desc->assumptions)
 831     {
 832       if (dump_file)
 833         fprintf (dump_file,
 834                  ";; Unable to prove that the number of iterations "
 835                  "can be counted in runtime\n");
 836       return;
 837     }
 838
 839   if (desc->const_iter)
 840     {
 841       if (dump_file)
 842         fprintf (dump_file, ";; Loop iterates constant times\n");
 843       return;
 844     }
 845
 846   /* If we have profile feedback, check whether the loop rolls.  */
 847   if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll)
 848     {
 849       if (dump_file)
 850         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 851       return;
 852     }
 853
 854   /* Success; now force nunroll to be power of 2, as we are unable to
 855      cope with overflows in computation of number of iterations.  */
 856   for (i = 1; 2 * i <= nunroll; i *= 2)
 857     continue;
 858
 859   loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
 860   loop->lpt_decision.times = i - 1;
 861
 862   if (dump_file)
 863     fprintf (dump_file,
 864              ";; Decided to unroll the runtime computable "
 865              "times rolling loop, %d times.\n",
 866              loop->lpt_decision.times);
 867 }
 868
 869 /* Splits edge E and inserts the sequence of instructions INSNS on it, and
 870    returns the newly created block.  If INSNS is NULL_RTX, nothing is changed
 871    and NULL is returned instead.  */
 872
 873 basic_block
 874 split_edge_and_insert (edge e, rtx insns)
 875 {
 876   basic_block bb;
 877
 878   if (!insns)
 879     return NULL;
 880   bb = split_edge (e);
 881   emit_insn_after (insns, BB_END (bb));
 882   bb->flags |= BB_SUPERBLOCK;
 883   return bb;
 884 }
 885
 886 /* Unroll LOOP for that we are able to count number of iterations in runtime
 887    LOOP->LPT_DECISION.TIMES + 1 times.  The transformation does this (with some
 888    extra care for case n < 0):
 889
 890    for (i = 0; i < n; i++)
 891      body;
 892
 893    ==>
 894
 895    i = 0;
 896    mod = n % 4;
 897
 898    switch (mod)
 899      {
 900        case 3:
 901          body; i++;
 902        case 2:
 903          body; i++;
 904        case 1:
 905          body; i++;
 906        case 0: ;
 907      }
 908
 909    while (i < n)
 910      {
 911        body; i++;
 912        body; i++;
 913        body; i++;
 914        body; i++;
 915      }
 916    */
 917 static void
 918 unroll_loop_runtime_iterations (struct loop *loop)
 919 {
 920   rtx old_niter, niter, init_code, branch_code, tmp;
 921   unsigned i, j, p;
 922   basic_block preheader, *body, *dom_bbs, swtch, ezc_swtch;
 923   unsigned n_dom_bbs;
 924   sbitmap wont_exit;
 925   int may_exit_copy;
 926   unsigned n_peel, n_remove_edges;
 927   edge *remove_edges, e;
 928   bool extra_zero_check, last_may_exit;
 929   unsigned max_unroll = loop->lpt_decision.times;
 930   struct niter_desc *desc = get_simple_loop_desc (loop);
 931   bool exit_at_end = loop_exit_at_end_p (loop);
 932   struct opt_info *opt_info = NULL;
 933   bool ok;
 934
 935   if (flag_split_ivs_in_unroller
 936       || flag_variable_expansion_in_unroller)
 937     opt_info = analyze_insns_in_loop (loop);
 938
 939   /* Remember blocks whose dominators will have to be updated.  */
 940   dom_bbs = XCNEWVEC (basic_block, n_basic_blocks);
 941   n_dom_bbs = 0;
 942
 943   body = get_loop_body (loop);
 944   for (i = 0; i < loop->num_nodes; i++)
 945     {
 946       unsigned nldom;
 947       basic_block *ldom;
 948
 949       nldom = get_dominated_by (CDI_DOMINATORS, body[i], &ldom);
 950       for (j = 0; j < nldom; j++)
 951         if (!flow_bb_inside_loop_p (loop, ldom[j]))
 952           dom_bbs[n_dom_bbs++] = ldom[j];
 953
 954       free (ldom);
 955     }
 956   free (body);
 957
 958   if (!exit_at_end)
 959     {
 960       /* Leave exit in first copy (for explanation why see comment in
 961          unroll_loop_constant_iterations).  */
 962       may_exit_copy = 0;
 963       n_peel = max_unroll - 1;
 964       extra_zero_check = true;
 965       last_may_exit = false;
 966     }
 967   else
 968     {
 969       /* Leave exit in last copy (for explanation why see comment in
 970          unroll_loop_constant_iterations).  */
 971       may_exit_copy = max_unroll;
 972       n_peel = max_unroll;
 973       extra_zero_check = false;
 974       last_may_exit = true;
 975     }
 976
 977   /* Get expression for number of iterations.  */
 978   start_sequence ();
 979   old_niter = niter = gen_reg_rtx (desc->mode);
 980   tmp = force_operand (copy_rtx (desc->niter_expr), niter);
 981   if (tmp != niter)
 982     emit_move_insn (niter, tmp);
 983
 984   /* Count modulo by ANDing it with max_unroll; we use the fact that
 985      the number of unrollings is a power of two, and thus this is correct
 986      even if there is overflow in the computation.  */
 987   niter = expand_simple_binop (desc->mode, AND,
 988                                niter,
 989                                GEN_INT (max_unroll),
 990                                NULL_RTX, 0, OPTAB_LIB_WIDEN);
 991
 992   init_code = get_insns ();
 993   end_sequence ();
 994
 995   /* Precondition the loop.  */
 996   split_edge_and_insert (loop_preheader_edge (loop), init_code);
 997
 998   remove_edges = XCNEWVEC (edge, max_unroll + n_peel + 1);
 999   n_remove_edges = 0;
1000
1001   wont_exit = sbitmap_alloc (max_unroll + 2);
1002
1003   /* Peel the first copy of loop body (almost always we must leave exit test
1004      here; the only exception is when we have extra zero check and the number
1005      of iterations is reliable.  Also record the place of (possible) extra
1006      zero check.  */
1007   sbitmap_zero (wont_exit);
1008   if (extra_zero_check
1009       && !desc->noloop_assumptions)
1010     SET_BIT (wont_exit, 1);
1011   ezc_swtch = loop_preheader_edge (loop)->src;
1012   ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1013                                       1, wont_exit, desc->out_edge,
1014                                       remove_edges, &n_remove_edges,
1015                                       DLTHE_FLAG_UPDATE_FREQ);
1016   gcc_assert (ok);
1017
1018   /* Record the place where switch will be built for preconditioning.  */
1019   swtch = split_edge (loop_preheader_edge (loop));
1020
1021   for (i = 0; i < n_peel; i++)
1022     {
1023       /* Peel the copy.  */
1024       sbitmap_zero (wont_exit);
1025       if (i != n_peel - 1 || !last_may_exit)
1026         SET_BIT (wont_exit, 1);
1027       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1028                                           1, wont_exit, desc->out_edge,
1029                                           remove_edges, &n_remove_edges,
1030                                           DLTHE_FLAG_UPDATE_FREQ);
1031       gcc_assert (ok);
1032
1033       /* Create item for switch.  */
1034       j = n_peel - i - (extra_zero_check ? 0 : 1);
1035       p = REG_BR_PROB_BASE / (i + 2);
1036
1037       preheader = split_edge (loop_preheader_edge (loop));
1038       branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
1039                                           block_label (preheader), p,
1040                                           NULL_RTX);
1041
1042       /* We rely on the fact that the compare and jump cannot be optimized out,
1043          and hence the cfg we create is correct.  */
1044       gcc_assert (branch_code != NULL_RTX);
1045
1046       swtch = split_edge_and_insert (single_pred_edge (swtch), branch_code);
1047       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1048       single_pred_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1049       e = make_edge (swtch, preheader,
1050                      single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1051       e->probability = p;
1052     }
1053
1054   if (extra_zero_check)
1055     {
1056       /* Add branch for zero iterations.  */
1057       p = REG_BR_PROB_BASE / (max_unroll + 1);
1058       swtch = ezc_swtch;
1059       preheader = split_edge (loop_preheader_edge (loop));
1060       branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
1061                                           block_label (preheader), p,
1062                                           NULL_RTX);
1063       gcc_assert (branch_code != NULL_RTX);
1064
1065       swtch = split_edge_and_insert (single_succ_edge (swtch), branch_code);
1066       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1067       single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1068       e = make_edge (swtch, preheader,
1069                      single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1070       e->probability = p;
1071     }
1072
1073   /* Recount dominators for outer blocks.  */
1074   iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, n_dom_bbs);
1075
1076   /* And unroll loop.  */
1077
1078   sbitmap_ones (wont_exit);
1079   RESET_BIT (wont_exit, may_exit_copy);
1080   opt_info_start_duplication (opt_info);
1081
1082   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1083                                       max_unroll,
1084                                       wont_exit, desc->out_edge,
1085                                       remove_edges, &n_remove_edges,
1086                                       DLTHE_FLAG_UPDATE_FREQ
1087                                       | (opt_info
1088                                          ? DLTHE_RECORD_COPY_NUMBER
1089                                            : 0));
1090   gcc_assert (ok);
1091
1092   if (opt_info)
1093     {
1094       apply_opt_in_copies (opt_info, max_unroll, true, true);
1095       free_opt_info (opt_info);
1096     }
1097
1098   free (wont_exit);
1099
1100   if (exit_at_end)
1101     {
1102       basic_block exit_block = get_bb_copy (desc->in_edge->src);
1103       /* Find a new in and out edge; they are in the last copy we have
1104          made.  */
1105
1106       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
1107         {
1108           desc->out_edge = EDGE_SUCC (exit_block, 0);
1109           desc->in_edge = EDGE_SUCC (exit_block, 1);
1110         }
1111       else
1112         {
1113           desc->out_edge = EDGE_SUCC (exit_block, 1);
1114           desc->in_edge = EDGE_SUCC (exit_block, 0);
1115         }
1116     }
1117
1118   /* Remove the edges.  */
1119   for (i = 0; i < n_remove_edges; i++)
1120     remove_path (remove_edges[i]);
1121   free (remove_edges);
1122
1123   /* We must be careful when updating the number of iterations due to
1124      preconditioning and the fact that the value must be valid at entry
1125      of the loop.  After passing through the above code, we see that
1126      the correct new number of iterations is this:  */
1127   gcc_assert (!desc->const_iter);
1128   desc->niter_expr =
1129     simplify_gen_binary (UDIV, desc->mode, old_niter,
1130                          GEN_INT (max_unroll + 1));
1131   desc->niter_max /= max_unroll + 1;
1132   if (exit_at_end)
1133     {
1134       desc->niter_expr =
1135         simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1136       desc->noloop_assumptions = NULL_RTX;
1137       desc->niter_max--;
1138     }
1139
1140   if (dump_file)
1141     fprintf (dump_file,
1142              ";; Unrolled loop %d times, counting # of iterations "
1143              "in runtime, %i insns\n",
1144              max_unroll, num_loop_insns (loop));
1145
1146   if (dom_bbs)
1147     free (dom_bbs);
1148 }
1149
1150 /* Decide whether to simply peel LOOP and how much.  */
1151 static void
1152 decide_peel_simple (struct loop *loop, int flags)
1153 {
1154   unsigned npeel;
1155   struct niter_desc *desc;
1156
1157   if (!(flags & UAP_PEEL))
1158     {
1159       /* We were not asked to, just return back silently.  */
1160       return;
1161     }
1162
1163   if (dump_file)
1164     fprintf (dump_file, "\n;; Considering simply peeling loop\n");
1165
1166   /* npeel = number of iterations to peel.  */
1167   npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns;
1168   if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_PEEL_TIMES))
1169     npeel = PARAM_VALUE (PARAM_MAX_PEEL_TIMES);
1170
1171   /* Skip big loops.  */
1172   if (!npeel)
1173     {
1174       if (dump_file)
1175         fprintf (dump_file, ";; Not considering loop, is too big\n");
1176       return;
1177     }
1178
1179   /* Check for simple loops.  */
1180   desc = get_simple_loop_desc (loop);
1181
1182   /* Check number of iterations.  */
1183   if (desc->simple_p && !desc->assumptions && desc->const_iter)
1184     {
1185       if (dump_file)
1186         fprintf (dump_file, ";; Loop iterates constant times\n");
1187       return;
1188     }
1189
1190   /* Do not simply peel loops with branches inside -- it increases number
1191      of mispredicts.  */
1192   if (num_loop_branches (loop) > 1)
1193     {
1194       if (dump_file)
1195         fprintf (dump_file, ";; Not peeling, contains branches\n");
1196       return;
1197     }
1198
1199   if (loop->header->count)
1200     {
1201       unsigned niter = expected_loop_iterations (loop);
1202       if (niter + 1 > npeel)
1203         {
1204           if (dump_file)
1205             {
1206               fprintf (dump_file, ";; Not peeling loop, rolls too much (");
1207               fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1208                        (HOST_WIDEST_INT) (niter + 1));
1209               fprintf (dump_file, " iterations > %d [maximum peelings])\n",
1210                        npeel);
1211             }
1212           return;
1213         }
1214       npeel = niter + 1;
1215     }
1216   else
1217     {
1218       /* For now we have no good heuristics to decide whether loop peeling
1219          will be effective, so disable it.  */
1220       if (dump_file)
1221         fprintf (dump_file,
1222                  ";; Not peeling loop, no evidence it will be profitable\n");
1223       return;
1224     }
1225
1226   /* Success.  */
1227   loop->lpt_decision.decision = LPT_PEEL_SIMPLE;
1228   loop->lpt_decision.times = npeel;
1229
1230   if (dump_file)
1231     fprintf (dump_file, ";; Decided to simply peel the loop, %d times.\n",
1232              loop->lpt_decision.times);
1233 }
1234
1235 /* Peel a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
1236    while (cond)
1237      body;
1238
1239    ==>
1240
1241    if (!cond) goto end;
1242    body;
1243    if (!cond) goto end;
1244    body;
1245    while (cond)
1246      body;
1247    end: ;
1248    */
1249 static void
1250 peel_loop_simple (struct loop *loop)
1251 {
1252   sbitmap wont_exit;
1253   unsigned npeel = loop->lpt_decision.times;
1254   struct niter_desc *desc = get_simple_loop_desc (loop);
1255   struct opt_info *opt_info = NULL;
1256   bool ok;
1257
1258   if (flag_split_ivs_in_unroller && npeel > 1)
1259     opt_info = analyze_insns_in_loop (loop);
1260
1261   wont_exit = sbitmap_alloc (npeel + 1);
1262   sbitmap_zero (wont_exit);
1263
1264   opt_info_start_duplication (opt_info);
1265
1266   ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1267                                       npeel, wont_exit,
1268                                       NULL, NULL,
1269                                       NULL, DLTHE_FLAG_UPDATE_FREQ
1270                                       | (opt_info
1271                                          ? DLTHE_RECORD_COPY_NUMBER
1272                                            : 0));
1273   gcc_assert (ok);
1274
1275   free (wont_exit);
1276
1277   if (opt_info)
1278     {
1279       apply_opt_in_copies (opt_info, npeel, false, false);
1280       free_opt_info (opt_info);
1281     }
1282
1283   if (desc->simple_p)
1284     {
1285       if (desc->const_iter)
1286         {
1287           desc->niter -= npeel;
1288           desc->niter_expr = GEN_INT (desc->niter);
1289           desc->noloop_assumptions = NULL_RTX;
1290         }
1291       else
1292         {
1293           /* We cannot just update niter_expr, as its value might be clobbered
1294              inside loop.  We could handle this by counting the number into
1295              temporary just like we do in runtime unrolling, but it does not
1296              seem worthwhile.  */
1297           free_simple_loop_desc (loop);
1298         }
1299     }
1300   if (dump_file)
1301     fprintf (dump_file, ";; Peeling loop %d times\n", npeel);
1302 }
1303
1304 /* Decide whether to unroll LOOP stupidly and how much.  */
1305 static void
1306 decide_unroll_stupid (struct loop *loop, int flags)
1307 {
1308   unsigned nunroll, nunroll_by_av, i;
1309   struct niter_desc *desc;
1310
1311   if (!(flags & UAP_UNROLL_ALL))
1312     {
1313       /* We were not asked to, just return back silently.  */
1314       return;
1315     }
1316
1317   if (dump_file)
1318     fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n");
1319
1320   /* nunroll = total number of copies of the original loop body in
1321      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
1322   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
1323   nunroll_by_av
1324     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
1325   if (nunroll > nunroll_by_av)
1326     nunroll = nunroll_by_av;
1327   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
1328     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1329
1330   /* Skip big loops.  */
1331   if (nunroll <= 1)
1332     {
1333       if (dump_file)
1334         fprintf (dump_file, ";; Not considering loop, is too big\n");
1335       return;
1336     }
1337
1338   /* Check for simple loops.  */
1339   desc = get_simple_loop_desc (loop);
1340
1341   /* Check simpleness.  */
1342   if (desc->simple_p && !desc->assumptions)
1343     {
1344       if (dump_file)
1345         fprintf (dump_file, ";; The loop is simple\n");
1346       return;
1347     }
1348
1349   /* Do not unroll loops with branches inside -- it increases number
1350      of mispredicts.  */
1351   if (num_loop_branches (loop) > 1)
1352     {
1353       if (dump_file)
1354         fprintf (dump_file, ";; Not unrolling, contains branches\n");
1355       return;
1356     }
1357
1358   /* If we have profile feedback, check whether the loop rolls.  */
1359   if (loop->header->count
1360       && expected_loop_iterations (loop) < 2 * nunroll)
1361     {
1362       if (dump_file)
1363         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
1364       return;
1365     }
1366
1367   /* Success.  Now force nunroll to be power of 2, as it seems that this
1368      improves results (partially because of better alignments, partially
1369      because of some dark magic).  */
1370   for (i = 1; 2 * i <= nunroll; i *= 2)
1371     continue;
1372
1373   loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1374   loop->lpt_decision.times = i - 1;
1375
1376   if (dump_file)
1377     fprintf (dump_file,
1378              ";; Decided to unroll the loop stupidly, %d times.\n",
1379              loop->lpt_decision.times);
1380 }
1381
1382 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
1383    while (cond)
1384      body;
1385
1386    ==>
1387
1388    while (cond)
1389      {
1390        body;
1391        if (!cond) break;
1392        body;
1393        if (!cond) break;
1394        body;
1395        if (!cond) break;
1396        body;
1397      }
1398    */
1399 static void
1400 unroll_loop_stupid (struct loop *loop)
1401 {
1402   sbitmap wont_exit;
1403   unsigned nunroll = loop->lpt_decision.times;
1404   struct niter_desc *desc = get_simple_loop_desc (loop);
1405   struct opt_info *opt_info = NULL;
1406   bool ok;
1407
1408   if (flag_split_ivs_in_unroller
1409       || flag_variable_expansion_in_unroller)
1410     opt_info = analyze_insns_in_loop (loop);
1411
1412
1413   wont_exit = sbitmap_alloc (nunroll + 1);
1414   sbitmap_zero (wont_exit);
1415   opt_info_start_duplication (opt_info);
1416
1417   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1418                                       nunroll, wont_exit,
1419                                       NULL, NULL, NULL,
1420                                       DLTHE_FLAG_UPDATE_FREQ
1421                                       | (opt_info
1422                                          ? DLTHE_RECORD_COPY_NUMBER
1423                                            : 0));
1424   gcc_assert (ok);
1425
1426   if (opt_info)
1427     {
1428       apply_opt_in_copies (opt_info, nunroll, true, true);
1429       free_opt_info (opt_info);
1430     }
1431
1432   free (wont_exit);
1433
1434   if (desc->simple_p)
1435     {
1436       /* We indeed may get here provided that there are nontrivial assumptions
1437          for a loop to be really simple.  We could update the counts, but the
1438          problem is that we are unable to decide which exit will be taken
1439          (not really true in case the number of iterations is constant,
1440          but noone will do anything with this information, so we do not
1441          worry about it).  */
1442       desc->simple_p = false;
1443     }
1444
1445   if (dump_file)
1446     fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
1447              nunroll, num_loop_insns (loop));
1448 }
1449
1450 /* A hash function for information about insns to split.  */
1451
1452 static hashval_t
1453 si_info_hash (const void *ivts)
1454 {
1455   return (hashval_t) INSN_UID (((struct iv_to_split *) ivts)->insn);
1456 }
1457
1458 /* An equality functions for information about insns to split.  */
1459
1460 static int
1461 si_info_eq (const void *ivts1, const void *ivts2)
1462 {
1463   const struct iv_to_split *i1 = ivts1;
1464   const struct iv_to_split *i2 = ivts2;
1465
1466   return i1->insn == i2->insn;
1467 }
1468
1469 /* Return a hash for VES, which is really a "var_to_expand *".  */
1470
1471 static hashval_t
1472 ve_info_hash (const void *ves)
1473 {
1474   return (hashval_t) INSN_UID (((struct var_to_expand *) ves)->insn);
1475 }
1476
1477 /* Return true if IVTS1 and IVTS2 (which are really both of type
1478    "var_to_expand *") refer to the same instruction.  */
1479
1480 static int
1481 ve_info_eq (const void *ivts1, const void *ivts2)
1482 {
1483   const struct var_to_expand *i1 = ivts1;
1484   const struct var_to_expand *i2 = ivts2;
1485
1486   return i1->insn == i2->insn;
1487 }
1488
1489 /* Returns true if REG is referenced in one insn in LOOP.  */
1490
1491 bool
1492 referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg)
1493 {
1494   basic_block *body, bb;
1495   unsigned i;
1496   int count_ref = 0;
1497   rtx insn;
1498
1499   body = get_loop_body (loop);
1500   for (i = 0; i < loop->num_nodes; i++)
1501     {
1502       bb = body[i];
1503
1504       FOR_BB_INSNS (bb, insn)
1505       {
1506         if (rtx_referenced_p (reg, insn))
1507           count_ref++;
1508       }
1509     }
1510   return (count_ref  == 1);
1511 }
1512
1513 /* Determine whether INSN contains an accumulator
1514    which can be expanded into separate copies,
1515    one for each copy of the LOOP body.
1516
1517    for (i = 0 ; i < n; i++)
1518      sum += a[i];
1519
1520    ==>
1521
1522    sum += a[i]
1523    ....
1524    i = i+1;
1525    sum1 += a[i]
1526    ....
1527    i = i+1
1528    sum2 += a[i];
1529    ....
1530
1531    Return NULL if INSN contains no opportunity for expansion of accumulator.
1532    Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
1533    information and return a pointer to it.
1534 */
1535
1536 static struct var_to_expand *
1537 analyze_insn_to_expand_var (struct loop *loop, rtx insn)
1538 {
1539   rtx set, dest, src, op1;
1540   struct var_to_expand *ves;
1541   enum machine_mode mode1, mode2;
1542
1543   set = single_set (insn);
1544   if (!set)
1545     return NULL;
1546
1547   dest = SET_DEST (set);
1548   src = SET_SRC (set);
1549
1550   if (GET_CODE (src) != PLUS
1551       && GET_CODE (src) != MINUS
1552       && GET_CODE (src) != MULT)
1553     return NULL;
1554
1555   /* Hmm, this is a bit paradoxical.  We know that INSN is a valid insn
1556      in MD.  But if there is no optab to generate the insn, we can not
1557      perform the variable expansion.  This can happen if an MD provides
1558      an insn but not a named pattern to generate it, for example to avoid
1559      producing code that needs additional mode switches like for x87/mmx.
1560
1561      So we check have_insn_for which looks for an optab for the operation
1562      in SRC.  If it doesn't exist, we can't perform the expansion even
1563      though INSN is valid.  */
1564   if (!have_insn_for (GET_CODE (src), GET_MODE (src)))
1565     return NULL;
1566
1567   if (!XEXP (src, 0))
1568     return NULL;
1569
1570   op1 = XEXP (src, 0);
1571
1572   if (!REG_P (dest)
1573       && !(GET_CODE (dest) == SUBREG
1574            && REG_P (SUBREG_REG (dest))))
1575     return NULL;
1576
1577   if (!rtx_equal_p (dest, op1))
1578     return NULL;
1579
1580   if (!referenced_in_one_insn_in_loop_p (loop, dest))
1581     return NULL;
1582
1583   if (rtx_referenced_p (dest, XEXP (src, 1)))
1584     return NULL;
1585
1586   mode1 = GET_MODE (dest);
1587   mode2 = GET_MODE (XEXP (src, 1));
1588   if ((FLOAT_MODE_P (mode1)
1589        || FLOAT_MODE_P (mode2))
1590       && !flag_unsafe_math_optimizations)
1591     return NULL;
1592
1593   /* Record the accumulator to expand.  */
1594   ves = XNEW (struct var_to_expand);
1595   ves->insn = insn;
1596   ves->var_expansions = VEC_alloc (rtx, heap, 1);
1597   ves->reg = copy_rtx (dest);
1598   ves->op = GET_CODE (src);
1599   ves->expansion_count = 0;
1600   ves->reuse_expansion = 0;
1601   return ves;
1602 }
1603
1604 /* Determine whether there is an induction variable in INSN that
1605    we would like to split during unrolling.
1606
1607    I.e. replace
1608
1609    i = i + 1;
1610    ...
1611    i = i + 1;
1612    ...
1613    i = i + 1;
1614    ...
1615
1616    type chains by
1617
1618    i0 = i + 1
1619    ...
1620    i = i0 + 1
1621    ...
1622    i = i0 + 2
1623    ...
1624
1625    Return NULL if INSN contains no interesting IVs.  Otherwise, allocate
1626    an IV_TO_SPLIT structure, fill it with the relevant information and return a
1627    pointer to it.  */
1628
1629 static struct iv_to_split *
1630 analyze_iv_to_split_insn (rtx insn)
1631 {
1632   rtx set, dest;
1633   struct rtx_iv iv;
1634   struct iv_to_split *ivts;
1635   bool ok;
1636
1637   /* For now we just split the basic induction variables.  Later this may be
1638      extended for example by selecting also addresses of memory references.  */
1639   set = single_set (insn);
1640   if (!set)
1641     return NULL;
1642
1643   dest = SET_DEST (set);
1644   if (!REG_P (dest))
1645     return NULL;
1646
1647   if (!biv_p (insn, dest))
1648     return NULL;
1649
1650   ok = iv_analyze_result (insn, dest, &iv);
1651
1652   /* This used to be an assert under the assumption that if biv_p returns
1653      true that iv_analyze_result must also return true.  However, that
1654      assumption is not strictly correct as evidenced by pr25569.
1655
1656      Returning NULL when iv_analyze_result returns false is safe and
1657      avoids the problems in pr25569 until the iv_analyze_* routines
1658      can be fixed, which is apparently hard and time consuming
1659      according to their author.  */
1660   if (! ok)
1661     return NULL;
1662
1663   if (iv.step == const0_rtx
1664       || iv.mode != iv.extend_mode)
1665     return NULL;
1666
1667   /* Record the insn to split.  */
1668   ivts = XNEW (struct iv_to_split);
1669   ivts->insn = insn;
1670   ivts->base_var = NULL_RTX;
1671   ivts->step = iv.step;
1672   ivts->n_loc = 1;
1673   ivts->loc[0] = 1;
1674
1675   return ivts;
1676 }
1677
1678 /* Determines which of insns in LOOP can be optimized.
1679    Return a OPT_INFO struct with the relevant hash tables filled
1680    with all insns to be optimized.  The FIRST_NEW_BLOCK field
1681    is undefined for the return value.  */
1682
1683 static struct opt_info *
1684 analyze_insns_in_loop (struct loop *loop)
1685 {
1686   basic_block *body, bb;
1687   unsigned i;
1688   struct opt_info *opt_info = XCNEW (struct opt_info);
1689   rtx insn;
1690   struct iv_to_split *ivts = NULL;
1691   struct var_to_expand *ves = NULL;
1692   PTR *slot1;
1693   PTR *slot2;
1694   VEC (edge, heap) *edges = get_loop_exit_edges (loop);
1695   edge exit;
1696   bool can_apply = false;
1697
1698   iv_analysis_loop_init (loop);
1699
1700   body = get_loop_body (loop);
1701
1702   if (flag_split_ivs_in_unroller)
1703     opt_info->insns_to_split = htab_create (5 * loop->num_nodes,
1704                                             si_info_hash, si_info_eq, free);
1705
1706   /* Record the loop exit bb and loop preheader before the unrolling.  */
1707   opt_info->loop_preheader = loop_preheader_edge (loop)->src;
1708
1709   if (VEC_length (edge, edges) == 1)
1710     {
1711       exit = VEC_index (edge, edges, 0);
1712       if (!(exit->flags & EDGE_COMPLEX))
1713         {
1714           opt_info->loop_exit = split_edge (exit);
1715           can_apply = true;
1716         }
1717     }
1718
1719   if (flag_variable_expansion_in_unroller
1720       && can_apply)
1721     opt_info->insns_with_var_to_expand = htab_create (5 * loop->num_nodes,
1722                                                       ve_info_hash, ve_info_eq, free);
1723
1724   for (i = 0; i < loop->num_nodes; i++)
1725     {
1726       bb = body[i];
1727       if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1728         continue;
1729
1730       FOR_BB_INSNS (bb, insn)
1731       {
1732         if (!INSN_P (insn))
1733           continue;
1734
1735         if (opt_info->insns_to_split)
1736           ivts = analyze_iv_to_split_insn (insn);
1737
1738         if (ivts)
1739           {
1740             slot1 = htab_find_slot (opt_info->insns_to_split, ivts, INSERT);
1741             *slot1 = ivts;
1742             continue;
1743           }
1744
1745         if (opt_info->insns_with_var_to_expand)
1746           ves = analyze_insn_to_expand_var (loop, insn);
1747
1748         if (ves)
1749           {
1750             slot2 = htab_find_slot (opt_info->insns_with_var_to_expand, ves, INSERT);
1751             *slot2 = ves;
1752           }
1753       }
1754     }
1755
1756   VEC_free (edge, heap, edges);
1757   free (body);
1758   return opt_info;
1759 }
1760
1761 /* Called just before loop duplication.  Records start of duplicated area
1762    to OPT_INFO.  */
1763
1764 static void
1765 opt_info_start_duplication (struct opt_info *opt_info)
1766 {
1767   if (opt_info)
1768     opt_info->first_new_block = last_basic_block;
1769 }
1770
1771 /* Determine the number of iterations between initialization of the base
1772    variable and the current copy (N_COPY).  N_COPIES is the total number
1773    of newly created copies.  UNROLLING is true if we are unrolling
1774    (not peeling) the loop.  */
1775
1776 static unsigned
1777 determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
1778 {
1779   if (unrolling)
1780     {
1781       /* If we are unrolling, initialization is done in the original loop
1782          body (number 0).  */
1783       return n_copy;
1784     }
1785   else
1786     {
1787       /* If we are peeling, the copy in that the initialization occurs has
1788          number 1.  The original loop (number 0) is the last.  */
1789       if (n_copy)
1790         return n_copy - 1;
1791       else
1792         return n_copies;
1793     }
1794 }
1795
1796 /* Locate in EXPR the expression corresponding to the location recorded
1797    in IVTS, and return a pointer to the RTX for this location.  */
1798
1799 static rtx *
1800 get_ivts_expr (rtx expr, struct iv_to_split *ivts)
1801 {
1802   unsigned i;
1803   rtx *ret = &expr;
1804
1805   for (i = 0; i < ivts->n_loc; i++)
1806     ret = &XEXP (*ret, ivts->loc[i]);
1807
1808   return ret;
1809 }
1810
1811 /* Allocate basic variable for the induction variable chain.  Callback for
1812    htab_traverse.  */
1813
1814 static int
1815 allocate_basic_variable (void **slot, void *data ATTRIBUTE_UNUSED)
1816 {
1817   struct iv_to_split *ivts = *slot;
1818   rtx expr = *get_ivts_expr (single_set (ivts->insn), ivts);
1819
1820   ivts->base_var = gen_reg_rtx (GET_MODE (expr));
1821
1822   return 1;
1823 }
1824
1825 /* Insert initialization of basic variable of IVTS before INSN, taking
1826    the initial value from INSN.  */
1827
1828 static void
1829 insert_base_initialization (struct iv_to_split *ivts, rtx insn)
1830 {
1831   rtx expr = copy_rtx (*get_ivts_expr (single_set (insn), ivts));
1832   rtx seq;
1833
1834   start_sequence ();
1835   expr = force_operand (expr, ivts->base_var);
1836   if (expr != ivts->base_var)
1837     emit_move_insn (ivts->base_var, expr);
1838   seq = get_insns ();
1839   end_sequence ();
1840
1841   emit_insn_before (seq, insn);
1842 }
1843
1844 /* Replace the use of induction variable described in IVTS in INSN
1845    by base variable + DELTA * step.  */
1846
1847 static void
1848 split_iv (struct iv_to_split *ivts, rtx insn, unsigned delta)
1849 {
1850   rtx expr, *loc, seq, incr, var;
1851   enum machine_mode mode = GET_MODE (ivts->base_var);
1852   rtx src, dest, set;
1853
1854   /* Construct base + DELTA * step.  */
1855   if (!delta)
1856     expr = ivts->base_var;
1857   else
1858     {
1859       incr = simplify_gen_binary (MULT, mode,
1860                                   ivts->step, gen_int_mode (delta, mode));
1861       expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
1862                                   ivts->base_var, incr);
1863     }
1864
1865   /* Figure out where to do the replacement.  */
1866   loc = get_ivts_expr (single_set (insn), ivts);
1867
1868   /* If we can make the replacement right away, we're done.  */
1869   if (validate_change (insn, loc, expr, 0))
1870     return;
1871
1872   /* Otherwise, force EXPR into a register and try again.  */
1873   start_sequence ();
1874   var = gen_reg_rtx (mode);
1875   expr = force_operand (expr, var);
1876   if (expr != var)
1877     emit_move_insn (var, expr);
1878   seq = get_insns ();
1879   end_sequence ();
1880   emit_insn_before (seq, insn);
1881
1882   if (validate_change (insn, loc, var, 0))
1883     return;
1884
1885   /* The last chance.  Try recreating the assignment in insn
1886      completely from scratch.  */
1887   set = single_set (insn);
1888   gcc_assert (set);
1889
1890   start_sequence ();
1891   *loc = var;
1892   src = copy_rtx (SET_SRC (set));
1893   dest = copy_rtx (SET_DEST (set));
1894   src = force_operand (src, dest);
1895   if (src != dest)
1896     emit_move_insn (dest, src);
1897   seq = get_insns ();
1898   end_sequence ();
1899
1900   emit_insn_before (seq, insn);
1901   delete_insn (insn);
1902 }
1903
1904
1905 /* Return one expansion of the accumulator recorded in struct VE.  */
1906
1907 static rtx
1908 get_expansion (struct var_to_expand *ve)
1909 {
1910   rtx reg;
1911
1912   if (ve->reuse_expansion == 0)
1913     reg = ve->reg;
1914   else
1915     reg = VEC_index (rtx, ve->var_expansions, ve->reuse_expansion - 1);
1916
1917   if (VEC_length (rtx, ve->var_expansions) == (unsigned) ve->reuse_expansion)
1918     ve->reuse_expansion = 0;
1919   else
1920     ve->reuse_expansion++;
1921
1922   return reg;
1923 }
1924
1925
1926 /* Given INSN replace the uses of the accumulator recorded in VE
1927    with a new register.  */
1928
1929 static void
1930 expand_var_during_unrolling (struct var_to_expand *ve, rtx insn)
1931 {
1932   rtx new_reg, set;
1933   bool really_new_expansion = false;
1934
1935   set = single_set (insn);
1936   gcc_assert (set);
1937
1938   /* Generate a new register only if the expansion limit has not been
1939      reached.  Else reuse an already existing expansion.  */
1940   if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count)
1941     {
1942       really_new_expansion = true;
1943       new_reg = gen_reg_rtx (GET_MODE (ve->reg));
1944     }
1945   else
1946     new_reg = get_expansion (ve);
1947
1948   validate_change (insn, &SET_DEST (set), new_reg, 1);
1949   validate_change (insn, &XEXP (SET_SRC (set), 0), new_reg, 1);
1950
1951   if (apply_change_group ())
1952     if (really_new_expansion)
1953       {
1954         VEC_safe_push (rtx, heap, ve->var_expansions, new_reg);
1955         ve->expansion_count++;
1956       }
1957 }
1958
1959 /* Initialize the variable expansions in loop preheader.
1960    Callbacks for htab_traverse.  PLACE_P is the loop-preheader
1961    basic block where the initialization of the expansions
1962    should take place.  */
1963
1964 static int
1965 insert_var_expansion_initialization (void **slot, void *place_p)
1966 {
1967   struct var_to_expand *ve = *slot;
1968   basic_block place = (basic_block)place_p;
1969   rtx seq, var, zero_init, insn;
1970   unsigned i;
1971
1972   if (VEC_length (rtx, ve->var_expansions) == 0)
1973     return 1;
1974
1975   start_sequence ();
1976   if (ve->op == PLUS || ve->op == MINUS)
1977     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
1978       {
1979         zero_init =  CONST0_RTX (GET_MODE (var));
1980         emit_move_insn (var, zero_init);
1981       }
1982   else if (ve->op == MULT)
1983     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
1984       {
1985         zero_init =  CONST1_RTX (GET_MODE (var));
1986         emit_move_insn (var, zero_init);
1987       }
1988
1989   seq = get_insns ();
1990   end_sequence ();
1991
1992   insn = BB_HEAD (place);
1993   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
1994     insn = NEXT_INSN (insn);
1995
1996   emit_insn_after (seq, insn);
1997   /* Continue traversing the hash table.  */
1998   return 1;
1999 }
2000
2001 /*  Combine the variable expansions at the loop exit.
2002     Callbacks for htab_traverse.  PLACE_P is the loop exit
2003     basic block where the summation of the expansions should
2004     take place.  */
2005
2006 static int
2007 combine_var_copies_in_loop_exit (void **slot, void *place_p)
2008 {
2009   struct var_to_expand *ve = *slot;
2010   basic_block place = (basic_block)place_p;
2011   rtx sum = ve->reg;
2012   rtx expr, seq, var, insn;
2013   unsigned i;
2014
2015   if (VEC_length (rtx, ve->var_expansions) == 0)
2016     return 1;
2017
2018   start_sequence ();
2019   if (ve->op == PLUS || ve->op == MINUS)
2020     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2021       {
2022         sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg),
2023                                    var, sum);
2024       }
2025   else if (ve->op == MULT)
2026     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2027       {
2028         sum = simplify_gen_binary (MULT, GET_MODE (ve->reg),
2029                                    var, sum);
2030       }
2031
2032   expr = force_operand (sum, ve->reg);
2033   if (expr != ve->reg)
2034     emit_move_insn (ve->reg, expr);
2035   seq = get_insns ();
2036   end_sequence ();
2037
2038   insn = BB_HEAD (place);
2039   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
2040     insn = NEXT_INSN (insn);
2041
2042   emit_insn_after (seq, insn);
2043
2044   /* Continue traversing the hash table.  */
2045   return 1;
2046 }
2047
2048 /* Apply loop optimizations in loop copies using the
2049    data which gathered during the unrolling.  Structure
2050    OPT_INFO record that data.
2051
2052    UNROLLING is true if we unrolled (not peeled) the loop.
2053    REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
2054    the loop (as it should happen in complete unrolling, but not in ordinary
2055    peeling of the loop).  */
2056
2057 static void
2058 apply_opt_in_copies (struct opt_info *opt_info,
2059                      unsigned n_copies, bool unrolling,
2060                      bool rewrite_original_loop)
2061 {
2062   unsigned i, delta;
2063   basic_block bb, orig_bb;
2064   rtx insn, orig_insn, next;
2065   struct iv_to_split ivts_templ, *ivts;
2066   struct var_to_expand ve_templ, *ves;
2067
2068   /* Sanity check -- we need to put initialization in the original loop
2069      body.  */
2070   gcc_assert (!unrolling || rewrite_original_loop);
2071
2072   /* Allocate the basic variables (i0).  */
2073   if (opt_info->insns_to_split)
2074     htab_traverse (opt_info->insns_to_split, allocate_basic_variable, NULL);
2075
2076   for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2077     {
2078       bb = BASIC_BLOCK (i);
2079       orig_bb = get_bb_original (bb);
2080
2081       /* bb->aux holds position in copy sequence initialized by
2082          duplicate_loop_to_header_edge.  */
2083       delta = determine_split_iv_delta ((size_t)bb->aux, n_copies,
2084                                         unrolling);
2085       bb->aux = 0;
2086       orig_insn = BB_HEAD (orig_bb);
2087       for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); insn = next)
2088         {
2089           next = NEXT_INSN (insn);
2090           if (!INSN_P (insn))
2091             continue;
2092
2093           while (!INSN_P (orig_insn))
2094             orig_insn = NEXT_INSN (orig_insn);
2095
2096           ivts_templ.insn = orig_insn;
2097           ve_templ.insn = orig_insn;
2098
2099           /* Apply splitting iv optimization.  */
2100           if (opt_info->insns_to_split)
2101             {
2102               ivts = htab_find (opt_info->insns_to_split, &ivts_templ);
2103
2104               if (ivts)
2105                 {
2106                   gcc_assert (GET_CODE (PATTERN (insn))
2107                               == GET_CODE (PATTERN (orig_insn)));
2108
2109                   if (!delta)
2110                     insert_base_initialization (ivts, insn);
2111                   split_iv (ivts, insn, delta);
2112                 }
2113             }
2114           /* Apply variable expansion optimization.  */
2115           if (unrolling && opt_info->insns_with_var_to_expand)
2116             {
2117               ves = htab_find (opt_info->insns_with_var_to_expand, &ve_templ);
2118               if (ves)
2119                 {
2120                   gcc_assert (GET_CODE (PATTERN (insn))
2121                               == GET_CODE (PATTERN (orig_insn)));
2122                   expand_var_during_unrolling (ves, insn);
2123                 }
2124             }
2125           orig_insn = NEXT_INSN (orig_insn);
2126         }
2127     }
2128
2129   if (!rewrite_original_loop)
2130     return;
2131
2132   /* Initialize the variable expansions in the loop preheader
2133      and take care of combining them at the loop exit.  */
2134   if (opt_info->insns_with_var_to_expand)
2135     {
2136       htab_traverse (opt_info->insns_with_var_to_expand,
2137                      insert_var_expansion_initialization,
2138                      opt_info->loop_preheader);
2139       htab_traverse (opt_info->insns_with_var_to_expand,
2140                      combine_var_copies_in_loop_exit,
2141                      opt_info->loop_exit);
2142     }
2143
2144   /* Rewrite also the original loop body.  Find them as originals of the blocks
2145      in the last copied iteration, i.e. those that have
2146      get_bb_copy (get_bb_original (bb)) == bb.  */
2147   for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2148     {
2149       bb = BASIC_BLOCK (i);
2150       orig_bb = get_bb_original (bb);
2151       if (get_bb_copy (orig_bb) != bb)
2152         continue;
2153
2154       delta = determine_split_iv_delta (0, n_copies, unrolling);
2155       for (orig_insn = BB_HEAD (orig_bb);
2156            orig_insn != NEXT_INSN (BB_END (bb));
2157            orig_insn = next)
2158         {
2159           next = NEXT_INSN (orig_insn);
2160
2161           if (!INSN_P (orig_insn))
2162             continue;
2163
2164           ivts_templ.insn = orig_insn;
2165           if (opt_info->insns_to_split)
2166             {
2167               ivts = htab_find (opt_info->insns_to_split, &ivts_templ);
2168               if (ivts)
2169                 {
2170                   if (!delta)
2171                     insert_base_initialization (ivts, orig_insn);
2172                   split_iv (ivts, orig_insn, delta);
2173                   continue;
2174                 }
2175             }
2176
2177         }
2178     }
2179 }
2180
2181 /*  Release the data structures used for the variable expansion
2182     optimization.  Callbacks for htab_traverse.  */
2183
2184 static int
2185 release_var_copies (void **slot, void *data ATTRIBUTE_UNUSED)
2186 {
2187   struct var_to_expand *ve = *slot;
2188
2189   VEC_free (rtx, heap, ve->var_expansions);
2190
2191   /* Continue traversing the hash table.  */
2192   return 1;
2193 }
2194
2195 /* Release OPT_INFO.  */
2196
2197 static void
2198 free_opt_info (struct opt_info *opt_info)
2199 {
2200   if (opt_info->insns_to_split)
2201     htab_delete (opt_info->insns_to_split);
2202   if (opt_info->insns_with_var_to_expand)
2203     {
2204       htab_traverse (opt_info->insns_with_var_to_expand,
2205                      release_var_copies, NULL);
2206       htab_delete (opt_info->insns_with_var_to_expand);
2207     }
2208   free (opt_info);
2209 }