gcc/loop-unroll.c

   1 /* Loop unrolling and peeling.
   2    Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 2, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to the Free
  18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  19 02111-1307, USA.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "rtl.h"
  26 #include "hard-reg-set.h"
  27 #include "obstack.h"
  28 #include "basic-block.h"
  29 #include "cfgloop.h"
  30 #include "cfglayout.h"
  31 #include "params.h"
  32 #include "output.h"
  33 #include "expr.h"
  34 #include "hashtab.h"
  35 #include "recog.h"
  36 #include "varray.h"
  37
  38 /* This pass performs loop unrolling and peeling.  We only perform these
  39    optimizations on innermost loops (with single exception) because
  40    the impact on performance is greatest here, and we want to avoid
  41    unnecessary code size growth.  The gain is caused by greater sequentiality
  42    of code, better code to optimize for further passes and in some cases
  43    by fewer testings of exit conditions.  The main problem is code growth,
  44    that impacts performance negatively due to effect of caches.
  45
  46    What we do:
  47
  48    -- complete peeling of once-rolling loops; this is the above mentioned
  49       exception, as this causes loop to be cancelled completely and
  50       does not cause code growth
  51    -- complete peeling of loops that roll (small) constant times.
  52    -- simple peeling of first iterations of loops that do not roll much
  53       (according to profile feedback)
  54    -- unrolling of loops that roll constant times; this is almost always
  55       win, as we get rid of exit condition tests.
  56    -- unrolling of loops that roll number of times that we can compute
  57       in runtime; we also get rid of exit condition tests here, but there
  58       is the extra expense for calculating the number of iterations
  59    -- simple unrolling of remaining loops; this is performed only if we
  60       are asked to, as the gain is questionable in this case and often
  61       it may even slow down the code
  62    For more detailed descriptions of each of those, see comments at
  63    appropriate function below.
  64
  65    There is a lot of parameters (defined and described in params.def) that
  66    control how much we unroll/peel.
  67
  68    ??? A great problem is that we don't have a good way how to determine
  69    how many times we should unroll the loop; the experiments I have made
  70    showed that this choice may affect performance in order of several %.
  71    */
  72
  73 /* Information about induction variables to split.  */
  74
  75 struct iv_to_split
  76 {
  77   rtx insn;             /* The insn in that the induction variable occurs.  */
  78   rtx base_var;         /* The variable on that the values in the further
  79                            iterations are based.  */
  80   rtx step;             /* Step of the induction variable.  */
  81   unsigned n_loc;
  82   unsigned loc[3];      /* Location where the definition of the induction
  83                            variable occurs in the insn.  For example if
  84                            N_LOC is 2, the expression is located at
  85                            XEXP (XEXP (single_set, loc[0]), loc[1]).  */
  86 };
  87
  88 DEF_VEC_P(rtx);
  89 DEF_VEC_ALLOC_P(rtx,heap);
  90
  91 /* Information about accumulators to expand.  */
  92
  93 struct var_to_expand
  94 {
  95   rtx insn;                        /* The insn in that the variable expansion occurs.  */
  96   rtx reg;                         /* The accumulator which is expanded.  */
  97   VEC(rtx,heap) *var_expansions;   /* The copies of the accumulator which is expanded.  */
  98   enum rtx_code op;                /* The type of the accumulation - addition, subtraction
  99                                       or multiplication.  */
 100   int expansion_count;             /* Count the number of expansions generated so far.  */
 101   int reuse_expansion;             /* The expansion we intend to reuse to expand
 102                                       the accumulator.  If REUSE_EXPANSION is 0 reuse
 103                                       the original accumulator.  Else use
 104                                       var_expansions[REUSE_EXPANSION - 1].  */
 105 };
 106
 107 /* Information about optimization applied in
 108    the unrolled loop.  */
 109
 110 struct opt_info
 111 {
 112   htab_t insns_to_split;           /* A hashtable of insns to split.  */
 113   htab_t insns_with_var_to_expand; /* A hashtable of insns with accumulators
 114                                       to expand.  */
 115   unsigned first_new_block;        /* The first basic block that was
 116                                       duplicated.  */
 117   basic_block loop_exit;           /* The loop exit basic block.  */
 118   basic_block loop_preheader;      /* The loop preheader basic block.  */
 119 };
 120
 121 static void decide_unrolling_and_peeling (struct loops *, int);
 122 static void peel_loops_completely (struct loops *, int);
 123 static void decide_peel_simple (struct loop *, int);
 124 static void decide_peel_once_rolling (struct loop *, int);
 125 static void decide_peel_completely (struct loop *, int);
 126 static void decide_unroll_stupid (struct loop *, int);
 127 static void decide_unroll_constant_iterations (struct loop *, int);
 128 static void decide_unroll_runtime_iterations (struct loop *, int);
 129 static void peel_loop_simple (struct loops *, struct loop *);
 130 static void peel_loop_completely (struct loops *, struct loop *);
 131 static void unroll_loop_stupid (struct loops *, struct loop *);
 132 static void unroll_loop_constant_iterations (struct loops *, struct loop *);
 133 static void unroll_loop_runtime_iterations (struct loops *, struct loop *);
 134 static struct opt_info *analyze_insns_in_loop (struct loop *);
 135 static void opt_info_start_duplication (struct opt_info *);
 136 static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
 137 static void free_opt_info (struct opt_info *);
 138 static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx);
 139 static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx);
 140 static struct iv_to_split *analyze_iv_to_split_insn (rtx);
 141 static void expand_var_during_unrolling (struct var_to_expand *, rtx);
 142 static int insert_var_expansion_initialization (void **, void *);
 143 static int combine_var_copies_in_loop_exit (void **, void *);
 144 static int release_var_copies (void **, void *);
 145 static rtx get_expansion (struct var_to_expand *);
 146
 147 /* Unroll and/or peel (depending on FLAGS) LOOPS.  */
 148 void
 149 unroll_and_peel_loops (struct loops *loops, int flags)
 150 {
 151   struct loop *loop, *next;
 152   bool check;
 153
 154   /* First perform complete loop peeling (it is almost surely a win,
 155      and affects parameters for further decision a lot).  */
 156   peel_loops_completely (loops, flags);
 157
 158   /* Now decide rest of unrolling and peeling.  */
 159   decide_unrolling_and_peeling (loops, flags);
 160
 161   loop = loops->tree_root;
 162   while (loop->inner)
 163     loop = loop->inner;
 164
 165   /* Scan the loops, inner ones first.  */
 166   while (loop != loops->tree_root)
 167     {
 168       if (loop->next)
 169         {
 170           next = loop->next;
 171           while (next->inner)
 172             next = next->inner;
 173         }
 174       else
 175         next = loop->outer;
 176
 177       check = true;
 178       /* And perform the appropriate transformations.  */
 179       switch (loop->lpt_decision.decision)
 180         {
 181         case LPT_PEEL_COMPLETELY:
 182           /* Already done.  */
 183           gcc_unreachable ();
 184         case LPT_PEEL_SIMPLE:
 185           peel_loop_simple (loops, loop);
 186           break;
 187         case LPT_UNROLL_CONSTANT:
 188           unroll_loop_constant_iterations (loops, loop);
 189           break;
 190         case LPT_UNROLL_RUNTIME:
 191           unroll_loop_runtime_iterations (loops, loop);
 192           break;
 193         case LPT_UNROLL_STUPID:
 194           unroll_loop_stupid (loops, loop);
 195           break;
 196         case LPT_NONE:
 197           check = false;
 198           break;
 199         default:
 200           gcc_unreachable ();
 201         }
 202       if (check)
 203         {
 204 #ifdef ENABLE_CHECKING
 205           verify_dominators (CDI_DOMINATORS);
 206           verify_loop_structure (loops);
 207 #endif
 208         }
 209       loop = next;
 210     }
 211
 212   iv_analysis_done ();
 213 }
 214
 215 /* Check whether exit of the LOOP is at the end of loop body.  */
 216
 217 static bool
 218 loop_exit_at_end_p (struct loop *loop)
 219 {
 220   struct niter_desc *desc = get_simple_loop_desc (loop);
 221   rtx insn;
 222
 223   if (desc->in_edge->dest != loop->latch)
 224     return false;
 225
 226   /* Check that the latch is empty.  */
 227   FOR_BB_INSNS (loop->latch, insn)
 228     {
 229       if (INSN_P (insn))
 230         return false;
 231     }
 232
 233   return true;
 234 }
 235
 236 /* Check whether to peel LOOPS (depending on FLAGS) completely and do so.  */
 237 static void
 238 peel_loops_completely (struct loops *loops, int flags)
 239 {
 240   struct loop *loop, *next;
 241
 242   loop = loops->tree_root;
 243   while (loop->inner)
 244     loop = loop->inner;
 245
 246   while (loop != loops->tree_root)
 247     {
 248       if (loop->next)
 249         {
 250           next = loop->next;
 251           while (next->inner)
 252             next = next->inner;
 253         }
 254       else
 255         next = loop->outer;
 256
 257       loop->lpt_decision.decision = LPT_NONE;
 258
 259       if (dump_file)
 260         fprintf (dump_file,
 261                  "\n;; *** Considering loop %d for complete peeling ***\n",
 262                  loop->num);
 263
 264       loop->ninsns = num_loop_insns (loop);
 265
 266       decide_peel_once_rolling (loop, flags);
 267       if (loop->lpt_decision.decision == LPT_NONE)
 268         decide_peel_completely (loop, flags);
 269
 270       if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY)
 271         {
 272           peel_loop_completely (loops, loop);
 273 #ifdef ENABLE_CHECKING
 274           verify_dominators (CDI_DOMINATORS);
 275           verify_loop_structure (loops);
 276 #endif
 277         }
 278       loop = next;
 279     }
 280 }
 281
 282 /* Decide whether unroll or peel LOOPS (depending on FLAGS) and how much.  */
 283 static void
 284 decide_unrolling_and_peeling (struct loops *loops, int flags)
 285 {
 286   struct loop *loop = loops->tree_root, *next;
 287
 288   while (loop->inner)
 289     loop = loop->inner;
 290
 291   /* Scan the loops, inner ones first.  */
 292   while (loop != loops->tree_root)
 293     {
 294       if (loop->next)
 295         {
 296           next = loop->next;
 297           while (next->inner)
 298             next = next->inner;
 299         }
 300       else
 301         next = loop->outer;
 302
 303       loop->lpt_decision.decision = LPT_NONE;
 304
 305       if (dump_file)
 306         fprintf (dump_file, "\n;; *** Considering loop %d ***\n", loop->num);
 307
 308       /* Do not peel cold areas.  */
 309       if (!maybe_hot_bb_p (loop->header))
 310         {
 311           if (dump_file)
 312             fprintf (dump_file, ";; Not considering loop, cold area\n");
 313           loop = next;
 314           continue;
 315         }
 316
 317       /* Can the loop be manipulated?  */
 318       if (!can_duplicate_loop_p (loop))
 319         {
 320           if (dump_file)
 321             fprintf (dump_file,
 322                      ";; Not considering loop, cannot duplicate\n");
 323           loop = next;
 324           continue;
 325         }
 326
 327       /* Skip non-innermost loops.  */
 328       if (loop->inner)
 329         {
 330           if (dump_file)
 331             fprintf (dump_file, ";; Not considering loop, is not innermost\n");
 332           loop = next;
 333           continue;
 334         }
 335
 336       loop->ninsns = num_loop_insns (loop);
 337       loop->av_ninsns = average_num_loop_insns (loop);
 338
 339       /* Try transformations one by one in decreasing order of
 340          priority.  */
 341
 342       decide_unroll_constant_iterations (loop, flags);
 343       if (loop->lpt_decision.decision == LPT_NONE)
 344         decide_unroll_runtime_iterations (loop, flags);
 345       if (loop->lpt_decision.decision == LPT_NONE)
 346         decide_unroll_stupid (loop, flags);
 347       if (loop->lpt_decision.decision == LPT_NONE)
 348         decide_peel_simple (loop, flags);
 349
 350       loop = next;
 351     }
 352 }
 353
 354 /* Decide whether the LOOP is once rolling and suitable for complete
 355    peeling.  */
 356 static void
 357 decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
 358 {
 359   struct niter_desc *desc;
 360
 361   if (dump_file)
 362     fprintf (dump_file, "\n;; Considering peeling once rolling loop\n");
 363
 364   /* Is the loop small enough?  */
 365   if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns)
 366     {
 367       if (dump_file)
 368         fprintf (dump_file, ";; Not considering loop, is too big\n");
 369       return;
 370     }
 371
 372   /* Check for simple loops.  */
 373   desc = get_simple_loop_desc (loop);
 374
 375   /* Check number of iterations.  */
 376   if (!desc->simple_p
 377       || desc->assumptions
 378       || desc->infinite
 379       || !desc->const_iter
 380       || desc->niter != 0)
 381     {
 382       if (dump_file)
 383         fprintf (dump_file,
 384                  ";; Unable to prove that the loop rolls exactly once\n");
 385       return;
 386     }
 387
 388   /* Success.  */
 389   if (dump_file)
 390     fprintf (dump_file, ";; Decided to peel exactly once rolling loop\n");
 391   loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
 392 }
 393
 394 /* Decide whether the LOOP is suitable for complete peeling.  */
 395 static void
 396 decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
 397 {
 398   unsigned npeel;
 399   struct niter_desc *desc;
 400
 401   if (dump_file)
 402     fprintf (dump_file, "\n;; Considering peeling completely\n");
 403
 404   /* Skip non-innermost loops.  */
 405   if (loop->inner)
 406     {
 407       if (dump_file)
 408         fprintf (dump_file, ";; Not considering loop, is not innermost\n");
 409       return;
 410     }
 411
 412   /* Do not peel cold areas.  */
 413   if (!maybe_hot_bb_p (loop->header))
 414     {
 415       if (dump_file)
 416         fprintf (dump_file, ";; Not considering loop, cold area\n");
 417       return;
 418     }
 419
 420   /* Can the loop be manipulated?  */
 421   if (!can_duplicate_loop_p (loop))
 422     {
 423       if (dump_file)
 424         fprintf (dump_file,
 425                  ";; Not considering loop, cannot duplicate\n");
 426       return;
 427     }
 428
 429   /* npeel = number of iterations to peel.  */
 430   npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS) / loop->ninsns;
 431   if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
 432     npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
 433
 434   /* Is the loop small enough?  */
 435   if (!npeel)
 436     {
 437       if (dump_file)
 438         fprintf (dump_file, ";; Not considering loop, is too big\n");
 439       return;
 440     }
 441
 442   /* Check for simple loops.  */
 443   desc = get_simple_loop_desc (loop);
 444
 445   /* Check number of iterations.  */
 446   if (!desc->simple_p
 447       || desc->assumptions
 448       || !desc->const_iter
 449       || desc->infinite)
 450     {
 451       if (dump_file)
 452         fprintf (dump_file,
 453                  ";; Unable to prove that the loop iterates constant times\n");
 454       return;
 455     }
 456
 457   if (desc->niter > npeel - 1)
 458     {
 459       if (dump_file)
 460         {
 461           fprintf (dump_file,
 462                    ";; Not peeling loop completely, rolls too much (");
 463           fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter);
 464           fprintf (dump_file, " iterations > %d [maximum peelings])\n", npeel);
 465         }
 466       return;
 467     }
 468
 469   /* Success.  */
 470   if (dump_file)
 471     fprintf (dump_file, ";; Decided to peel loop completely\n");
 472   loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
 473 }
 474
 475 /* Peel all iterations of LOOP, remove exit edges and cancel the loop
 476    completely.  The transformation done:
 477
 478    for (i = 0; i < 4; i++)
 479      body;
 480
 481    ==>
 482
 483    i = 0;
 484    body; i++;
 485    body; i++;
 486    body; i++;
 487    body; i++;
 488    */
 489 static void
 490 peel_loop_completely (struct loops *loops, struct loop *loop)
 491 {
 492   sbitmap wont_exit;
 493   unsigned HOST_WIDE_INT npeel;
 494   unsigned n_remove_edges, i;
 495   edge *remove_edges, ein;
 496   struct niter_desc *desc = get_simple_loop_desc (loop);
 497   struct opt_info *opt_info = NULL;
 498
 499   npeel = desc->niter;
 500
 501   if (npeel)
 502     {
 503       bool ok;
 504
 505       wont_exit = sbitmap_alloc (npeel + 1);
 506       sbitmap_ones (wont_exit);
 507       RESET_BIT (wont_exit, 0);
 508       if (desc->noloop_assumptions)
 509         RESET_BIT (wont_exit, 1);
 510
 511       remove_edges = xcalloc (npeel, sizeof (edge));
 512       n_remove_edges = 0;
 513
 514       if (flag_split_ivs_in_unroller)
 515         opt_info = analyze_insns_in_loop (loop);
 516
 517       opt_info_start_duplication (opt_info);
 518       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 519                                           loops, npeel,
 520                                           wont_exit, desc->out_edge,
 521                                           remove_edges, &n_remove_edges,
 522                                           DLTHE_FLAG_UPDATE_FREQ);
 523       gcc_assert (ok);
 524
 525       free (wont_exit);
 526
 527       if (opt_info)
 528         {
 529           apply_opt_in_copies (opt_info, npeel, false, true);
 530           free_opt_info (opt_info);
 531         }
 532
 533       /* Remove the exit edges.  */
 534       for (i = 0; i < n_remove_edges; i++)
 535         remove_path (loops, remove_edges[i]);
 536       free (remove_edges);
 537     }
 538
 539   ein = desc->in_edge;
 540   free_simple_loop_desc (loop);
 541
 542   /* Now remove the unreachable part of the last iteration and cancel
 543      the loop.  */
 544   remove_path (loops, ein);
 545
 546   if (dump_file)
 547     fprintf (dump_file, ";; Peeled loop completely, %d times\n", (int) npeel);
 548 }
 549
 550 /* Decide whether to unroll LOOP iterating constant number of times
 551    and how much.  */
 552
 553 static void
 554 decide_unroll_constant_iterations (struct loop *loop, int flags)
 555 {
 556   unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
 557   struct niter_desc *desc;
 558
 559   if (!(flags & UAP_UNROLL))
 560     {
 561       /* We were not asked to, just return back silently.  */
 562       return;
 563     }
 564
 565   if (dump_file)
 566     fprintf (dump_file,
 567              "\n;; Considering unrolling loop with constant "
 568              "number of iterations\n");
 569
 570   /* nunroll = total number of copies of the original loop body in
 571      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 572   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
 573   nunroll_by_av
 574     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
 575   if (nunroll > nunroll_by_av)
 576     nunroll = nunroll_by_av;
 577   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
 578     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 579
 580   /* Skip big loops.  */
 581   if (nunroll <= 1)
 582     {
 583       if (dump_file)
 584         fprintf (dump_file, ";; Not considering loop, is too big\n");
 585       return;
 586     }
 587
 588   /* Check for simple loops.  */
 589   desc = get_simple_loop_desc (loop);
 590
 591   /* Check number of iterations.  */
 592   if (!desc->simple_p || !desc->const_iter || desc->assumptions)
 593     {
 594       if (dump_file)
 595         fprintf (dump_file,
 596                  ";; Unable to prove that the loop iterates constant times\n");
 597       return;
 598     }
 599
 600   /* Check whether the loop rolls enough to consider.  */
 601   if (desc->niter < 2 * nunroll)
 602     {
 603       if (dump_file)
 604         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 605       return;
 606     }
 607
 608   /* Success; now compute number of iterations to unroll.  We alter
 609      nunroll so that as few as possible copies of loop body are
 610      necessary, while still not decreasing the number of unrollings
 611      too much (at most by 1).  */
 612   best_copies = 2 * nunroll + 10;
 613
 614   i = 2 * nunroll + 2;
 615   if (i - 1 >= desc->niter)
 616     i = desc->niter - 2;
 617
 618   for (; i >= nunroll - 1; i--)
 619     {
 620       unsigned exit_mod = desc->niter % (i + 1);
 621
 622       if (!loop_exit_at_end_p (loop))
 623         n_copies = exit_mod + i + 1;
 624       else if (exit_mod != (unsigned) i
 625                || desc->noloop_assumptions != NULL_RTX)
 626         n_copies = exit_mod + i + 2;
 627       else
 628         n_copies = i + 1;
 629
 630       if (n_copies < best_copies)
 631         {
 632           best_copies = n_copies;
 633           best_unroll = i;
 634         }
 635     }
 636
 637   if (dump_file)
 638     fprintf (dump_file, ";; max_unroll %d (%d copies, initial %d).\n",
 639              best_unroll + 1, best_copies, nunroll);
 640
 641   loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
 642   loop->lpt_decision.times = best_unroll;
 643
 644   if (dump_file)
 645     fprintf (dump_file,
 646              ";; Decided to unroll the constant times rolling loop, %d times.\n",
 647              loop->lpt_decision.times);
 648 }
 649
 650 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1
 651    times.  The transformation does this:
 652
 653    for (i = 0; i < 102; i++)
 654      body;
 655
 656    ==>
 657
 658    i = 0;
 659    body; i++;
 660    body; i++;
 661    while (i < 102)
 662      {
 663        body; i++;
 664        body; i++;
 665        body; i++;
 666        body; i++;
 667      }
 668   */
 669 static void
 670 unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
 671 {
 672   unsigned HOST_WIDE_INT niter;
 673   unsigned exit_mod;
 674   sbitmap wont_exit;
 675   unsigned n_remove_edges, i;
 676   edge *remove_edges;
 677   unsigned max_unroll = loop->lpt_decision.times;
 678   struct niter_desc *desc = get_simple_loop_desc (loop);
 679   bool exit_at_end = loop_exit_at_end_p (loop);
 680   struct opt_info *opt_info = NULL;
 681   bool ok;
 682
 683   niter = desc->niter;
 684
 685   /* Should not get here (such loop should be peeled instead).  */
 686   gcc_assert (niter > max_unroll + 1);
 687
 688   exit_mod = niter % (max_unroll + 1);
 689
 690   wont_exit = sbitmap_alloc (max_unroll + 1);
 691   sbitmap_ones (wont_exit);
 692
 693   remove_edges = xcalloc (max_unroll + exit_mod + 1, sizeof (edge));
 694   n_remove_edges = 0;
 695   if (flag_split_ivs_in_unroller
 696       || flag_variable_expansion_in_unroller)
 697     opt_info = analyze_insns_in_loop (loop);
 698
 699   if (!exit_at_end)
 700     {
 701       /* The exit is not at the end of the loop; leave exit test
 702          in the first copy, so that the loops that start with test
 703          of exit condition have continuous body after unrolling.  */
 704
 705       if (dump_file)
 706         fprintf (dump_file, ";; Condition on beginning of loop.\n");
 707
 708       /* Peel exit_mod iterations.  */
 709       RESET_BIT (wont_exit, 0);
 710       if (desc->noloop_assumptions)
 711         RESET_BIT (wont_exit, 1);
 712
 713       if (exit_mod)
 714         {
 715           opt_info_start_duplication (opt_info);
 716           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 717                                               loops, exit_mod,
 718                                               wont_exit, desc->out_edge,
 719                                               remove_edges, &n_remove_edges,
 720                                               DLTHE_FLAG_UPDATE_FREQ);
 721           gcc_assert (ok);
 722
 723           if (opt_info && exit_mod > 1)
 724             apply_opt_in_copies (opt_info, exit_mod, false, false);
 725
 726           desc->noloop_assumptions = NULL_RTX;
 727           desc->niter -= exit_mod;
 728           desc->niter_max -= exit_mod;
 729         }
 730
 731       SET_BIT (wont_exit, 1);
 732     }
 733   else
 734     {
 735       /* Leave exit test in last copy, for the same reason as above if
 736          the loop tests the condition at the end of loop body.  */
 737
 738       if (dump_file)
 739         fprintf (dump_file, ";; Condition on end of loop.\n");
 740
 741       /* We know that niter >= max_unroll + 2; so we do not need to care of
 742          case when we would exit before reaching the loop.  So just peel
 743          exit_mod + 1 iterations.  */
 744       if (exit_mod != max_unroll
 745           || desc->noloop_assumptions)
 746         {
 747           RESET_BIT (wont_exit, 0);
 748           if (desc->noloop_assumptions)
 749             RESET_BIT (wont_exit, 1);
 750
 751           opt_info_start_duplication (opt_info);
 752           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 753                                               loops, exit_mod + 1,
 754                                               wont_exit, desc->out_edge,
 755                                               remove_edges, &n_remove_edges,
 756                                               DLTHE_FLAG_UPDATE_FREQ);
 757           gcc_assert (ok);
 758
 759           if (opt_info && exit_mod > 0)
 760             apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
 761
 762           desc->niter -= exit_mod + 1;
 763           desc->niter_max -= exit_mod + 1;
 764           desc->noloop_assumptions = NULL_RTX;
 765
 766           SET_BIT (wont_exit, 0);
 767           SET_BIT (wont_exit, 1);
 768         }
 769
 770       RESET_BIT (wont_exit, max_unroll);
 771     }
 772
 773   /* Now unroll the loop.  */
 774
 775   opt_info_start_duplication (opt_info);
 776   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
 777                                       loops, max_unroll,
 778                                       wont_exit, desc->out_edge,
 779                                       remove_edges, &n_remove_edges,
 780                                       DLTHE_FLAG_UPDATE_FREQ);
 781   gcc_assert (ok);
 782
 783   if (opt_info)
 784     {
 785       apply_opt_in_copies (opt_info, max_unroll, true, true);
 786       free_opt_info (opt_info);
 787     }
 788
 789   free (wont_exit);
 790
 791   if (exit_at_end)
 792     {
 793       basic_block exit_block = desc->in_edge->src->rbi->copy;
 794       /* Find a new in and out edge; they are in the last copy we have made.  */
 795
 796       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
 797         {
 798           desc->out_edge = EDGE_SUCC (exit_block, 0);
 799           desc->in_edge = EDGE_SUCC (exit_block, 1);
 800         }
 801       else
 802         {
 803           desc->out_edge = EDGE_SUCC (exit_block, 1);
 804           desc->in_edge = EDGE_SUCC (exit_block, 0);
 805         }
 806     }
 807
 808   desc->niter /= max_unroll + 1;
 809   desc->niter_max /= max_unroll + 1;
 810   desc->niter_expr = GEN_INT (desc->niter);
 811
 812   /* Remove the edges.  */
 813   for (i = 0; i < n_remove_edges; i++)
 814     remove_path (loops, remove_edges[i]);
 815   free (remove_edges);
 816
 817   if (dump_file)
 818     fprintf (dump_file,
 819              ";; Unrolled loop %d times, constant # of iterations %i insns\n",
 820              max_unroll, num_loop_insns (loop));
 821 }
 822
 823 /* Decide whether to unroll LOOP iterating runtime computable number of times
 824    and how much.  */
 825 static void
 826 decide_unroll_runtime_iterations (struct loop *loop, int flags)
 827 {
 828   unsigned nunroll, nunroll_by_av, i;
 829   struct niter_desc *desc;
 830
 831   if (!(flags & UAP_UNROLL))
 832     {
 833       /* We were not asked to, just return back silently.  */
 834       return;
 835     }
 836
 837   if (dump_file)
 838     fprintf (dump_file,
 839              "\n;; Considering unrolling loop with runtime "
 840              "computable number of iterations\n");
 841
 842   /* nunroll = total number of copies of the original loop body in
 843      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 844   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
 845   nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
 846   if (nunroll > nunroll_by_av)
 847     nunroll = nunroll_by_av;
 848   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
 849     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 850
 851   /* Skip big loops.  */
 852   if (nunroll <= 1)
 853     {
 854       if (dump_file)
 855         fprintf (dump_file, ";; Not considering loop, is too big\n");
 856       return;
 857     }
 858
 859   /* Check for simple loops.  */
 860   desc = get_simple_loop_desc (loop);
 861
 862   /* Check simpleness.  */
 863   if (!desc->simple_p || desc->assumptions)
 864     {
 865       if (dump_file)
 866         fprintf (dump_file,
 867                  ";; Unable to prove that the number of iterations "
 868                  "can be counted in runtime\n");
 869       return;
 870     }
 871
 872   if (desc->const_iter)
 873     {
 874       if (dump_file)
 875         fprintf (dump_file, ";; Loop iterates constant times\n");
 876       return;
 877     }
 878
 879   /* If we have profile feedback, check whether the loop rolls.  */
 880   if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll)
 881     {
 882       if (dump_file)
 883         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 884       return;
 885     }
 886
 887   /* Success; now force nunroll to be power of 2, as we are unable to
 888      cope with overflows in computation of number of iterations.  */
 889   for (i = 1; 2 * i <= nunroll; i *= 2)
 890     continue;
 891
 892   loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
 893   loop->lpt_decision.times = i - 1;
 894
 895   if (dump_file)
 896     fprintf (dump_file,
 897              ";; Decided to unroll the runtime computable "
 898              "times rolling loop, %d times.\n",
 899              loop->lpt_decision.times);
 900 }
 901
 902 /* Unroll LOOP for that we are able to count number of iterations in runtime
 903    LOOP->LPT_DECISION.TIMES + 1 times.  The transformation does this (with some
 904    extra care for case n < 0):
 905
 906    for (i = 0; i < n; i++)
 907      body;
 908
 909    ==>
 910
 911    i = 0;
 912    mod = n % 4;
 913
 914    switch (mod)
 915      {
 916        case 3:
 917          body; i++;
 918        case 2:
 919          body; i++;
 920        case 1:
 921          body; i++;
 922        case 0: ;
 923      }
 924
 925    while (i < n)
 926      {
 927        body; i++;
 928        body; i++;
 929        body; i++;
 930        body; i++;
 931      }
 932    */
 933 static void
 934 unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
 935 {
 936   rtx old_niter, niter, init_code, branch_code, tmp;
 937   unsigned i, j, p;
 938   basic_block preheader, *body, *dom_bbs, swtch, ezc_swtch;
 939   unsigned n_dom_bbs;
 940   sbitmap wont_exit;
 941   int may_exit_copy;
 942   unsigned n_peel, n_remove_edges;
 943   edge *remove_edges, e;
 944   bool extra_zero_check, last_may_exit;
 945   unsigned max_unroll = loop->lpt_decision.times;
 946   struct niter_desc *desc = get_simple_loop_desc (loop);
 947   bool exit_at_end = loop_exit_at_end_p (loop);
 948   struct opt_info *opt_info = NULL;
 949   bool ok;
 950
 951   if (flag_split_ivs_in_unroller
 952       || flag_variable_expansion_in_unroller)
 953     opt_info = analyze_insns_in_loop (loop);
 954
 955   /* Remember blocks whose dominators will have to be updated.  */
 956   dom_bbs = xcalloc (n_basic_blocks, sizeof (basic_block));
 957   n_dom_bbs = 0;
 958
 959   body = get_loop_body (loop);
 960   for (i = 0; i < loop->num_nodes; i++)
 961     {
 962       unsigned nldom;
 963       basic_block *ldom;
 964
 965       nldom = get_dominated_by (CDI_DOMINATORS, body[i], &ldom);
 966       for (j = 0; j < nldom; j++)
 967         if (!flow_bb_inside_loop_p (loop, ldom[j]))
 968           dom_bbs[n_dom_bbs++] = ldom[j];
 969
 970       free (ldom);
 971     }
 972   free (body);
 973
 974   if (!exit_at_end)
 975     {
 976       /* Leave exit in first copy (for explanation why see comment in
 977          unroll_loop_constant_iterations).  */
 978       may_exit_copy = 0;
 979       n_peel = max_unroll - 1;
 980       extra_zero_check = true;
 981       last_may_exit = false;
 982     }
 983   else
 984     {
 985       /* Leave exit in last copy (for explanation why see comment in
 986          unroll_loop_constant_iterations).  */
 987       may_exit_copy = max_unroll;
 988       n_peel = max_unroll;
 989       extra_zero_check = false;
 990       last_may_exit = true;
 991     }
 992
 993   /* Get expression for number of iterations.  */
 994   start_sequence ();
 995   old_niter = niter = gen_reg_rtx (desc->mode);
 996   tmp = force_operand (copy_rtx (desc->niter_expr), niter);
 997   if (tmp != niter)
 998     emit_move_insn (niter, tmp);
 999
1000   /* Count modulo by ANDing it with max_unroll; we use the fact that
1001      the number of unrollings is a power of two, and thus this is correct
1002      even if there is overflow in the computation.  */
1003   niter = expand_simple_binop (desc->mode, AND,
1004                                niter,
1005                                GEN_INT (max_unroll),
1006                                NULL_RTX, 0, OPTAB_LIB_WIDEN);
1007
1008   init_code = get_insns ();
1009   end_sequence ();
1010
1011   /* Precondition the loop.  */
1012   loop_split_edge_with (loop_preheader_edge (loop), init_code);
1013
1014   remove_edges = xcalloc (max_unroll + n_peel + 1, sizeof (edge));
1015   n_remove_edges = 0;
1016
1017   wont_exit = sbitmap_alloc (max_unroll + 2);
1018
1019   /* Peel the first copy of loop body (almost always we must leave exit test
1020      here; the only exception is when we have extra zero check and the number
1021      of iterations is reliable.  Also record the place of (possible) extra
1022      zero check.  */
1023   sbitmap_zero (wont_exit);
1024   if (extra_zero_check
1025       && !desc->noloop_assumptions)
1026     SET_BIT (wont_exit, 1);
1027   ezc_swtch = loop_preheader_edge (loop)->src;
1028   ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1029                                       loops, 1,
1030                                       wont_exit, desc->out_edge,
1031                                       remove_edges, &n_remove_edges,
1032                                       DLTHE_FLAG_UPDATE_FREQ);
1033   gcc_assert (ok);
1034
1035   /* Record the place where switch will be built for preconditioning.  */
1036   swtch = loop_split_edge_with (loop_preheader_edge (loop),
1037                                 NULL_RTX);
1038
1039   for (i = 0; i < n_peel; i++)
1040     {
1041       /* Peel the copy.  */
1042       sbitmap_zero (wont_exit);
1043       if (i != n_peel - 1 || !last_may_exit)
1044         SET_BIT (wont_exit, 1);
1045       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1046                                           loops, 1,
1047                                           wont_exit, desc->out_edge,
1048                                           remove_edges, &n_remove_edges,
1049                                           DLTHE_FLAG_UPDATE_FREQ);
1050       gcc_assert (ok);
1051
1052       /* Create item for switch.  */
1053       j = n_peel - i - (extra_zero_check ? 0 : 1);
1054       p = REG_BR_PROB_BASE / (i + 2);
1055
1056       preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1057       branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
1058                                           block_label (preheader), p,
1059                                           NULL_RTX);
1060
1061       swtch = loop_split_edge_with (single_pred_edge (swtch), branch_code);
1062       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1063       single_pred_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1064       e = make_edge (swtch, preheader,
1065                      single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1066       e->probability = p;
1067     }
1068
1069   if (extra_zero_check)
1070     {
1071       /* Add branch for zero iterations.  */
1072       p = REG_BR_PROB_BASE / (max_unroll + 1);
1073       swtch = ezc_swtch;
1074       preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1075       branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
1076                                           block_label (preheader), p,
1077                                           NULL_RTX);
1078
1079       swtch = loop_split_edge_with (single_succ_edge (swtch), branch_code);
1080       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1081       single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1082       e = make_edge (swtch, preheader,
1083                      single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1084       e->probability = p;
1085     }
1086
1087   /* Recount dominators for outer blocks.  */
1088   iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, n_dom_bbs);
1089
1090   /* And unroll loop.  */
1091
1092   sbitmap_ones (wont_exit);
1093   RESET_BIT (wont_exit, may_exit_copy);
1094   opt_info_start_duplication (opt_info);
1095
1096   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1097                                       loops, max_unroll,
1098                                       wont_exit, desc->out_edge,
1099                                       remove_edges, &n_remove_edges,
1100                                       DLTHE_FLAG_UPDATE_FREQ);
1101   gcc_assert (ok);
1102
1103   if (opt_info)
1104     {
1105       apply_opt_in_copies (opt_info, max_unroll, true, true);
1106       free_opt_info (opt_info);
1107     }
1108
1109   free (wont_exit);
1110
1111   if (exit_at_end)
1112     {
1113       basic_block exit_block = desc->in_edge->src->rbi->copy;
1114       /* Find a new in and out edge; they are in the last copy we have
1115          made.  */
1116
1117       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
1118         {
1119           desc->out_edge = EDGE_SUCC (exit_block, 0);
1120           desc->in_edge = EDGE_SUCC (exit_block, 1);
1121         }
1122       else
1123         {
1124           desc->out_edge = EDGE_SUCC (exit_block, 1);
1125           desc->in_edge = EDGE_SUCC (exit_block, 0);
1126         }
1127     }
1128
1129   /* Remove the edges.  */
1130   for (i = 0; i < n_remove_edges; i++)
1131     remove_path (loops, remove_edges[i]);
1132   free (remove_edges);
1133
1134   /* We must be careful when updating the number of iterations due to
1135      preconditioning and the fact that the value must be valid at entry
1136      of the loop.  After passing through the above code, we see that
1137      the correct new number of iterations is this:  */
1138   gcc_assert (!desc->const_iter);
1139   desc->niter_expr =
1140     simplify_gen_binary (UDIV, desc->mode, old_niter,
1141                          GEN_INT (max_unroll + 1));
1142   desc->niter_max /= max_unroll + 1;
1143   if (exit_at_end)
1144     {
1145       desc->niter_expr =
1146         simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1147       desc->noloop_assumptions = NULL_RTX;
1148       desc->niter_max--;
1149     }
1150
1151   if (dump_file)
1152     fprintf (dump_file,
1153              ";; Unrolled loop %d times, counting # of iterations "
1154              "in runtime, %i insns\n",
1155              max_unroll, num_loop_insns (loop));
1156 }
1157
1158 /* Decide whether to simply peel LOOP and how much.  */
1159 static void
1160 decide_peel_simple (struct loop *loop, int flags)
1161 {
1162   unsigned npeel;
1163   struct niter_desc *desc;
1164
1165   if (!(flags & UAP_PEEL))
1166     {
1167       /* We were not asked to, just return back silently.  */
1168       return;
1169     }
1170
1171   if (dump_file)
1172     fprintf (dump_file, "\n;; Considering simply peeling loop\n");
1173
1174   /* npeel = number of iterations to peel.  */
1175   npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns;
1176   if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_PEEL_TIMES))
1177     npeel = PARAM_VALUE (PARAM_MAX_PEEL_TIMES);
1178
1179   /* Skip big loops.  */
1180   if (!npeel)
1181     {
1182       if (dump_file)
1183         fprintf (dump_file, ";; Not considering loop, is too big\n");
1184       return;
1185     }
1186
1187   /* Check for simple loops.  */
1188   desc = get_simple_loop_desc (loop);
1189
1190   /* Check number of iterations.  */
1191   if (desc->simple_p && !desc->assumptions && desc->const_iter)
1192     {
1193       if (dump_file)
1194         fprintf (dump_file, ";; Loop iterates constant times\n");
1195       return;
1196     }
1197
1198   /* Do not simply peel loops with branches inside -- it increases number
1199      of mispredicts.  */
1200   if (num_loop_branches (loop) > 1)
1201     {
1202       if (dump_file)
1203         fprintf (dump_file, ";; Not peeling, contains branches\n");
1204       return;
1205     }
1206
1207   if (loop->header->count)
1208     {
1209       unsigned niter = expected_loop_iterations (loop);
1210       if (niter + 1 > npeel)
1211         {
1212           if (dump_file)
1213             {
1214               fprintf (dump_file, ";; Not peeling loop, rolls too much (");
1215               fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1216                        (HOST_WIDEST_INT) (niter + 1));
1217               fprintf (dump_file, " iterations > %d [maximum peelings])\n",
1218                        npeel);
1219             }
1220           return;
1221         }
1222       npeel = niter + 1;
1223     }
1224   else
1225     {
1226       /* For now we have no good heuristics to decide whether loop peeling
1227          will be effective, so disable it.  */
1228       if (dump_file)
1229         fprintf (dump_file,
1230                  ";; Not peeling loop, no evidence it will be profitable\n");
1231       return;
1232     }
1233
1234   /* Success.  */
1235   loop->lpt_decision.decision = LPT_PEEL_SIMPLE;
1236   loop->lpt_decision.times = npeel;
1237
1238   if (dump_file)
1239     fprintf (dump_file, ";; Decided to simply peel the loop, %d times.\n",
1240              loop->lpt_decision.times);
1241 }
1242
1243 /* Peel a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
1244    while (cond)
1245      body;
1246
1247    ==>
1248
1249    if (!cond) goto end;
1250    body;
1251    if (!cond) goto end;
1252    body;
1253    while (cond)
1254      body;
1255    end: ;
1256    */
1257 static void
1258 peel_loop_simple (struct loops *loops, struct loop *loop)
1259 {
1260   sbitmap wont_exit;
1261   unsigned npeel = loop->lpt_decision.times;
1262   struct niter_desc *desc = get_simple_loop_desc (loop);
1263   struct opt_info *opt_info = NULL;
1264   bool ok;
1265
1266   if (flag_split_ivs_in_unroller && npeel > 1)
1267     opt_info = analyze_insns_in_loop (loop);
1268
1269   wont_exit = sbitmap_alloc (npeel + 1);
1270   sbitmap_zero (wont_exit);
1271
1272   opt_info_start_duplication (opt_info);
1273
1274   ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1275                                       loops, npeel, wont_exit,
1276                                       NULL, NULL,
1277                                       NULL, DLTHE_FLAG_UPDATE_FREQ);
1278   gcc_assert (ok);
1279
1280   free (wont_exit);
1281
1282   if (opt_info)
1283     {
1284       apply_opt_in_copies (opt_info, npeel, false, false);
1285       free_opt_info (opt_info);
1286     }
1287
1288   if (desc->simple_p)
1289     {
1290       if (desc->const_iter)
1291         {
1292           desc->niter -= npeel;
1293           desc->niter_expr = GEN_INT (desc->niter);
1294           desc->noloop_assumptions = NULL_RTX;
1295         }
1296       else
1297         {
1298           /* We cannot just update niter_expr, as its value might be clobbered
1299              inside loop.  We could handle this by counting the number into
1300              temporary just like we do in runtime unrolling, but it does not
1301              seem worthwhile.  */
1302           free_simple_loop_desc (loop);
1303         }
1304     }
1305   if (dump_file)
1306     fprintf (dump_file, ";; Peeling loop %d times\n", npeel);
1307 }
1308
1309 /* Decide whether to unroll LOOP stupidly and how much.  */
1310 static void
1311 decide_unroll_stupid (struct loop *loop, int flags)
1312 {
1313   unsigned nunroll, nunroll_by_av, i;
1314   struct niter_desc *desc;
1315
1316   if (!(flags & UAP_UNROLL_ALL))
1317     {
1318       /* We were not asked to, just return back silently.  */
1319       return;
1320     }
1321
1322   if (dump_file)
1323     fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n");
1324
1325   /* nunroll = total number of copies of the original loop body in
1326      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
1327   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
1328   nunroll_by_av
1329     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
1330   if (nunroll > nunroll_by_av)
1331     nunroll = nunroll_by_av;
1332   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
1333     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1334
1335   /* Skip big loops.  */
1336   if (nunroll <= 1)
1337     {
1338       if (dump_file)
1339         fprintf (dump_file, ";; Not considering loop, is too big\n");
1340       return;
1341     }
1342
1343   /* Check for simple loops.  */
1344   desc = get_simple_loop_desc (loop);
1345
1346   /* Check simpleness.  */
1347   if (desc->simple_p && !desc->assumptions)
1348     {
1349       if (dump_file)
1350         fprintf (dump_file, ";; The loop is simple\n");
1351       return;
1352     }
1353
1354   /* Do not unroll loops with branches inside -- it increases number
1355      of mispredicts.  */
1356   if (num_loop_branches (loop) > 1)
1357     {
1358       if (dump_file)
1359         fprintf (dump_file, ";; Not unrolling, contains branches\n");
1360       return;
1361     }
1362
1363   /* If we have profile feedback, check whether the loop rolls.  */
1364   if (loop->header->count
1365       && expected_loop_iterations (loop) < 2 * nunroll)
1366     {
1367       if (dump_file)
1368         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
1369       return;
1370     }
1371
1372   /* Success.  Now force nunroll to be power of 2, as it seems that this
1373      improves results (partially because of better alignments, partially
1374      because of some dark magic).  */
1375   for (i = 1; 2 * i <= nunroll; i *= 2)
1376     continue;
1377
1378   loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1379   loop->lpt_decision.times = i - 1;
1380
1381   if (dump_file)
1382     fprintf (dump_file,
1383              ";; Decided to unroll the loop stupidly, %d times.\n",
1384              loop->lpt_decision.times);
1385 }
1386
1387 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
1388    while (cond)
1389      body;
1390
1391    ==>
1392
1393    while (cond)
1394      {
1395        body;
1396        if (!cond) break;
1397        body;
1398        if (!cond) break;
1399        body;
1400        if (!cond) break;
1401        body;
1402      }
1403    */
1404 static void
1405 unroll_loop_stupid (struct loops *loops, struct loop *loop)
1406 {
1407   sbitmap wont_exit;
1408   unsigned nunroll = loop->lpt_decision.times;
1409   struct niter_desc *desc = get_simple_loop_desc (loop);
1410   struct opt_info *opt_info = NULL;
1411   bool ok;
1412
1413   if (flag_split_ivs_in_unroller
1414       || flag_variable_expansion_in_unroller)
1415     opt_info = analyze_insns_in_loop (loop);
1416
1417
1418   wont_exit = sbitmap_alloc (nunroll + 1);
1419   sbitmap_zero (wont_exit);
1420   opt_info_start_duplication (opt_info);
1421
1422   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1423                                       loops, nunroll, wont_exit,
1424                                       NULL, NULL, NULL,
1425                                       DLTHE_FLAG_UPDATE_FREQ);
1426   gcc_assert (ok);
1427
1428   if (opt_info)
1429     {
1430       apply_opt_in_copies (opt_info, nunroll, true, true);
1431       free_opt_info (opt_info);
1432     }
1433
1434   free (wont_exit);
1435
1436   if (desc->simple_p)
1437     {
1438       /* We indeed may get here provided that there are nontrivial assumptions
1439          for a loop to be really simple.  We could update the counts, but the
1440          problem is that we are unable to decide which exit will be taken
1441          (not really true in case the number of iterations is constant,
1442          but noone will do anything with this information, so we do not
1443          worry about it).  */
1444       desc->simple_p = false;
1445     }
1446
1447   if (dump_file)
1448     fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
1449              nunroll, num_loop_insns (loop));
1450 }
1451
1452 /* A hash function for information about insns to split.  */
1453
1454 static hashval_t
1455 si_info_hash (const void *ivts)
1456 {
1457   return htab_hash_pointer (((struct iv_to_split *) ivts)->insn);
1458 }
1459
1460 /* An equality functions for information about insns to split.  */
1461
1462 static int
1463 si_info_eq (const void *ivts1, const void *ivts2)
1464 {
1465   const struct iv_to_split *i1 = ivts1;
1466   const struct iv_to_split *i2 = ivts2;
1467
1468   return i1->insn == i2->insn;
1469 }
1470
1471 /* Return a hash for VES, which is really a "var_to_expand *".  */
1472
1473 static hashval_t
1474 ve_info_hash (const void *ves)
1475 {
1476   return htab_hash_pointer (((struct var_to_expand *) ves)->insn);
1477 }
1478
1479 /* Return true if IVTS1 and IVTS2 (which are really both of type
1480    "var_to_expand *") refer to the same instruction.  */
1481
1482 static int
1483 ve_info_eq (const void *ivts1, const void *ivts2)
1484 {
1485   const struct var_to_expand *i1 = ivts1;
1486   const struct var_to_expand *i2 = ivts2;
1487
1488   return i1->insn == i2->insn;
1489 }
1490
1491 /* Returns true if REG is referenced in one insn in LOOP.  */
1492
1493 bool
1494 referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg)
1495 {
1496   basic_block *body, bb;
1497   unsigned i;
1498   int count_ref = 0;
1499   rtx insn;
1500
1501   body = get_loop_body (loop);
1502   for (i = 0; i < loop->num_nodes; i++)
1503     {
1504       bb = body[i];
1505
1506       FOR_BB_INSNS (bb, insn)
1507       {
1508         if (rtx_referenced_p (reg, insn))
1509           count_ref++;
1510       }
1511     }
1512   return (count_ref  == 1);
1513 }
1514
1515 /* Determine whether INSN contains an accumulator
1516    which can be expanded into separate copies,
1517    one for each copy of the LOOP body.
1518
1519    for (i = 0 ; i < n; i++)
1520      sum += a[i];
1521
1522    ==>
1523
1524    sum += a[i]
1525    ....
1526    i = i+1;
1527    sum1 += a[i]
1528    ....
1529    i = i+1
1530    sum2 += a[i];
1531    ....
1532
1533    Return NULL if INSN contains no opportunity for expansion of accumulator.
1534    Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
1535    information and return a pointer to it.
1536 */
1537
1538 static struct var_to_expand *
1539 analyze_insn_to_expand_var (struct loop *loop, rtx insn)
1540 {
1541   rtx set, dest, src, op1;
1542   struct var_to_expand *ves;
1543   enum machine_mode mode1, mode2;
1544
1545   set = single_set (insn);
1546   if (!set)
1547     return NULL;
1548
1549   dest = SET_DEST (set);
1550   src = SET_SRC (set);
1551
1552   if (GET_CODE (src) != PLUS
1553       && GET_CODE (src) != MINUS
1554       && GET_CODE (src) != MULT)
1555     return NULL;
1556
1557   if (!XEXP (src, 0))
1558     return NULL;
1559
1560   op1 = XEXP (src, 0);
1561
1562   if (!REG_P (dest)
1563       && !(GET_CODE (dest) == SUBREG
1564            && REG_P (SUBREG_REG (dest))))
1565     return NULL;
1566
1567   if (!rtx_equal_p (dest, op1))
1568     return NULL;
1569
1570   if (!referenced_in_one_insn_in_loop_p (loop, dest))
1571     return NULL;
1572
1573   if (rtx_referenced_p (dest, XEXP (src, 1)))
1574     return NULL;
1575
1576   mode1 = GET_MODE (dest);
1577   mode2 = GET_MODE (XEXP (src, 1));
1578   if ((FLOAT_MODE_P (mode1)
1579        || FLOAT_MODE_P (mode2))
1580       && !flag_unsafe_math_optimizations)
1581     return NULL;
1582
1583   /* Record the accumulator to expand.  */
1584   ves = xmalloc (sizeof (struct var_to_expand));
1585   ves->insn = insn;
1586   ves->var_expansions = VEC_alloc (rtx, heap, 1);
1587   ves->reg = copy_rtx (dest);
1588   ves->op = GET_CODE (src);
1589   ves->expansion_count = 0;
1590   ves->reuse_expansion = 0;
1591   return ves;
1592 }
1593
1594 /* Determine whether there is an induction variable in INSN that
1595    we would like to split during unrolling.
1596
1597    I.e. replace
1598
1599    i = i + 1;
1600    ...
1601    i = i + 1;
1602    ...
1603    i = i + 1;
1604    ...
1605
1606    type chains by
1607
1608    i0 = i + 1
1609    ...
1610    i = i0 + 1
1611    ...
1612    i = i0 + 2
1613    ...
1614
1615    Return NULL if INSN contains no interesting IVs.  Otherwise, allocate
1616    an IV_TO_SPLIT structure, fill it with the relevant information and return a
1617    pointer to it.  */
1618
1619 static struct iv_to_split *
1620 analyze_iv_to_split_insn (rtx insn)
1621 {
1622   rtx set, dest;
1623   struct rtx_iv iv;
1624   struct iv_to_split *ivts;
1625   bool ok;
1626
1627   /* For now we just split the basic induction variables.  Later this may be
1628      extended for example by selecting also addresses of memory references.  */
1629   set = single_set (insn);
1630   if (!set)
1631     return NULL;
1632
1633   dest = SET_DEST (set);
1634   if (!REG_P (dest))
1635     return NULL;
1636
1637   if (!biv_p (insn, dest))
1638     return NULL;
1639
1640   ok = iv_analyze (insn, dest, &iv);
1641   gcc_assert (ok);
1642
1643   if (iv.step == const0_rtx
1644       || iv.mode != iv.extend_mode)
1645     return NULL;
1646
1647   /* Record the insn to split.  */
1648   ivts = xmalloc (sizeof (struct iv_to_split));
1649   ivts->insn = insn;
1650   ivts->base_var = NULL_RTX;
1651   ivts->step = iv.step;
1652   ivts->n_loc = 1;
1653   ivts->loc[0] = 1;
1654
1655   return ivts;
1656 }
1657
1658 /* Determines which of insns in LOOP can be optimized.
1659    Return a OPT_INFO struct with the relevant hash tables filled
1660    with all insns to be optimized.  The FIRST_NEW_BLOCK field
1661    is undefined for the return value.  */
1662
1663 static struct opt_info *
1664 analyze_insns_in_loop (struct loop *loop)
1665 {
1666   basic_block *body, bb;
1667   unsigned i, num_edges = 0;
1668   struct opt_info *opt_info = xcalloc (1, sizeof (struct opt_info));
1669   rtx insn;
1670   struct iv_to_split *ivts = NULL;
1671   struct var_to_expand *ves = NULL;
1672   PTR *slot1;
1673   PTR *slot2;
1674   edge *edges = get_loop_exit_edges (loop, &num_edges);
1675   bool can_apply = false;
1676
1677   iv_analysis_loop_init (loop);
1678
1679   body = get_loop_body (loop);
1680
1681   if (flag_split_ivs_in_unroller)
1682     opt_info->insns_to_split = htab_create (5 * loop->num_nodes,
1683                                             si_info_hash, si_info_eq, free);
1684
1685   /* Record the loop exit bb and loop preheader before the unrolling.  */
1686   if (!loop_preheader_edge (loop)->src)
1687     {
1688       loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1689       opt_info->loop_preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1690     }
1691   else
1692     opt_info->loop_preheader = loop_preheader_edge (loop)->src;
1693
1694   if (num_edges == 1
1695       && !(edges[0]->flags & EDGE_COMPLEX))
1696     {
1697       opt_info->loop_exit = loop_split_edge_with (edges[0], NULL_RTX);
1698       can_apply = true;
1699     }
1700
1701   if (flag_variable_expansion_in_unroller
1702       && can_apply)
1703     opt_info->insns_with_var_to_expand = htab_create (5 * loop->num_nodes,
1704                                                       ve_info_hash, ve_info_eq, free);
1705
1706   for (i = 0; i < loop->num_nodes; i++)
1707     {
1708       bb = body[i];
1709       if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1710         continue;
1711
1712       FOR_BB_INSNS (bb, insn)
1713       {
1714         if (!INSN_P (insn))
1715           continue;
1716
1717         if (opt_info->insns_to_split)
1718           ivts = analyze_iv_to_split_insn (insn);
1719
1720         if (ivts)
1721           {
1722             slot1 = htab_find_slot (opt_info->insns_to_split, ivts, INSERT);
1723             *slot1 = ivts;
1724             continue;
1725           }
1726
1727         if (opt_info->insns_with_var_to_expand)
1728           ves = analyze_insn_to_expand_var (loop, insn);
1729
1730         if (ves)
1731           {
1732             slot2 = htab_find_slot (opt_info->insns_with_var_to_expand, ves, INSERT);
1733             *slot2 = ves;
1734           }
1735       }
1736     }
1737
1738   free (edges);
1739   free (body);
1740   return opt_info;
1741 }
1742
1743 /* Called just before loop duplication.  Records start of duplicated area
1744    to OPT_INFO.  */
1745
1746 static void
1747 opt_info_start_duplication (struct opt_info *opt_info)
1748 {
1749   if (opt_info)
1750     opt_info->first_new_block = last_basic_block;
1751 }
1752
1753 /* Determine the number of iterations between initialization of the base
1754    variable and the current copy (N_COPY).  N_COPIES is the total number
1755    of newly created copies.  UNROLLING is true if we are unrolling
1756    (not peeling) the loop.  */
1757
1758 static unsigned
1759 determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
1760 {
1761   if (unrolling)
1762     {
1763       /* If we are unrolling, initialization is done in the original loop
1764          body (number 0).  */
1765       return n_copy;
1766     }
1767   else
1768     {
1769       /* If we are peeling, the copy in that the initialization occurs has
1770          number 1.  The original loop (number 0) is the last.  */
1771       if (n_copy)
1772         return n_copy - 1;
1773       else
1774         return n_copies;
1775     }
1776 }
1777
1778 /* Locate in EXPR the expression corresponding to the location recorded
1779    in IVTS, and return a pointer to the RTX for this location.  */
1780
1781 static rtx *
1782 get_ivts_expr (rtx expr, struct iv_to_split *ivts)
1783 {
1784   unsigned i;
1785   rtx *ret = &expr;
1786
1787   for (i = 0; i < ivts->n_loc; i++)
1788     ret = &XEXP (*ret, ivts->loc[i]);
1789
1790   return ret;
1791 }
1792
1793 /* Allocate basic variable for the induction variable chain.  Callback for
1794    htab_traverse.  */
1795
1796 static int
1797 allocate_basic_variable (void **slot, void *data ATTRIBUTE_UNUSED)
1798 {
1799   struct iv_to_split *ivts = *slot;
1800   rtx expr = *get_ivts_expr (single_set (ivts->insn), ivts);
1801
1802   ivts->base_var = gen_reg_rtx (GET_MODE (expr));
1803
1804   return 1;
1805 }
1806
1807 /* Insert initialization of basic variable of IVTS before INSN, taking
1808    the initial value from INSN.  */
1809
1810 static void
1811 insert_base_initialization (struct iv_to_split *ivts, rtx insn)
1812 {
1813   rtx expr = copy_rtx (*get_ivts_expr (single_set (insn), ivts));
1814   rtx seq;
1815
1816   start_sequence ();
1817   expr = force_operand (expr, ivts->base_var);
1818   if (expr != ivts->base_var)
1819     emit_move_insn (ivts->base_var, expr);
1820   seq = get_insns ();
1821   end_sequence ();
1822
1823   emit_insn_before (seq, insn);
1824 }
1825
1826 /* Replace the use of induction variable described in IVTS in INSN
1827    by base variable + DELTA * step.  */
1828
1829 static void
1830 split_iv (struct iv_to_split *ivts, rtx insn, unsigned delta)
1831 {
1832   rtx expr, *loc, seq, incr, var;
1833   enum machine_mode mode = GET_MODE (ivts->base_var);
1834   rtx src, dest, set;
1835
1836   /* Construct base + DELTA * step.  */
1837   if (!delta)
1838     expr = ivts->base_var;
1839   else
1840     {
1841       incr = simplify_gen_binary (MULT, mode,
1842                                   ivts->step, gen_int_mode (delta, mode));
1843       expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
1844                                   ivts->base_var, incr);
1845     }
1846
1847   /* Figure out where to do the replacement.  */
1848   loc = get_ivts_expr (single_set (insn), ivts);
1849
1850   /* If we can make the replacement right away, we're done.  */
1851   if (validate_change (insn, loc, expr, 0))
1852     return;
1853
1854   /* Otherwise, force EXPR into a register and try again.  */
1855   start_sequence ();
1856   var = gen_reg_rtx (mode);
1857   expr = force_operand (expr, var);
1858   if (expr != var)
1859     emit_move_insn (var, expr);
1860   seq = get_insns ();
1861   end_sequence ();
1862   emit_insn_before (seq, insn);
1863
1864   if (validate_change (insn, loc, var, 0))
1865     return;
1866
1867   /* The last chance.  Try recreating the assignment in insn
1868      completely from scratch.  */
1869   set = single_set (insn);
1870   gcc_assert (set);
1871
1872   start_sequence ();
1873   *loc = var;
1874   src = copy_rtx (SET_SRC (set));
1875   dest = copy_rtx (SET_DEST (set));
1876   src = force_operand (src, dest);
1877   if (src != dest)
1878     emit_move_insn (dest, src);
1879   seq = get_insns ();
1880   end_sequence ();
1881
1882   emit_insn_before (seq, insn);
1883   delete_insn (insn);
1884 }
1885
1886
1887 /* Return one expansion of the accumulator recorded in struct VE.  */
1888
1889 static rtx
1890 get_expansion (struct var_to_expand *ve)
1891 {
1892   rtx reg;
1893
1894   if (ve->reuse_expansion == 0)
1895     reg = ve->reg;
1896   else
1897     reg = VEC_index (rtx, ve->var_expansions, ve->reuse_expansion - 1);
1898
1899   if (VEC_length (rtx, ve->var_expansions) == (unsigned) ve->reuse_expansion)
1900     ve->reuse_expansion = 0;
1901   else
1902     ve->reuse_expansion++;
1903
1904   return reg;
1905 }
1906
1907
1908 /* Given INSN replace the uses of the accumulator recorded in VE
1909    with a new register.  */
1910
1911 static void
1912 expand_var_during_unrolling (struct var_to_expand *ve, rtx insn)
1913 {
1914   rtx new_reg, set;
1915   bool really_new_expansion = false;
1916
1917   set = single_set (insn);
1918   gcc_assert (set);
1919
1920   /* Generate a new register only if the expansion limit has not been
1921      reached.  Else reuse an already existing expansion.  */
1922   if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count)
1923     {
1924       really_new_expansion = true;
1925       new_reg = gen_reg_rtx (GET_MODE (ve->reg));
1926     }
1927   else
1928     new_reg = get_expansion (ve);
1929
1930   validate_change (insn, &SET_DEST (set), new_reg, 1);
1931   validate_change (insn, &XEXP (SET_SRC (set), 0), new_reg, 1);
1932
1933   if (apply_change_group ())
1934     if (really_new_expansion)
1935       {
1936         VEC_safe_push (rtx, heap, ve->var_expansions, new_reg);
1937         ve->expansion_count++;
1938       }
1939 }
1940
1941 /* Initialize the variable expansions in loop preheader.
1942    Callbacks for htab_traverse.  PLACE_P is the loop-preheader
1943    basic block where the initialization of the expansions
1944    should take place.  */
1945
1946 static int
1947 insert_var_expansion_initialization (void **slot, void *place_p)
1948 {
1949   struct var_to_expand *ve = *slot;
1950   basic_block place = (basic_block)place_p;
1951   rtx seq, var, zero_init, insn;
1952   unsigned i;
1953
1954   if (VEC_length (rtx, ve->var_expansions) == 0)
1955     return 1;
1956
1957   start_sequence ();
1958   if (ve->op == PLUS || ve->op == MINUS)
1959     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
1960       {
1961         zero_init =  CONST0_RTX (GET_MODE (var));
1962         emit_move_insn (var, zero_init);
1963       }
1964   else if (ve->op == MULT)
1965     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
1966       {
1967         zero_init =  CONST1_RTX (GET_MODE (var));
1968         emit_move_insn (var, zero_init);
1969       }
1970
1971   seq = get_insns ();
1972   end_sequence ();
1973
1974   insn = BB_HEAD (place);
1975   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
1976     insn = NEXT_INSN (insn);
1977
1978   emit_insn_after (seq, insn);
1979   /* Continue traversing the hash table.  */
1980   return 1;
1981 }
1982
1983 /*  Combine the variable expansions at the loop exit.
1984     Callbacks for htab_traverse.  PLACE_P is the loop exit
1985     basic block where the summation of the expansions should
1986     take place.  */
1987
1988 static int
1989 combine_var_copies_in_loop_exit (void **slot, void *place_p)
1990 {
1991   struct var_to_expand *ve = *slot;
1992   basic_block place = (basic_block)place_p;
1993   rtx sum = ve->reg;
1994   rtx expr, seq, var, insn;
1995   unsigned i;
1996
1997   if (VEC_length (rtx, ve->var_expansions) == 0)
1998     return 1;
1999
2000   start_sequence ();
2001   if (ve->op == PLUS || ve->op == MINUS)
2002     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2003       {
2004         sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg),
2005                                    var, sum);
2006       }
2007   else if (ve->op == MULT)
2008     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2009       {
2010         sum = simplify_gen_binary (MULT, GET_MODE (ve->reg),
2011                                    var, sum);
2012       }
2013
2014   expr = force_operand (sum, ve->reg);
2015   if (expr != ve->reg)
2016     emit_move_insn (ve->reg, expr);
2017   seq = get_insns ();
2018   end_sequence ();
2019
2020   insn = BB_HEAD (place);
2021   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
2022     insn = NEXT_INSN (insn);
2023
2024   emit_insn_after (seq, insn);
2025
2026   /* Continue traversing the hash table.  */
2027   return 1;
2028 }
2029
2030 /* Apply loop optimizations in loop copies using the
2031    data which gathered during the unrolling.  Structure
2032    OPT_INFO record that data.
2033
2034    UNROLLING is true if we unrolled (not peeled) the loop.
2035    REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
2036    the loop (as it should happen in complete unrolling, but not in ordinary
2037    peeling of the loop).  */
2038
2039 static void
2040 apply_opt_in_copies (struct opt_info *opt_info,
2041                      unsigned n_copies, bool unrolling,
2042                      bool rewrite_original_loop)
2043 {
2044   unsigned i, delta;
2045   basic_block bb, orig_bb;
2046   rtx insn, orig_insn, next;
2047   struct iv_to_split ivts_templ, *ivts;
2048   struct var_to_expand ve_templ, *ves;
2049
2050   /* Sanity check -- we need to put initialization in the original loop
2051      body.  */
2052   gcc_assert (!unrolling || rewrite_original_loop);
2053
2054   /* Allocate the basic variables (i0).  */
2055   if (opt_info->insns_to_split)
2056     htab_traverse (opt_info->insns_to_split, allocate_basic_variable, NULL);
2057
2058   for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2059     {
2060       bb = BASIC_BLOCK (i);
2061       orig_bb = bb->rbi->original;
2062
2063       delta = determine_split_iv_delta (bb->rbi->copy_number, n_copies,
2064                                         unrolling);
2065       orig_insn = BB_HEAD (orig_bb);
2066       for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); insn = next)
2067         {
2068           next = NEXT_INSN (insn);
2069           if (!INSN_P (insn))
2070             continue;
2071
2072           while (!INSN_P (orig_insn))
2073             orig_insn = NEXT_INSN (orig_insn);
2074
2075           ivts_templ.insn = orig_insn;
2076           ve_templ.insn = orig_insn;
2077
2078           /* Apply splitting iv optimization.  */
2079           if (opt_info->insns_to_split)
2080             {
2081               ivts = htab_find (opt_info->insns_to_split, &ivts_templ);
2082
2083               if (ivts)
2084                 {
2085 #ifdef ENABLE_CHECKING
2086                   gcc_assert (rtx_equal_p (PATTERN (insn), PATTERN (orig_insn)));
2087 #endif
2088
2089                   if (!delta)
2090                     insert_base_initialization (ivts, insn);
2091                   split_iv (ivts, insn, delta);
2092                 }
2093             }
2094           /* Apply variable expansion optimization.  */
2095           if (unrolling && opt_info->insns_with_var_to_expand)
2096             {
2097               ves = htab_find (opt_info->insns_with_var_to_expand, &ve_templ);
2098               if (ves)
2099                 {
2100 #ifdef ENABLE_CHECKING
2101                   gcc_assert (rtx_equal_p (PATTERN (insn), PATTERN (orig_insn)));
2102 #endif
2103                   expand_var_during_unrolling (ves, insn);
2104                 }
2105             }
2106           orig_insn = NEXT_INSN (orig_insn);
2107         }
2108     }
2109
2110   if (!rewrite_original_loop)
2111     return;
2112
2113   /* Initialize the variable expansions in the loop preheader
2114      and take care of combining them at the loop exit.  */
2115   if (opt_info->insns_with_var_to_expand)
2116     {
2117       htab_traverse (opt_info->insns_with_var_to_expand,
2118                      insert_var_expansion_initialization,
2119                      opt_info->loop_preheader);
2120       htab_traverse (opt_info->insns_with_var_to_expand,
2121                      combine_var_copies_in_loop_exit,
2122                      opt_info->loop_exit);
2123     }
2124
2125   /* Rewrite also the original loop body.  Find them as originals of the blocks
2126      in the last copied iteration, i.e. those that have
2127      bb->rbi->original->copy == bb.  */
2128   for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2129     {
2130       bb = BASIC_BLOCK (i);
2131       orig_bb = bb->rbi->original;
2132       if (orig_bb->rbi->copy != bb)
2133         continue;
2134
2135       delta = determine_split_iv_delta (0, n_copies, unrolling);
2136       for (orig_insn = BB_HEAD (orig_bb);
2137            orig_insn != NEXT_INSN (BB_END (bb));
2138            orig_insn = next)
2139         {
2140           next = NEXT_INSN (orig_insn);
2141
2142           if (!INSN_P (orig_insn))
2143             continue;
2144
2145           ivts_templ.insn = orig_insn;
2146           if (opt_info->insns_to_split)
2147             {
2148               ivts = htab_find (opt_info->insns_to_split, &ivts_templ);
2149               if (ivts)
2150                 {
2151                   if (!delta)
2152                     insert_base_initialization (ivts, orig_insn);
2153                   split_iv (ivts, orig_insn, delta);
2154                   continue;
2155                 }
2156             }
2157
2158         }
2159     }
2160 }
2161
2162 /*  Release the data structures used for the variable expansion
2163     optimization.  Callbacks for htab_traverse.  */
2164
2165 static int
2166 release_var_copies (void **slot, void *data ATTRIBUTE_UNUSED)
2167 {
2168   struct var_to_expand *ve = *slot;
2169
2170   VEC_free (rtx, heap, ve->var_expansions);
2171
2172   /* Continue traversing the hash table.  */
2173   return 1;
2174 }
2175
2176 /* Release OPT_INFO.  */
2177
2178 static void
2179 free_opt_info (struct opt_info *opt_info)
2180 {
2181   if (opt_info->insns_to_split)
2182     htab_delete (opt_info->insns_to_split);
2183   if (opt_info->insns_with_var_to_expand)
2184     {
2185       htab_traverse (opt_info->insns_with_var_to_expand,
2186                      release_var_copies, NULL);
2187       htab_delete (opt_info->insns_with_var_to_expand);
2188     }
2189   free (opt_info);
2190 }