gcc/loop-unroll.c

   1 /* Loop unrolling and peeling.
   2    Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 2, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to the Free
  18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  19 02111-1307, USA.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "rtl.h"
  26 #include "hard-reg-set.h"
  27 #include "obstack.h"
  28 #include "basic-block.h"
  29 #include "cfgloop.h"
  30 #include "cfglayout.h"
  31 #include "params.h"
  32 #include "output.h"
  33 #include "expr.h"
  34 #include "hashtab.h"
  35 #include "recog.h"
  36 #include "varray.h"
  37
  38 /* This pass performs loop unrolling and peeling.  We only perform these
  39    optimizations on innermost loops (with single exception) because
  40    the impact on performance is greatest here, and we want to avoid
  41    unnecessary code size growth.  The gain is caused by greater sequentiality
  42    of code, better code to optimize for further passes and in some cases
  43    by fewer testings of exit conditions.  The main problem is code growth,
  44    that impacts performance negatively due to effect of caches.
  45
  46    What we do:
  47
  48    -- complete peeling of once-rolling loops; this is the above mentioned
  49       exception, as this causes loop to be cancelled completely and
  50       does not cause code growth
  51    -- complete peeling of loops that roll (small) constant times.
  52    -- simple peeling of first iterations of loops that do not roll much
  53       (according to profile feedback)
  54    -- unrolling of loops that roll constant times; this is almost always
  55       win, as we get rid of exit condition tests.
  56    -- unrolling of loops that roll number of times that we can compute
  57       in runtime; we also get rid of exit condition tests here, but there
  58       is the extra expense for calculating the number of iterations
  59    -- simple unrolling of remaining loops; this is performed only if we
  60       are asked to, as the gain is questionable in this case and often
  61       it may even slow down the code
  62    For more detailed descriptions of each of those, see comments at
  63    appropriate function below.
  64
  65    There is a lot of parameters (defined and described in params.def) that
  66    control how much we unroll/peel.
  67
  68    ??? A great problem is that we don't have a good way how to determine
  69    how many times we should unroll the loop; the experiments I have made
  70    showed that this choice may affect performance in order of several %.
  71    */
  72
  73 /* Information about induction variables to split.  */
  74
  75 struct iv_to_split
  76 {
  77   rtx insn;             /* The insn in that the induction variable occurs.  */
  78   rtx base_var;         /* The variable on that the values in the further
  79                            iterations are based.  */
  80   rtx step;             /* Step of the induction variable.  */
  81   unsigned n_loc;
  82   unsigned loc[3];      /* Location where the definition of the induction
  83                            variable occurs in the insn.  For example if
  84                            N_LOC is 2, the expression is located at
  85                            XEXP (XEXP (single_set, loc[0]), loc[1]).  */
  86 };
  87
  88 /* Information about accumulators to expand.  */
  89
  90 struct var_to_expand
  91 {
  92   rtx insn;                        /* The insn in that the variable expansion occurs.  */
  93   rtx reg;                         /* The accumulator which is expanded.  */
  94   varray_type var_expansions;      /* The copies of the accumulator which is expanded.  */
  95   enum rtx_code op;                /* The type of the accumulation - addition, subtraction
  96                                       or multiplication.  */
  97   int expansion_count;             /* Count the number of expansions generated so far.  */
  98   int reuse_expansion;             /* The expansion we intend to reuse to expand
  99                                       the accumulator.  If REUSE_EXPANSION is 0 reuse
 100                                       the original accumulator.  Else use
 101                                       var_expansions[REUSE_EXPANSION - 1].  */
 102 };
 103
 104 /* Information about optimization applied in
 105    the unrolled loop.  */
 106
 107 struct opt_info
 108 {
 109   htab_t insns_to_split;           /* A hashtable of insns to split.  */
 110   htab_t insns_with_var_to_expand; /* A hashtable of insns with accumulators
 111                                       to expand.  */
 112   unsigned first_new_block;        /* The first basic block that was
 113                                       duplicated.  */
 114   basic_block loop_exit;           /* The loop exit basic block.  */
 115   basic_block loop_preheader;      /* The loop preheader basic block.  */
 116 };
 117
 118 static void decide_unrolling_and_peeling (struct loops *, int);
 119 static void peel_loops_completely (struct loops *, int);
 120 static void decide_peel_simple (struct loop *, int);
 121 static void decide_peel_once_rolling (struct loop *, int);
 122 static void decide_peel_completely (struct loop *, int);
 123 static void decide_unroll_stupid (struct loop *, int);
 124 static void decide_unroll_constant_iterations (struct loop *, int);
 125 static void decide_unroll_runtime_iterations (struct loop *, int);
 126 static void peel_loop_simple (struct loops *, struct loop *);
 127 static void peel_loop_completely (struct loops *, struct loop *);
 128 static void unroll_loop_stupid (struct loops *, struct loop *);
 129 static void unroll_loop_constant_iterations (struct loops *, struct loop *);
 130 static void unroll_loop_runtime_iterations (struct loops *, struct loop *);
 131 static struct opt_info *analyze_insns_in_loop (struct loop *);
 132 static void opt_info_start_duplication (struct opt_info *);
 133 static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
 134 static void free_opt_info (struct opt_info *);
 135 static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx);
 136 static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx);
 137 static struct iv_to_split *analyze_iv_to_split_insn (rtx);
 138 static void expand_var_during_unrolling (struct var_to_expand *, rtx);
 139 static int insert_var_expansion_initialization (void **, void *);
 140 static int combine_var_copies_in_loop_exit (void **, void *);
 141 static int release_var_copies (void **, void *);
 142 static rtx get_expansion (struct var_to_expand *);
 143
 144 /* Unroll and/or peel (depending on FLAGS) LOOPS.  */
 145 void
 146 unroll_and_peel_loops (struct loops *loops, int flags)
 147 {
 148   struct loop *loop, *next;
 149   bool check;
 150
 151   /* First perform complete loop peeling (it is almost surely a win,
 152      and affects parameters for further decision a lot).  */
 153   peel_loops_completely (loops, flags);
 154
 155   /* Now decide rest of unrolling and peeling.  */
 156   decide_unrolling_and_peeling (loops, flags);
 157
 158   loop = loops->tree_root;
 159   while (loop->inner)
 160     loop = loop->inner;
 161
 162   /* Scan the loops, inner ones first.  */
 163   while (loop != loops->tree_root)
 164     {
 165       if (loop->next)
 166         {
 167           next = loop->next;
 168           while (next->inner)
 169             next = next->inner;
 170         }
 171       else
 172         next = loop->outer;
 173
 174       check = true;
 175       /* And perform the appropriate transformations.  */
 176       switch (loop->lpt_decision.decision)
 177         {
 178         case LPT_PEEL_COMPLETELY:
 179           /* Already done.  */
 180           gcc_unreachable ();
 181         case LPT_PEEL_SIMPLE:
 182           peel_loop_simple (loops, loop);
 183           break;
 184         case LPT_UNROLL_CONSTANT:
 185           unroll_loop_constant_iterations (loops, loop);
 186           break;
 187         case LPT_UNROLL_RUNTIME:
 188           unroll_loop_runtime_iterations (loops, loop);
 189           break;
 190         case LPT_UNROLL_STUPID:
 191           unroll_loop_stupid (loops, loop);
 192           break;
 193         case LPT_NONE:
 194           check = false;
 195           break;
 196         default:
 197           gcc_unreachable ();
 198         }
 199       if (check)
 200         {
 201 #ifdef ENABLE_CHECKING
 202           verify_dominators (CDI_DOMINATORS);
 203           verify_loop_structure (loops);
 204 #endif
 205         }
 206       loop = next;
 207     }
 208
 209   iv_analysis_done ();
 210 }
 211
 212 /* Check whether exit of the LOOP is at the end of loop body.  */
 213
 214 static bool
 215 loop_exit_at_end_p (struct loop *loop)
 216 {
 217   struct niter_desc *desc = get_simple_loop_desc (loop);
 218   rtx insn;
 219
 220   if (desc->in_edge->dest != loop->latch)
 221     return false;
 222
 223   /* Check that the latch is empty.  */
 224   FOR_BB_INSNS (loop->latch, insn)
 225     {
 226       if (INSN_P (insn))
 227         return false;
 228     }
 229
 230   return true;
 231 }
 232
 233 /* Check whether to peel LOOPS (depending on FLAGS) completely and do so.  */
 234 static void
 235 peel_loops_completely (struct loops *loops, int flags)
 236 {
 237   struct loop *loop;
 238   unsigned i;
 239
 240   /* Scan the loops, the inner ones first.  */
 241   for (i = loops->num - 1; i > 0; i--)
 242     {
 243       loop = loops->parray[i];
 244       if (!loop)
 245         continue;
 246
 247       loop->lpt_decision.decision = LPT_NONE;
 248
 249       if (dump_file)
 250         fprintf (dump_file,
 251                  "\n;; *** Considering loop %d for complete peeling ***\n",
 252                  loop->num);
 253
 254       loop->ninsns = num_loop_insns (loop);
 255
 256       decide_peel_once_rolling (loop, flags);
 257       if (loop->lpt_decision.decision == LPT_NONE)
 258         decide_peel_completely (loop, flags);
 259
 260       if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY)
 261         {
 262           peel_loop_completely (loops, loop);
 263 #ifdef ENABLE_CHECKING
 264           verify_dominators (CDI_DOMINATORS);
 265           verify_loop_structure (loops);
 266 #endif
 267         }
 268     }
 269 }
 270
 271 /* Decide whether unroll or peel LOOPS (depending on FLAGS) and how much.  */
 272 static void
 273 decide_unrolling_and_peeling (struct loops *loops, int flags)
 274 {
 275   struct loop *loop = loops->tree_root, *next;
 276
 277   while (loop->inner)
 278     loop = loop->inner;
 279
 280   /* Scan the loops, inner ones first.  */
 281   while (loop != loops->tree_root)
 282     {
 283       if (loop->next)
 284         {
 285           next = loop->next;
 286           while (next->inner)
 287             next = next->inner;
 288         }
 289       else
 290         next = loop->outer;
 291
 292       loop->lpt_decision.decision = LPT_NONE;
 293
 294       if (dump_file)
 295         fprintf (dump_file, "\n;; *** Considering loop %d ***\n", loop->num);
 296
 297       /* Do not peel cold areas.  */
 298       if (!maybe_hot_bb_p (loop->header))
 299         {
 300           if (dump_file)
 301             fprintf (dump_file, ";; Not considering loop, cold area\n");
 302           loop = next;
 303           continue;
 304         }
 305
 306       /* Can the loop be manipulated?  */
 307       if (!can_duplicate_loop_p (loop))
 308         {
 309           if (dump_file)
 310             fprintf (dump_file,
 311                      ";; Not considering loop, cannot duplicate\n");
 312           loop = next;
 313           continue;
 314         }
 315
 316       /* Skip non-innermost loops.  */
 317       if (loop->inner)
 318         {
 319           if (dump_file)
 320             fprintf (dump_file, ";; Not considering loop, is not innermost\n");
 321           loop = next;
 322           continue;
 323         }
 324
 325       loop->ninsns = num_loop_insns (loop);
 326       loop->av_ninsns = average_num_loop_insns (loop);
 327
 328       /* Try transformations one by one in decreasing order of
 329          priority.  */
 330
 331       decide_unroll_constant_iterations (loop, flags);
 332       if (loop->lpt_decision.decision == LPT_NONE)
 333         decide_unroll_runtime_iterations (loop, flags);
 334       if (loop->lpt_decision.decision == LPT_NONE)
 335         decide_unroll_stupid (loop, flags);
 336       if (loop->lpt_decision.decision == LPT_NONE)
 337         decide_peel_simple (loop, flags);
 338
 339       loop = next;
 340     }
 341 }
 342
 343 /* Decide whether the LOOP is once rolling and suitable for complete
 344    peeling.  */
 345 static void
 346 decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
 347 {
 348   struct niter_desc *desc;
 349
 350   if (dump_file)
 351     fprintf (dump_file, "\n;; Considering peeling once rolling loop\n");
 352
 353   /* Is the loop small enough?  */
 354   if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns)
 355     {
 356       if (dump_file)
 357         fprintf (dump_file, ";; Not considering loop, is too big\n");
 358       return;
 359     }
 360
 361   /* Check for simple loops.  */
 362   desc = get_simple_loop_desc (loop);
 363
 364   /* Check number of iterations.  */
 365   if (!desc->simple_p
 366       || desc->assumptions
 367       || desc->infinite
 368       || !desc->const_iter
 369       || desc->niter != 0)
 370     {
 371       if (dump_file)
 372         fprintf (dump_file,
 373                  ";; Unable to prove that the loop rolls exactly once\n");
 374       return;
 375     }
 376
 377   /* Success.  */
 378   if (dump_file)
 379     fprintf (dump_file, ";; Decided to peel exactly once rolling loop\n");
 380   loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
 381 }
 382
 383 /* Decide whether the LOOP is suitable for complete peeling.  */
 384 static void
 385 decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
 386 {
 387   unsigned npeel;
 388   struct niter_desc *desc;
 389
 390   if (dump_file)
 391     fprintf (dump_file, "\n;; Considering peeling completely\n");
 392
 393   /* Skip non-innermost loops.  */
 394   if (loop->inner)
 395     {
 396       if (dump_file)
 397         fprintf (dump_file, ";; Not considering loop, is not innermost\n");
 398       return;
 399     }
 400
 401   /* Do not peel cold areas.  */
 402   if (!maybe_hot_bb_p (loop->header))
 403     {
 404       if (dump_file)
 405         fprintf (dump_file, ";; Not considering loop, cold area\n");
 406       return;
 407     }
 408
 409   /* Can the loop be manipulated?  */
 410   if (!can_duplicate_loop_p (loop))
 411     {
 412       if (dump_file)
 413         fprintf (dump_file,
 414                  ";; Not considering loop, cannot duplicate\n");
 415       return;
 416     }
 417
 418   /* npeel = number of iterations to peel.  */
 419   npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS) / loop->ninsns;
 420   if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
 421     npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
 422
 423   /* Is the loop small enough?  */
 424   if (!npeel)
 425     {
 426       if (dump_file)
 427         fprintf (dump_file, ";; Not considering loop, is too big\n");
 428       return;
 429     }
 430
 431   /* Check for simple loops.  */
 432   desc = get_simple_loop_desc (loop);
 433
 434   /* Check number of iterations.  */
 435   if (!desc->simple_p
 436       || desc->assumptions
 437       || !desc->const_iter
 438       || desc->infinite)
 439     {
 440       if (dump_file)
 441         fprintf (dump_file,
 442                  ";; Unable to prove that the loop iterates constant times\n");
 443       return;
 444     }
 445
 446   if (desc->niter > npeel - 1)
 447     {
 448       if (dump_file)
 449         {
 450           fprintf (dump_file,
 451                    ";; Not peeling loop completely, rolls too much (");
 452           fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter);
 453           fprintf (dump_file, " iterations > %d [maximum peelings])\n", npeel);
 454         }
 455       return;
 456     }
 457
 458   /* Success.  */
 459   if (dump_file)
 460     fprintf (dump_file, ";; Decided to peel loop completely\n");
 461   loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
 462 }
 463
 464 /* Peel all iterations of LOOP, remove exit edges and cancel the loop
 465    completely.  The transformation done:
 466
 467    for (i = 0; i < 4; i++)
 468      body;
 469
 470    ==>
 471
 472    i = 0;
 473    body; i++;
 474    body; i++;
 475    body; i++;
 476    body; i++;
 477    */
 478 static void
 479 peel_loop_completely (struct loops *loops, struct loop *loop)
 480 {
 481   sbitmap wont_exit;
 482   unsigned HOST_WIDE_INT npeel;
 483   unsigned n_remove_edges, i;
 484   edge *remove_edges, ein;
 485   struct niter_desc *desc = get_simple_loop_desc (loop);
 486   struct opt_info *opt_info = NULL;
 487
 488   npeel = desc->niter;
 489
 490   if (npeel)
 491     {
 492       wont_exit = sbitmap_alloc (npeel + 1);
 493       sbitmap_ones (wont_exit);
 494       RESET_BIT (wont_exit, 0);
 495       if (desc->noloop_assumptions)
 496         RESET_BIT (wont_exit, 1);
 497
 498       remove_edges = xcalloc (npeel, sizeof (edge));
 499       n_remove_edges = 0;
 500
 501       if (flag_split_ivs_in_unroller)
 502         opt_info = analyze_insns_in_loop (loop);
 503
 504       opt_info_start_duplication (opt_info);
 505       if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 506                 loops, npeel,
 507                 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
 508                 DLTHE_FLAG_UPDATE_FREQ))
 509         abort ();
 510
 511       free (wont_exit);
 512
 513       if (opt_info)
 514         {
 515           apply_opt_in_copies (opt_info, npeel, false, true);
 516           free_opt_info (opt_info);
 517         }
 518
 519       /* Remove the exit edges.  */
 520       for (i = 0; i < n_remove_edges; i++)
 521         remove_path (loops, remove_edges[i]);
 522       free (remove_edges);
 523     }
 524
 525   ein = desc->in_edge;
 526   free_simple_loop_desc (loop);
 527
 528   /* Now remove the unreachable part of the last iteration and cancel
 529      the loop.  */
 530   remove_path (loops, ein);
 531
 532   if (dump_file)
 533     fprintf (dump_file, ";; Peeled loop completely, %d times\n", (int) npeel);
 534 }
 535
 536 /* Decide whether to unroll LOOP iterating constant number of times
 537    and how much.  */
 538
 539 static void
 540 decide_unroll_constant_iterations (struct loop *loop, int flags)
 541 {
 542   unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
 543   struct niter_desc *desc;
 544
 545   if (!(flags & UAP_UNROLL))
 546     {
 547       /* We were not asked to, just return back silently.  */
 548       return;
 549     }
 550
 551   if (dump_file)
 552     fprintf (dump_file,
 553              "\n;; Considering unrolling loop with constant "
 554              "number of iterations\n");
 555
 556   /* nunroll = total number of copies of the original loop body in
 557      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 558   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
 559   nunroll_by_av
 560     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
 561   if (nunroll > nunroll_by_av)
 562     nunroll = nunroll_by_av;
 563   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
 564     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 565
 566   /* Skip big loops.  */
 567   if (nunroll <= 1)
 568     {
 569       if (dump_file)
 570         fprintf (dump_file, ";; Not considering loop, is too big\n");
 571       return;
 572     }
 573
 574   /* Check for simple loops.  */
 575   desc = get_simple_loop_desc (loop);
 576
 577   /* Check number of iterations.  */
 578   if (!desc->simple_p || !desc->const_iter || desc->assumptions)
 579     {
 580       if (dump_file)
 581         fprintf (dump_file,
 582                  ";; Unable to prove that the loop iterates constant times\n");
 583       return;
 584     }
 585
 586   /* Check whether the loop rolls enough to consider.  */
 587   if (desc->niter < 2 * nunroll)
 588     {
 589       if (dump_file)
 590         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 591       return;
 592     }
 593
 594   /* Success; now compute number of iterations to unroll.  We alter
 595      nunroll so that as few as possible copies of loop body are
 596      necessary, while still not decreasing the number of unrollings
 597      too much (at most by 1).  */
 598   best_copies = 2 * nunroll + 10;
 599
 600   i = 2 * nunroll + 2;
 601   if (i - 1 >= desc->niter)
 602     i = desc->niter - 2;
 603
 604   for (; i >= nunroll - 1; i--)
 605     {
 606       unsigned exit_mod = desc->niter % (i + 1);
 607
 608       if (!loop_exit_at_end_p (loop))
 609         n_copies = exit_mod + i + 1;
 610       else if (exit_mod != (unsigned) i
 611                || desc->noloop_assumptions != NULL_RTX)
 612         n_copies = exit_mod + i + 2;
 613       else
 614         n_copies = i + 1;
 615
 616       if (n_copies < best_copies)
 617         {
 618           best_copies = n_copies;
 619           best_unroll = i;
 620         }
 621     }
 622
 623   if (dump_file)
 624     fprintf (dump_file, ";; max_unroll %d (%d copies, initial %d).\n",
 625              best_unroll + 1, best_copies, nunroll);
 626
 627   loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
 628   loop->lpt_decision.times = best_unroll;
 629
 630   if (dump_file)
 631     fprintf (dump_file,
 632              ";; Decided to unroll the constant times rolling loop, %d times.\n",
 633              loop->lpt_decision.times);
 634 }
 635
 636 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1
 637    times.  The transformation does this:
 638
 639    for (i = 0; i < 102; i++)
 640      body;
 641
 642    ==>
 643
 644    i = 0;
 645    body; i++;
 646    body; i++;
 647    while (i < 102)
 648      {
 649        body; i++;
 650        body; i++;
 651        body; i++;
 652        body; i++;
 653      }
 654   */
 655 static void
 656 unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
 657 {
 658   unsigned HOST_WIDE_INT niter;
 659   unsigned exit_mod;
 660   sbitmap wont_exit;
 661   unsigned n_remove_edges, i;
 662   edge *remove_edges;
 663   unsigned max_unroll = loop->lpt_decision.times;
 664   struct niter_desc *desc = get_simple_loop_desc (loop);
 665   bool exit_at_end = loop_exit_at_end_p (loop);
 666   struct opt_info *opt_info = NULL;
 667
 668   niter = desc->niter;
 669
 670   /* Should not get here (such loop should be peeled instead).  */
 671   gcc_assert (niter > max_unroll + 1);
 672
 673   exit_mod = niter % (max_unroll + 1);
 674
 675   wont_exit = sbitmap_alloc (max_unroll + 1);
 676   sbitmap_ones (wont_exit);
 677
 678   remove_edges = xcalloc (max_unroll + exit_mod + 1, sizeof (edge));
 679   n_remove_edges = 0;
 680   if (flag_split_ivs_in_unroller
 681       || flag_variable_expansion_in_unroller)
 682     opt_info = analyze_insns_in_loop (loop);
 683
 684   if (!exit_at_end)
 685     {
 686       /* The exit is not at the end of the loop; leave exit test
 687          in the first copy, so that the loops that start with test
 688          of exit condition have continuous body after unrolling.  */
 689
 690       if (dump_file)
 691         fprintf (dump_file, ";; Condition on beginning of loop.\n");
 692
 693       /* Peel exit_mod iterations.  */
 694       RESET_BIT (wont_exit, 0);
 695       if (desc->noloop_assumptions)
 696         RESET_BIT (wont_exit, 1);
 697
 698       if (exit_mod)
 699         {
 700           opt_info_start_duplication (opt_info);
 701           if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 702                                               loops, exit_mod,
 703                                               wont_exit, desc->out_edge,
 704                                               remove_edges, &n_remove_edges,
 705                                               DLTHE_FLAG_UPDATE_FREQ))
 706             abort ();
 707
 708           if (opt_info && exit_mod > 1)
 709             apply_opt_in_copies (opt_info, exit_mod, false, false);
 710
 711           desc->noloop_assumptions = NULL_RTX;
 712           desc->niter -= exit_mod;
 713           desc->niter_max -= exit_mod;
 714         }
 715
 716       SET_BIT (wont_exit, 1);
 717     }
 718   else
 719     {
 720       /* Leave exit test in last copy, for the same reason as above if
 721          the loop tests the condition at the end of loop body.  */
 722
 723       if (dump_file)
 724         fprintf (dump_file, ";; Condition on end of loop.\n");
 725
 726       /* We know that niter >= max_unroll + 2; so we do not need to care of
 727          case when we would exit before reaching the loop.  So just peel
 728          exit_mod + 1 iterations.  */
 729       if (exit_mod != max_unroll
 730           || desc->noloop_assumptions)
 731         {
 732           RESET_BIT (wont_exit, 0);
 733           if (desc->noloop_assumptions)
 734             RESET_BIT (wont_exit, 1);
 735
 736           opt_info_start_duplication (opt_info);
 737           if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 738                 loops, exit_mod + 1,
 739                 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
 740                 DLTHE_FLAG_UPDATE_FREQ))
 741             abort ();
 742
 743           if (opt_info && exit_mod > 0)
 744             apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
 745
 746           desc->niter -= exit_mod + 1;
 747           desc->niter_max -= exit_mod + 1;
 748           desc->noloop_assumptions = NULL_RTX;
 749
 750           SET_BIT (wont_exit, 0);
 751           SET_BIT (wont_exit, 1);
 752         }
 753
 754       RESET_BIT (wont_exit, max_unroll);
 755     }
 756
 757   /* Now unroll the loop.  */
 758
 759   opt_info_start_duplication (opt_info);
 760   if (!duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
 761                 loops, max_unroll,
 762                 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
 763                 DLTHE_FLAG_UPDATE_FREQ))
 764     abort ();
 765
 766   if (opt_info)
 767     {
 768       apply_opt_in_copies (opt_info, max_unroll, true, true);
 769       free_opt_info (opt_info);
 770     }
 771
 772   free (wont_exit);
 773
 774   if (exit_at_end)
 775     {
 776       basic_block exit_block = desc->in_edge->src->rbi->copy;
 777       /* Find a new in and out edge; they are in the last copy we have made.  */
 778
 779       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
 780         {
 781           desc->out_edge = EDGE_SUCC (exit_block, 0);
 782           desc->in_edge = EDGE_SUCC (exit_block, 1);
 783         }
 784       else
 785         {
 786           desc->out_edge = EDGE_SUCC (exit_block, 1);
 787           desc->in_edge = EDGE_SUCC (exit_block, 0);
 788         }
 789     }
 790
 791   desc->niter /= max_unroll + 1;
 792   desc->niter_max /= max_unroll + 1;
 793   desc->niter_expr = GEN_INT (desc->niter);
 794
 795   /* Remove the edges.  */
 796   for (i = 0; i < n_remove_edges; i++)
 797     remove_path (loops, remove_edges[i]);
 798   free (remove_edges);
 799
 800   if (dump_file)
 801     fprintf (dump_file,
 802              ";; Unrolled loop %d times, constant # of iterations %i insns\n",
 803              max_unroll, num_loop_insns (loop));
 804 }
 805
 806 /* Decide whether to unroll LOOP iterating runtime computable number of times
 807    and how much.  */
 808 static void
 809 decide_unroll_runtime_iterations (struct loop *loop, int flags)
 810 {
 811   unsigned nunroll, nunroll_by_av, i;
 812   struct niter_desc *desc;
 813
 814   if (!(flags & UAP_UNROLL))
 815     {
 816       /* We were not asked to, just return back silently.  */
 817       return;
 818     }
 819
 820   if (dump_file)
 821     fprintf (dump_file,
 822              "\n;; Considering unrolling loop with runtime "
 823              "computable number of iterations\n");
 824
 825   /* nunroll = total number of copies of the original loop body in
 826      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 827   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
 828   nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
 829   if (nunroll > nunroll_by_av)
 830     nunroll = nunroll_by_av;
 831   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
 832     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 833
 834   /* Skip big loops.  */
 835   if (nunroll <= 1)
 836     {
 837       if (dump_file)
 838         fprintf (dump_file, ";; Not considering loop, is too big\n");
 839       return;
 840     }
 841
 842   /* Check for simple loops.  */
 843   desc = get_simple_loop_desc (loop);
 844
 845   /* Check simpleness.  */
 846   if (!desc->simple_p || desc->assumptions)
 847     {
 848       if (dump_file)
 849         fprintf (dump_file,
 850                  ";; Unable to prove that the number of iterations "
 851                  "can be counted in runtime\n");
 852       return;
 853     }
 854
 855   if (desc->const_iter)
 856     {
 857       if (dump_file)
 858         fprintf (dump_file, ";; Loop iterates constant times\n");
 859       return;
 860     }
 861
 862   /* If we have profile feedback, check whether the loop rolls.  */
 863   if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll)
 864     {
 865       if (dump_file)
 866         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 867       return;
 868     }
 869
 870   /* Success; now force nunroll to be power of 2, as we are unable to
 871      cope with overflows in computation of number of iterations.  */
 872   for (i = 1; 2 * i <= nunroll; i *= 2)
 873     continue;
 874
 875   loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
 876   loop->lpt_decision.times = i - 1;
 877
 878   if (dump_file)
 879     fprintf (dump_file,
 880              ";; Decided to unroll the runtime computable "
 881              "times rolling loop, %d times.\n",
 882              loop->lpt_decision.times);
 883 }
 884
 885 /* Unroll LOOP for that we are able to count number of iterations in runtime
 886    LOOP->LPT_DECISION.TIMES + 1 times.  The transformation does this (with some
 887    extra care for case n < 0):
 888
 889    for (i = 0; i < n; i++)
 890      body;
 891
 892    ==>
 893
 894    i = 0;
 895    mod = n % 4;
 896
 897    switch (mod)
 898      {
 899        case 3:
 900          body; i++;
 901        case 2:
 902          body; i++;
 903        case 1:
 904          body; i++;
 905        case 0: ;
 906      }
 907
 908    while (i < n)
 909      {
 910        body; i++;
 911        body; i++;
 912        body; i++;
 913        body; i++;
 914      }
 915    */
 916 static void
 917 unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
 918 {
 919   rtx old_niter, niter, init_code, branch_code, tmp;
 920   unsigned i, j, p;
 921   basic_block preheader, *body, *dom_bbs, swtch, ezc_swtch;
 922   unsigned n_dom_bbs;
 923   sbitmap wont_exit;
 924   int may_exit_copy;
 925   unsigned n_peel, n_remove_edges;
 926   edge *remove_edges, e;
 927   bool extra_zero_check, last_may_exit;
 928   unsigned max_unroll = loop->lpt_decision.times;
 929   struct niter_desc *desc = get_simple_loop_desc (loop);
 930   bool exit_at_end = loop_exit_at_end_p (loop);
 931   struct opt_info *opt_info = NULL;
 932
 933   if (flag_split_ivs_in_unroller
 934       || flag_variable_expansion_in_unroller)
 935     opt_info = analyze_insns_in_loop (loop);
 936
 937   /* Remember blocks whose dominators will have to be updated.  */
 938   dom_bbs = xcalloc (n_basic_blocks, sizeof (basic_block));
 939   n_dom_bbs = 0;
 940
 941   body = get_loop_body (loop);
 942   for (i = 0; i < loop->num_nodes; i++)
 943     {
 944       unsigned nldom;
 945       basic_block *ldom;
 946
 947       nldom = get_dominated_by (CDI_DOMINATORS, body[i], &ldom);
 948       for (j = 0; j < nldom; j++)
 949         if (!flow_bb_inside_loop_p (loop, ldom[j]))
 950           dom_bbs[n_dom_bbs++] = ldom[j];
 951
 952       free (ldom);
 953     }
 954   free (body);
 955
 956   if (!exit_at_end)
 957     {
 958       /* Leave exit in first copy (for explanation why see comment in
 959          unroll_loop_constant_iterations).  */
 960       may_exit_copy = 0;
 961       n_peel = max_unroll - 1;
 962       extra_zero_check = true;
 963       last_may_exit = false;
 964     }
 965   else
 966     {
 967       /* Leave exit in last copy (for explanation why see comment in
 968          unroll_loop_constant_iterations).  */
 969       may_exit_copy = max_unroll;
 970       n_peel = max_unroll;
 971       extra_zero_check = false;
 972       last_may_exit = true;
 973     }
 974
 975   /* Get expression for number of iterations.  */
 976   start_sequence ();
 977   old_niter = niter = gen_reg_rtx (desc->mode);
 978   tmp = force_operand (copy_rtx (desc->niter_expr), niter);
 979   if (tmp != niter)
 980     emit_move_insn (niter, tmp);
 981
 982   /* Count modulo by ANDing it with max_unroll; we use the fact that
 983      the number of unrollings is a power of two, and thus this is correct
 984      even if there is overflow in the computation.  */
 985   niter = expand_simple_binop (desc->mode, AND,
 986                                niter,
 987                                GEN_INT (max_unroll),
 988                                NULL_RTX, 0, OPTAB_LIB_WIDEN);
 989
 990   init_code = get_insns ();
 991   end_sequence ();
 992
 993   /* Precondition the loop.  */
 994   loop_split_edge_with (loop_preheader_edge (loop), init_code);
 995
 996   remove_edges = xcalloc (max_unroll + n_peel + 1, sizeof (edge));
 997   n_remove_edges = 0;
 998
 999   wont_exit = sbitmap_alloc (max_unroll + 2);
1000
1001   /* Peel the first copy of loop body (almost always we must leave exit test
1002      here; the only exception is when we have extra zero check and the number
1003      of iterations is reliable.  Also record the place of (possible) extra
1004      zero check.  */
1005   sbitmap_zero (wont_exit);
1006   if (extra_zero_check
1007       && !desc->noloop_assumptions)
1008     SET_BIT (wont_exit, 1);
1009   ezc_swtch = loop_preheader_edge (loop)->src;
1010   if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1011                 loops, 1,
1012                 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
1013                 DLTHE_FLAG_UPDATE_FREQ))
1014     abort ();
1015
1016   /* Record the place where switch will be built for preconditioning.  */
1017   swtch = loop_split_edge_with (loop_preheader_edge (loop),
1018                                 NULL_RTX);
1019
1020   for (i = 0; i < n_peel; i++)
1021     {
1022       /* Peel the copy.  */
1023       sbitmap_zero (wont_exit);
1024       if (i != n_peel - 1 || !last_may_exit)
1025         SET_BIT (wont_exit, 1);
1026       if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1027                 loops, 1,
1028                 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
1029                 DLTHE_FLAG_UPDATE_FREQ))
1030         abort ();
1031
1032       /* Create item for switch.  */
1033       j = n_peel - i - (extra_zero_check ? 0 : 1);
1034       p = REG_BR_PROB_BASE / (i + 2);
1035
1036       preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1037       branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
1038                                           block_label (preheader), p, NULL_RTX);
1039
1040       swtch = loop_split_edge_with (EDGE_PRED (swtch, 0), branch_code);
1041       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1042       EDGE_SUCC (swtch, 0)->probability = REG_BR_PROB_BASE - p;
1043       e = make_edge (swtch, preheader,
1044                      EDGE_SUCC (swtch, 0)->flags & EDGE_IRREDUCIBLE_LOOP);
1045       e->probability = p;
1046     }
1047
1048   if (extra_zero_check)
1049     {
1050       /* Add branch for zero iterations.  */
1051       p = REG_BR_PROB_BASE / (max_unroll + 1);
1052       swtch = ezc_swtch;
1053       preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1054       branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
1055                                           block_label (preheader), p, NULL_RTX);
1056
1057       swtch = loop_split_edge_with (EDGE_SUCC (swtch, 0), branch_code);
1058       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1059       EDGE_SUCC (swtch, 0)->probability = REG_BR_PROB_BASE - p;
1060       e = make_edge (swtch, preheader,
1061                      EDGE_SUCC (swtch, 0)->flags & EDGE_IRREDUCIBLE_LOOP);
1062       e->probability = p;
1063     }
1064
1065   /* Recount dominators for outer blocks.  */
1066   iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, n_dom_bbs);
1067
1068   /* And unroll loop.  */
1069
1070   sbitmap_ones (wont_exit);
1071   RESET_BIT (wont_exit, may_exit_copy);
1072   opt_info_start_duplication (opt_info);
1073
1074   if (!duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1075                 loops, max_unroll,
1076                 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
1077                 DLTHE_FLAG_UPDATE_FREQ))
1078     abort ();
1079
1080   if (opt_info)
1081     {
1082       apply_opt_in_copies (opt_info, max_unroll, true, true);
1083       free_opt_info (opt_info);
1084     }
1085
1086   free (wont_exit);
1087
1088   if (exit_at_end)
1089     {
1090       basic_block exit_block = desc->in_edge->src->rbi->copy;
1091       /* Find a new in and out edge; they are in the last copy we have made.  */
1092
1093       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
1094         {
1095           desc->out_edge = EDGE_SUCC (exit_block, 0);
1096           desc->in_edge = EDGE_SUCC (exit_block, 1);
1097         }
1098       else
1099         {
1100           desc->out_edge = EDGE_SUCC (exit_block, 1);
1101           desc->in_edge = EDGE_SUCC (exit_block, 0);
1102         }
1103     }
1104
1105   /* Remove the edges.  */
1106   for (i = 0; i < n_remove_edges; i++)
1107     remove_path (loops, remove_edges[i]);
1108   free (remove_edges);
1109
1110   /* We must be careful when updating the number of iterations due to
1111      preconditioning and the fact that the value must be valid at entry
1112      of the loop.  After passing through the above code, we see that
1113      the correct new number of iterations is this:  */
1114   gcc_assert (!desc->const_iter);
1115   desc->niter_expr =
1116     simplify_gen_binary (UDIV, desc->mode, old_niter, GEN_INT (max_unroll + 1));
1117   desc->niter_max /= max_unroll + 1;
1118   if (exit_at_end)
1119     {
1120       desc->niter_expr =
1121         simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1122       desc->noloop_assumptions = NULL_RTX;
1123       desc->niter_max--;
1124     }
1125
1126   if (dump_file)
1127     fprintf (dump_file,
1128              ";; Unrolled loop %d times, counting # of iterations "
1129              "in runtime, %i insns\n",
1130              max_unroll, num_loop_insns (loop));
1131 }
1132
1133 /* Decide whether to simply peel LOOP and how much.  */
1134 static void
1135 decide_peel_simple (struct loop *loop, int flags)
1136 {
1137   unsigned npeel;
1138   struct niter_desc *desc;
1139
1140   if (!(flags & UAP_PEEL))
1141     {
1142       /* We were not asked to, just return back silently.  */
1143       return;
1144     }
1145
1146   if (dump_file)
1147     fprintf (dump_file, "\n;; Considering simply peeling loop\n");
1148
1149   /* npeel = number of iterations to peel.  */
1150   npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns;
1151   if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_PEEL_TIMES))
1152     npeel = PARAM_VALUE (PARAM_MAX_PEEL_TIMES);
1153
1154   /* Skip big loops.  */
1155   if (!npeel)
1156     {
1157       if (dump_file)
1158         fprintf (dump_file, ";; Not considering loop, is too big\n");
1159       return;
1160     }
1161
1162   /* Check for simple loops.  */
1163   desc = get_simple_loop_desc (loop);
1164
1165   /* Check number of iterations.  */
1166   if (desc->simple_p && !desc->assumptions && desc->const_iter)
1167     {
1168       if (dump_file)
1169         fprintf (dump_file, ";; Loop iterates constant times\n");
1170       return;
1171     }
1172
1173   /* Do not simply peel loops with branches inside -- it increases number
1174      of mispredicts.  */
1175   if (num_loop_branches (loop) > 1)
1176     {
1177       if (dump_file)
1178         fprintf (dump_file, ";; Not peeling, contains branches\n");
1179       return;
1180     }
1181
1182   if (loop->header->count)
1183     {
1184       unsigned niter = expected_loop_iterations (loop);
1185       if (niter + 1 > npeel)
1186         {
1187           if (dump_file)
1188             {
1189               fprintf (dump_file, ";; Not peeling loop, rolls too much (");
1190               fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1191                        (HOST_WIDEST_INT) (niter + 1));
1192               fprintf (dump_file, " iterations > %d [maximum peelings])\n",
1193                        npeel);
1194             }
1195           return;
1196         }
1197       npeel = niter + 1;
1198     }
1199   else
1200     {
1201       /* For now we have no good heuristics to decide whether loop peeling
1202          will be effective, so disable it.  */
1203       if (dump_file)
1204         fprintf (dump_file,
1205                  ";; Not peeling loop, no evidence it will be profitable\n");
1206       return;
1207     }
1208
1209   /* Success.  */
1210   loop->lpt_decision.decision = LPT_PEEL_SIMPLE;
1211   loop->lpt_decision.times = npeel;
1212
1213   if (dump_file)
1214     fprintf (dump_file, ";; Decided to simply peel the loop, %d times.\n",
1215              loop->lpt_decision.times);
1216 }
1217
1218 /* Peel a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
1219    while (cond)
1220      body;
1221
1222    ==>
1223
1224    if (!cond) goto end;
1225    body;
1226    if (!cond) goto end;
1227    body;
1228    while (cond)
1229      body;
1230    end: ;
1231    */
1232 static void
1233 peel_loop_simple (struct loops *loops, struct loop *loop)
1234 {
1235   sbitmap wont_exit;
1236   unsigned npeel = loop->lpt_decision.times;
1237   struct niter_desc *desc = get_simple_loop_desc (loop);
1238   struct opt_info *opt_info = NULL;
1239
1240   if (flag_split_ivs_in_unroller && npeel > 1)
1241     opt_info = analyze_insns_in_loop (loop);
1242
1243   wont_exit = sbitmap_alloc (npeel + 1);
1244   sbitmap_zero (wont_exit);
1245
1246   opt_info_start_duplication (opt_info);
1247
1248   if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1249                 loops, npeel, wont_exit, NULL, NULL, NULL,
1250                 DLTHE_FLAG_UPDATE_FREQ))
1251     abort ();
1252
1253   free (wont_exit);
1254
1255   if (opt_info)
1256     {
1257       apply_opt_in_copies (opt_info, npeel, false, false);
1258       free_opt_info (opt_info);
1259     }
1260
1261   if (desc->simple_p)
1262     {
1263       if (desc->const_iter)
1264         {
1265           desc->niter -= npeel;
1266           desc->niter_expr = GEN_INT (desc->niter);
1267           desc->noloop_assumptions = NULL_RTX;
1268         }
1269       else
1270         {
1271           /* We cannot just update niter_expr, as its value might be clobbered
1272              inside loop.  We could handle this by counting the number into
1273              temporary just like we do in runtime unrolling, but it does not
1274              seem worthwhile.  */
1275           free_simple_loop_desc (loop);
1276         }
1277     }
1278   if (dump_file)
1279     fprintf (dump_file, ";; Peeling loop %d times\n", npeel);
1280 }
1281
1282 /* Decide whether to unroll LOOP stupidly and how much.  */
1283 static void
1284 decide_unroll_stupid (struct loop *loop, int flags)
1285 {
1286   unsigned nunroll, nunroll_by_av, i;
1287   struct niter_desc *desc;
1288
1289   if (!(flags & UAP_UNROLL_ALL))
1290     {
1291       /* We were not asked to, just return back silently.  */
1292       return;
1293     }
1294
1295   if (dump_file)
1296     fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n");
1297
1298   /* nunroll = total number of copies of the original loop body in
1299      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
1300   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
1301   nunroll_by_av
1302     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
1303   if (nunroll > nunroll_by_av)
1304     nunroll = nunroll_by_av;
1305   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
1306     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1307
1308   /* Skip big loops.  */
1309   if (nunroll <= 1)
1310     {
1311       if (dump_file)
1312         fprintf (dump_file, ";; Not considering loop, is too big\n");
1313       return;
1314     }
1315
1316   /* Check for simple loops.  */
1317   desc = get_simple_loop_desc (loop);
1318
1319   /* Check simpleness.  */
1320   if (desc->simple_p && !desc->assumptions)
1321     {
1322       if (dump_file)
1323         fprintf (dump_file, ";; The loop is simple\n");
1324       return;
1325     }
1326
1327   /* Do not unroll loops with branches inside -- it increases number
1328      of mispredicts.  */
1329   if (num_loop_branches (loop) > 1)
1330     {
1331       if (dump_file)
1332         fprintf (dump_file, ";; Not unrolling, contains branches\n");
1333       return;
1334     }
1335
1336   /* If we have profile feedback, check whether the loop rolls.  */
1337   if (loop->header->count
1338       && expected_loop_iterations (loop) < 2 * nunroll)
1339     {
1340       if (dump_file)
1341         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
1342       return;
1343     }
1344
1345   /* Success.  Now force nunroll to be power of 2, as it seems that this
1346      improves results (partially because of better alignments, partially
1347      because of some dark magic).  */
1348   for (i = 1; 2 * i <= nunroll; i *= 2)
1349     continue;
1350
1351   loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1352   loop->lpt_decision.times = i - 1;
1353
1354   if (dump_file)
1355     fprintf (dump_file,
1356              ";; Decided to unroll the loop stupidly, %d times.\n",
1357              loop->lpt_decision.times);
1358 }
1359
1360 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
1361    while (cond)
1362      body;
1363
1364    ==>
1365
1366    while (cond)
1367      {
1368        body;
1369        if (!cond) break;
1370        body;
1371        if (!cond) break;
1372        body;
1373        if (!cond) break;
1374        body;
1375      }
1376    */
1377 static void
1378 unroll_loop_stupid (struct loops *loops, struct loop *loop)
1379 {
1380   sbitmap wont_exit;
1381   unsigned nunroll = loop->lpt_decision.times;
1382   struct niter_desc *desc = get_simple_loop_desc (loop);
1383   struct opt_info *opt_info = NULL;
1384
1385   if (flag_split_ivs_in_unroller
1386       || flag_variable_expansion_in_unroller)
1387     opt_info = analyze_insns_in_loop (loop);
1388
1389
1390   wont_exit = sbitmap_alloc (nunroll + 1);
1391   sbitmap_zero (wont_exit);
1392   opt_info_start_duplication (opt_info);
1393
1394   if (!duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1395                 loops, nunroll, wont_exit, NULL, NULL, NULL,
1396                 DLTHE_FLAG_UPDATE_FREQ))
1397     abort ();
1398
1399   if (opt_info)
1400     {
1401       apply_opt_in_copies (opt_info, nunroll, true, true);
1402       free_opt_info (opt_info);
1403     }
1404
1405   free (wont_exit);
1406
1407   if (desc->simple_p)
1408     {
1409       /* We indeed may get here provided that there are nontrivial assumptions
1410          for a loop to be really simple.  We could update the counts, but the
1411          problem is that we are unable to decide which exit will be taken
1412          (not really true in case the number of iterations is constant,
1413          but noone will do anything with this information, so we do not
1414          worry about it).  */
1415       desc->simple_p = false;
1416     }
1417
1418   if (dump_file)
1419     fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
1420              nunroll, num_loop_insns (loop));
1421 }
1422
1423 /* A hash function for information about insns to split.  */
1424
1425 static hashval_t
1426 si_info_hash (const void *ivts)
1427 {
1428   return htab_hash_pointer (((struct iv_to_split *) ivts)->insn);
1429 }
1430
1431 /* An equality functions for information about insns to split.  */
1432
1433 static int
1434 si_info_eq (const void *ivts1, const void *ivts2)
1435 {
1436   const struct iv_to_split *i1 = ivts1;
1437   const struct iv_to_split *i2 = ivts2;
1438
1439   return i1->insn == i2->insn;
1440 }
1441
1442 /* Return a hash for VES, which is really a "var_to_expand *".  */
1443
1444 static hashval_t
1445 ve_info_hash (const void *ves)
1446 {
1447   return htab_hash_pointer (((struct var_to_expand *) ves)->insn);
1448 }
1449
1450 /* Return true if IVTS1 and IVTS2 (which are really both of type
1451    "var_to_expand *") refer to the same instruction.  */
1452
1453 static int
1454 ve_info_eq (const void *ivts1, const void *ivts2)
1455 {
1456   const struct var_to_expand *i1 = ivts1;
1457   const struct var_to_expand *i2 = ivts2;
1458
1459   return i1->insn == i2->insn;
1460 }
1461
1462 /* Returns true if REG is referenced in one insn in LOOP.  */
1463
1464 bool
1465 referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg)
1466 {
1467   basic_block *body, bb;
1468   unsigned i;
1469   int count_ref = 0;
1470   rtx insn;
1471
1472   body = get_loop_body (loop);
1473   for (i = 0; i < loop->num_nodes; i++)
1474     {
1475       bb = body[i];
1476
1477       FOR_BB_INSNS (bb, insn)
1478       {
1479         if (rtx_referenced_p (reg, insn))
1480           count_ref++;
1481       }
1482     }
1483   return (count_ref  == 1);
1484 }
1485
1486 /* Determine whether INSN contains an accumulator
1487    which can be expanded into separate copies,
1488    one for each copy of the LOOP body.
1489
1490    for (i = 0 ; i < n; i++)
1491      sum += a[i];
1492
1493    ==>
1494
1495    sum += a[i]
1496    ....
1497    i = i+1;
1498    sum1 += a[i]
1499    ....
1500    i = i+1
1501    sum2 += a[i];
1502    ....
1503
1504    Return NULL if INSN contains no opportunity for expansion of accumulator.
1505    Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
1506    information and return a pointer to it.
1507 */
1508
1509 static struct var_to_expand *
1510 analyze_insn_to_expand_var (struct loop *loop, rtx insn)
1511 {
1512   rtx set, dest, src, op1;
1513   struct var_to_expand *ves;
1514   enum machine_mode mode1, mode2;
1515
1516   set = single_set (insn);
1517   if (!set)
1518     return NULL;
1519
1520   dest = SET_DEST (set);
1521   src = SET_SRC (set);
1522
1523   if (GET_CODE (src) != PLUS
1524       && GET_CODE (src) != MINUS
1525       && GET_CODE (src) != MULT)
1526     return NULL;
1527
1528   if (!XEXP (src, 0))
1529     return NULL;
1530
1531   op1 = XEXP (src, 0);
1532
1533   if (!REG_P (dest)
1534       && !(GET_CODE (dest) == SUBREG
1535            && REG_P (SUBREG_REG (dest))))
1536     return NULL;
1537
1538   if (!rtx_equal_p (dest, op1))
1539     return NULL;
1540
1541   if (!referenced_in_one_insn_in_loop_p (loop, dest))
1542     return NULL;
1543
1544   if (rtx_referenced_p (dest, XEXP (src, 1)))
1545     return NULL;
1546
1547   mode1 = GET_MODE (dest);
1548   mode2 = GET_MODE (XEXP (src, 1));
1549   if ((FLOAT_MODE_P (mode1)
1550        || FLOAT_MODE_P (mode2))
1551       && !flag_unsafe_math_optimizations)
1552     return NULL;
1553
1554   /* Record the accumulator to expand.  */
1555   ves = xmalloc (sizeof (struct var_to_expand));
1556   ves->insn = insn;
1557   VARRAY_RTX_INIT (ves->var_expansions, 1, "var_expansions");
1558   ves->reg = copy_rtx (dest);
1559   ves->op = GET_CODE (src);
1560   ves->expansion_count = 0;
1561   ves->reuse_expansion = 0;
1562   return ves;
1563 }
1564
1565 /* Determine whether there is an induction variable in INSN that
1566    we would like to split during unrolling.
1567
1568    I.e. replace
1569
1570    i = i + 1;
1571    ...
1572    i = i + 1;
1573    ...
1574    i = i + 1;
1575    ...
1576
1577    type chains by
1578
1579    i0 = i + 1
1580    ...
1581    i = i0 + 1
1582    ...
1583    i = i0 + 2
1584    ...
1585
1586    Return NULL if INSN contains no interesting IVs.  Otherwise, allocate
1587    an IV_TO_SPLIT structure, fill it with the relevant information and return a
1588    pointer to it.  */
1589
1590 static struct iv_to_split *
1591 analyze_iv_to_split_insn (rtx insn)
1592 {
1593   rtx set, dest;
1594   struct rtx_iv iv;
1595   struct iv_to_split *ivts;
1596
1597   /* For now we just split the basic induction variables.  Later this may be
1598      extended for example by selecting also addresses of memory references.  */
1599   set = single_set (insn);
1600   if (!set)
1601     return NULL;
1602
1603   dest = SET_DEST (set);
1604   if (!REG_P (dest))
1605     return NULL;
1606
1607   if (!biv_p (insn, dest))
1608     return NULL;
1609
1610   if (!iv_analyze (insn, dest, &iv))
1611     abort ();
1612
1613   if (iv.step == const0_rtx
1614       || iv.mode != iv.extend_mode)
1615     return NULL;
1616
1617   /* Record the insn to split.  */
1618   ivts = xmalloc (sizeof (struct iv_to_split));
1619   ivts->insn = insn;
1620   ivts->base_var = NULL_RTX;
1621   ivts->step = iv.step;
1622   ivts->n_loc = 1;
1623   ivts->loc[0] = 1;
1624
1625   return ivts;
1626 }
1627
1628 /* Determines which of insns in LOOP can be optimized.
1629    Return a OPT_INFO struct with the relevant hash tables filled
1630    with all insns to be optimized.  The FIRST_NEW_BLOCK field
1631    is undefined for the return value.  */
1632
1633 static struct opt_info *
1634 analyze_insns_in_loop (struct loop *loop)
1635 {
1636   basic_block *body, bb;
1637   unsigned i, n_edges = 0;
1638   struct opt_info *opt_info = xcalloc (1, sizeof (struct opt_info));
1639   rtx insn;
1640   struct iv_to_split *ivts = NULL;
1641   struct var_to_expand *ves = NULL;
1642   PTR *slot1;
1643   PTR *slot2;
1644   edge *edges = get_loop_exit_edges (loop, &n_edges);
1645   basic_block preheader;
1646   bool can_apply = false;
1647
1648   iv_analysis_loop_init (loop);
1649
1650   body = get_loop_body (loop);
1651
1652   if (flag_split_ivs_in_unroller)
1653     opt_info->insns_to_split = htab_create (5 * loop->num_nodes,
1654                                             si_info_hash, si_info_eq, free);
1655
1656   /* Record the loop exit bb and loop preheader before the unrolling.  */
1657   if (!loop_preheader_edge (loop)->src)
1658     {
1659       preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1660       opt_info->loop_preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1661     }
1662   else
1663     opt_info->loop_preheader = loop_preheader_edge (loop)->src;
1664
1665   if (n_edges == 1
1666       && !(edges[0]->flags & EDGE_COMPLEX)
1667       && (edges[0]->flags & EDGE_LOOP_EXIT))
1668     {
1669       opt_info->loop_exit = loop_split_edge_with (edges[0], NULL_RTX);
1670       can_apply = true;
1671     }
1672
1673   if (flag_variable_expansion_in_unroller
1674       && can_apply)
1675     opt_info->insns_with_var_to_expand = htab_create (5 * loop->num_nodes,
1676                                                       ve_info_hash, ve_info_eq, free);
1677
1678   for (i = 0; i < loop->num_nodes; i++)
1679     {
1680       bb = body[i];
1681       if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1682         continue;
1683
1684       FOR_BB_INSNS (bb, insn)
1685       {
1686         if (!INSN_P (insn))
1687           continue;
1688
1689         if (opt_info->insns_to_split)
1690           ivts = analyze_iv_to_split_insn (insn);
1691
1692         if (ivts)
1693           {
1694             slot1 = htab_find_slot (opt_info->insns_to_split, ivts, INSERT);
1695             *slot1 = ivts;
1696             continue;
1697           }
1698
1699         if (opt_info->insns_with_var_to_expand)
1700           ves = analyze_insn_to_expand_var (loop, insn);
1701
1702         if (ves)
1703           {
1704             slot2 = htab_find_slot (opt_info->insns_with_var_to_expand, ves, INSERT);
1705             *slot2 = ves;
1706           }
1707       }
1708     }
1709
1710   free (edges);
1711   free (body);
1712   return opt_info;
1713 }
1714
1715 /* Called just before loop duplication.  Records start of duplicated area
1716    to OPT_INFO.  */
1717
1718 static void
1719 opt_info_start_duplication (struct opt_info *opt_info)
1720 {
1721   if (opt_info)
1722     opt_info->first_new_block = last_basic_block;
1723 }
1724
1725 /* Determine the number of iterations between initialization of the base
1726    variable and the current copy (N_COPY).  N_COPIES is the total number
1727    of newly created copies.  UNROLLING is true if we are unrolling
1728    (not peeling) the loop.  */
1729
1730 static unsigned
1731 determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
1732 {
1733   if (unrolling)
1734     {
1735       /* If we are unrolling, initialization is done in the original loop
1736          body (number 0).  */
1737       return n_copy;
1738     }
1739   else
1740     {
1741       /* If we are peeling, the copy in that the initialization occurs has
1742          number 1.  The original loop (number 0) is the last.  */
1743       if (n_copy)
1744         return n_copy - 1;
1745       else
1746         return n_copies;
1747     }
1748 }
1749
1750 /* Locate in EXPR the expression corresponding to the location recorded
1751    in IVTS, and return a pointer to the RTX for this location.  */
1752
1753 static rtx *
1754 get_ivts_expr (rtx expr, struct iv_to_split *ivts)
1755 {
1756   unsigned i;
1757   rtx *ret = &expr;
1758
1759   for (i = 0; i < ivts->n_loc; i++)
1760     ret = &XEXP (*ret, ivts->loc[i]);
1761
1762   return ret;
1763 }
1764
1765 /* Allocate basic variable for the induction variable chain.  Callback for
1766    htab_traverse.  */
1767
1768 static int
1769 allocate_basic_variable (void **slot, void *data ATTRIBUTE_UNUSED)
1770 {
1771   struct iv_to_split *ivts = *slot;
1772   rtx expr = *get_ivts_expr (single_set (ivts->insn), ivts);
1773
1774   ivts->base_var = gen_reg_rtx (GET_MODE (expr));
1775
1776   return 1;
1777 }
1778
1779 /* Insert initialization of basic variable of IVTS before INSN, taking
1780    the initial value from INSN.  */
1781
1782 static void
1783 insert_base_initialization (struct iv_to_split *ivts, rtx insn)
1784 {
1785   rtx expr = copy_rtx (*get_ivts_expr (single_set (insn), ivts));
1786   rtx seq;
1787
1788   start_sequence ();
1789   expr = force_operand (expr, ivts->base_var);
1790   if (expr != ivts->base_var)
1791     emit_move_insn (ivts->base_var, expr);
1792   seq = get_insns ();
1793   end_sequence ();
1794
1795   emit_insn_before (seq, insn);
1796 }
1797
1798 /* Replace the use of induction variable described in IVTS in INSN
1799    by base variable + DELTA * step.  */
1800
1801 static void
1802 split_iv (struct iv_to_split *ivts, rtx insn, unsigned delta)
1803 {
1804   rtx expr, *loc, seq, incr, var;
1805   enum machine_mode mode = GET_MODE (ivts->base_var);
1806   rtx src, dest, set;
1807
1808   /* Construct base + DELTA * step.  */
1809   if (!delta)
1810     expr = ivts->base_var;
1811   else
1812     {
1813       incr = simplify_gen_binary (MULT, mode,
1814                                   ivts->step, gen_int_mode (delta, mode));
1815       expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
1816                                   ivts->base_var, incr);
1817     }
1818
1819   /* Figure out where to do the replacement.  */
1820   loc = get_ivts_expr (single_set (insn), ivts);
1821
1822   /* If we can make the replacement right away, we're done.  */
1823   if (validate_change (insn, loc, expr, 0))
1824     return;
1825
1826   /* Otherwise, force EXPR into a register and try again.  */
1827   start_sequence ();
1828   var = gen_reg_rtx (mode);
1829   expr = force_operand (expr, var);
1830   if (expr != var)
1831     emit_move_insn (var, expr);
1832   seq = get_insns ();
1833   end_sequence ();
1834   emit_insn_before (seq, insn);
1835
1836   if (validate_change (insn, loc, var, 0))
1837     return;
1838
1839   /* The last chance.  Try recreating the assignment in insn
1840      completely from scratch.  */
1841   set = single_set (insn);
1842   gcc_assert (set);
1843
1844   start_sequence ();
1845   *loc = var;
1846   src = copy_rtx (SET_SRC (set));
1847   dest = copy_rtx (SET_DEST (set));
1848   src = force_operand (src, dest);
1849   if (src != dest)
1850     emit_move_insn (dest, src);
1851   seq = get_insns ();
1852   end_sequence ();
1853
1854   emit_insn_before (seq, insn);
1855   delete_insn (insn);
1856 }
1857
1858
1859 /* Return one expansion of the accumulator recorded in struct VE.  */
1860
1861 static rtx
1862 get_expansion (struct var_to_expand *ve)
1863 {
1864   rtx reg;
1865
1866   if (ve->reuse_expansion == 0)
1867     reg = ve->reg;
1868   else
1869     reg = VARRAY_RTX (ve->var_expansions,  ve->reuse_expansion - 1);
1870
1871   if (VARRAY_ACTIVE_SIZE (ve->var_expansions) == (unsigned) ve->reuse_expansion)
1872     ve->reuse_expansion = 0;
1873   else
1874     ve->reuse_expansion++;
1875
1876   return reg;
1877 }
1878
1879
1880 /* Given INSN replace the uses of the accumulator recorded in VE
1881    with a new register.  */
1882
1883 static void
1884 expand_var_during_unrolling (struct var_to_expand *ve, rtx insn)
1885 {
1886   rtx new_reg, set;
1887   bool really_new_expansion = false;
1888
1889   set = single_set (insn);
1890   if (!set)
1891     abort ();
1892
1893   /* Generate a new register only if the expansion limit has not been
1894      reached.  Else reuse an already existing expansion.  */
1895   if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count)
1896     {
1897       really_new_expansion = true;
1898       new_reg = gen_reg_rtx (GET_MODE (ve->reg));
1899     }
1900   else
1901     new_reg = get_expansion (ve);
1902
1903   validate_change (insn, &SET_DEST (set), new_reg, 1);
1904   validate_change (insn, &XEXP (SET_SRC (set), 0), new_reg, 1);
1905
1906   if (apply_change_group ())
1907     if (really_new_expansion)
1908       {
1909         VARRAY_PUSH_RTX (ve->var_expansions, new_reg);
1910         ve->expansion_count++;
1911       }
1912 }
1913
1914 /* Initialize the variable expansions in loop preheader.
1915    Callbacks for htab_traverse.  PLACE_P is the loop-preheader
1916    basic block where the initialization of the expansions
1917    should take place.  */
1918
1919 static int
1920 insert_var_expansion_initialization (void **slot, void *place_p)
1921 {
1922   struct var_to_expand *ve = *slot;
1923   basic_block place = (basic_block)place_p;
1924   rtx seq, var, zero_init, insn;
1925   unsigned i;
1926
1927   if (VARRAY_ACTIVE_SIZE (ve->var_expansions) == 0)
1928     return 1;
1929
1930   start_sequence ();
1931   if (ve->op == PLUS || ve->op == MINUS)
1932     for (i = 0; i < VARRAY_ACTIVE_SIZE (ve->var_expansions); i++)
1933       {
1934         var = VARRAY_RTX (ve->var_expansions, i);
1935         zero_init =  CONST0_RTX (GET_MODE (var));
1936         emit_move_insn (var, zero_init);
1937       }
1938   else if (ve->op == MULT)
1939     for (i = 0; i < VARRAY_ACTIVE_SIZE (ve->var_expansions); i++)
1940       {
1941         var = VARRAY_RTX (ve->var_expansions, i);
1942         zero_init =  CONST1_RTX (GET_MODE (var));
1943         emit_move_insn (var, zero_init);
1944       }
1945
1946   seq = get_insns ();
1947   end_sequence ();
1948
1949   insn = BB_HEAD (place);
1950   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
1951     insn = NEXT_INSN (insn);
1952
1953   emit_insn_after (seq, insn);
1954   /* Continue traversing the hash table.  */
1955   return 1;
1956 }
1957
1958 /*  Combine the variable expansions at the loop exit.
1959     Callbacks for htab_traverse.  PLACE_P is the loop exit
1960     basic block where the summation of the expansions should
1961     take place.  */
1962
1963 static int
1964 combine_var_copies_in_loop_exit (void **slot, void *place_p)
1965 {
1966   struct var_to_expand *ve = *slot;
1967   basic_block place = (basic_block)place_p;
1968   rtx sum = ve->reg;
1969   rtx expr, seq, var, insn;
1970   unsigned i;
1971
1972   if (VARRAY_ACTIVE_SIZE (ve->var_expansions) == 0)
1973     return 1;
1974
1975   start_sequence ();
1976   if (ve->op == PLUS || ve->op == MINUS)
1977     for (i = 0; i < VARRAY_ACTIVE_SIZE (ve->var_expansions); i++)
1978       {
1979         var = VARRAY_RTX (ve->var_expansions, i);
1980         sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg),
1981                                    var, sum);
1982       }
1983   else if (ve->op == MULT)
1984     for (i = 0; i < VARRAY_ACTIVE_SIZE (ve->var_expansions); i++)
1985       {
1986         var = VARRAY_RTX (ve->var_expansions, i);
1987         sum = simplify_gen_binary (MULT, GET_MODE (ve->reg),
1988                                    var, sum);
1989       }
1990
1991   expr = force_operand (sum, ve->reg);
1992   if (expr != ve->reg)
1993     emit_move_insn (ve->reg, expr);
1994   seq = get_insns ();
1995   end_sequence ();
1996
1997   insn = BB_HEAD (place);
1998   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
1999     insn = NEXT_INSN (insn);
2000
2001   emit_insn_after (seq, insn);
2002
2003   /* Continue traversing the hash table.  */
2004   return 1;
2005 }
2006
2007 /* Apply loop optimizations in loop copies using the
2008    data which gathered during the unrolling.  Structure
2009    OPT_INFO record that data.
2010
2011    UNROLLING is true if we unrolled (not peeled) the loop.
2012    REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
2013    the loop (as it should happen in complete unrolling, but not in ordinary
2014    peeling of the loop).  */
2015
2016 static void
2017 apply_opt_in_copies (struct opt_info *opt_info,
2018                      unsigned n_copies, bool unrolling,
2019                      bool rewrite_original_loop)
2020 {
2021   unsigned i, delta;
2022   basic_block bb, orig_bb;
2023   rtx insn, orig_insn, next;
2024   struct iv_to_split ivts_templ, *ivts;
2025   struct var_to_expand ve_templ, *ves;
2026
2027   /* Sanity check -- we need to put initialization in the original loop
2028      body.  */
2029   gcc_assert (!unrolling || rewrite_original_loop);
2030
2031   /* Allocate the basic variables (i0).  */
2032   if (opt_info->insns_to_split)
2033     htab_traverse (opt_info->insns_to_split, allocate_basic_variable, NULL);
2034
2035   for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2036     {
2037       bb = BASIC_BLOCK (i);
2038       orig_bb = bb->rbi->original;
2039
2040       delta = determine_split_iv_delta (bb->rbi->copy_number, n_copies,
2041                                         unrolling);
2042       orig_insn = BB_HEAD (orig_bb);
2043       for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); insn = next)
2044         {
2045           next = NEXT_INSN (insn);
2046           if (!INSN_P (insn))
2047             continue;
2048
2049           while (!INSN_P (orig_insn))
2050             orig_insn = NEXT_INSN (orig_insn);
2051
2052           ivts_templ.insn = orig_insn;
2053           ve_templ.insn = orig_insn;
2054
2055           /* Apply splitting iv optimization.  */
2056           if (opt_info->insns_to_split)
2057             {
2058               ivts = htab_find (opt_info->insns_to_split, &ivts_templ);
2059
2060               if (ivts)
2061                 {
2062 #ifdef ENABLE_CHECKING
2063                   gcc_assert (rtx_equal_p (PATTERN (insn), PATTERN (orig_insn)));
2064 #endif
2065
2066                   if (!delta)
2067                     insert_base_initialization (ivts, insn);
2068                   split_iv (ivts, insn, delta);
2069                 }
2070             }
2071           /* Apply variable expansion optimization.  */
2072           if (unrolling && opt_info->insns_with_var_to_expand)
2073             {
2074               ves = htab_find (opt_info->insns_with_var_to_expand, &ve_templ);
2075               if (ves)
2076                 {
2077 #ifdef ENABLE_CHECKING
2078                   gcc_assert (rtx_equal_p (PATTERN (insn), PATTERN (orig_insn)));
2079 #endif
2080                   expand_var_during_unrolling (ves, insn);
2081                 }
2082             }
2083           orig_insn = NEXT_INSN (orig_insn);
2084         }
2085     }
2086
2087   if (!rewrite_original_loop)
2088     return;
2089
2090   /* Initialize the variable expansions in the loop preheader
2091      and take care of combining them at the loop exit.  */
2092   if (opt_info->insns_with_var_to_expand)
2093     {
2094       htab_traverse (opt_info->insns_with_var_to_expand,
2095                      insert_var_expansion_initialization,
2096                      opt_info->loop_preheader);
2097       htab_traverse (opt_info->insns_with_var_to_expand,
2098                      combine_var_copies_in_loop_exit,
2099                      opt_info->loop_exit);
2100     }
2101
2102   /* Rewrite also the original loop body.  Find them as originals of the blocks
2103      in the last copied iteration, i.e. those that have
2104      bb->rbi->original->copy == bb.  */
2105   for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2106     {
2107       bb = BASIC_BLOCK (i);
2108       orig_bb = bb->rbi->original;
2109       if (orig_bb->rbi->copy != bb)
2110         continue;
2111
2112       delta = determine_split_iv_delta (0, n_copies, unrolling);
2113       for (orig_insn = BB_HEAD (orig_bb);
2114            orig_insn != NEXT_INSN (BB_END (bb));
2115            orig_insn = next)
2116         {
2117           next = NEXT_INSN (orig_insn);
2118
2119           if (!INSN_P (orig_insn))
2120             continue;
2121
2122           ivts_templ.insn = orig_insn;
2123           if (opt_info->insns_to_split)
2124             {
2125               ivts = htab_find (opt_info->insns_to_split, &ivts_templ);
2126               if (ivts)
2127                 {
2128                   if (!delta)
2129                     insert_base_initialization (ivts, orig_insn);
2130                   split_iv (ivts, orig_insn, delta);
2131                   continue;
2132                 }
2133             }
2134
2135         }
2136     }
2137 }
2138
2139 /*  Release the data structures used for the variable expansion
2140     optimization.  Callbacks for htab_traverse.  */
2141
2142 static int
2143 release_var_copies (void **slot, void *data ATTRIBUTE_UNUSED)
2144 {
2145   struct var_to_expand *ve = *slot;
2146
2147   VARRAY_CLEAR (ve->var_expansions);
2148
2149   /* Continue traversing the hash table.  */
2150   return 1;
2151 }
2152
2153 /* Release OPT_INFO.  */
2154
2155 static void
2156 free_opt_info (struct opt_info *opt_info)
2157 {
2158   if (opt_info->insns_to_split)
2159     htab_delete (opt_info->insns_to_split);
2160   if (opt_info->insns_with_var_to_expand)
2161     {
2162       htab_traverse (opt_info->insns_with_var_to_expand,
2163                      release_var_copies, NULL);
2164       htab_delete (opt_info->insns_with_var_to_expand);
2165     }
2166   free (opt_info);
2167 }