gcc/loop-unroll.c

   1 /* Loop unrolling and peeling.
   2    Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 2, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to the Free
  18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  19 02111-1307, USA.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "rtl.h"
  26 #include "hard-reg-set.h"
  27 #include "obstack.h"
  28 #include "basic-block.h"
  29 #include "cfgloop.h"
  30 #include "cfglayout.h"
  31 #include "params.h"
  32 #include "output.h"
  33 #include "expr.h"
  34 #include "hashtab.h"
  35 #include "recog.h"
  36 #include "varray.h"
  37
  38 /* This pass performs loop unrolling and peeling.  We only perform these
  39    optimizations on innermost loops (with single exception) because
  40    the impact on performance is greatest here, and we want to avoid
  41    unnecessary code size growth.  The gain is caused by greater sequentiality
  42    of code, better code to optimize for further passes and in some cases
  43    by fewer testings of exit conditions.  The main problem is code growth,
  44    that impacts performance negatively due to effect of caches.
  45
  46    What we do:
  47
  48    -- complete peeling of once-rolling loops; this is the above mentioned
  49       exception, as this causes loop to be cancelled completely and
  50       does not cause code growth
  51    -- complete peeling of loops that roll (small) constant times.
  52    -- simple peeling of first iterations of loops that do not roll much
  53       (according to profile feedback)
  54    -- unrolling of loops that roll constant times; this is almost always
  55       win, as we get rid of exit condition tests.
  56    -- unrolling of loops that roll number of times that we can compute
  57       in runtime; we also get rid of exit condition tests here, but there
  58       is the extra expense for calculating the number of iterations
  59    -- simple unrolling of remaining loops; this is performed only if we
  60       are asked to, as the gain is questionable in this case and often
  61       it may even slow down the code
  62    For more detailed descriptions of each of those, see comments at
  63    appropriate function below.
  64
  65    There is a lot of parameters (defined and described in params.def) that
  66    control how much we unroll/peel.
  67
  68    ??? A great problem is that we don't have a good way how to determine
  69    how many times we should unroll the loop; the experiments I have made
  70    showed that this choice may affect performance in order of several %.
  71    */
  72
  73 /* Information about induction variables to split.  */
  74
  75 struct iv_to_split
  76 {
  77   rtx insn;             /* The insn in that the induction variable occurs.  */
  78   rtx base_var;         /* The variable on that the values in the further
  79                            iterations are based.  */
  80   rtx step;             /* Step of the induction variable.  */
  81   unsigned n_loc;
  82   unsigned loc[3];      /* Location where the definition of the induction
  83                            variable occurs in the insn.  For example if
  84                            N_LOC is 2, the expression is located at
  85                            XEXP (XEXP (single_set, loc[0]), loc[1]).  */
  86 };
  87
  88 /* Information about accumulators to expand.  */
  89
  90 struct var_to_expand
  91 {
  92   rtx insn;                        /* The insn in that the variable expansion occurs.  */
  93   rtx reg;                         /* The accumulator which is expanded.  */
  94   varray_type var_expansions;      /* The copies of the accumulator which is expanded.  */
  95   enum rtx_code op;                /* The type of the accumulation - addition, subtraction
  96                                       or multiplication.  */
  97   int expansion_count;             /* Count the number of expansions generated so far.  */
  98   int reuse_expansion;             /* The expansion we intend to reuse to expand
  99                                       the accumulator.  If REUSE_EXPANSION is 0 reuse
 100                                       the original accumulator.  Else use
 101                                       var_expansions[REUSE_EXPANSION - 1].  */
 102 };
 103
 104 /* Information about optimization applied in
 105    the unrolled loop.  */
 106
 107 struct opt_info
 108 {
 109   htab_t insns_to_split;           /* A hashtable of insns to split.  */
 110   htab_t insns_with_var_to_expand; /* A hashtable of insns with accumulators
 111                                       to expand.  */
 112   unsigned first_new_block;        /* The first basic block that was
 113                                       duplicated.  */
 114   basic_block loop_exit;           /* The loop exit basic block.  */
 115   basic_block loop_preheader;      /* The loop preheader basic block.  */
 116 };
 117
 118 static void decide_unrolling_and_peeling (struct loops *, int);
 119 static void peel_loops_completely (struct loops *, int);
 120 static void decide_peel_simple (struct loop *, int);
 121 static void decide_peel_once_rolling (struct loop *, int);
 122 static void decide_peel_completely (struct loop *, int);
 123 static void decide_unroll_stupid (struct loop *, int);
 124 static void decide_unroll_constant_iterations (struct loop *, int);
 125 static void decide_unroll_runtime_iterations (struct loop *, int);
 126 static void peel_loop_simple (struct loops *, struct loop *);
 127 static void peel_loop_completely (struct loops *, struct loop *);
 128 static void unroll_loop_stupid (struct loops *, struct loop *);
 129 static void unroll_loop_constant_iterations (struct loops *, struct loop *);
 130 static void unroll_loop_runtime_iterations (struct loops *, struct loop *);
 131 static struct opt_info *analyze_insns_in_loop (struct loop *);
 132 static void opt_info_start_duplication (struct opt_info *);
 133 static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
 134 static void free_opt_info (struct opt_info *);
 135 static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx);
 136 static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx);
 137 static struct iv_to_split *analyze_iv_to_split_insn (rtx);
 138 static void expand_var_during_unrolling (struct var_to_expand *, rtx);
 139 static int insert_var_expansion_initialization (void **, void *);
 140 static int combine_var_copies_in_loop_exit (void **, void *);
 141 static int release_var_copies (void **, void *);
 142 static rtx get_expansion (struct var_to_expand *);
 143
 144 /* Unroll and/or peel (depending on FLAGS) LOOPS.  */
 145 void
 146 unroll_and_peel_loops (struct loops *loops, int flags)
 147 {
 148   struct loop *loop, *next;
 149   bool check;
 150
 151   /* First perform complete loop peeling (it is almost surely a win,
 152      and affects parameters for further decision a lot).  */
 153   peel_loops_completely (loops, flags);
 154
 155   /* Now decide rest of unrolling and peeling.  */
 156   decide_unrolling_and_peeling (loops, flags);
 157
 158   loop = loops->tree_root;
 159   while (loop->inner)
 160     loop = loop->inner;
 161
 162   /* Scan the loops, inner ones first.  */
 163   while (loop != loops->tree_root)
 164     {
 165       if (loop->next)
 166         {
 167           next = loop->next;
 168           while (next->inner)
 169             next = next->inner;
 170         }
 171       else
 172         next = loop->outer;
 173
 174       check = true;
 175       /* And perform the appropriate transformations.  */
 176       switch (loop->lpt_decision.decision)
 177         {
 178         case LPT_PEEL_COMPLETELY:
 179           /* Already done.  */
 180           gcc_unreachable ();
 181         case LPT_PEEL_SIMPLE:
 182           peel_loop_simple (loops, loop);
 183           break;
 184         case LPT_UNROLL_CONSTANT:
 185           unroll_loop_constant_iterations (loops, loop);
 186           break;
 187         case LPT_UNROLL_RUNTIME:
 188           unroll_loop_runtime_iterations (loops, loop);
 189           break;
 190         case LPT_UNROLL_STUPID:
 191           unroll_loop_stupid (loops, loop);
 192           break;
 193         case LPT_NONE:
 194           check = false;
 195           break;
 196         default:
 197           gcc_unreachable ();
 198         }
 199       if (check)
 200         {
 201 #ifdef ENABLE_CHECKING
 202           verify_dominators (CDI_DOMINATORS);
 203           verify_loop_structure (loops);
 204 #endif
 205         }
 206       loop = next;
 207     }
 208
 209   iv_analysis_done ();
 210 }
 211
 212 /* Check whether exit of the LOOP is at the end of loop body.  */
 213
 214 static bool
 215 loop_exit_at_end_p (struct loop *loop)
 216 {
 217   struct niter_desc *desc = get_simple_loop_desc (loop);
 218   rtx insn;
 219
 220   if (desc->in_edge->dest != loop->latch)
 221     return false;
 222
 223   /* Check that the latch is empty.  */
 224   FOR_BB_INSNS (loop->latch, insn)
 225     {
 226       if (INSN_P (insn))
 227         return false;
 228     }
 229
 230   return true;
 231 }
 232
 233 /* Check whether to peel LOOPS (depending on FLAGS) completely and do so.  */
 234 static void
 235 peel_loops_completely (struct loops *loops, int flags)
 236 {
 237   struct loop *loop, *next;
 238
 239   loop = loops->tree_root;
 240   while (loop->inner)
 241     loop = loop->inner;
 242
 243   while (loop != loops->tree_root)
 244     {
 245       if (loop->next)
 246         {
 247           next = loop->next;
 248           while (next->inner)
 249             next = next->inner;
 250         }
 251       else
 252         next = loop->outer;
 253
 254       loop->lpt_decision.decision = LPT_NONE;
 255
 256       if (dump_file)
 257         fprintf (dump_file,
 258                  "\n;; *** Considering loop %d for complete peeling ***\n",
 259                  loop->num);
 260
 261       loop->ninsns = num_loop_insns (loop);
 262
 263       decide_peel_once_rolling (loop, flags);
 264       if (loop->lpt_decision.decision == LPT_NONE)
 265         decide_peel_completely (loop, flags);
 266
 267       if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY)
 268         {
 269           peel_loop_completely (loops, loop);
 270 #ifdef ENABLE_CHECKING
 271           verify_dominators (CDI_DOMINATORS);
 272           verify_loop_structure (loops);
 273 #endif
 274         }
 275       loop = next;
 276     }
 277 }
 278
 279 /* Decide whether unroll or peel LOOPS (depending on FLAGS) and how much.  */
 280 static void
 281 decide_unrolling_and_peeling (struct loops *loops, int flags)
 282 {
 283   struct loop *loop = loops->tree_root, *next;
 284
 285   while (loop->inner)
 286     loop = loop->inner;
 287
 288   /* Scan the loops, inner ones first.  */
 289   while (loop != loops->tree_root)
 290     {
 291       if (loop->next)
 292         {
 293           next = loop->next;
 294           while (next->inner)
 295             next = next->inner;
 296         }
 297       else
 298         next = loop->outer;
 299
 300       loop->lpt_decision.decision = LPT_NONE;
 301
 302       if (dump_file)
 303         fprintf (dump_file, "\n;; *** Considering loop %d ***\n", loop->num);
 304
 305       /* Do not peel cold areas.  */
 306       if (!maybe_hot_bb_p (loop->header))
 307         {
 308           if (dump_file)
 309             fprintf (dump_file, ";; Not considering loop, cold area\n");
 310           loop = next;
 311           continue;
 312         }
 313
 314       /* Can the loop be manipulated?  */
 315       if (!can_duplicate_loop_p (loop))
 316         {
 317           if (dump_file)
 318             fprintf (dump_file,
 319                      ";; Not considering loop, cannot duplicate\n");
 320           loop = next;
 321           continue;
 322         }
 323
 324       /* Skip non-innermost loops.  */
 325       if (loop->inner)
 326         {
 327           if (dump_file)
 328             fprintf (dump_file, ";; Not considering loop, is not innermost\n");
 329           loop = next;
 330           continue;
 331         }
 332
 333       loop->ninsns = num_loop_insns (loop);
 334       loop->av_ninsns = average_num_loop_insns (loop);
 335
 336       /* Try transformations one by one in decreasing order of
 337          priority.  */
 338
 339       decide_unroll_constant_iterations (loop, flags);
 340       if (loop->lpt_decision.decision == LPT_NONE)
 341         decide_unroll_runtime_iterations (loop, flags);
 342       if (loop->lpt_decision.decision == LPT_NONE)
 343         decide_unroll_stupid (loop, flags);
 344       if (loop->lpt_decision.decision == LPT_NONE)
 345         decide_peel_simple (loop, flags);
 346
 347       loop = next;
 348     }
 349 }
 350
 351 /* Decide whether the LOOP is once rolling and suitable for complete
 352    peeling.  */
 353 static void
 354 decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
 355 {
 356   struct niter_desc *desc;
 357
 358   if (dump_file)
 359     fprintf (dump_file, "\n;; Considering peeling once rolling loop\n");
 360
 361   /* Is the loop small enough?  */
 362   if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns)
 363     {
 364       if (dump_file)
 365         fprintf (dump_file, ";; Not considering loop, is too big\n");
 366       return;
 367     }
 368
 369   /* Check for simple loops.  */
 370   desc = get_simple_loop_desc (loop);
 371
 372   /* Check number of iterations.  */
 373   if (!desc->simple_p
 374       || desc->assumptions
 375       || !desc->const_iter
 376       || desc->niter != 0)
 377     {
 378       if (dump_file)
 379         fprintf (dump_file,
 380                  ";; Unable to prove that the loop rolls exactly once\n");
 381       return;
 382     }
 383
 384   /* Success.  */
 385   if (dump_file)
 386     fprintf (dump_file, ";; Decided to peel exactly once rolling loop\n");
 387   loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
 388 }
 389
 390 /* Decide whether the LOOP is suitable for complete peeling.  */
 391 static void
 392 decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
 393 {
 394   unsigned npeel;
 395   struct niter_desc *desc;
 396
 397   if (dump_file)
 398     fprintf (dump_file, "\n;; Considering peeling completely\n");
 399
 400   /* Skip non-innermost loops.  */
 401   if (loop->inner)
 402     {
 403       if (dump_file)
 404         fprintf (dump_file, ";; Not considering loop, is not innermost\n");
 405       return;
 406     }
 407
 408   /* Do not peel cold areas.  */
 409   if (!maybe_hot_bb_p (loop->header))
 410     {
 411       if (dump_file)
 412         fprintf (dump_file, ";; Not considering loop, cold area\n");
 413       return;
 414     }
 415
 416   /* Can the loop be manipulated?  */
 417   if (!can_duplicate_loop_p (loop))
 418     {
 419       if (dump_file)
 420         fprintf (dump_file,
 421                  ";; Not considering loop, cannot duplicate\n");
 422       return;
 423     }
 424
 425   /* npeel = number of iterations to peel.  */
 426   npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS) / loop->ninsns;
 427   if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
 428     npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
 429
 430   /* Is the loop small enough?  */
 431   if (!npeel)
 432     {
 433       if (dump_file)
 434         fprintf (dump_file, ";; Not considering loop, is too big\n");
 435       return;
 436     }
 437
 438   /* Check for simple loops.  */
 439   desc = get_simple_loop_desc (loop);
 440
 441   /* Check number of iterations.  */
 442   if (!desc->simple_p
 443       || desc->assumptions
 444       || !desc->const_iter)
 445     {
 446       if (dump_file)
 447         fprintf (dump_file,
 448                  ";; Unable to prove that the loop iterates constant times\n");
 449       return;
 450     }
 451
 452   if (desc->niter > npeel - 1)
 453     {
 454       if (dump_file)
 455         {
 456           fprintf (dump_file,
 457                    ";; Not peeling loop completely, rolls too much (");
 458           fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter);
 459           fprintf (dump_file, " iterations > %d [maximum peelings])\n", npeel);
 460         }
 461       return;
 462     }
 463
 464   /* Success.  */
 465   if (dump_file)
 466     fprintf (dump_file, ";; Decided to peel loop completely\n");
 467   loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
 468 }
 469
 470 /* Peel all iterations of LOOP, remove exit edges and cancel the loop
 471    completely.  The transformation done:
 472
 473    for (i = 0; i < 4; i++)
 474      body;
 475
 476    ==>
 477
 478    i = 0;
 479    body; i++;
 480    body; i++;
 481    body; i++;
 482    body; i++;
 483    */
 484 static void
 485 peel_loop_completely (struct loops *loops, struct loop *loop)
 486 {
 487   sbitmap wont_exit;
 488   unsigned HOST_WIDE_INT npeel;
 489   unsigned n_remove_edges, i;
 490   edge *remove_edges, ein;
 491   struct niter_desc *desc = get_simple_loop_desc (loop);
 492   struct opt_info *opt_info = NULL;
 493
 494   npeel = desc->niter;
 495
 496   if (npeel)
 497     {
 498       wont_exit = sbitmap_alloc (npeel + 1);
 499       sbitmap_ones (wont_exit);
 500       RESET_BIT (wont_exit, 0);
 501       if (desc->noloop_assumptions)
 502         RESET_BIT (wont_exit, 1);
 503
 504       remove_edges = xcalloc (npeel, sizeof (edge));
 505       n_remove_edges = 0;
 506
 507       if (flag_split_ivs_in_unroller)
 508         opt_info = analyze_insns_in_loop (loop);
 509
 510       opt_info_start_duplication (opt_info);
 511       if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 512                 loops, npeel,
 513                 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
 514                 DLTHE_FLAG_UPDATE_FREQ))
 515         abort ();
 516
 517       free (wont_exit);
 518
 519       if (opt_info)
 520         {
 521           apply_opt_in_copies (opt_info, npeel, false, true);
 522           free_opt_info (opt_info);
 523         }
 524
 525       /* Remove the exit edges.  */
 526       for (i = 0; i < n_remove_edges; i++)
 527         remove_path (loops, remove_edges[i]);
 528       free (remove_edges);
 529     }
 530
 531   ein = desc->in_edge;
 532   free_simple_loop_desc (loop);
 533
 534   /* Now remove the unreachable part of the last iteration and cancel
 535      the loop.  */
 536   remove_path (loops, ein);
 537
 538   if (dump_file)
 539     fprintf (dump_file, ";; Peeled loop completely, %d times\n", (int) npeel);
 540 }
 541
 542 /* Decide whether to unroll LOOP iterating constant number of times
 543    and how much.  */
 544
 545 static void
 546 decide_unroll_constant_iterations (struct loop *loop, int flags)
 547 {
 548   unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
 549   struct niter_desc *desc;
 550
 551   if (!(flags & UAP_UNROLL))
 552     {
 553       /* We were not asked to, just return back silently.  */
 554       return;
 555     }
 556
 557   if (dump_file)
 558     fprintf (dump_file,
 559              "\n;; Considering unrolling loop with constant "
 560              "number of iterations\n");
 561
 562   /* nunroll = total number of copies of the original loop body in
 563      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 564   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
 565   nunroll_by_av
 566     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
 567   if (nunroll > nunroll_by_av)
 568     nunroll = nunroll_by_av;
 569   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
 570     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 571
 572   /* Skip big loops.  */
 573   if (nunroll <= 1)
 574     {
 575       if (dump_file)
 576         fprintf (dump_file, ";; Not considering loop, is too big\n");
 577       return;
 578     }
 579
 580   /* Check for simple loops.  */
 581   desc = get_simple_loop_desc (loop);
 582
 583   /* Check number of iterations.  */
 584   if (!desc->simple_p || !desc->const_iter || desc->assumptions)
 585     {
 586       if (dump_file)
 587         fprintf (dump_file,
 588                  ";; Unable to prove that the loop iterates constant times\n");
 589       return;
 590     }
 591
 592   /* Check whether the loop rolls enough to consider.  */
 593   if (desc->niter < 2 * nunroll)
 594     {
 595       if (dump_file)
 596         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 597       return;
 598     }
 599
 600   /* Success; now compute number of iterations to unroll.  We alter
 601      nunroll so that as few as possible copies of loop body are
 602      necessary, while still not decreasing the number of unrollings
 603      too much (at most by 1).  */
 604   best_copies = 2 * nunroll + 10;
 605
 606   i = 2 * nunroll + 2;
 607   if (i - 1 >= desc->niter)
 608     i = desc->niter - 2;
 609
 610   for (; i >= nunroll - 1; i--)
 611     {
 612       unsigned exit_mod = desc->niter % (i + 1);
 613
 614       if (!loop_exit_at_end_p (loop))
 615         n_copies = exit_mod + i + 1;
 616       else if (exit_mod != (unsigned) i
 617                || desc->noloop_assumptions != NULL_RTX)
 618         n_copies = exit_mod + i + 2;
 619       else
 620         n_copies = i + 1;
 621
 622       if (n_copies < best_copies)
 623         {
 624           best_copies = n_copies;
 625           best_unroll = i;
 626         }
 627     }
 628
 629   if (dump_file)
 630     fprintf (dump_file, ";; max_unroll %d (%d copies, initial %d).\n",
 631              best_unroll + 1, best_copies, nunroll);
 632
 633   loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
 634   loop->lpt_decision.times = best_unroll;
 635
 636   if (dump_file)
 637     fprintf (dump_file,
 638              ";; Decided to unroll the constant times rolling loop, %d times.\n",
 639              loop->lpt_decision.times);
 640 }
 641
 642 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1
 643    times.  The transformation does this:
 644
 645    for (i = 0; i < 102; i++)
 646      body;
 647
 648    ==>
 649
 650    i = 0;
 651    body; i++;
 652    body; i++;
 653    while (i < 102)
 654      {
 655        body; i++;
 656        body; i++;
 657        body; i++;
 658        body; i++;
 659      }
 660   */
 661 static void
 662 unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
 663 {
 664   unsigned HOST_WIDE_INT niter;
 665   unsigned exit_mod;
 666   sbitmap wont_exit;
 667   unsigned n_remove_edges, i;
 668   edge *remove_edges;
 669   unsigned max_unroll = loop->lpt_decision.times;
 670   struct niter_desc *desc = get_simple_loop_desc (loop);
 671   bool exit_at_end = loop_exit_at_end_p (loop);
 672   struct opt_info *opt_info = NULL;
 673
 674   niter = desc->niter;
 675
 676   /* Should not get here (such loop should be peeled instead).  */
 677   gcc_assert (niter > max_unroll + 1);
 678
 679   exit_mod = niter % (max_unroll + 1);
 680
 681   wont_exit = sbitmap_alloc (max_unroll + 1);
 682   sbitmap_ones (wont_exit);
 683
 684   remove_edges = xcalloc (max_unroll + exit_mod + 1, sizeof (edge));
 685   n_remove_edges = 0;
 686   if (flag_split_ivs_in_unroller
 687       || flag_variable_expansion_in_unroller)
 688     opt_info = analyze_insns_in_loop (loop);
 689
 690   if (!exit_at_end)
 691     {
 692       /* The exit is not at the end of the loop; leave exit test
 693          in the first copy, so that the loops that start with test
 694          of exit condition have continuous body after unrolling.  */
 695
 696       if (dump_file)
 697         fprintf (dump_file, ";; Condition on beginning of loop.\n");
 698
 699       /* Peel exit_mod iterations.  */
 700       RESET_BIT (wont_exit, 0);
 701       if (desc->noloop_assumptions)
 702         RESET_BIT (wont_exit, 1);
 703
 704       if (exit_mod)
 705         {
 706           opt_info_start_duplication (opt_info);
 707           if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 708                                               loops, exit_mod,
 709                                               wont_exit, desc->out_edge,
 710                                               remove_edges, &n_remove_edges,
 711                                               DLTHE_FLAG_UPDATE_FREQ))
 712             abort ();
 713
 714           if (opt_info && exit_mod > 1)
 715             apply_opt_in_copies (opt_info, exit_mod, false, false);
 716
 717           desc->noloop_assumptions = NULL_RTX;
 718           desc->niter -= exit_mod;
 719           desc->niter_max -= exit_mod;
 720         }
 721
 722       SET_BIT (wont_exit, 1);
 723     }
 724   else
 725     {
 726       /* Leave exit test in last copy, for the same reason as above if
 727          the loop tests the condition at the end of loop body.  */
 728
 729       if (dump_file)
 730         fprintf (dump_file, ";; Condition on end of loop.\n");
 731
 732       /* We know that niter >= max_unroll + 2; so we do not need to care of
 733          case when we would exit before reaching the loop.  So just peel
 734          exit_mod + 1 iterations.  */
 735       if (exit_mod != max_unroll
 736           || desc->noloop_assumptions)
 737         {
 738           RESET_BIT (wont_exit, 0);
 739           if (desc->noloop_assumptions)
 740             RESET_BIT (wont_exit, 1);
 741
 742           opt_info_start_duplication (opt_info);
 743           if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 744                 loops, exit_mod + 1,
 745                 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
 746                 DLTHE_FLAG_UPDATE_FREQ))
 747             abort ();
 748
 749           if (opt_info && exit_mod > 0)
 750             apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
 751
 752           desc->niter -= exit_mod + 1;
 753           desc->niter_max -= exit_mod + 1;
 754           desc->noloop_assumptions = NULL_RTX;
 755
 756           SET_BIT (wont_exit, 0);
 757           SET_BIT (wont_exit, 1);
 758         }
 759
 760       RESET_BIT (wont_exit, max_unroll);
 761     }
 762
 763   /* Now unroll the loop.  */
 764
 765   opt_info_start_duplication (opt_info);
 766   if (!duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
 767                 loops, max_unroll,
 768                 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
 769                 DLTHE_FLAG_UPDATE_FREQ))
 770     abort ();
 771
 772   if (opt_info)
 773     {
 774       apply_opt_in_copies (opt_info, max_unroll, true, true);
 775       free_opt_info (opt_info);
 776     }
 777
 778   free (wont_exit);
 779
 780   if (exit_at_end)
 781     {
 782       basic_block exit_block = desc->in_edge->src->rbi->copy;
 783       /* Find a new in and out edge; they are in the last copy we have made.  */
 784
 785       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
 786         {
 787           desc->out_edge = EDGE_SUCC (exit_block, 0);
 788           desc->in_edge = EDGE_SUCC (exit_block, 1);
 789         }
 790       else
 791         {
 792           desc->out_edge = EDGE_SUCC (exit_block, 1);
 793           desc->in_edge = EDGE_SUCC (exit_block, 0);
 794         }
 795     }
 796
 797   desc->niter /= max_unroll + 1;
 798   desc->niter_max /= max_unroll + 1;
 799   desc->niter_expr = GEN_INT (desc->niter);
 800
 801   /* Remove the edges.  */
 802   for (i = 0; i < n_remove_edges; i++)
 803     remove_path (loops, remove_edges[i]);
 804   free (remove_edges);
 805
 806   if (dump_file)
 807     fprintf (dump_file,
 808              ";; Unrolled loop %d times, constant # of iterations %i insns\n",
 809              max_unroll, num_loop_insns (loop));
 810 }
 811
 812 /* Decide whether to unroll LOOP iterating runtime computable number of times
 813    and how much.  */
 814 static void
 815 decide_unroll_runtime_iterations (struct loop *loop, int flags)
 816 {
 817   unsigned nunroll, nunroll_by_av, i;
 818   struct niter_desc *desc;
 819
 820   if (!(flags & UAP_UNROLL))
 821     {
 822       /* We were not asked to, just return back silently.  */
 823       return;
 824     }
 825
 826   if (dump_file)
 827     fprintf (dump_file,
 828              "\n;; Considering unrolling loop with runtime "
 829              "computable number of iterations\n");
 830
 831   /* nunroll = total number of copies of the original loop body in
 832      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 833   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
 834   nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
 835   if (nunroll > nunroll_by_av)
 836     nunroll = nunroll_by_av;
 837   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
 838     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 839
 840   /* Skip big loops.  */
 841   if (nunroll <= 1)
 842     {
 843       if (dump_file)
 844         fprintf (dump_file, ";; Not considering loop, is too big\n");
 845       return;
 846     }
 847
 848   /* Check for simple loops.  */
 849   desc = get_simple_loop_desc (loop);
 850
 851   /* Check simpleness.  */
 852   if (!desc->simple_p || desc->assumptions)
 853     {
 854       if (dump_file)
 855         fprintf (dump_file,
 856                  ";; Unable to prove that the number of iterations "
 857                  "can be counted in runtime\n");
 858       return;
 859     }
 860
 861   if (desc->const_iter)
 862     {
 863       if (dump_file)
 864         fprintf (dump_file, ";; Loop iterates constant times\n");
 865       return;
 866     }
 867
 868   /* If we have profile feedback, check whether the loop rolls.  */
 869   if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll)
 870     {
 871       if (dump_file)
 872         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 873       return;
 874     }
 875
 876   /* Success; now force nunroll to be power of 2, as we are unable to
 877      cope with overflows in computation of number of iterations.  */
 878   for (i = 1; 2 * i <= nunroll; i *= 2)
 879     continue;
 880
 881   loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
 882   loop->lpt_decision.times = i - 1;
 883
 884   if (dump_file)
 885     fprintf (dump_file,
 886              ";; Decided to unroll the runtime computable "
 887              "times rolling loop, %d times.\n",
 888              loop->lpt_decision.times);
 889 }
 890
 891 /* Unroll LOOP for that we are able to count number of iterations in runtime
 892    LOOP->LPT_DECISION.TIMES + 1 times.  The transformation does this (with some
 893    extra care for case n < 0):
 894
 895    for (i = 0; i < n; i++)
 896      body;
 897
 898    ==>
 899
 900    i = 0;
 901    mod = n % 4;
 902
 903    switch (mod)
 904      {
 905        case 3:
 906          body; i++;
 907        case 2:
 908          body; i++;
 909        case 1:
 910          body; i++;
 911        case 0: ;
 912      }
 913
 914    while (i < n)
 915      {
 916        body; i++;
 917        body; i++;
 918        body; i++;
 919        body; i++;
 920      }
 921    */
 922 static void
 923 unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
 924 {
 925   rtx old_niter, niter, init_code, branch_code, tmp;
 926   unsigned i, j, p;
 927   basic_block preheader, *body, *dom_bbs, swtch, ezc_swtch;
 928   unsigned n_dom_bbs;
 929   sbitmap wont_exit;
 930   int may_exit_copy;
 931   unsigned n_peel, n_remove_edges;
 932   edge *remove_edges, e;
 933   bool extra_zero_check, last_may_exit;
 934   unsigned max_unroll = loop->lpt_decision.times;
 935   struct niter_desc *desc = get_simple_loop_desc (loop);
 936   bool exit_at_end = loop_exit_at_end_p (loop);
 937   struct opt_info *opt_info = NULL;
 938
 939   if (flag_split_ivs_in_unroller
 940       || flag_variable_expansion_in_unroller)
 941     opt_info = analyze_insns_in_loop (loop);
 942
 943   /* Remember blocks whose dominators will have to be updated.  */
 944   dom_bbs = xcalloc (n_basic_blocks, sizeof (basic_block));
 945   n_dom_bbs = 0;
 946
 947   body = get_loop_body (loop);
 948   for (i = 0; i < loop->num_nodes; i++)
 949     {
 950       unsigned nldom;
 951       basic_block *ldom;
 952
 953       nldom = get_dominated_by (CDI_DOMINATORS, body[i], &ldom);
 954       for (j = 0; j < nldom; j++)
 955         if (!flow_bb_inside_loop_p (loop, ldom[j]))
 956           dom_bbs[n_dom_bbs++] = ldom[j];
 957
 958       free (ldom);
 959     }
 960   free (body);
 961
 962   if (!exit_at_end)
 963     {
 964       /* Leave exit in first copy (for explanation why see comment in
 965          unroll_loop_constant_iterations).  */
 966       may_exit_copy = 0;
 967       n_peel = max_unroll - 1;
 968       extra_zero_check = true;
 969       last_may_exit = false;
 970     }
 971   else
 972     {
 973       /* Leave exit in last copy (for explanation why see comment in
 974          unroll_loop_constant_iterations).  */
 975       may_exit_copy = max_unroll;
 976       n_peel = max_unroll;
 977       extra_zero_check = false;
 978       last_may_exit = true;
 979     }
 980
 981   /* Get expression for number of iterations.  */
 982   start_sequence ();
 983   old_niter = niter = gen_reg_rtx (desc->mode);
 984   tmp = force_operand (copy_rtx (desc->niter_expr), niter);
 985   if (tmp != niter)
 986     emit_move_insn (niter, tmp);
 987
 988   /* Count modulo by ANDing it with max_unroll; we use the fact that
 989      the number of unrollings is a power of two, and thus this is correct
 990      even if there is overflow in the computation.  */
 991   niter = expand_simple_binop (desc->mode, AND,
 992                                niter,
 993                                GEN_INT (max_unroll),
 994                                NULL_RTX, 0, OPTAB_LIB_WIDEN);
 995
 996   init_code = get_insns ();
 997   end_sequence ();
 998
 999   /* Precondition the loop.  */
1000   loop_split_edge_with (loop_preheader_edge (loop), init_code);
1001
1002   remove_edges = xcalloc (max_unroll + n_peel + 1, sizeof (edge));
1003   n_remove_edges = 0;
1004
1005   wont_exit = sbitmap_alloc (max_unroll + 2);
1006
1007   /* Peel the first copy of loop body (almost always we must leave exit test
1008      here; the only exception is when we have extra zero check and the number
1009      of iterations is reliable.  Also record the place of (possible) extra
1010      zero check.  */
1011   sbitmap_zero (wont_exit);
1012   if (extra_zero_check
1013       && !desc->noloop_assumptions)
1014     SET_BIT (wont_exit, 1);
1015   ezc_swtch = loop_preheader_edge (loop)->src;
1016   if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1017                 loops, 1,
1018                 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
1019                 DLTHE_FLAG_UPDATE_FREQ))
1020     abort ();
1021
1022   /* Record the place where switch will be built for preconditioning.  */
1023   swtch = loop_split_edge_with (loop_preheader_edge (loop),
1024                                 NULL_RTX);
1025
1026   for (i = 0; i < n_peel; i++)
1027     {
1028       /* Peel the copy.  */
1029       sbitmap_zero (wont_exit);
1030       if (i != n_peel - 1 || !last_may_exit)
1031         SET_BIT (wont_exit, 1);
1032       if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1033                 loops, 1,
1034                 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
1035                 DLTHE_FLAG_UPDATE_FREQ))
1036         abort ();
1037
1038       /* Create item for switch.  */
1039       j = n_peel - i - (extra_zero_check ? 0 : 1);
1040       p = REG_BR_PROB_BASE / (i + 2);
1041
1042       preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1043       branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
1044                                           block_label (preheader), p, NULL_RTX);
1045
1046       swtch = loop_split_edge_with (EDGE_PRED (swtch, 0), branch_code);
1047       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1048       EDGE_SUCC (swtch, 0)->probability = REG_BR_PROB_BASE - p;
1049       e = make_edge (swtch, preheader,
1050                      EDGE_SUCC (swtch, 0)->flags & EDGE_IRREDUCIBLE_LOOP);
1051       e->probability = p;
1052     }
1053
1054   if (extra_zero_check)
1055     {
1056       /* Add branch for zero iterations.  */
1057       p = REG_BR_PROB_BASE / (max_unroll + 1);
1058       swtch = ezc_swtch;
1059       preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1060       branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
1061                                           block_label (preheader), p, NULL_RTX);
1062
1063       swtch = loop_split_edge_with (EDGE_SUCC (swtch, 0), branch_code);
1064       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1065       EDGE_SUCC (swtch, 0)->probability = REG_BR_PROB_BASE - p;
1066       e = make_edge (swtch, preheader,
1067                      EDGE_SUCC (swtch, 0)->flags & EDGE_IRREDUCIBLE_LOOP);
1068       e->probability = p;
1069     }
1070
1071   /* Recount dominators for outer blocks.  */
1072   iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, n_dom_bbs);
1073
1074   /* And unroll loop.  */
1075
1076   sbitmap_ones (wont_exit);
1077   RESET_BIT (wont_exit, may_exit_copy);
1078   opt_info_start_duplication (opt_info);
1079
1080   if (!duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1081                 loops, max_unroll,
1082                 wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
1083                 DLTHE_FLAG_UPDATE_FREQ))
1084     abort ();
1085
1086   if (opt_info)
1087     {
1088       apply_opt_in_copies (opt_info, max_unroll, true, true);
1089       free_opt_info (opt_info);
1090     }
1091
1092   free (wont_exit);
1093
1094   if (exit_at_end)
1095     {
1096       basic_block exit_block = desc->in_edge->src->rbi->copy;
1097       /* Find a new in and out edge; they are in the last copy we have made.  */
1098
1099       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
1100         {
1101           desc->out_edge = EDGE_SUCC (exit_block, 0);
1102           desc->in_edge = EDGE_SUCC (exit_block, 1);
1103         }
1104       else
1105         {
1106           desc->out_edge = EDGE_SUCC (exit_block, 1);
1107           desc->in_edge = EDGE_SUCC (exit_block, 0);
1108         }
1109     }
1110
1111   /* Remove the edges.  */
1112   for (i = 0; i < n_remove_edges; i++)
1113     remove_path (loops, remove_edges[i]);
1114   free (remove_edges);
1115
1116   /* We must be careful when updating the number of iterations due to
1117      preconditioning and the fact that the value must be valid at entry
1118      of the loop.  After passing through the above code, we see that
1119      the correct new number of iterations is this:  */
1120   gcc_assert (!desc->const_iter);
1121   desc->niter_expr =
1122     simplify_gen_binary (UDIV, desc->mode, old_niter, GEN_INT (max_unroll + 1));
1123   desc->niter_max /= max_unroll + 1;
1124   if (exit_at_end)
1125     {
1126       desc->niter_expr =
1127         simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1128       desc->noloop_assumptions = NULL_RTX;
1129       desc->niter_max--;
1130     }
1131
1132   if (dump_file)
1133     fprintf (dump_file,
1134              ";; Unrolled loop %d times, counting # of iterations "
1135              "in runtime, %i insns\n",
1136              max_unroll, num_loop_insns (loop));
1137 }
1138
1139 /* Decide whether to simply peel LOOP and how much.  */
1140 static void
1141 decide_peel_simple (struct loop *loop, int flags)
1142 {
1143   unsigned npeel;
1144   struct niter_desc *desc;
1145
1146   if (!(flags & UAP_PEEL))
1147     {
1148       /* We were not asked to, just return back silently.  */
1149       return;
1150     }
1151
1152   if (dump_file)
1153     fprintf (dump_file, "\n;; Considering simply peeling loop\n");
1154
1155   /* npeel = number of iterations to peel.  */
1156   npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns;
1157   if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_PEEL_TIMES))
1158     npeel = PARAM_VALUE (PARAM_MAX_PEEL_TIMES);
1159
1160   /* Skip big loops.  */
1161   if (!npeel)
1162     {
1163       if (dump_file)
1164         fprintf (dump_file, ";; Not considering loop, is too big\n");
1165       return;
1166     }
1167
1168   /* Check for simple loops.  */
1169   desc = get_simple_loop_desc (loop);
1170
1171   /* Check number of iterations.  */
1172   if (desc->simple_p && !desc->assumptions && desc->const_iter)
1173     {
1174       if (dump_file)
1175         fprintf (dump_file, ";; Loop iterates constant times\n");
1176       return;
1177     }
1178
1179   /* Do not simply peel loops with branches inside -- it increases number
1180      of mispredicts.  */
1181   if (num_loop_branches (loop) > 1)
1182     {
1183       if (dump_file)
1184         fprintf (dump_file, ";; Not peeling, contains branches\n");
1185       return;
1186     }
1187
1188   if (loop->header->count)
1189     {
1190       unsigned niter = expected_loop_iterations (loop);
1191       if (niter + 1 > npeel)
1192         {
1193           if (dump_file)
1194             {
1195               fprintf (dump_file, ";; Not peeling loop, rolls too much (");
1196               fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1197                        (HOST_WIDEST_INT) (niter + 1));
1198               fprintf (dump_file, " iterations > %d [maximum peelings])\n",
1199                        npeel);
1200             }
1201           return;
1202         }
1203       npeel = niter + 1;
1204     }
1205   else
1206     {
1207       /* For now we have no good heuristics to decide whether loop peeling
1208          will be effective, so disable it.  */
1209       if (dump_file)
1210         fprintf (dump_file,
1211                  ";; Not peeling loop, no evidence it will be profitable\n");
1212       return;
1213     }
1214
1215   /* Success.  */
1216   loop->lpt_decision.decision = LPT_PEEL_SIMPLE;
1217   loop->lpt_decision.times = npeel;
1218
1219   if (dump_file)
1220     fprintf (dump_file, ";; Decided to simply peel the loop, %d times.\n",
1221              loop->lpt_decision.times);
1222 }
1223
1224 /* Peel a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
1225    while (cond)
1226      body;
1227
1228    ==>
1229
1230    if (!cond) goto end;
1231    body;
1232    if (!cond) goto end;
1233    body;
1234    while (cond)
1235      body;
1236    end: ;
1237    */
1238 static void
1239 peel_loop_simple (struct loops *loops, struct loop *loop)
1240 {
1241   sbitmap wont_exit;
1242   unsigned npeel = loop->lpt_decision.times;
1243   struct niter_desc *desc = get_simple_loop_desc (loop);
1244   struct opt_info *opt_info = NULL;
1245
1246   if (flag_split_ivs_in_unroller && npeel > 1)
1247     opt_info = analyze_insns_in_loop (loop);
1248
1249   wont_exit = sbitmap_alloc (npeel + 1);
1250   sbitmap_zero (wont_exit);
1251
1252   opt_info_start_duplication (opt_info);
1253
1254   if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1255                 loops, npeel, wont_exit, NULL, NULL, NULL,
1256                 DLTHE_FLAG_UPDATE_FREQ))
1257     abort ();
1258
1259   free (wont_exit);
1260
1261   if (opt_info)
1262     {
1263       apply_opt_in_copies (opt_info, npeel, false, false);
1264       free_opt_info (opt_info);
1265     }
1266
1267   if (desc->simple_p)
1268     {
1269       if (desc->const_iter)
1270         {
1271           desc->niter -= npeel;
1272           desc->niter_expr = GEN_INT (desc->niter);
1273           desc->noloop_assumptions = NULL_RTX;
1274         }
1275       else
1276         {
1277           /* We cannot just update niter_expr, as its value might be clobbered
1278              inside loop.  We could handle this by counting the number into
1279              temporary just like we do in runtime unrolling, but it does not
1280              seem worthwhile.  */
1281           free_simple_loop_desc (loop);
1282         }
1283     }
1284   if (dump_file)
1285     fprintf (dump_file, ";; Peeling loop %d times\n", npeel);
1286 }
1287
1288 /* Decide whether to unroll LOOP stupidly and how much.  */
1289 static void
1290 decide_unroll_stupid (struct loop *loop, int flags)
1291 {
1292   unsigned nunroll, nunroll_by_av, i;
1293   struct niter_desc *desc;
1294
1295   if (!(flags & UAP_UNROLL_ALL))
1296     {
1297       /* We were not asked to, just return back silently.  */
1298       return;
1299     }
1300
1301   if (dump_file)
1302     fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n");
1303
1304   /* nunroll = total number of copies of the original loop body in
1305      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
1306   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
1307   nunroll_by_av
1308     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
1309   if (nunroll > nunroll_by_av)
1310     nunroll = nunroll_by_av;
1311   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
1312     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1313
1314   /* Skip big loops.  */
1315   if (nunroll <= 1)
1316     {
1317       if (dump_file)
1318         fprintf (dump_file, ";; Not considering loop, is too big\n");
1319       return;
1320     }
1321
1322   /* Check for simple loops.  */
1323   desc = get_simple_loop_desc (loop);
1324
1325   /* Check simpleness.  */
1326   if (desc->simple_p && !desc->assumptions)
1327     {
1328       if (dump_file)
1329         fprintf (dump_file, ";; The loop is simple\n");
1330       return;
1331     }
1332
1333   /* Do not unroll loops with branches inside -- it increases number
1334      of mispredicts.  */
1335   if (num_loop_branches (loop) > 1)
1336     {
1337       if (dump_file)
1338         fprintf (dump_file, ";; Not unrolling, contains branches\n");
1339       return;
1340     }
1341
1342   /* If we have profile feedback, check whether the loop rolls.  */
1343   if (loop->header->count
1344       && expected_loop_iterations (loop) < 2 * nunroll)
1345     {
1346       if (dump_file)
1347         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
1348       return;
1349     }
1350
1351   /* Success.  Now force nunroll to be power of 2, as it seems that this
1352      improves results (partially because of better alignments, partially
1353      because of some dark magic).  */
1354   for (i = 1; 2 * i <= nunroll; i *= 2)
1355     continue;
1356
1357   loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1358   loop->lpt_decision.times = i - 1;
1359
1360   if (dump_file)
1361     fprintf (dump_file,
1362              ";; Decided to unroll the loop stupidly, %d times.\n",
1363              loop->lpt_decision.times);
1364 }
1365
1366 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
1367    while (cond)
1368      body;
1369
1370    ==>
1371
1372    while (cond)
1373      {
1374        body;
1375        if (!cond) break;
1376        body;
1377        if (!cond) break;
1378        body;
1379        if (!cond) break;
1380        body;
1381      }
1382    */
1383 static void
1384 unroll_loop_stupid (struct loops *loops, struct loop *loop)
1385 {
1386   sbitmap wont_exit;
1387   unsigned nunroll = loop->lpt_decision.times;
1388   struct niter_desc *desc = get_simple_loop_desc (loop);
1389   struct opt_info *opt_info = NULL;
1390
1391   if (flag_split_ivs_in_unroller
1392       || flag_variable_expansion_in_unroller)
1393     opt_info = analyze_insns_in_loop (loop);
1394
1395
1396   wont_exit = sbitmap_alloc (nunroll + 1);
1397   sbitmap_zero (wont_exit);
1398   opt_info_start_duplication (opt_info);
1399
1400   if (!duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1401                 loops, nunroll, wont_exit, NULL, NULL, NULL,
1402                 DLTHE_FLAG_UPDATE_FREQ))
1403     abort ();
1404
1405   if (opt_info)
1406     {
1407       apply_opt_in_copies (opt_info, nunroll, true, true);
1408       free_opt_info (opt_info);
1409     }
1410
1411   free (wont_exit);
1412
1413   if (desc->simple_p)
1414     {
1415       /* We indeed may get here provided that there are nontrivial assumptions
1416          for a loop to be really simple.  We could update the counts, but the
1417          problem is that we are unable to decide which exit will be taken
1418          (not really true in case the number of iterations is constant,
1419          but noone will do anything with this information, so we do not
1420          worry about it).  */
1421       desc->simple_p = false;
1422     }
1423
1424   if (dump_file)
1425     fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
1426              nunroll, num_loop_insns (loop));
1427 }
1428
1429 /* A hash function for information about insns to split.  */
1430
1431 static hashval_t
1432 si_info_hash (const void *ivts)
1433 {
1434   return htab_hash_pointer (((struct iv_to_split *) ivts)->insn);
1435 }
1436
1437 /* An equality functions for information about insns to split.  */
1438
1439 static int
1440 si_info_eq (const void *ivts1, const void *ivts2)
1441 {
1442   const struct iv_to_split *i1 = ivts1;
1443   const struct iv_to_split *i2 = ivts2;
1444
1445   return i1->insn == i2->insn;
1446 }
1447
1448 /* Return a hash for VES, which is really a "var_to_expand *".  */
1449
1450 static hashval_t
1451 ve_info_hash (const void *ves)
1452 {
1453   return htab_hash_pointer (((struct var_to_expand *) ves)->insn);
1454 }
1455
1456 /* Return true if IVTS1 and IVTS2 (which are really both of type
1457    "var_to_expand *") refer to the same instruction.  */
1458
1459 static int
1460 ve_info_eq (const void *ivts1, const void *ivts2)
1461 {
1462   const struct var_to_expand *i1 = ivts1;
1463   const struct var_to_expand *i2 = ivts2;
1464
1465   return i1->insn == i2->insn;
1466 }
1467
1468 /* Returns true if REG is referenced in one insn in LOOP.  */
1469
1470 bool
1471 referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg)
1472 {
1473   basic_block *body, bb;
1474   unsigned i;
1475   int count_ref = 0;
1476   rtx insn;
1477
1478   body = get_loop_body (loop);
1479   for (i = 0; i < loop->num_nodes; i++)
1480     {
1481       bb = body[i];
1482
1483       FOR_BB_INSNS (bb, insn)
1484       {
1485         if (rtx_referenced_p (reg, insn))
1486           count_ref++;
1487       }
1488     }
1489   return (count_ref  == 1);
1490 }
1491
1492 /* Determine whether INSN contains an accumulator
1493    which can be expanded into separate copies,
1494    one for each copy of the LOOP body.
1495
1496    for (i = 0 ; i < n; i++)
1497      sum += a[i];
1498
1499    ==>
1500
1501    sum += a[i]
1502    ....
1503    i = i+1;
1504    sum1 += a[i]
1505    ....
1506    i = i+1
1507    sum2 += a[i];
1508    ....
1509
1510    Return NULL if INSN contains no opportunity for expansion of accumulator.
1511    Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
1512    information and return a pointer to it.
1513 */
1514
1515 static struct var_to_expand *
1516 analyze_insn_to_expand_var (struct loop *loop, rtx insn)
1517 {
1518   rtx set, dest, src, op1;
1519   struct var_to_expand *ves;
1520   enum machine_mode mode1, mode2;
1521
1522   set = single_set (insn);
1523   if (!set)
1524     return NULL;
1525
1526   dest = SET_DEST (set);
1527   src = SET_SRC (set);
1528
1529   if (GET_CODE (src) != PLUS
1530       && GET_CODE (src) != MINUS
1531       && GET_CODE (src) != MULT)
1532     return NULL;
1533
1534   if (!XEXP (src, 0))
1535     return NULL;
1536
1537   op1 = XEXP (src, 0);
1538
1539   if (!REG_P (dest)
1540       && !(GET_CODE (dest) == SUBREG
1541            && REG_P (SUBREG_REG (dest))))
1542     return NULL;
1543
1544   if (!rtx_equal_p (dest, op1))
1545     return NULL;
1546
1547   if (!referenced_in_one_insn_in_loop_p (loop, dest))
1548     return NULL;
1549
1550   if (rtx_referenced_p (dest, XEXP (src, 1)))
1551     return NULL;
1552
1553   mode1 = GET_MODE (dest);
1554   mode2 = GET_MODE (XEXP (src, 1));
1555   if ((FLOAT_MODE_P (mode1)
1556        || FLOAT_MODE_P (mode2))
1557       && !flag_unsafe_math_optimizations)
1558     return NULL;
1559
1560   /* Record the accumulator to expand.  */
1561   ves = xmalloc (sizeof (struct var_to_expand));
1562   ves->insn = insn;
1563   VARRAY_RTX_INIT (ves->var_expansions, 1, "var_expansions");
1564   ves->reg = copy_rtx (dest);
1565   ves->op = GET_CODE (src);
1566   ves->expansion_count = 0;
1567   ves->reuse_expansion = 0;
1568   return ves;
1569 }
1570
1571 /* Determine whether there is an induction variable in INSN that
1572    we would like to split during unrolling.
1573
1574    I.e. replace
1575
1576    i = i + 1;
1577    ...
1578    i = i + 1;
1579    ...
1580    i = i + 1;
1581    ...
1582
1583    type chains by
1584
1585    i0 = i + 1
1586    ...
1587    i = i0 + 1
1588    ...
1589    i = i0 + 2
1590    ...
1591
1592    Return NULL if INSN contains no interesting IVs.  Otherwise, allocate
1593    an IV_TO_SPLIT structure, fill it with the relevant information and return a
1594    pointer to it.  */
1595
1596 static struct iv_to_split *
1597 analyze_iv_to_split_insn (rtx insn)
1598 {
1599   rtx set, dest;
1600   struct rtx_iv iv;
1601   struct iv_to_split *ivts;
1602
1603   /* For now we just split the basic induction variables.  Later this may be
1604      extended for example by selecting also addresses of memory references.  */
1605   set = single_set (insn);
1606   if (!set)
1607     return NULL;
1608
1609   dest = SET_DEST (set);
1610   if (!REG_P (dest))
1611     return NULL;
1612
1613   if (!biv_p (insn, dest))
1614     return NULL;
1615
1616   if (!iv_analyze (insn, dest, &iv))
1617     abort ();
1618
1619   if (iv.step == const0_rtx
1620       || iv.mode != iv.extend_mode)
1621     return NULL;
1622
1623   /* Record the insn to split.  */
1624   ivts = xmalloc (sizeof (struct iv_to_split));
1625   ivts->insn = insn;
1626   ivts->base_var = NULL_RTX;
1627   ivts->step = iv.step;
1628   ivts->n_loc = 1;
1629   ivts->loc[0] = 1;
1630
1631   return ivts;
1632 }
1633
1634 /* Determines which of insns in LOOP can be optimized.
1635    Return a OPT_INFO struct with the relevant hash tables filled
1636    with all insns to be optimized.  The FIRST_NEW_BLOCK field
1637    is undefined for the return value.  */
1638
1639 static struct opt_info *
1640 analyze_insns_in_loop (struct loop *loop)
1641 {
1642   basic_block *body, bb;
1643   unsigned i, n_edges = 0;
1644   struct opt_info *opt_info = xcalloc (1, sizeof (struct opt_info));
1645   rtx insn;
1646   struct iv_to_split *ivts = NULL;
1647   struct var_to_expand *ves = NULL;
1648   PTR *slot1;
1649   PTR *slot2;
1650   edge *edges = get_loop_exit_edges (loop, &n_edges);
1651   basic_block preheader;
1652   bool can_apply = false;
1653
1654   iv_analysis_loop_init (loop);
1655
1656   body = get_loop_body (loop);
1657
1658   if (flag_split_ivs_in_unroller)
1659     opt_info->insns_to_split = htab_create (5 * loop->num_nodes,
1660                                             si_info_hash, si_info_eq, free);
1661
1662   /* Record the loop exit bb and loop preheader before the unrolling.  */
1663   if (!loop_preheader_edge (loop)->src)
1664     {
1665       preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1666       opt_info->loop_preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1667     }
1668   else
1669     opt_info->loop_preheader = loop_preheader_edge (loop)->src;
1670
1671   if (n_edges == 1
1672       && !(edges[0]->flags & EDGE_COMPLEX)
1673       && (edges[0]->flags & EDGE_LOOP_EXIT))
1674     {
1675       opt_info->loop_exit = loop_split_edge_with (edges[0], NULL_RTX);
1676       can_apply = true;
1677     }
1678
1679   if (flag_variable_expansion_in_unroller
1680       && can_apply)
1681     opt_info->insns_with_var_to_expand = htab_create (5 * loop->num_nodes,
1682                                                       ve_info_hash, ve_info_eq, free);
1683
1684   for (i = 0; i < loop->num_nodes; i++)
1685     {
1686       bb = body[i];
1687       if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1688         continue;
1689
1690       FOR_BB_INSNS (bb, insn)
1691       {
1692         if (!INSN_P (insn))
1693           continue;
1694
1695         if (opt_info->insns_to_split)
1696           ivts = analyze_iv_to_split_insn (insn);
1697
1698         if (ivts)
1699           {
1700             slot1 = htab_find_slot (opt_info->insns_to_split, ivts, INSERT);
1701             *slot1 = ivts;
1702             continue;
1703           }
1704
1705         if (opt_info->insns_with_var_to_expand)
1706           ves = analyze_insn_to_expand_var (loop, insn);
1707
1708         if (ves)
1709           {
1710             slot2 = htab_find_slot (opt_info->insns_with_var_to_expand, ves, INSERT);
1711             *slot2 = ves;
1712           }
1713       }
1714     }
1715
1716   free (edges);
1717   free (body);
1718   return opt_info;
1719 }
1720
1721 /* Called just before loop duplication.  Records start of duplicated area
1722    to OPT_INFO.  */
1723
1724 static void
1725 opt_info_start_duplication (struct opt_info *opt_info)
1726 {
1727   if (opt_info)
1728     opt_info->first_new_block = last_basic_block;
1729 }
1730
1731 /* Determine the number of iterations between initialization of the base
1732    variable and the current copy (N_COPY).  N_COPIES is the total number
1733    of newly created copies.  UNROLLING is true if we are unrolling
1734    (not peeling) the loop.  */
1735
1736 static unsigned
1737 determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
1738 {
1739   if (unrolling)
1740     {
1741       /* If we are unrolling, initialization is done in the original loop
1742          body (number 0).  */
1743       return n_copy;
1744     }
1745   else
1746     {
1747       /* If we are peeling, the copy in that the initialization occurs has
1748          number 1.  The original loop (number 0) is the last.  */
1749       if (n_copy)
1750         return n_copy - 1;
1751       else
1752         return n_copies;
1753     }
1754 }
1755
1756 /* Locate in EXPR the expression corresponding to the location recorded
1757    in IVTS, and return a pointer to the RTX for this location.  */
1758
1759 static rtx *
1760 get_ivts_expr (rtx expr, struct iv_to_split *ivts)
1761 {
1762   unsigned i;
1763   rtx *ret = &expr;
1764
1765   for (i = 0; i < ivts->n_loc; i++)
1766     ret = &XEXP (*ret, ivts->loc[i]);
1767
1768   return ret;
1769 }
1770
1771 /* Allocate basic variable for the induction variable chain.  Callback for
1772    htab_traverse.  */
1773
1774 static int
1775 allocate_basic_variable (void **slot, void *data ATTRIBUTE_UNUSED)
1776 {
1777   struct iv_to_split *ivts = *slot;
1778   rtx expr = *get_ivts_expr (single_set (ivts->insn), ivts);
1779
1780   ivts->base_var = gen_reg_rtx (GET_MODE (expr));
1781
1782   return 1;
1783 }
1784
1785 /* Insert initialization of basic variable of IVTS before INSN, taking
1786    the initial value from INSN.  */
1787
1788 static void
1789 insert_base_initialization (struct iv_to_split *ivts, rtx insn)
1790 {
1791   rtx expr = copy_rtx (*get_ivts_expr (single_set (insn), ivts));
1792   rtx seq;
1793
1794   start_sequence ();
1795   expr = force_operand (expr, ivts->base_var);
1796   if (expr != ivts->base_var)
1797     emit_move_insn (ivts->base_var, expr);
1798   seq = get_insns ();
1799   end_sequence ();
1800
1801   emit_insn_before (seq, insn);
1802 }
1803
1804 /* Replace the use of induction variable described in IVTS in INSN
1805    by base variable + DELTA * step.  */
1806
1807 static void
1808 split_iv (struct iv_to_split *ivts, rtx insn, unsigned delta)
1809 {
1810   rtx expr, *loc, seq, incr, var;
1811   enum machine_mode mode = GET_MODE (ivts->base_var);
1812   rtx src, dest, set;
1813
1814   /* Construct base + DELTA * step.  */
1815   if (!delta)
1816     expr = ivts->base_var;
1817   else
1818     {
1819       incr = simplify_gen_binary (MULT, mode,
1820                                   ivts->step, gen_int_mode (delta, mode));
1821       expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
1822                                   ivts->base_var, incr);
1823     }
1824
1825   /* Figure out where to do the replacement.  */
1826   loc = get_ivts_expr (single_set (insn), ivts);
1827
1828   /* If we can make the replacement right away, we're done.  */
1829   if (validate_change (insn, loc, expr, 0))
1830     return;
1831
1832   /* Otherwise, force EXPR into a register and try again.  */
1833   start_sequence ();
1834   var = gen_reg_rtx (mode);
1835   expr = force_operand (expr, var);
1836   if (expr != var)
1837     emit_move_insn (var, expr);
1838   seq = get_insns ();
1839   end_sequence ();
1840   emit_insn_before (seq, insn);
1841
1842   if (validate_change (insn, loc, var, 0))
1843     return;
1844
1845   /* The last chance.  Try recreating the assignment in insn
1846      completely from scratch.  */
1847   set = single_set (insn);
1848   gcc_assert (set);
1849
1850   start_sequence ();
1851   *loc = var;
1852   src = copy_rtx (SET_SRC (set));
1853   dest = copy_rtx (SET_DEST (set));
1854   src = force_operand (src, dest);
1855   if (src != dest)
1856     emit_move_insn (dest, src);
1857   seq = get_insns ();
1858   end_sequence ();
1859
1860   emit_insn_before (seq, insn);
1861   delete_insn (insn);
1862 }
1863
1864
1865 /* Return one expansion of the accumulator recorded in struct VE.  */
1866
1867 static rtx
1868 get_expansion (struct var_to_expand *ve)
1869 {
1870   rtx reg;
1871
1872   if (ve->reuse_expansion == 0)
1873     reg = ve->reg;
1874   else
1875     reg = VARRAY_RTX (ve->var_expansions,  ve->reuse_expansion - 1);
1876
1877   if (VARRAY_ACTIVE_SIZE (ve->var_expansions) == (unsigned) ve->reuse_expansion)
1878     ve->reuse_expansion = 0;
1879   else
1880     ve->reuse_expansion++;
1881
1882   return reg;
1883 }
1884
1885
1886 /* Given INSN replace the uses of the accumulator recorded in VE
1887    with a new register.  */
1888
1889 static void
1890 expand_var_during_unrolling (struct var_to_expand *ve, rtx insn)
1891 {
1892   rtx new_reg, set;
1893   bool really_new_expansion = false;
1894
1895   set = single_set (insn);
1896   if (!set)
1897     abort ();
1898
1899   /* Generate a new register only if the expansion limit has not been
1900      reached.  Else reuse an already existing expansion.  */
1901   if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count)
1902     {
1903       really_new_expansion = true;
1904       new_reg = gen_reg_rtx (GET_MODE (ve->reg));
1905     }
1906   else
1907     new_reg = get_expansion (ve);
1908
1909   validate_change (insn, &SET_DEST (set), new_reg, 1);
1910   validate_change (insn, &XEXP (SET_SRC (set), 0), new_reg, 1);
1911
1912   if (apply_change_group ())
1913     if (really_new_expansion)
1914       {
1915         VARRAY_PUSH_RTX (ve->var_expansions, new_reg);
1916         ve->expansion_count++;
1917       }
1918 }
1919
1920 /* Initialize the variable expansions in loop preheader.
1921    Callbacks for htab_traverse.  PLACE_P is the loop-preheader
1922    basic block where the initialization of the expansions
1923    should take place.  */
1924
1925 static int
1926 insert_var_expansion_initialization (void **slot, void *place_p)
1927 {
1928   struct var_to_expand *ve = *slot;
1929   basic_block place = (basic_block)place_p;
1930   rtx seq, var, zero_init, insn;
1931   unsigned i;
1932
1933   if (VARRAY_ACTIVE_SIZE (ve->var_expansions) == 0)
1934     return 1;
1935
1936   start_sequence ();
1937   if (ve->op == PLUS || ve->op == MINUS)
1938     for (i = 0; i < VARRAY_ACTIVE_SIZE (ve->var_expansions); i++)
1939       {
1940         var = VARRAY_RTX (ve->var_expansions, i);
1941         zero_init =  CONST0_RTX (GET_MODE (var));
1942         emit_move_insn (var, zero_init);
1943       }
1944   else if (ve->op == MULT)
1945     for (i = 0; i < VARRAY_ACTIVE_SIZE (ve->var_expansions); i++)
1946       {
1947         var = VARRAY_RTX (ve->var_expansions, i);
1948         zero_init =  CONST1_RTX (GET_MODE (var));
1949         emit_move_insn (var, zero_init);
1950       }
1951
1952   seq = get_insns ();
1953   end_sequence ();
1954
1955   insn = BB_HEAD (place);
1956   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
1957     insn = NEXT_INSN (insn);
1958
1959   emit_insn_after (seq, insn);
1960   /* Continue traversing the hash table.  */
1961   return 1;
1962 }
1963
1964 /*  Combine the variable expansions at the loop exit.
1965     Callbacks for htab_traverse.  PLACE_P is the loop exit
1966     basic block where the summation of the expansions should
1967     take place.  */
1968
1969 static int
1970 combine_var_copies_in_loop_exit (void **slot, void *place_p)
1971 {
1972   struct var_to_expand *ve = *slot;
1973   basic_block place = (basic_block)place_p;
1974   rtx sum = ve->reg;
1975   rtx expr, seq, var, insn;
1976   unsigned i;
1977
1978   if (VARRAY_ACTIVE_SIZE (ve->var_expansions) == 0)
1979     return 1;
1980
1981   start_sequence ();
1982   if (ve->op == PLUS || ve->op == MINUS)
1983     for (i = 0; i < VARRAY_ACTIVE_SIZE (ve->var_expansions); i++)
1984       {
1985         var = VARRAY_RTX (ve->var_expansions, i);
1986         sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg),
1987                                    var, sum);
1988       }
1989   else if (ve->op == MULT)
1990     for (i = 0; i < VARRAY_ACTIVE_SIZE (ve->var_expansions); i++)
1991       {
1992         var = VARRAY_RTX (ve->var_expansions, i);
1993         sum = simplify_gen_binary (MULT, GET_MODE (ve->reg),
1994                                    var, sum);
1995       }
1996
1997   expr = force_operand (sum, ve->reg);
1998   if (expr != ve->reg)
1999     emit_move_insn (ve->reg, expr);
2000   seq = get_insns ();
2001   end_sequence ();
2002
2003   insn = BB_HEAD (place);
2004   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
2005     insn = NEXT_INSN (insn);
2006
2007   emit_insn_after (seq, insn);
2008
2009   /* Continue traversing the hash table.  */
2010   return 1;
2011 }
2012
2013 /* Apply loop optimizations in loop copies using the
2014    data which gathered during the unrolling.  Structure
2015    OPT_INFO record that data.
2016
2017    UNROLLING is true if we unrolled (not peeled) the loop.
2018    REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
2019    the loop (as it should happen in complete unrolling, but not in ordinary
2020    peeling of the loop).  */
2021
2022 static void
2023 apply_opt_in_copies (struct opt_info *opt_info,
2024                      unsigned n_copies, bool unrolling,
2025                      bool rewrite_original_loop)
2026 {
2027   unsigned i, delta;
2028   basic_block bb, orig_bb;
2029   rtx insn, orig_insn, next;
2030   struct iv_to_split ivts_templ, *ivts;
2031   struct var_to_expand ve_templ, *ves;
2032
2033   /* Sanity check -- we need to put initialization in the original loop
2034      body.  */
2035   gcc_assert (!unrolling || rewrite_original_loop);
2036
2037   /* Allocate the basic variables (i0).  */
2038   if (opt_info->insns_to_split)
2039     htab_traverse (opt_info->insns_to_split, allocate_basic_variable, NULL);
2040
2041   for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2042     {
2043       bb = BASIC_BLOCK (i);
2044       orig_bb = bb->rbi->original;
2045
2046       delta = determine_split_iv_delta (bb->rbi->copy_number, n_copies,
2047                                         unrolling);
2048       orig_insn = BB_HEAD (orig_bb);
2049       for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); insn = next)
2050         {
2051           next = NEXT_INSN (insn);
2052           if (!INSN_P (insn))
2053             continue;
2054
2055           while (!INSN_P (orig_insn))
2056             orig_insn = NEXT_INSN (orig_insn);
2057
2058           ivts_templ.insn = orig_insn;
2059           ve_templ.insn = orig_insn;
2060
2061           /* Apply splitting iv optimization.  */
2062           if (opt_info->insns_to_split)
2063             {
2064               ivts = htab_find (opt_info->insns_to_split, &ivts_templ);
2065
2066               if (ivts)
2067                 {
2068 #ifdef ENABLE_CHECKING
2069                   gcc_assert (rtx_equal_p (PATTERN (insn), PATTERN (orig_insn)));
2070 #endif
2071
2072                   if (!delta)
2073                     insert_base_initialization (ivts, insn);
2074                   split_iv (ivts, insn, delta);
2075                 }
2076             }
2077           /* Apply variable expansion optimization.  */
2078           if (unrolling && opt_info->insns_with_var_to_expand)
2079             {
2080               ves = htab_find (opt_info->insns_with_var_to_expand, &ve_templ);
2081               if (ves)
2082                 {
2083 #ifdef ENABLE_CHECKING
2084                   gcc_assert (rtx_equal_p (PATTERN (insn), PATTERN (orig_insn)));
2085 #endif
2086                   expand_var_during_unrolling (ves, insn);
2087                 }
2088             }
2089           orig_insn = NEXT_INSN (orig_insn);
2090         }
2091     }
2092
2093   if (!rewrite_original_loop)
2094     return;
2095
2096   /* Initialize the variable expansions in the loop preheader
2097      and take care of combining them at the loop exit.  */
2098   if (opt_info->insns_with_var_to_expand)
2099     {
2100       htab_traverse (opt_info->insns_with_var_to_expand,
2101                      insert_var_expansion_initialization,
2102                      opt_info->loop_preheader);
2103       htab_traverse (opt_info->insns_with_var_to_expand,
2104                      combine_var_copies_in_loop_exit,
2105                      opt_info->loop_exit);
2106     }
2107
2108   /* Rewrite also the original loop body.  Find them as originals of the blocks
2109      in the last copied iteration, i.e. those that have
2110      bb->rbi->original->copy == bb.  */
2111   for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2112     {
2113       bb = BASIC_BLOCK (i);
2114       orig_bb = bb->rbi->original;
2115       if (orig_bb->rbi->copy != bb)
2116         continue;
2117
2118       delta = determine_split_iv_delta (0, n_copies, unrolling);
2119       for (orig_insn = BB_HEAD (orig_bb);
2120            orig_insn != NEXT_INSN (BB_END (bb));
2121            orig_insn = next)
2122         {
2123           next = NEXT_INSN (orig_insn);
2124
2125           if (!INSN_P (orig_insn))
2126             continue;
2127
2128           ivts_templ.insn = orig_insn;
2129           if (opt_info->insns_to_split)
2130             {
2131               ivts = htab_find (opt_info->insns_to_split, &ivts_templ);
2132               if (ivts)
2133                 {
2134                   if (!delta)
2135                     insert_base_initialization (ivts, orig_insn);
2136                   split_iv (ivts, orig_insn, delta);
2137                   continue;
2138                 }
2139             }
2140
2141         }
2142     }
2143 }
2144
2145 /*  Release the data structures used for the variable expansion
2146     optimization.  Callbacks for htab_traverse.  */
2147
2148 static int
2149 release_var_copies (void **slot, void *data ATTRIBUTE_UNUSED)
2150 {
2151   struct var_to_expand *ve = *slot;
2152
2153   VARRAY_CLEAR (ve->var_expansions);
2154
2155   /* Continue traversing the hash table.  */
2156   return 1;
2157 }
2158
2159 /* Release OPT_INFO.  */
2160
2161 static void
2162 free_opt_info (struct opt_info *opt_info)
2163 {
2164   if (opt_info->insns_to_split)
2165     htab_delete (opt_info->insns_to_split);
2166   if (opt_info->insns_with_var_to_expand)
2167     {
2168       htab_traverse (opt_info->insns_with_var_to_expand,
2169                      release_var_copies, NULL);
2170       htab_delete (opt_info->insns_with_var_to_expand);
2171     }
2172   free (opt_info);
2173 }