gcc/loop-unroll.c

   1 /* Loop unrolling and peeling.
   2    Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 2, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to the Free
  18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  19 02110-1301, USA.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "rtl.h"
  26 #include "hard-reg-set.h"
  27 #include "obstack.h"
  28 #include "basic-block.h"
  29 #include "cfgloop.h"
  30 #include "cfglayout.h"
  31 #include "params.h"
  32 #include "output.h"
  33 #include "expr.h"
  34 #include "hashtab.h"
  35 #include "recog.h"
  36 #include "varray.h"
  37
  38 /* This pass performs loop unrolling and peeling.  We only perform these
  39    optimizations on innermost loops (with single exception) because
  40    the impact on performance is greatest here, and we want to avoid
  41    unnecessary code size growth.  The gain is caused by greater sequentiality
  42    of code, better code to optimize for further passes and in some cases
  43    by fewer testings of exit conditions.  The main problem is code growth,
  44    that impacts performance negatively due to effect of caches.
  45
  46    What we do:
  47
  48    -- complete peeling of once-rolling loops; this is the above mentioned
  49       exception, as this causes loop to be cancelled completely and
  50       does not cause code growth
  51    -- complete peeling of loops that roll (small) constant times.
  52    -- simple peeling of first iterations of loops that do not roll much
  53       (according to profile feedback)
  54    -- unrolling of loops that roll constant times; this is almost always
  55       win, as we get rid of exit condition tests.
  56    -- unrolling of loops that roll number of times that we can compute
  57       in runtime; we also get rid of exit condition tests here, but there
  58       is the extra expense for calculating the number of iterations
  59    -- simple unrolling of remaining loops; this is performed only if we
  60       are asked to, as the gain is questionable in this case and often
  61       it may even slow down the code
  62    For more detailed descriptions of each of those, see comments at
  63    appropriate function below.
  64
  65    There is a lot of parameters (defined and described in params.def) that
  66    control how much we unroll/peel.
  67
  68    ??? A great problem is that we don't have a good way how to determine
  69    how many times we should unroll the loop; the experiments I have made
  70    showed that this choice may affect performance in order of several %.
  71    */
  72
  73 /* Information about induction variables to split.  */
  74
  75 struct iv_to_split
  76 {
  77   rtx insn;             /* The insn in that the induction variable occurs.  */
  78   rtx base_var;         /* The variable on that the values in the further
  79                            iterations are based.  */
  80   rtx step;             /* Step of the induction variable.  */
  81   unsigned n_loc;
  82   unsigned loc[3];      /* Location where the definition of the induction
  83                            variable occurs in the insn.  For example if
  84                            N_LOC is 2, the expression is located at
  85                            XEXP (XEXP (single_set, loc[0]), loc[1]).  */
  86 };
  87
  88 DEF_VEC_P(rtx);
  89 DEF_VEC_ALLOC_P(rtx,heap);
  90
  91 /* Information about accumulators to expand.  */
  92
  93 struct var_to_expand
  94 {
  95   rtx insn;                        /* The insn in that the variable expansion occurs.  */
  96   rtx reg;                         /* The accumulator which is expanded.  */
  97   VEC(rtx,heap) *var_expansions;   /* The copies of the accumulator which is expanded.  */
  98   enum rtx_code op;                /* The type of the accumulation - addition, subtraction
  99                                       or multiplication.  */
 100   int expansion_count;             /* Count the number of expansions generated so far.  */
 101   int reuse_expansion;             /* The expansion we intend to reuse to expand
 102                                       the accumulator.  If REUSE_EXPANSION is 0 reuse
 103                                       the original accumulator.  Else use
 104                                       var_expansions[REUSE_EXPANSION - 1].  */
 105 };
 106
 107 /* Information about optimization applied in
 108    the unrolled loop.  */
 109
 110 struct opt_info
 111 {
 112   htab_t insns_to_split;           /* A hashtable of insns to split.  */
 113   htab_t insns_with_var_to_expand; /* A hashtable of insns with accumulators
 114                                       to expand.  */
 115   unsigned first_new_block;        /* The first basic block that was
 116                                       duplicated.  */
 117   basic_block loop_exit;           /* The loop exit basic block.  */
 118   basic_block loop_preheader;      /* The loop preheader basic block.  */
 119 };
 120
 121 static void decide_unrolling_and_peeling (struct loops *, int);
 122 static void peel_loops_completely (struct loops *, int);
 123 static void decide_peel_simple (struct loop *, int);
 124 static void decide_peel_once_rolling (struct loop *, int);
 125 static void decide_peel_completely (struct loop *, int);
 126 static void decide_unroll_stupid (struct loop *, int);
 127 static void decide_unroll_constant_iterations (struct loop *, int);
 128 static void decide_unroll_runtime_iterations (struct loop *, int);
 129 static void peel_loop_simple (struct loops *, struct loop *);
 130 static void peel_loop_completely (struct loops *, struct loop *);
 131 static void unroll_loop_stupid (struct loops *, struct loop *);
 132 static void unroll_loop_constant_iterations (struct loops *, struct loop *);
 133 static void unroll_loop_runtime_iterations (struct loops *, struct loop *);
 134 static struct opt_info *analyze_insns_in_loop (struct loop *);
 135 static void opt_info_start_duplication (struct opt_info *);
 136 static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
 137 static void free_opt_info (struct opt_info *);
 138 static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx);
 139 static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx);
 140 static struct iv_to_split *analyze_iv_to_split_insn (rtx);
 141 static void expand_var_during_unrolling (struct var_to_expand *, rtx);
 142 static int insert_var_expansion_initialization (void **, void *);
 143 static int combine_var_copies_in_loop_exit (void **, void *);
 144 static int release_var_copies (void **, void *);
 145 static rtx get_expansion (struct var_to_expand *);
 146
 147 /* Unroll and/or peel (depending on FLAGS) LOOPS.  */
 148 void
 149 unroll_and_peel_loops (struct loops *loops, int flags)
 150 {
 151   struct loop *loop, *next;
 152   bool check;
 153
 154   /* First perform complete loop peeling (it is almost surely a win,
 155      and affects parameters for further decision a lot).  */
 156   peel_loops_completely (loops, flags);
 157
 158   /* Now decide rest of unrolling and peeling.  */
 159   decide_unrolling_and_peeling (loops, flags);
 160
 161   loop = loops->tree_root;
 162   while (loop->inner)
 163     loop = loop->inner;
 164
 165   /* Scan the loops, inner ones first.  */
 166   while (loop != loops->tree_root)
 167     {
 168       if (loop->next)
 169         {
 170           next = loop->next;
 171           while (next->inner)
 172             next = next->inner;
 173         }
 174       else
 175         next = loop->outer;
 176
 177       check = true;
 178       /* And perform the appropriate transformations.  */
 179       switch (loop->lpt_decision.decision)
 180         {
 181         case LPT_PEEL_COMPLETELY:
 182           /* Already done.  */
 183           gcc_unreachable ();
 184         case LPT_PEEL_SIMPLE:
 185           peel_loop_simple (loops, loop);
 186           break;
 187         case LPT_UNROLL_CONSTANT:
 188           unroll_loop_constant_iterations (loops, loop);
 189           break;
 190         case LPT_UNROLL_RUNTIME:
 191           unroll_loop_runtime_iterations (loops, loop);
 192           break;
 193         case LPT_UNROLL_STUPID:
 194           unroll_loop_stupid (loops, loop);
 195           break;
 196         case LPT_NONE:
 197           check = false;
 198           break;
 199         default:
 200           gcc_unreachable ();
 201         }
 202       if (check)
 203         {
 204 #ifdef ENABLE_CHECKING
 205           verify_dominators (CDI_DOMINATORS);
 206           verify_loop_structure (loops);
 207 #endif
 208         }
 209       loop = next;
 210     }
 211
 212   iv_analysis_done ();
 213 }
 214
 215 /* Check whether exit of the LOOP is at the end of loop body.  */
 216
 217 static bool
 218 loop_exit_at_end_p (struct loop *loop)
 219 {
 220   struct niter_desc *desc = get_simple_loop_desc (loop);
 221   rtx insn;
 222
 223   if (desc->in_edge->dest != loop->latch)
 224     return false;
 225
 226   /* Check that the latch is empty.  */
 227   FOR_BB_INSNS (loop->latch, insn)
 228     {
 229       if (INSN_P (insn))
 230         return false;
 231     }
 232
 233   return true;
 234 }
 235
 236 /* Check whether to peel LOOPS (depending on FLAGS) completely and do so.  */
 237 static void
 238 peel_loops_completely (struct loops *loops, int flags)
 239 {
 240   struct loop *loop, *next;
 241
 242   loop = loops->tree_root;
 243   while (loop->inner)
 244     loop = loop->inner;
 245
 246   while (loop != loops->tree_root)
 247     {
 248       if (loop->next)
 249         {
 250           next = loop->next;
 251           while (next->inner)
 252             next = next->inner;
 253         }
 254       else
 255         next = loop->outer;
 256
 257       loop->lpt_decision.decision = LPT_NONE;
 258
 259       if (dump_file)
 260         fprintf (dump_file,
 261                  "\n;; *** Considering loop %d for complete peeling ***\n",
 262                  loop->num);
 263
 264       loop->ninsns = num_loop_insns (loop);
 265
 266       decide_peel_once_rolling (loop, flags);
 267       if (loop->lpt_decision.decision == LPT_NONE)
 268         decide_peel_completely (loop, flags);
 269
 270       if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY)
 271         {
 272           peel_loop_completely (loops, loop);
 273 #ifdef ENABLE_CHECKING
 274           verify_dominators (CDI_DOMINATORS);
 275           verify_loop_structure (loops);
 276 #endif
 277         }
 278       loop = next;
 279     }
 280 }
 281
 282 /* Decide whether unroll or peel LOOPS (depending on FLAGS) and how much.  */
 283 static void
 284 decide_unrolling_and_peeling (struct loops *loops, int flags)
 285 {
 286   struct loop *loop = loops->tree_root, *next;
 287
 288   while (loop->inner)
 289     loop = loop->inner;
 290
 291   /* Scan the loops, inner ones first.  */
 292   while (loop != loops->tree_root)
 293     {
 294       if (loop->next)
 295         {
 296           next = loop->next;
 297           while (next->inner)
 298             next = next->inner;
 299         }
 300       else
 301         next = loop->outer;
 302
 303       loop->lpt_decision.decision = LPT_NONE;
 304
 305       if (dump_file)
 306         fprintf (dump_file, "\n;; *** Considering loop %d ***\n", loop->num);
 307
 308       /* Do not peel cold areas.  */
 309       if (!maybe_hot_bb_p (loop->header))
 310         {
 311           if (dump_file)
 312             fprintf (dump_file, ";; Not considering loop, cold area\n");
 313           loop = next;
 314           continue;
 315         }
 316
 317       /* Can the loop be manipulated?  */
 318       if (!can_duplicate_loop_p (loop))
 319         {
 320           if (dump_file)
 321             fprintf (dump_file,
 322                      ";; Not considering loop, cannot duplicate\n");
 323           loop = next;
 324           continue;
 325         }
 326
 327       /* Skip non-innermost loops.  */
 328       if (loop->inner)
 329         {
 330           if (dump_file)
 331             fprintf (dump_file, ";; Not considering loop, is not innermost\n");
 332           loop = next;
 333           continue;
 334         }
 335
 336       loop->ninsns = num_loop_insns (loop);
 337       loop->av_ninsns = average_num_loop_insns (loop);
 338
 339       /* Try transformations one by one in decreasing order of
 340          priority.  */
 341
 342       decide_unroll_constant_iterations (loop, flags);
 343       if (loop->lpt_decision.decision == LPT_NONE)
 344         decide_unroll_runtime_iterations (loop, flags);
 345       if (loop->lpt_decision.decision == LPT_NONE)
 346         decide_unroll_stupid (loop, flags);
 347       if (loop->lpt_decision.decision == LPT_NONE)
 348         decide_peel_simple (loop, flags);
 349
 350       loop = next;
 351     }
 352 }
 353
 354 /* Decide whether the LOOP is once rolling and suitable for complete
 355    peeling.  */
 356 static void
 357 decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
 358 {
 359   struct niter_desc *desc;
 360
 361   if (dump_file)
 362     fprintf (dump_file, "\n;; Considering peeling once rolling loop\n");
 363
 364   /* Is the loop small enough?  */
 365   if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns)
 366     {
 367       if (dump_file)
 368         fprintf (dump_file, ";; Not considering loop, is too big\n");
 369       return;
 370     }
 371
 372   /* Check for simple loops.  */
 373   desc = get_simple_loop_desc (loop);
 374
 375   /* Check number of iterations.  */
 376   if (!desc->simple_p
 377       || desc->assumptions
 378       || desc->infinite
 379       || !desc->const_iter
 380       || desc->niter != 0)
 381     {
 382       if (dump_file)
 383         fprintf (dump_file,
 384                  ";; Unable to prove that the loop rolls exactly once\n");
 385       return;
 386     }
 387
 388   /* Success.  */
 389   if (dump_file)
 390     fprintf (dump_file, ";; Decided to peel exactly once rolling loop\n");
 391   loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
 392 }
 393
 394 /* Decide whether the LOOP is suitable for complete peeling.  */
 395 static void
 396 decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
 397 {
 398   unsigned npeel;
 399   struct niter_desc *desc;
 400
 401   if (dump_file)
 402     fprintf (dump_file, "\n;; Considering peeling completely\n");
 403
 404   /* Skip non-innermost loops.  */
 405   if (loop->inner)
 406     {
 407       if (dump_file)
 408         fprintf (dump_file, ";; Not considering loop, is not innermost\n");
 409       return;
 410     }
 411
 412   /* Do not peel cold areas.  */
 413   if (!maybe_hot_bb_p (loop->header))
 414     {
 415       if (dump_file)
 416         fprintf (dump_file, ";; Not considering loop, cold area\n");
 417       return;
 418     }
 419
 420   /* Can the loop be manipulated?  */
 421   if (!can_duplicate_loop_p (loop))
 422     {
 423       if (dump_file)
 424         fprintf (dump_file,
 425                  ";; Not considering loop, cannot duplicate\n");
 426       return;
 427     }
 428
 429   /* npeel = number of iterations to peel.  */
 430   npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS) / loop->ninsns;
 431   if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
 432     npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
 433
 434   /* Is the loop small enough?  */
 435   if (!npeel)
 436     {
 437       if (dump_file)
 438         fprintf (dump_file, ";; Not considering loop, is too big\n");
 439       return;
 440     }
 441
 442   /* Check for simple loops.  */
 443   desc = get_simple_loop_desc (loop);
 444
 445   /* Check number of iterations.  */
 446   if (!desc->simple_p
 447       || desc->assumptions
 448       || !desc->const_iter
 449       || desc->infinite)
 450     {
 451       if (dump_file)
 452         fprintf (dump_file,
 453                  ";; Unable to prove that the loop iterates constant times\n");
 454       return;
 455     }
 456
 457   if (desc->niter > npeel - 1)
 458     {
 459       if (dump_file)
 460         {
 461           fprintf (dump_file,
 462                    ";; Not peeling loop completely, rolls too much (");
 463           fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter);
 464           fprintf (dump_file, " iterations > %d [maximum peelings])\n", npeel);
 465         }
 466       return;
 467     }
 468
 469   /* Success.  */
 470   if (dump_file)
 471     fprintf (dump_file, ";; Decided to peel loop completely\n");
 472   loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
 473 }
 474
 475 /* Peel all iterations of LOOP, remove exit edges and cancel the loop
 476    completely.  The transformation done:
 477
 478    for (i = 0; i < 4; i++)
 479      body;
 480
 481    ==>
 482
 483    i = 0;
 484    body; i++;
 485    body; i++;
 486    body; i++;
 487    body; i++;
 488    */
 489 static void
 490 peel_loop_completely (struct loops *loops, struct loop *loop)
 491 {
 492   sbitmap wont_exit;
 493   unsigned HOST_WIDE_INT npeel;
 494   unsigned n_remove_edges, i;
 495   edge *remove_edges, ein;
 496   struct niter_desc *desc = get_simple_loop_desc (loop);
 497   struct opt_info *opt_info = NULL;
 498
 499   npeel = desc->niter;
 500
 501   if (npeel)
 502     {
 503       bool ok;
 504
 505       wont_exit = sbitmap_alloc (npeel + 1);
 506       sbitmap_ones (wont_exit);
 507       RESET_BIT (wont_exit, 0);
 508       if (desc->noloop_assumptions)
 509         RESET_BIT (wont_exit, 1);
 510
 511       remove_edges = xcalloc (npeel, sizeof (edge));
 512       n_remove_edges = 0;
 513
 514       if (flag_split_ivs_in_unroller)
 515         opt_info = analyze_insns_in_loop (loop);
 516
 517       opt_info_start_duplication (opt_info);
 518       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 519                                           loops, npeel,
 520                                           wont_exit, desc->out_edge,
 521                                           remove_edges, &n_remove_edges,
 522                                           DLTHE_FLAG_UPDATE_FREQ
 523                                           | DLTHE_FLAG_COMPLETTE_PEEL
 524                                           | (opt_info
 525                                              ? DLTHE_RECORD_COPY_NUMBER : 0));
 526       gcc_assert (ok);
 527
 528       free (wont_exit);
 529
 530       if (opt_info)
 531         {
 532           apply_opt_in_copies (opt_info, npeel, false, true);
 533           free_opt_info (opt_info);
 534         }
 535
 536       /* Remove the exit edges.  */
 537       for (i = 0; i < n_remove_edges; i++)
 538         remove_path (loops, remove_edges[i]);
 539       free (remove_edges);
 540     }
 541
 542   ein = desc->in_edge;
 543   free_simple_loop_desc (loop);
 544
 545   /* Now remove the unreachable part of the last iteration and cancel
 546      the loop.  */
 547   remove_path (loops, ein);
 548
 549   if (dump_file)
 550     fprintf (dump_file, ";; Peeled loop completely, %d times\n", (int) npeel);
 551 }
 552
 553 /* Decide whether to unroll LOOP iterating constant number of times
 554    and how much.  */
 555
 556 static void
 557 decide_unroll_constant_iterations (struct loop *loop, int flags)
 558 {
 559   unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
 560   struct niter_desc *desc;
 561
 562   if (!(flags & UAP_UNROLL))
 563     {
 564       /* We were not asked to, just return back silently.  */
 565       return;
 566     }
 567
 568   if (dump_file)
 569     fprintf (dump_file,
 570              "\n;; Considering unrolling loop with constant "
 571              "number of iterations\n");
 572
 573   /* nunroll = total number of copies of the original loop body in
 574      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 575   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
 576   nunroll_by_av
 577     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
 578   if (nunroll > nunroll_by_av)
 579     nunroll = nunroll_by_av;
 580   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
 581     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 582
 583   /* Skip big loops.  */
 584   if (nunroll <= 1)
 585     {
 586       if (dump_file)
 587         fprintf (dump_file, ";; Not considering loop, is too big\n");
 588       return;
 589     }
 590
 591   /* Check for simple loops.  */
 592   desc = get_simple_loop_desc (loop);
 593
 594   /* Check number of iterations.  */
 595   if (!desc->simple_p || !desc->const_iter || desc->assumptions)
 596     {
 597       if (dump_file)
 598         fprintf (dump_file,
 599                  ";; Unable to prove that the loop iterates constant times\n");
 600       return;
 601     }
 602
 603   /* Check whether the loop rolls enough to consider.  */
 604   if (desc->niter < 2 * nunroll)
 605     {
 606       if (dump_file)
 607         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 608       return;
 609     }
 610
 611   /* Success; now compute number of iterations to unroll.  We alter
 612      nunroll so that as few as possible copies of loop body are
 613      necessary, while still not decreasing the number of unrollings
 614      too much (at most by 1).  */
 615   best_copies = 2 * nunroll + 10;
 616
 617   i = 2 * nunroll + 2;
 618   if (i - 1 >= desc->niter)
 619     i = desc->niter - 2;
 620
 621   for (; i >= nunroll - 1; i--)
 622     {
 623       unsigned exit_mod = desc->niter % (i + 1);
 624
 625       if (!loop_exit_at_end_p (loop))
 626         n_copies = exit_mod + i + 1;
 627       else if (exit_mod != (unsigned) i
 628                || desc->noloop_assumptions != NULL_RTX)
 629         n_copies = exit_mod + i + 2;
 630       else
 631         n_copies = i + 1;
 632
 633       if (n_copies < best_copies)
 634         {
 635           best_copies = n_copies;
 636           best_unroll = i;
 637         }
 638     }
 639
 640   if (dump_file)
 641     fprintf (dump_file, ";; max_unroll %d (%d copies, initial %d).\n",
 642              best_unroll + 1, best_copies, nunroll);
 643
 644   loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
 645   loop->lpt_decision.times = best_unroll;
 646
 647   if (dump_file)
 648     fprintf (dump_file,
 649              ";; Decided to unroll the constant times rolling loop, %d times.\n",
 650              loop->lpt_decision.times);
 651 }
 652
 653 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1
 654    times.  The transformation does this:
 655
 656    for (i = 0; i < 102; i++)
 657      body;
 658
 659    ==>
 660
 661    i = 0;
 662    body; i++;
 663    body; i++;
 664    while (i < 102)
 665      {
 666        body; i++;
 667        body; i++;
 668        body; i++;
 669        body; i++;
 670      }
 671   */
 672 static void
 673 unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
 674 {
 675   unsigned HOST_WIDE_INT niter;
 676   unsigned exit_mod;
 677   sbitmap wont_exit;
 678   unsigned n_remove_edges, i;
 679   edge *remove_edges;
 680   unsigned max_unroll = loop->lpt_decision.times;
 681   struct niter_desc *desc = get_simple_loop_desc (loop);
 682   bool exit_at_end = loop_exit_at_end_p (loop);
 683   struct opt_info *opt_info = NULL;
 684   bool ok;
 685
 686   niter = desc->niter;
 687
 688   /* Should not get here (such loop should be peeled instead).  */
 689   gcc_assert (niter > max_unroll + 1);
 690
 691   exit_mod = niter % (max_unroll + 1);
 692
 693   wont_exit = sbitmap_alloc (max_unroll + 1);
 694   sbitmap_ones (wont_exit);
 695
 696   remove_edges = xcalloc (max_unroll + exit_mod + 1, sizeof (edge));
 697   n_remove_edges = 0;
 698   if (flag_split_ivs_in_unroller
 699       || flag_variable_expansion_in_unroller)
 700     opt_info = analyze_insns_in_loop (loop);
 701
 702   if (!exit_at_end)
 703     {
 704       /* The exit is not at the end of the loop; leave exit test
 705          in the first copy, so that the loops that start with test
 706          of exit condition have continuous body after unrolling.  */
 707
 708       if (dump_file)
 709         fprintf (dump_file, ";; Condition on beginning of loop.\n");
 710
 711       /* Peel exit_mod iterations.  */
 712       RESET_BIT (wont_exit, 0);
 713       if (desc->noloop_assumptions)
 714         RESET_BIT (wont_exit, 1);
 715
 716       if (exit_mod)
 717         {
 718           opt_info_start_duplication (opt_info);
 719           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 720                                               loops, exit_mod,
 721                                               wont_exit, desc->out_edge,
 722                                               remove_edges, &n_remove_edges,
 723                                               DLTHE_FLAG_UPDATE_FREQ
 724                                               | (opt_info && exit_mod > 1
 725                                                  ? DLTHE_RECORD_COPY_NUMBER
 726                                                    : 0));
 727           gcc_assert (ok);
 728
 729           if (opt_info && exit_mod > 1)
 730             apply_opt_in_copies (opt_info, exit_mod, false, false);
 731
 732           desc->noloop_assumptions = NULL_RTX;
 733           desc->niter -= exit_mod;
 734           desc->niter_max -= exit_mod;
 735         }
 736
 737       SET_BIT (wont_exit, 1);
 738     }
 739   else
 740     {
 741       /* Leave exit test in last copy, for the same reason as above if
 742          the loop tests the condition at the end of loop body.  */
 743
 744       if (dump_file)
 745         fprintf (dump_file, ";; Condition on end of loop.\n");
 746
 747       /* We know that niter >= max_unroll + 2; so we do not need to care of
 748          case when we would exit before reaching the loop.  So just peel
 749          exit_mod + 1 iterations.  */
 750       if (exit_mod != max_unroll
 751           || desc->noloop_assumptions)
 752         {
 753           RESET_BIT (wont_exit, 0);
 754           if (desc->noloop_assumptions)
 755             RESET_BIT (wont_exit, 1);
 756
 757           opt_info_start_duplication (opt_info);
 758           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 759                                               loops, exit_mod + 1,
 760                                               wont_exit, desc->out_edge,
 761                                               remove_edges, &n_remove_edges,
 762                                               DLTHE_FLAG_UPDATE_FREQ
 763                                               | (opt_info && exit_mod > 0
 764                                                  ? DLTHE_RECORD_COPY_NUMBER
 765                                                    : 0));
 766           gcc_assert (ok);
 767
 768           if (opt_info && exit_mod > 0)
 769             apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
 770
 771           desc->niter -= exit_mod + 1;
 772           desc->niter_max -= exit_mod + 1;
 773           desc->noloop_assumptions = NULL_RTX;
 774
 775           SET_BIT (wont_exit, 0);
 776           SET_BIT (wont_exit, 1);
 777         }
 778
 779       RESET_BIT (wont_exit, max_unroll);
 780     }
 781
 782   /* Now unroll the loop.  */
 783
 784   opt_info_start_duplication (opt_info);
 785   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
 786                                       loops, max_unroll,
 787                                       wont_exit, desc->out_edge,
 788                                       remove_edges, &n_remove_edges,
 789                                       DLTHE_FLAG_UPDATE_FREQ
 790                                       | (opt_info
 791                                          ? DLTHE_RECORD_COPY_NUMBER
 792                                            : 0));
 793   gcc_assert (ok);
 794
 795   if (opt_info)
 796     {
 797       apply_opt_in_copies (opt_info, max_unroll, true, true);
 798       free_opt_info (opt_info);
 799     }
 800
 801   free (wont_exit);
 802
 803   if (exit_at_end)
 804     {
 805       basic_block exit_block = get_bb_copy (desc->in_edge->src);
 806       /* Find a new in and out edge; they are in the last copy we have made.  */
 807
 808       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
 809         {
 810           desc->out_edge = EDGE_SUCC (exit_block, 0);
 811           desc->in_edge = EDGE_SUCC (exit_block, 1);
 812         }
 813       else
 814         {
 815           desc->out_edge = EDGE_SUCC (exit_block, 1);
 816           desc->in_edge = EDGE_SUCC (exit_block, 0);
 817         }
 818     }
 819
 820   desc->niter /= max_unroll + 1;
 821   desc->niter_max /= max_unroll + 1;
 822   desc->niter_expr = GEN_INT (desc->niter);
 823
 824   /* Remove the edges.  */
 825   for (i = 0; i < n_remove_edges; i++)
 826     remove_path (loops, remove_edges[i]);
 827   free (remove_edges);
 828
 829   if (dump_file)
 830     fprintf (dump_file,
 831              ";; Unrolled loop %d times, constant # of iterations %i insns\n",
 832              max_unroll, num_loop_insns (loop));
 833 }
 834
 835 /* Decide whether to unroll LOOP iterating runtime computable number of times
 836    and how much.  */
 837 static void
 838 decide_unroll_runtime_iterations (struct loop *loop, int flags)
 839 {
 840   unsigned nunroll, nunroll_by_av, i;
 841   struct niter_desc *desc;
 842
 843   if (!(flags & UAP_UNROLL))
 844     {
 845       /* We were not asked to, just return back silently.  */
 846       return;
 847     }
 848
 849   if (dump_file)
 850     fprintf (dump_file,
 851              "\n;; Considering unrolling loop with runtime "
 852              "computable number of iterations\n");
 853
 854   /* nunroll = total number of copies of the original loop body in
 855      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 856   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
 857   nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
 858   if (nunroll > nunroll_by_av)
 859     nunroll = nunroll_by_av;
 860   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
 861     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 862
 863   /* Skip big loops.  */
 864   if (nunroll <= 1)
 865     {
 866       if (dump_file)
 867         fprintf (dump_file, ";; Not considering loop, is too big\n");
 868       return;
 869     }
 870
 871   /* Check for simple loops.  */
 872   desc = get_simple_loop_desc (loop);
 873
 874   /* Check simpleness.  */
 875   if (!desc->simple_p || desc->assumptions)
 876     {
 877       if (dump_file)
 878         fprintf (dump_file,
 879                  ";; Unable to prove that the number of iterations "
 880                  "can be counted in runtime\n");
 881       return;
 882     }
 883
 884   if (desc->const_iter)
 885     {
 886       if (dump_file)
 887         fprintf (dump_file, ";; Loop iterates constant times\n");
 888       return;
 889     }
 890
 891   /* If we have profile feedback, check whether the loop rolls.  */
 892   if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll)
 893     {
 894       if (dump_file)
 895         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 896       return;
 897     }
 898
 899   /* Success; now force nunroll to be power of 2, as we are unable to
 900      cope with overflows in computation of number of iterations.  */
 901   for (i = 1; 2 * i <= nunroll; i *= 2)
 902     continue;
 903
 904   loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
 905   loop->lpt_decision.times = i - 1;
 906
 907   if (dump_file)
 908     fprintf (dump_file,
 909              ";; Decided to unroll the runtime computable "
 910              "times rolling loop, %d times.\n",
 911              loop->lpt_decision.times);
 912 }
 913
 914 /* Unroll LOOP for that we are able to count number of iterations in runtime
 915    LOOP->LPT_DECISION.TIMES + 1 times.  The transformation does this (with some
 916    extra care for case n < 0):
 917
 918    for (i = 0; i < n; i++)
 919      body;
 920
 921    ==>
 922
 923    i = 0;
 924    mod = n % 4;
 925
 926    switch (mod)
 927      {
 928        case 3:
 929          body; i++;
 930        case 2:
 931          body; i++;
 932        case 1:
 933          body; i++;
 934        case 0: ;
 935      }
 936
 937    while (i < n)
 938      {
 939        body; i++;
 940        body; i++;
 941        body; i++;
 942        body; i++;
 943      }
 944    */
 945 static void
 946 unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
 947 {
 948   rtx old_niter, niter, init_code, branch_code, tmp;
 949   unsigned i, j, p;
 950   basic_block preheader, *body, *dom_bbs, swtch, ezc_swtch;
 951   unsigned n_dom_bbs;
 952   sbitmap wont_exit;
 953   int may_exit_copy;
 954   unsigned n_peel, n_remove_edges;
 955   edge *remove_edges, e;
 956   bool extra_zero_check, last_may_exit;
 957   unsigned max_unroll = loop->lpt_decision.times;
 958   struct niter_desc *desc = get_simple_loop_desc (loop);
 959   bool exit_at_end = loop_exit_at_end_p (loop);
 960   struct opt_info *opt_info = NULL;
 961   bool ok;
 962
 963   if (flag_split_ivs_in_unroller
 964       || flag_variable_expansion_in_unroller)
 965     opt_info = analyze_insns_in_loop (loop);
 966
 967   /* Remember blocks whose dominators will have to be updated.  */
 968   dom_bbs = xcalloc (n_basic_blocks, sizeof (basic_block));
 969   n_dom_bbs = 0;
 970
 971   body = get_loop_body (loop);
 972   for (i = 0; i < loop->num_nodes; i++)
 973     {
 974       unsigned nldom;
 975       basic_block *ldom;
 976
 977       nldom = get_dominated_by (CDI_DOMINATORS, body[i], &ldom);
 978       for (j = 0; j < nldom; j++)
 979         if (!flow_bb_inside_loop_p (loop, ldom[j]))
 980           dom_bbs[n_dom_bbs++] = ldom[j];
 981
 982       free (ldom);
 983     }
 984   free (body);
 985
 986   if (!exit_at_end)
 987     {
 988       /* Leave exit in first copy (for explanation why see comment in
 989          unroll_loop_constant_iterations).  */
 990       may_exit_copy = 0;
 991       n_peel = max_unroll - 1;
 992       extra_zero_check = true;
 993       last_may_exit = false;
 994     }
 995   else
 996     {
 997       /* Leave exit in last copy (for explanation why see comment in
 998          unroll_loop_constant_iterations).  */
 999       may_exit_copy = max_unroll;
1000       n_peel = max_unroll;
1001       extra_zero_check = false;
1002       last_may_exit = true;
1003     }
1004
1005   /* Get expression for number of iterations.  */
1006   start_sequence ();
1007   old_niter = niter = gen_reg_rtx (desc->mode);
1008   tmp = force_operand (copy_rtx (desc->niter_expr), niter);
1009   if (tmp != niter)
1010     emit_move_insn (niter, tmp);
1011
1012   /* Count modulo by ANDing it with max_unroll; we use the fact that
1013      the number of unrollings is a power of two, and thus this is correct
1014      even if there is overflow in the computation.  */
1015   niter = expand_simple_binop (desc->mode, AND,
1016                                niter,
1017                                GEN_INT (max_unroll),
1018                                NULL_RTX, 0, OPTAB_LIB_WIDEN);
1019
1020   init_code = get_insns ();
1021   end_sequence ();
1022
1023   /* Precondition the loop.  */
1024   loop_split_edge_with (loop_preheader_edge (loop), init_code);
1025
1026   remove_edges = xcalloc (max_unroll + n_peel + 1, sizeof (edge));
1027   n_remove_edges = 0;
1028
1029   wont_exit = sbitmap_alloc (max_unroll + 2);
1030
1031   /* Peel the first copy of loop body (almost always we must leave exit test
1032      here; the only exception is when we have extra zero check and the number
1033      of iterations is reliable.  Also record the place of (possible) extra
1034      zero check.  */
1035   sbitmap_zero (wont_exit);
1036   if (extra_zero_check
1037       && !desc->noloop_assumptions)
1038     SET_BIT (wont_exit, 1);
1039   ezc_swtch = loop_preheader_edge (loop)->src;
1040   ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1041                                       loops, 1,
1042                                       wont_exit, desc->out_edge,
1043                                       remove_edges, &n_remove_edges,
1044                                       DLTHE_FLAG_UPDATE_FREQ);
1045   gcc_assert (ok);
1046
1047   /* Record the place where switch will be built for preconditioning.  */
1048   swtch = loop_split_edge_with (loop_preheader_edge (loop),
1049                                 NULL_RTX);
1050
1051   for (i = 0; i < n_peel; i++)
1052     {
1053       /* Peel the copy.  */
1054       sbitmap_zero (wont_exit);
1055       if (i != n_peel - 1 || !last_may_exit)
1056         SET_BIT (wont_exit, 1);
1057       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1058                                           loops, 1,
1059                                           wont_exit, desc->out_edge,
1060                                           remove_edges, &n_remove_edges,
1061                                           DLTHE_FLAG_UPDATE_FREQ);
1062       gcc_assert (ok);
1063
1064       /* Create item for switch.  */
1065       j = n_peel - i - (extra_zero_check ? 0 : 1);
1066       p = REG_BR_PROB_BASE / (i + 2);
1067
1068       preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1069       branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
1070                                           block_label (preheader), p,
1071                                           NULL_RTX);
1072
1073       swtch = loop_split_edge_with (single_pred_edge (swtch), branch_code);
1074       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1075       single_pred_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1076       e = make_edge (swtch, preheader,
1077                      single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1078       e->probability = p;
1079     }
1080
1081   if (extra_zero_check)
1082     {
1083       /* Add branch for zero iterations.  */
1084       p = REG_BR_PROB_BASE / (max_unroll + 1);
1085       swtch = ezc_swtch;
1086       preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1087       branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
1088                                           block_label (preheader), p,
1089                                           NULL_RTX);
1090
1091       swtch = loop_split_edge_with (single_succ_edge (swtch), branch_code);
1092       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1093       single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1094       e = make_edge (swtch, preheader,
1095                      single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1096       e->probability = p;
1097     }
1098
1099   /* Recount dominators for outer blocks.  */
1100   iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, n_dom_bbs);
1101
1102   /* And unroll loop.  */
1103
1104   sbitmap_ones (wont_exit);
1105   RESET_BIT (wont_exit, may_exit_copy);
1106   opt_info_start_duplication (opt_info);
1107
1108   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1109                                       loops, max_unroll,
1110                                       wont_exit, desc->out_edge,
1111                                       remove_edges, &n_remove_edges,
1112                                       DLTHE_FLAG_UPDATE_FREQ
1113                                       | (opt_info
1114                                          ? DLTHE_RECORD_COPY_NUMBER
1115                                            : 0));
1116   gcc_assert (ok);
1117
1118   if (opt_info)
1119     {
1120       apply_opt_in_copies (opt_info, max_unroll, true, true);
1121       free_opt_info (opt_info);
1122     }
1123
1124   free (wont_exit);
1125
1126   if (exit_at_end)
1127     {
1128       basic_block exit_block = get_bb_copy (desc->in_edge->src);
1129       /* Find a new in and out edge; they are in the last copy we have
1130          made.  */
1131
1132       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
1133         {
1134           desc->out_edge = EDGE_SUCC (exit_block, 0);
1135           desc->in_edge = EDGE_SUCC (exit_block, 1);
1136         }
1137       else
1138         {
1139           desc->out_edge = EDGE_SUCC (exit_block, 1);
1140           desc->in_edge = EDGE_SUCC (exit_block, 0);
1141         }
1142     }
1143
1144   /* Remove the edges.  */
1145   for (i = 0; i < n_remove_edges; i++)
1146     remove_path (loops, remove_edges[i]);
1147   free (remove_edges);
1148
1149   /* We must be careful when updating the number of iterations due to
1150      preconditioning and the fact that the value must be valid at entry
1151      of the loop.  After passing through the above code, we see that
1152      the correct new number of iterations is this:  */
1153   gcc_assert (!desc->const_iter);
1154   desc->niter_expr =
1155     simplify_gen_binary (UDIV, desc->mode, old_niter,
1156                          GEN_INT (max_unroll + 1));
1157   desc->niter_max /= max_unroll + 1;
1158   if (exit_at_end)
1159     {
1160       desc->niter_expr =
1161         simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1162       desc->noloop_assumptions = NULL_RTX;
1163       desc->niter_max--;
1164     }
1165
1166   if (dump_file)
1167     fprintf (dump_file,
1168              ";; Unrolled loop %d times, counting # of iterations "
1169              "in runtime, %i insns\n",
1170              max_unroll, num_loop_insns (loop));
1171 }
1172
1173 /* Decide whether to simply peel LOOP and how much.  */
1174 static void
1175 decide_peel_simple (struct loop *loop, int flags)
1176 {
1177   unsigned npeel;
1178   struct niter_desc *desc;
1179
1180   if (!(flags & UAP_PEEL))
1181     {
1182       /* We were not asked to, just return back silently.  */
1183       return;
1184     }
1185
1186   if (dump_file)
1187     fprintf (dump_file, "\n;; Considering simply peeling loop\n");
1188
1189   /* npeel = number of iterations to peel.  */
1190   npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns;
1191   if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_PEEL_TIMES))
1192     npeel = PARAM_VALUE (PARAM_MAX_PEEL_TIMES);
1193
1194   /* Skip big loops.  */
1195   if (!npeel)
1196     {
1197       if (dump_file)
1198         fprintf (dump_file, ";; Not considering loop, is too big\n");
1199       return;
1200     }
1201
1202   /* Check for simple loops.  */
1203   desc = get_simple_loop_desc (loop);
1204
1205   /* Check number of iterations.  */
1206   if (desc->simple_p && !desc->assumptions && desc->const_iter)
1207     {
1208       if (dump_file)
1209         fprintf (dump_file, ";; Loop iterates constant times\n");
1210       return;
1211     }
1212
1213   /* Do not simply peel loops with branches inside -- it increases number
1214      of mispredicts.  */
1215   if (num_loop_branches (loop) > 1)
1216     {
1217       if (dump_file)
1218         fprintf (dump_file, ";; Not peeling, contains branches\n");
1219       return;
1220     }
1221
1222   if (loop->header->count)
1223     {
1224       unsigned niter = expected_loop_iterations (loop);
1225       if (niter + 1 > npeel)
1226         {
1227           if (dump_file)
1228             {
1229               fprintf (dump_file, ";; Not peeling loop, rolls too much (");
1230               fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1231                        (HOST_WIDEST_INT) (niter + 1));
1232               fprintf (dump_file, " iterations > %d [maximum peelings])\n",
1233                        npeel);
1234             }
1235           return;
1236         }
1237       npeel = niter + 1;
1238     }
1239   else
1240     {
1241       /* For now we have no good heuristics to decide whether loop peeling
1242          will be effective, so disable it.  */
1243       if (dump_file)
1244         fprintf (dump_file,
1245                  ";; Not peeling loop, no evidence it will be profitable\n");
1246       return;
1247     }
1248
1249   /* Success.  */
1250   loop->lpt_decision.decision = LPT_PEEL_SIMPLE;
1251   loop->lpt_decision.times = npeel;
1252
1253   if (dump_file)
1254     fprintf (dump_file, ";; Decided to simply peel the loop, %d times.\n",
1255              loop->lpt_decision.times);
1256 }
1257
1258 /* Peel a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
1259    while (cond)
1260      body;
1261
1262    ==>
1263
1264    if (!cond) goto end;
1265    body;
1266    if (!cond) goto end;
1267    body;
1268    while (cond)
1269      body;
1270    end: ;
1271    */
1272 static void
1273 peel_loop_simple (struct loops *loops, struct loop *loop)
1274 {
1275   sbitmap wont_exit;
1276   unsigned npeel = loop->lpt_decision.times;
1277   struct niter_desc *desc = get_simple_loop_desc (loop);
1278   struct opt_info *opt_info = NULL;
1279   bool ok;
1280
1281   if (flag_split_ivs_in_unroller && npeel > 1)
1282     opt_info = analyze_insns_in_loop (loop);
1283
1284   wont_exit = sbitmap_alloc (npeel + 1);
1285   sbitmap_zero (wont_exit);
1286
1287   opt_info_start_duplication (opt_info);
1288
1289   ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1290                                       loops, npeel, wont_exit,
1291                                       NULL, NULL,
1292                                       NULL, DLTHE_FLAG_UPDATE_FREQ
1293                                       | (opt_info
1294                                          ? DLTHE_RECORD_COPY_NUMBER
1295                                            : 0));
1296   gcc_assert (ok);
1297
1298   free (wont_exit);
1299
1300   if (opt_info)
1301     {
1302       apply_opt_in_copies (opt_info, npeel, false, false);
1303       free_opt_info (opt_info);
1304     }
1305
1306   if (desc->simple_p)
1307     {
1308       if (desc->const_iter)
1309         {
1310           desc->niter -= npeel;
1311           desc->niter_expr = GEN_INT (desc->niter);
1312           desc->noloop_assumptions = NULL_RTX;
1313         }
1314       else
1315         {
1316           /* We cannot just update niter_expr, as its value might be clobbered
1317              inside loop.  We could handle this by counting the number into
1318              temporary just like we do in runtime unrolling, but it does not
1319              seem worthwhile.  */
1320           free_simple_loop_desc (loop);
1321         }
1322     }
1323   if (dump_file)
1324     fprintf (dump_file, ";; Peeling loop %d times\n", npeel);
1325 }
1326
1327 /* Decide whether to unroll LOOP stupidly and how much.  */
1328 static void
1329 decide_unroll_stupid (struct loop *loop, int flags)
1330 {
1331   unsigned nunroll, nunroll_by_av, i;
1332   struct niter_desc *desc;
1333
1334   if (!(flags & UAP_UNROLL_ALL))
1335     {
1336       /* We were not asked to, just return back silently.  */
1337       return;
1338     }
1339
1340   if (dump_file)
1341     fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n");
1342
1343   /* nunroll = total number of copies of the original loop body in
1344      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
1345   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
1346   nunroll_by_av
1347     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
1348   if (nunroll > nunroll_by_av)
1349     nunroll = nunroll_by_av;
1350   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
1351     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1352
1353   /* Skip big loops.  */
1354   if (nunroll <= 1)
1355     {
1356       if (dump_file)
1357         fprintf (dump_file, ";; Not considering loop, is too big\n");
1358       return;
1359     }
1360
1361   /* Check for simple loops.  */
1362   desc = get_simple_loop_desc (loop);
1363
1364   /* Check simpleness.  */
1365   if (desc->simple_p && !desc->assumptions)
1366     {
1367       if (dump_file)
1368         fprintf (dump_file, ";; The loop is simple\n");
1369       return;
1370     }
1371
1372   /* Do not unroll loops with branches inside -- it increases number
1373      of mispredicts.  */
1374   if (num_loop_branches (loop) > 1)
1375     {
1376       if (dump_file)
1377         fprintf (dump_file, ";; Not unrolling, contains branches\n");
1378       return;
1379     }
1380
1381   /* If we have profile feedback, check whether the loop rolls.  */
1382   if (loop->header->count
1383       && expected_loop_iterations (loop) < 2 * nunroll)
1384     {
1385       if (dump_file)
1386         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
1387       return;
1388     }
1389
1390   /* Success.  Now force nunroll to be power of 2, as it seems that this
1391      improves results (partially because of better alignments, partially
1392      because of some dark magic).  */
1393   for (i = 1; 2 * i <= nunroll; i *= 2)
1394     continue;
1395
1396   loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1397   loop->lpt_decision.times = i - 1;
1398
1399   if (dump_file)
1400     fprintf (dump_file,
1401              ";; Decided to unroll the loop stupidly, %d times.\n",
1402              loop->lpt_decision.times);
1403 }
1404
1405 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
1406    while (cond)
1407      body;
1408
1409    ==>
1410
1411    while (cond)
1412      {
1413        body;
1414        if (!cond) break;
1415        body;
1416        if (!cond) break;
1417        body;
1418        if (!cond) break;
1419        body;
1420      }
1421    */
1422 static void
1423 unroll_loop_stupid (struct loops *loops, struct loop *loop)
1424 {
1425   sbitmap wont_exit;
1426   unsigned nunroll = loop->lpt_decision.times;
1427   struct niter_desc *desc = get_simple_loop_desc (loop);
1428   struct opt_info *opt_info = NULL;
1429   bool ok;
1430
1431   if (flag_split_ivs_in_unroller
1432       || flag_variable_expansion_in_unroller)
1433     opt_info = analyze_insns_in_loop (loop);
1434
1435
1436   wont_exit = sbitmap_alloc (nunroll + 1);
1437   sbitmap_zero (wont_exit);
1438   opt_info_start_duplication (opt_info);
1439
1440   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1441                                       loops, nunroll, wont_exit,
1442                                       NULL, NULL, NULL,
1443                                       DLTHE_FLAG_UPDATE_FREQ
1444                                       | (opt_info
1445                                          ? DLTHE_RECORD_COPY_NUMBER
1446                                            : 0));
1447   gcc_assert (ok);
1448
1449   if (opt_info)
1450     {
1451       apply_opt_in_copies (opt_info, nunroll, true, true);
1452       free_opt_info (opt_info);
1453     }
1454
1455   free (wont_exit);
1456
1457   if (desc->simple_p)
1458     {
1459       /* We indeed may get here provided that there are nontrivial assumptions
1460          for a loop to be really simple.  We could update the counts, but the
1461          problem is that we are unable to decide which exit will be taken
1462          (not really true in case the number of iterations is constant,
1463          but noone will do anything with this information, so we do not
1464          worry about it).  */
1465       desc->simple_p = false;
1466     }
1467
1468   if (dump_file)
1469     fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
1470              nunroll, num_loop_insns (loop));
1471 }
1472
1473 /* A hash function for information about insns to split.  */
1474
1475 static hashval_t
1476 si_info_hash (const void *ivts)
1477 {
1478   return htab_hash_pointer (((struct iv_to_split *) ivts)->insn);
1479 }
1480
1481 /* An equality functions for information about insns to split.  */
1482
1483 static int
1484 si_info_eq (const void *ivts1, const void *ivts2)
1485 {
1486   const struct iv_to_split *i1 = ivts1;
1487   const struct iv_to_split *i2 = ivts2;
1488
1489   return i1->insn == i2->insn;
1490 }
1491
1492 /* Return a hash for VES, which is really a "var_to_expand *".  */
1493
1494 static hashval_t
1495 ve_info_hash (const void *ves)
1496 {
1497   return htab_hash_pointer (((struct var_to_expand *) ves)->insn);
1498 }
1499
1500 /* Return true if IVTS1 and IVTS2 (which are really both of type
1501    "var_to_expand *") refer to the same instruction.  */
1502
1503 static int
1504 ve_info_eq (const void *ivts1, const void *ivts2)
1505 {
1506   const struct var_to_expand *i1 = ivts1;
1507   const struct var_to_expand *i2 = ivts2;
1508
1509   return i1->insn == i2->insn;
1510 }
1511
1512 /* Returns true if REG is referenced in one insn in LOOP.  */
1513
1514 bool
1515 referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg)
1516 {
1517   basic_block *body, bb;
1518   unsigned i;
1519   int count_ref = 0;
1520   rtx insn;
1521
1522   body = get_loop_body (loop);
1523   for (i = 0; i < loop->num_nodes; i++)
1524     {
1525       bb = body[i];
1526
1527       FOR_BB_INSNS (bb, insn)
1528       {
1529         if (rtx_referenced_p (reg, insn))
1530           count_ref++;
1531       }
1532     }
1533   return (count_ref  == 1);
1534 }
1535
1536 /* Determine whether INSN contains an accumulator
1537    which can be expanded into separate copies,
1538    one for each copy of the LOOP body.
1539
1540    for (i = 0 ; i < n; i++)
1541      sum += a[i];
1542
1543    ==>
1544
1545    sum += a[i]
1546    ....
1547    i = i+1;
1548    sum1 += a[i]
1549    ....
1550    i = i+1
1551    sum2 += a[i];
1552    ....
1553
1554    Return NULL if INSN contains no opportunity for expansion of accumulator.
1555    Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
1556    information and return a pointer to it.
1557 */
1558
1559 static struct var_to_expand *
1560 analyze_insn_to_expand_var (struct loop *loop, rtx insn)
1561 {
1562   rtx set, dest, src, op1;
1563   struct var_to_expand *ves;
1564   enum machine_mode mode1, mode2;
1565
1566   set = single_set (insn);
1567   if (!set)
1568     return NULL;
1569
1570   dest = SET_DEST (set);
1571   src = SET_SRC (set);
1572
1573   if (GET_CODE (src) != PLUS
1574       && GET_CODE (src) != MINUS
1575       && GET_CODE (src) != MULT)
1576     return NULL;
1577
1578   /* Hmm, this is a bit paradoxical.  We know that INSN is a valid insn
1579      in MD.  But if there is no optab to generate the insn, we can not
1580      perform the variable expansion.  This can happen if an MD provides
1581      an insn but not a named pattern to generate it, for example to avoid
1582      producing code that needs additional mode switches like for x87/mmx.
1583
1584      So we check have_insn_for which looks for an optab for the operation
1585      in SRC.  If it doesn't exist, we can't perform the expansion even
1586      though INSN is valid.  */
1587   if (!have_insn_for (GET_CODE (src), GET_MODE (src)))
1588     return NULL;
1589
1590   if (!XEXP (src, 0))
1591     return NULL;
1592
1593   op1 = XEXP (src, 0);
1594
1595   if (!REG_P (dest)
1596       && !(GET_CODE (dest) == SUBREG
1597            && REG_P (SUBREG_REG (dest))))
1598     return NULL;
1599
1600   if (!rtx_equal_p (dest, op1))
1601     return NULL;
1602
1603   if (!referenced_in_one_insn_in_loop_p (loop, dest))
1604     return NULL;
1605
1606   if (rtx_referenced_p (dest, XEXP (src, 1)))
1607     return NULL;
1608
1609   mode1 = GET_MODE (dest);
1610   mode2 = GET_MODE (XEXP (src, 1));
1611   if ((FLOAT_MODE_P (mode1)
1612        || FLOAT_MODE_P (mode2))
1613       && !flag_unsafe_math_optimizations)
1614     return NULL;
1615
1616   /* Record the accumulator to expand.  */
1617   ves = xmalloc (sizeof (struct var_to_expand));
1618   ves->insn = insn;
1619   ves->var_expansions = VEC_alloc (rtx, heap, 1);
1620   ves->reg = copy_rtx (dest);
1621   ves->op = GET_CODE (src);
1622   ves->expansion_count = 0;
1623   ves->reuse_expansion = 0;
1624   return ves;
1625 }
1626
1627 /* Determine whether there is an induction variable in INSN that
1628    we would like to split during unrolling.
1629
1630    I.e. replace
1631
1632    i = i + 1;
1633    ...
1634    i = i + 1;
1635    ...
1636    i = i + 1;
1637    ...
1638
1639    type chains by
1640
1641    i0 = i + 1
1642    ...
1643    i = i0 + 1
1644    ...
1645    i = i0 + 2
1646    ...
1647
1648    Return NULL if INSN contains no interesting IVs.  Otherwise, allocate
1649    an IV_TO_SPLIT structure, fill it with the relevant information and return a
1650    pointer to it.  */
1651
1652 static struct iv_to_split *
1653 analyze_iv_to_split_insn (rtx insn)
1654 {
1655   rtx set, dest;
1656   struct rtx_iv iv;
1657   struct iv_to_split *ivts;
1658   bool ok;
1659
1660   /* For now we just split the basic induction variables.  Later this may be
1661      extended for example by selecting also addresses of memory references.  */
1662   set = single_set (insn);
1663   if (!set)
1664     return NULL;
1665
1666   dest = SET_DEST (set);
1667   if (!REG_P (dest))
1668     return NULL;
1669
1670   if (!biv_p (insn, dest))
1671     return NULL;
1672
1673   ok = iv_analyze (insn, dest, &iv);
1674   gcc_assert (ok);
1675
1676   if (iv.step == const0_rtx
1677       || iv.mode != iv.extend_mode)
1678     return NULL;
1679
1680   /* Record the insn to split.  */
1681   ivts = xmalloc (sizeof (struct iv_to_split));
1682   ivts->insn = insn;
1683   ivts->base_var = NULL_RTX;
1684   ivts->step = iv.step;
1685   ivts->n_loc = 1;
1686   ivts->loc[0] = 1;
1687
1688   return ivts;
1689 }
1690
1691 /* Determines which of insns in LOOP can be optimized.
1692    Return a OPT_INFO struct with the relevant hash tables filled
1693    with all insns to be optimized.  The FIRST_NEW_BLOCK field
1694    is undefined for the return value.  */
1695
1696 static struct opt_info *
1697 analyze_insns_in_loop (struct loop *loop)
1698 {
1699   basic_block *body, bb;
1700   unsigned i, num_edges = 0;
1701   struct opt_info *opt_info = xcalloc (1, sizeof (struct opt_info));
1702   rtx insn;
1703   struct iv_to_split *ivts = NULL;
1704   struct var_to_expand *ves = NULL;
1705   PTR *slot1;
1706   PTR *slot2;
1707   edge *edges = get_loop_exit_edges (loop, &num_edges);
1708   bool can_apply = false;
1709
1710   iv_analysis_loop_init (loop);
1711
1712   body = get_loop_body (loop);
1713
1714   if (flag_split_ivs_in_unroller)
1715     opt_info->insns_to_split = htab_create (5 * loop->num_nodes,
1716                                             si_info_hash, si_info_eq, free);
1717
1718   /* Record the loop exit bb and loop preheader before the unrolling.  */
1719   if (!loop_preheader_edge (loop)->src)
1720     {
1721       loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1722       opt_info->loop_preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1723     }
1724   else
1725     opt_info->loop_preheader = loop_preheader_edge (loop)->src;
1726
1727   if (num_edges == 1
1728       && !(edges[0]->flags & EDGE_COMPLEX))
1729     {
1730       opt_info->loop_exit = loop_split_edge_with (edges[0], NULL_RTX);
1731       can_apply = true;
1732     }
1733
1734   if (flag_variable_expansion_in_unroller
1735       && can_apply)
1736     opt_info->insns_with_var_to_expand = htab_create (5 * loop->num_nodes,
1737                                                       ve_info_hash, ve_info_eq, free);
1738
1739   for (i = 0; i < loop->num_nodes; i++)
1740     {
1741       bb = body[i];
1742       if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1743         continue;
1744
1745       FOR_BB_INSNS (bb, insn)
1746       {
1747         if (!INSN_P (insn))
1748           continue;
1749
1750         if (opt_info->insns_to_split)
1751           ivts = analyze_iv_to_split_insn (insn);
1752
1753         if (ivts)
1754           {
1755             slot1 = htab_find_slot (opt_info->insns_to_split, ivts, INSERT);
1756             *slot1 = ivts;
1757             continue;
1758           }
1759
1760         if (opt_info->insns_with_var_to_expand)
1761           ves = analyze_insn_to_expand_var (loop, insn);
1762
1763         if (ves)
1764           {
1765             slot2 = htab_find_slot (opt_info->insns_with_var_to_expand, ves, INSERT);
1766             *slot2 = ves;
1767           }
1768       }
1769     }
1770
1771   free (edges);
1772   free (body);
1773   return opt_info;
1774 }
1775
1776 /* Called just before loop duplication.  Records start of duplicated area
1777    to OPT_INFO.  */
1778
1779 static void
1780 opt_info_start_duplication (struct opt_info *opt_info)
1781 {
1782   if (opt_info)
1783     opt_info->first_new_block = last_basic_block;
1784 }
1785
1786 /* Determine the number of iterations between initialization of the base
1787    variable and the current copy (N_COPY).  N_COPIES is the total number
1788    of newly created copies.  UNROLLING is true if we are unrolling
1789    (not peeling) the loop.  */
1790
1791 static unsigned
1792 determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
1793 {
1794   if (unrolling)
1795     {
1796       /* If we are unrolling, initialization is done in the original loop
1797          body (number 0).  */
1798       return n_copy;
1799     }
1800   else
1801     {
1802       /* If we are peeling, the copy in that the initialization occurs has
1803          number 1.  The original loop (number 0) is the last.  */
1804       if (n_copy)
1805         return n_copy - 1;
1806       else
1807         return n_copies;
1808     }
1809 }
1810
1811 /* Locate in EXPR the expression corresponding to the location recorded
1812    in IVTS, and return a pointer to the RTX for this location.  */
1813
1814 static rtx *
1815 get_ivts_expr (rtx expr, struct iv_to_split *ivts)
1816 {
1817   unsigned i;
1818   rtx *ret = &expr;
1819
1820   for (i = 0; i < ivts->n_loc; i++)
1821     ret = &XEXP (*ret, ivts->loc[i]);
1822
1823   return ret;
1824 }
1825
1826 /* Allocate basic variable for the induction variable chain.  Callback for
1827    htab_traverse.  */
1828
1829 static int
1830 allocate_basic_variable (void **slot, void *data ATTRIBUTE_UNUSED)
1831 {
1832   struct iv_to_split *ivts = *slot;
1833   rtx expr = *get_ivts_expr (single_set (ivts->insn), ivts);
1834
1835   ivts->base_var = gen_reg_rtx (GET_MODE (expr));
1836
1837   return 1;
1838 }
1839
1840 /* Insert initialization of basic variable of IVTS before INSN, taking
1841    the initial value from INSN.  */
1842
1843 static void
1844 insert_base_initialization (struct iv_to_split *ivts, rtx insn)
1845 {
1846   rtx expr = copy_rtx (*get_ivts_expr (single_set (insn), ivts));
1847   rtx seq;
1848
1849   start_sequence ();
1850   expr = force_operand (expr, ivts->base_var);
1851   if (expr != ivts->base_var)
1852     emit_move_insn (ivts->base_var, expr);
1853   seq = get_insns ();
1854   end_sequence ();
1855
1856   emit_insn_before (seq, insn);
1857 }
1858
1859 /* Replace the use of induction variable described in IVTS in INSN
1860    by base variable + DELTA * step.  */
1861
1862 static void
1863 split_iv (struct iv_to_split *ivts, rtx insn, unsigned delta)
1864 {
1865   rtx expr, *loc, seq, incr, var;
1866   enum machine_mode mode = GET_MODE (ivts->base_var);
1867   rtx src, dest, set;
1868
1869   /* Construct base + DELTA * step.  */
1870   if (!delta)
1871     expr = ivts->base_var;
1872   else
1873     {
1874       incr = simplify_gen_binary (MULT, mode,
1875                                   ivts->step, gen_int_mode (delta, mode));
1876       expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
1877                                   ivts->base_var, incr);
1878     }
1879
1880   /* Figure out where to do the replacement.  */
1881   loc = get_ivts_expr (single_set (insn), ivts);
1882
1883   /* If we can make the replacement right away, we're done.  */
1884   if (validate_change (insn, loc, expr, 0))
1885     return;
1886
1887   /* Otherwise, force EXPR into a register and try again.  */
1888   start_sequence ();
1889   var = gen_reg_rtx (mode);
1890   expr = force_operand (expr, var);
1891   if (expr != var)
1892     emit_move_insn (var, expr);
1893   seq = get_insns ();
1894   end_sequence ();
1895   emit_insn_before (seq, insn);
1896
1897   if (validate_change (insn, loc, var, 0))
1898     return;
1899
1900   /* The last chance.  Try recreating the assignment in insn
1901      completely from scratch.  */
1902   set = single_set (insn);
1903   gcc_assert (set);
1904
1905   start_sequence ();
1906   *loc = var;
1907   src = copy_rtx (SET_SRC (set));
1908   dest = copy_rtx (SET_DEST (set));
1909   src = force_operand (src, dest);
1910   if (src != dest)
1911     emit_move_insn (dest, src);
1912   seq = get_insns ();
1913   end_sequence ();
1914
1915   emit_insn_before (seq, insn);
1916   delete_insn (insn);
1917 }
1918
1919
1920 /* Return one expansion of the accumulator recorded in struct VE.  */
1921
1922 static rtx
1923 get_expansion (struct var_to_expand *ve)
1924 {
1925   rtx reg;
1926
1927   if (ve->reuse_expansion == 0)
1928     reg = ve->reg;
1929   else
1930     reg = VEC_index (rtx, ve->var_expansions, ve->reuse_expansion - 1);
1931
1932   if (VEC_length (rtx, ve->var_expansions) == (unsigned) ve->reuse_expansion)
1933     ve->reuse_expansion = 0;
1934   else
1935     ve->reuse_expansion++;
1936
1937   return reg;
1938 }
1939
1940
1941 /* Given INSN replace the uses of the accumulator recorded in VE
1942    with a new register.  */
1943
1944 static void
1945 expand_var_during_unrolling (struct var_to_expand *ve, rtx insn)
1946 {
1947   rtx new_reg, set;
1948   bool really_new_expansion = false;
1949
1950   set = single_set (insn);
1951   gcc_assert (set);
1952
1953   /* Generate a new register only if the expansion limit has not been
1954      reached.  Else reuse an already existing expansion.  */
1955   if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count)
1956     {
1957       really_new_expansion = true;
1958       new_reg = gen_reg_rtx (GET_MODE (ve->reg));
1959     }
1960   else
1961     new_reg = get_expansion (ve);
1962
1963   validate_change (insn, &SET_DEST (set), new_reg, 1);
1964   validate_change (insn, &XEXP (SET_SRC (set), 0), new_reg, 1);
1965
1966   if (apply_change_group ())
1967     if (really_new_expansion)
1968       {
1969         VEC_safe_push (rtx, heap, ve->var_expansions, new_reg);
1970         ve->expansion_count++;
1971       }
1972 }
1973
1974 /* Initialize the variable expansions in loop preheader.
1975    Callbacks for htab_traverse.  PLACE_P is the loop-preheader
1976    basic block where the initialization of the expansions
1977    should take place.  */
1978
1979 static int
1980 insert_var_expansion_initialization (void **slot, void *place_p)
1981 {
1982   struct var_to_expand *ve = *slot;
1983   basic_block place = (basic_block)place_p;
1984   rtx seq, var, zero_init, insn;
1985   unsigned i;
1986
1987   if (VEC_length (rtx, ve->var_expansions) == 0)
1988     return 1;
1989
1990   start_sequence ();
1991   if (ve->op == PLUS || ve->op == MINUS)
1992     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
1993       {
1994         zero_init =  CONST0_RTX (GET_MODE (var));
1995         emit_move_insn (var, zero_init);
1996       }
1997   else if (ve->op == MULT)
1998     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
1999       {
2000         zero_init =  CONST1_RTX (GET_MODE (var));
2001         emit_move_insn (var, zero_init);
2002       }
2003
2004   seq = get_insns ();
2005   end_sequence ();
2006
2007   insn = BB_HEAD (place);
2008   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
2009     insn = NEXT_INSN (insn);
2010
2011   emit_insn_after (seq, insn);
2012   /* Continue traversing the hash table.  */
2013   return 1;
2014 }
2015
2016 /*  Combine the variable expansions at the loop exit.
2017     Callbacks for htab_traverse.  PLACE_P is the loop exit
2018     basic block where the summation of the expansions should
2019     take place.  */
2020
2021 static int
2022 combine_var_copies_in_loop_exit (void **slot, void *place_p)
2023 {
2024   struct var_to_expand *ve = *slot;
2025   basic_block place = (basic_block)place_p;
2026   rtx sum = ve->reg;
2027   rtx expr, seq, var, insn;
2028   unsigned i;
2029
2030   if (VEC_length (rtx, ve->var_expansions) == 0)
2031     return 1;
2032
2033   start_sequence ();
2034   if (ve->op == PLUS || ve->op == MINUS)
2035     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2036       {
2037         sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg),
2038                                    var, sum);
2039       }
2040   else if (ve->op == MULT)
2041     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2042       {
2043         sum = simplify_gen_binary (MULT, GET_MODE (ve->reg),
2044                                    var, sum);
2045       }
2046
2047   expr = force_operand (sum, ve->reg);
2048   if (expr != ve->reg)
2049     emit_move_insn (ve->reg, expr);
2050   seq = get_insns ();
2051   end_sequence ();
2052
2053   insn = BB_HEAD (place);
2054   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
2055     insn = NEXT_INSN (insn);
2056
2057   emit_insn_after (seq, insn);
2058
2059   /* Continue traversing the hash table.  */
2060   return 1;
2061 }
2062
2063 /* Apply loop optimizations in loop copies using the
2064    data which gathered during the unrolling.  Structure
2065    OPT_INFO record that data.
2066
2067    UNROLLING is true if we unrolled (not peeled) the loop.
2068    REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
2069    the loop (as it should happen in complete unrolling, but not in ordinary
2070    peeling of the loop).  */
2071
2072 static void
2073 apply_opt_in_copies (struct opt_info *opt_info,
2074                      unsigned n_copies, bool unrolling,
2075                      bool rewrite_original_loop)
2076 {
2077   unsigned i, delta;
2078   basic_block bb, orig_bb;
2079   rtx insn, orig_insn, next;
2080   struct iv_to_split ivts_templ, *ivts;
2081   struct var_to_expand ve_templ, *ves;
2082
2083   /* Sanity check -- we need to put initialization in the original loop
2084      body.  */
2085   gcc_assert (!unrolling || rewrite_original_loop);
2086
2087   /* Allocate the basic variables (i0).  */
2088   if (opt_info->insns_to_split)
2089     htab_traverse (opt_info->insns_to_split, allocate_basic_variable, NULL);
2090
2091   for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2092     {
2093       bb = BASIC_BLOCK (i);
2094       orig_bb = get_bb_original (bb);
2095
2096       /* bb->aux holds position in copy sequence initialized by
2097          duplicate_loop_to_header_edge.  */
2098       delta = determine_split_iv_delta ((size_t)bb->aux, n_copies,
2099                                         unrolling);
2100       bb->aux = 0;
2101       orig_insn = BB_HEAD (orig_bb);
2102       for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); insn = next)
2103         {
2104           next = NEXT_INSN (insn);
2105           if (!INSN_P (insn))
2106             continue;
2107
2108           while (!INSN_P (orig_insn))
2109             orig_insn = NEXT_INSN (orig_insn);
2110
2111           ivts_templ.insn = orig_insn;
2112           ve_templ.insn = orig_insn;
2113
2114           /* Apply splitting iv optimization.  */
2115           if (opt_info->insns_to_split)
2116             {
2117               ivts = htab_find (opt_info->insns_to_split, &ivts_templ);
2118
2119               if (ivts)
2120                 {
2121 #ifdef ENABLE_CHECKING
2122                   gcc_assert (rtx_equal_p (PATTERN (insn), PATTERN (orig_insn)));
2123 #endif
2124
2125                   if (!delta)
2126                     insert_base_initialization (ivts, insn);
2127                   split_iv (ivts, insn, delta);
2128                 }
2129             }
2130           /* Apply variable expansion optimization.  */
2131           if (unrolling && opt_info->insns_with_var_to_expand)
2132             {
2133               ves = htab_find (opt_info->insns_with_var_to_expand, &ve_templ);
2134               if (ves)
2135                 {
2136 #ifdef ENABLE_CHECKING
2137                   gcc_assert (rtx_equal_p (PATTERN (insn), PATTERN (orig_insn)));
2138 #endif
2139                   expand_var_during_unrolling (ves, insn);
2140                 }
2141             }
2142           orig_insn = NEXT_INSN (orig_insn);
2143         }
2144     }
2145
2146   if (!rewrite_original_loop)
2147     return;
2148
2149   /* Initialize the variable expansions in the loop preheader
2150      and take care of combining them at the loop exit.  */
2151   if (opt_info->insns_with_var_to_expand)
2152     {
2153       htab_traverse (opt_info->insns_with_var_to_expand,
2154                      insert_var_expansion_initialization,
2155                      opt_info->loop_preheader);
2156       htab_traverse (opt_info->insns_with_var_to_expand,
2157                      combine_var_copies_in_loop_exit,
2158                      opt_info->loop_exit);
2159     }
2160
2161   /* Rewrite also the original loop body.  Find them as originals of the blocks
2162      in the last copied iteration, i.e. those that have
2163      get_bb_copy (get_bb_original (bb)) == bb.  */
2164   for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2165     {
2166       bb = BASIC_BLOCK (i);
2167       orig_bb = get_bb_original (bb);
2168       if (get_bb_copy (orig_bb) != bb)
2169         continue;
2170
2171       delta = determine_split_iv_delta (0, n_copies, unrolling);
2172       for (orig_insn = BB_HEAD (orig_bb);
2173            orig_insn != NEXT_INSN (BB_END (bb));
2174            orig_insn = next)
2175         {
2176           next = NEXT_INSN (orig_insn);
2177
2178           if (!INSN_P (orig_insn))
2179             continue;
2180
2181           ivts_templ.insn = orig_insn;
2182           if (opt_info->insns_to_split)
2183             {
2184               ivts = htab_find (opt_info->insns_to_split, &ivts_templ);
2185               if (ivts)
2186                 {
2187                   if (!delta)
2188                     insert_base_initialization (ivts, orig_insn);
2189                   split_iv (ivts, orig_insn, delta);
2190                   continue;
2191                 }
2192             }
2193
2194         }
2195     }
2196 }
2197
2198 /*  Release the data structures used for the variable expansion
2199     optimization.  Callbacks for htab_traverse.  */
2200
2201 static int
2202 release_var_copies (void **slot, void *data ATTRIBUTE_UNUSED)
2203 {
2204   struct var_to_expand *ve = *slot;
2205
2206   VEC_free (rtx, heap, ve->var_expansions);
2207
2208   /* Continue traversing the hash table.  */
2209   return 1;
2210 }
2211
2212 /* Release OPT_INFO.  */
2213
2214 static void
2215 free_opt_info (struct opt_info *opt_info)
2216 {
2217   if (opt_info->insns_to_split)
2218     htab_delete (opt_info->insns_to_split);
2219   if (opt_info->insns_with_var_to_expand)
2220     {
2221       htab_traverse (opt_info->insns_with_var_to_expand,
2222                      release_var_copies, NULL);
2223       htab_delete (opt_info->insns_with_var_to_expand);
2224     }
2225   free (opt_info);
2226 }