gcc/loop-unroll.c

   1 /* Loop unrolling and peeling.
   2    Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 2, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to the Free
  18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  19 02111-1307, USA.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "rtl.h"
  26 #include "hard-reg-set.h"
  27 #include "obstack.h"
  28 #include "basic-block.h"
  29 #include "cfgloop.h"
  30 #include "cfglayout.h"
  31 #include "params.h"
  32 #include "output.h"
  33 #include "expr.h"
  34 #include "hashtab.h"
  35 #include "recog.h"
  36 #include "varray.h"
  37
  38 /* This pass performs loop unrolling and peeling.  We only perform these
  39    optimizations on innermost loops (with single exception) because
  40    the impact on performance is greatest here, and we want to avoid
  41    unnecessary code size growth.  The gain is caused by greater sequentiality
  42    of code, better code to optimize for further passes and in some cases
  43    by fewer testings of exit conditions.  The main problem is code growth,
  44    that impacts performance negatively due to effect of caches.
  45
  46    What we do:
  47
  48    -- complete peeling of once-rolling loops; this is the above mentioned
  49       exception, as this causes loop to be cancelled completely and
  50       does not cause code growth
  51    -- complete peeling of loops that roll (small) constant times.
  52    -- simple peeling of first iterations of loops that do not roll much
  53       (according to profile feedback)
  54    -- unrolling of loops that roll constant times; this is almost always
  55       win, as we get rid of exit condition tests.
  56    -- unrolling of loops that roll number of times that we can compute
  57       in runtime; we also get rid of exit condition tests here, but there
  58       is the extra expense for calculating the number of iterations
  59    -- simple unrolling of remaining loops; this is performed only if we
  60       are asked to, as the gain is questionable in this case and often
  61       it may even slow down the code
  62    For more detailed descriptions of each of those, see comments at
  63    appropriate function below.
  64
  65    There is a lot of parameters (defined and described in params.def) that
  66    control how much we unroll/peel.
  67
  68    ??? A great problem is that we don't have a good way how to determine
  69    how many times we should unroll the loop; the experiments I have made
  70    showed that this choice may affect performance in order of several %.
  71    */
  72
  73 /* Information about induction variables to split.  */
  74
  75 struct iv_to_split
  76 {
  77   rtx insn;             /* The insn in that the induction variable occurs.  */
  78   rtx base_var;         /* The variable on that the values in the further
  79                            iterations are based.  */
  80   rtx step;             /* Step of the induction variable.  */
  81   unsigned n_loc;
  82   unsigned loc[3];      /* Location where the definition of the induction
  83                            variable occurs in the insn.  For example if
  84                            N_LOC is 2, the expression is located at
  85                            XEXP (XEXP (single_set, loc[0]), loc[1]).  */
  86 };
  87
  88 DEF_VEC_P(rtx);
  89 DEF_VEC_ALLOC_P(rtx,heap);
  90
  91 /* Information about accumulators to expand.  */
  92
  93 struct var_to_expand
  94 {
  95   rtx insn;                        /* The insn in that the variable expansion occurs.  */
  96   rtx reg;                         /* The accumulator which is expanded.  */
  97   VEC(rtx,heap) *var_expansions;   /* The copies of the accumulator which is expanded.  */
  98   enum rtx_code op;                /* The type of the accumulation - addition, subtraction
  99                                       or multiplication.  */
 100   int expansion_count;             /* Count the number of expansions generated so far.  */
 101   int reuse_expansion;             /* The expansion we intend to reuse to expand
 102                                       the accumulator.  If REUSE_EXPANSION is 0 reuse
 103                                       the original accumulator.  Else use
 104                                       var_expansions[REUSE_EXPANSION - 1].  */
 105 };
 106
 107 /* Information about optimization applied in
 108    the unrolled loop.  */
 109
 110 struct opt_info
 111 {
 112   htab_t insns_to_split;           /* A hashtable of insns to split.  */
 113   htab_t insns_with_var_to_expand; /* A hashtable of insns with accumulators
 114                                       to expand.  */
 115   unsigned first_new_block;        /* The first basic block that was
 116                                       duplicated.  */
 117   basic_block loop_exit;           /* The loop exit basic block.  */
 118   basic_block loop_preheader;      /* The loop preheader basic block.  */
 119 };
 120
 121 static void decide_unrolling_and_peeling (struct loops *, int);
 122 static void peel_loops_completely (struct loops *, int);
 123 static void decide_peel_simple (struct loop *, int);
 124 static void decide_peel_once_rolling (struct loop *, int);
 125 static void decide_peel_completely (struct loop *, int);
 126 static void decide_unroll_stupid (struct loop *, int);
 127 static void decide_unroll_constant_iterations (struct loop *, int);
 128 static void decide_unroll_runtime_iterations (struct loop *, int);
 129 static void peel_loop_simple (struct loops *, struct loop *);
 130 static void peel_loop_completely (struct loops *, struct loop *);
 131 static void unroll_loop_stupid (struct loops *, struct loop *);
 132 static void unroll_loop_constant_iterations (struct loops *, struct loop *);
 133 static void unroll_loop_runtime_iterations (struct loops *, struct loop *);
 134 static struct opt_info *analyze_insns_in_loop (struct loop *);
 135 static void opt_info_start_duplication (struct opt_info *);
 136 static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
 137 static void free_opt_info (struct opt_info *);
 138 static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx);
 139 static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx);
 140 static struct iv_to_split *analyze_iv_to_split_insn (rtx);
 141 static void expand_var_during_unrolling (struct var_to_expand *, rtx);
 142 static int insert_var_expansion_initialization (void **, void *);
 143 static int combine_var_copies_in_loop_exit (void **, void *);
 144 static int release_var_copies (void **, void *);
 145 static rtx get_expansion (struct var_to_expand *);
 146
 147 /* Unroll and/or peel (depending on FLAGS) LOOPS.  */
 148 void
 149 unroll_and_peel_loops (struct loops *loops, int flags)
 150 {
 151   struct loop *loop, *next;
 152   bool check;
 153
 154   /* First perform complete loop peeling (it is almost surely a win,
 155      and affects parameters for further decision a lot).  */
 156   peel_loops_completely (loops, flags);
 157
 158   /* Now decide rest of unrolling and peeling.  */
 159   decide_unrolling_and_peeling (loops, flags);
 160
 161   loop = loops->tree_root;
 162   while (loop->inner)
 163     loop = loop->inner;
 164
 165   /* Scan the loops, inner ones first.  */
 166   while (loop != loops->tree_root)
 167     {
 168       if (loop->next)
 169         {
 170           next = loop->next;
 171           while (next->inner)
 172             next = next->inner;
 173         }
 174       else
 175         next = loop->outer;
 176
 177       check = true;
 178       /* And perform the appropriate transformations.  */
 179       switch (loop->lpt_decision.decision)
 180         {
 181         case LPT_PEEL_COMPLETELY:
 182           /* Already done.  */
 183           gcc_unreachable ();
 184         case LPT_PEEL_SIMPLE:
 185           peel_loop_simple (loops, loop);
 186           break;
 187         case LPT_UNROLL_CONSTANT:
 188           unroll_loop_constant_iterations (loops, loop);
 189           break;
 190         case LPT_UNROLL_RUNTIME:
 191           unroll_loop_runtime_iterations (loops, loop);
 192           break;
 193         case LPT_UNROLL_STUPID:
 194           unroll_loop_stupid (loops, loop);
 195           break;
 196         case LPT_NONE:
 197           check = false;
 198           break;
 199         default:
 200           gcc_unreachable ();
 201         }
 202       if (check)
 203         {
 204 #ifdef ENABLE_CHECKING
 205           verify_dominators (CDI_DOMINATORS);
 206           verify_loop_structure (loops);
 207 #endif
 208         }
 209       loop = next;
 210     }
 211
 212   iv_analysis_done ();
 213 }
 214
 215 /* Check whether exit of the LOOP is at the end of loop body.  */
 216
 217 static bool
 218 loop_exit_at_end_p (struct loop *loop)
 219 {
 220   struct niter_desc *desc = get_simple_loop_desc (loop);
 221   rtx insn;
 222
 223   if (desc->in_edge->dest != loop->latch)
 224     return false;
 225
 226   /* Check that the latch is empty.  */
 227   FOR_BB_INSNS (loop->latch, insn)
 228     {
 229       if (INSN_P (insn))
 230         return false;
 231     }
 232
 233   return true;
 234 }
 235
 236 /* Check whether to peel LOOPS (depending on FLAGS) completely and do so.  */
 237 static void
 238 peel_loops_completely (struct loops *loops, int flags)
 239 {
 240   struct loop *loop, *next;
 241
 242   loop = loops->tree_root;
 243   while (loop->inner)
 244     loop = loop->inner;
 245
 246   while (loop != loops->tree_root)
 247     {
 248       if (loop->next)
 249         {
 250           next = loop->next;
 251           while (next->inner)
 252             next = next->inner;
 253         }
 254       else
 255         next = loop->outer;
 256
 257       loop->lpt_decision.decision = LPT_NONE;
 258
 259       if (dump_file)
 260         fprintf (dump_file,
 261                  "\n;; *** Considering loop %d for complete peeling ***\n",
 262                  loop->num);
 263
 264       loop->ninsns = num_loop_insns (loop);
 265
 266       decide_peel_once_rolling (loop, flags);
 267       if (loop->lpt_decision.decision == LPT_NONE)
 268         decide_peel_completely (loop, flags);
 269
 270       if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY)
 271         {
 272           peel_loop_completely (loops, loop);
 273 #ifdef ENABLE_CHECKING
 274           verify_dominators (CDI_DOMINATORS);
 275           verify_loop_structure (loops);
 276 #endif
 277         }
 278       loop = next;
 279     }
 280 }
 281
 282 /* Decide whether unroll or peel LOOPS (depending on FLAGS) and how much.  */
 283 static void
 284 decide_unrolling_and_peeling (struct loops *loops, int flags)
 285 {
 286   struct loop *loop = loops->tree_root, *next;
 287
 288   while (loop->inner)
 289     loop = loop->inner;
 290
 291   /* Scan the loops, inner ones first.  */
 292   while (loop != loops->tree_root)
 293     {
 294       if (loop->next)
 295         {
 296           next = loop->next;
 297           while (next->inner)
 298             next = next->inner;
 299         }
 300       else
 301         next = loop->outer;
 302
 303       loop->lpt_decision.decision = LPT_NONE;
 304
 305       if (dump_file)
 306         fprintf (dump_file, "\n;; *** Considering loop %d ***\n", loop->num);
 307
 308       /* Do not peel cold areas.  */
 309       if (!maybe_hot_bb_p (loop->header))
 310         {
 311           if (dump_file)
 312             fprintf (dump_file, ";; Not considering loop, cold area\n");
 313           loop = next;
 314           continue;
 315         }
 316
 317       /* Can the loop be manipulated?  */
 318       if (!can_duplicate_loop_p (loop))
 319         {
 320           if (dump_file)
 321             fprintf (dump_file,
 322                      ";; Not considering loop, cannot duplicate\n");
 323           loop = next;
 324           continue;
 325         }
 326
 327       /* Skip non-innermost loops.  */
 328       if (loop->inner)
 329         {
 330           if (dump_file)
 331             fprintf (dump_file, ";; Not considering loop, is not innermost\n");
 332           loop = next;
 333           continue;
 334         }
 335
 336       loop->ninsns = num_loop_insns (loop);
 337       loop->av_ninsns = average_num_loop_insns (loop);
 338
 339       /* Try transformations one by one in decreasing order of
 340          priority.  */
 341
 342       decide_unroll_constant_iterations (loop, flags);
 343       if (loop->lpt_decision.decision == LPT_NONE)
 344         decide_unroll_runtime_iterations (loop, flags);
 345       if (loop->lpt_decision.decision == LPT_NONE)
 346         decide_unroll_stupid (loop, flags);
 347       if (loop->lpt_decision.decision == LPT_NONE)
 348         decide_peel_simple (loop, flags);
 349
 350       loop = next;
 351     }
 352 }
 353
 354 /* Decide whether the LOOP is once rolling and suitable for complete
 355    peeling.  */
 356 static void
 357 decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
 358 {
 359   struct niter_desc *desc;
 360
 361   if (dump_file)
 362     fprintf (dump_file, "\n;; Considering peeling once rolling loop\n");
 363
 364   /* Is the loop small enough?  */
 365   if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns)
 366     {
 367       if (dump_file)
 368         fprintf (dump_file, ";; Not considering loop, is too big\n");
 369       return;
 370     }
 371
 372   /* Check for simple loops.  */
 373   desc = get_simple_loop_desc (loop);
 374
 375   /* Check number of iterations.  */
 376   if (!desc->simple_p
 377       || desc->assumptions
 378       || !desc->const_iter
 379       || desc->niter != 0)
 380     {
 381       if (dump_file)
 382         fprintf (dump_file,
 383                  ";; Unable to prove that the loop rolls exactly once\n");
 384       return;
 385     }
 386
 387   /* Success.  */
 388   if (dump_file)
 389     fprintf (dump_file, ";; Decided to peel exactly once rolling loop\n");
 390   loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
 391 }
 392
 393 /* Decide whether the LOOP is suitable for complete peeling.  */
 394 static void
 395 decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
 396 {
 397   unsigned npeel;
 398   struct niter_desc *desc;
 399
 400   if (dump_file)
 401     fprintf (dump_file, "\n;; Considering peeling completely\n");
 402
 403   /* Skip non-innermost loops.  */
 404   if (loop->inner)
 405     {
 406       if (dump_file)
 407         fprintf (dump_file, ";; Not considering loop, is not innermost\n");
 408       return;
 409     }
 410
 411   /* Do not peel cold areas.  */
 412   if (!maybe_hot_bb_p (loop->header))
 413     {
 414       if (dump_file)
 415         fprintf (dump_file, ";; Not considering loop, cold area\n");
 416       return;
 417     }
 418
 419   /* Can the loop be manipulated?  */
 420   if (!can_duplicate_loop_p (loop))
 421     {
 422       if (dump_file)
 423         fprintf (dump_file,
 424                  ";; Not considering loop, cannot duplicate\n");
 425       return;
 426     }
 427
 428   /* npeel = number of iterations to peel.  */
 429   npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS) / loop->ninsns;
 430   if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
 431     npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
 432
 433   /* Is the loop small enough?  */
 434   if (!npeel)
 435     {
 436       if (dump_file)
 437         fprintf (dump_file, ";; Not considering loop, is too big\n");
 438       return;
 439     }
 440
 441   /* Check for simple loops.  */
 442   desc = get_simple_loop_desc (loop);
 443
 444   /* Check number of iterations.  */
 445   if (!desc->simple_p
 446       || desc->assumptions
 447       || !desc->const_iter)
 448     {
 449       if (dump_file)
 450         fprintf (dump_file,
 451                  ";; Unable to prove that the loop iterates constant times\n");
 452       return;
 453     }
 454
 455   if (desc->niter > npeel - 1)
 456     {
 457       if (dump_file)
 458         {
 459           fprintf (dump_file,
 460                    ";; Not peeling loop completely, rolls too much (");
 461           fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter);
 462           fprintf (dump_file, " iterations > %d [maximum peelings])\n", npeel);
 463         }
 464       return;
 465     }
 466
 467   /* Success.  */
 468   if (dump_file)
 469     fprintf (dump_file, ";; Decided to peel loop completely\n");
 470   loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
 471 }
 472
 473 /* Peel all iterations of LOOP, remove exit edges and cancel the loop
 474    completely.  The transformation done:
 475
 476    for (i = 0; i < 4; i++)
 477      body;
 478
 479    ==>
 480
 481    i = 0;
 482    body; i++;
 483    body; i++;
 484    body; i++;
 485    body; i++;
 486    */
 487 static void
 488 peel_loop_completely (struct loops *loops, struct loop *loop)
 489 {
 490   sbitmap wont_exit;
 491   unsigned HOST_WIDE_INT npeel;
 492   unsigned n_remove_edges, i;
 493   edge *remove_edges, ein;
 494   struct niter_desc *desc = get_simple_loop_desc (loop);
 495   struct opt_info *opt_info = NULL;
 496
 497   npeel = desc->niter;
 498
 499   if (npeel)
 500     {
 501       bool ok;
 502
 503       wont_exit = sbitmap_alloc (npeel + 1);
 504       sbitmap_ones (wont_exit);
 505       RESET_BIT (wont_exit, 0);
 506       if (desc->noloop_assumptions)
 507         RESET_BIT (wont_exit, 1);
 508
 509       remove_edges = xcalloc (npeel, sizeof (edge));
 510       n_remove_edges = 0;
 511
 512       if (flag_split_ivs_in_unroller)
 513         opt_info = analyze_insns_in_loop (loop);
 514
 515       opt_info_start_duplication (opt_info);
 516       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 517                                           loops, npeel,
 518                                           wont_exit, desc->out_edge,
 519                                           remove_edges, &n_remove_edges,
 520                                           DLTHE_FLAG_UPDATE_FREQ);
 521       gcc_assert (ok);
 522
 523       free (wont_exit);
 524
 525       if (opt_info)
 526         {
 527           apply_opt_in_copies (opt_info, npeel, false, true);
 528           free_opt_info (opt_info);
 529         }
 530
 531       /* Remove the exit edges.  */
 532       for (i = 0; i < n_remove_edges; i++)
 533         remove_path (loops, remove_edges[i]);
 534       free (remove_edges);
 535     }
 536
 537   ein = desc->in_edge;
 538   free_simple_loop_desc (loop);
 539
 540   /* Now remove the unreachable part of the last iteration and cancel
 541      the loop.  */
 542   remove_path (loops, ein);
 543
 544   if (dump_file)
 545     fprintf (dump_file, ";; Peeled loop completely, %d times\n", (int) npeel);
 546 }
 547
 548 /* Decide whether to unroll LOOP iterating constant number of times
 549    and how much.  */
 550
 551 static void
 552 decide_unroll_constant_iterations (struct loop *loop, int flags)
 553 {
 554   unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
 555   struct niter_desc *desc;
 556
 557   if (!(flags & UAP_UNROLL))
 558     {
 559       /* We were not asked to, just return back silently.  */
 560       return;
 561     }
 562
 563   if (dump_file)
 564     fprintf (dump_file,
 565              "\n;; Considering unrolling loop with constant "
 566              "number of iterations\n");
 567
 568   /* nunroll = total number of copies of the original loop body in
 569      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 570   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
 571   nunroll_by_av
 572     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
 573   if (nunroll > nunroll_by_av)
 574     nunroll = nunroll_by_av;
 575   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
 576     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 577
 578   /* Skip big loops.  */
 579   if (nunroll <= 1)
 580     {
 581       if (dump_file)
 582         fprintf (dump_file, ";; Not considering loop, is too big\n");
 583       return;
 584     }
 585
 586   /* Check for simple loops.  */
 587   desc = get_simple_loop_desc (loop);
 588
 589   /* Check number of iterations.  */
 590   if (!desc->simple_p || !desc->const_iter || desc->assumptions)
 591     {
 592       if (dump_file)
 593         fprintf (dump_file,
 594                  ";; Unable to prove that the loop iterates constant times\n");
 595       return;
 596     }
 597
 598   /* Check whether the loop rolls enough to consider.  */
 599   if (desc->niter < 2 * nunroll)
 600     {
 601       if (dump_file)
 602         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 603       return;
 604     }
 605
 606   /* Success; now compute number of iterations to unroll.  We alter
 607      nunroll so that as few as possible copies of loop body are
 608      necessary, while still not decreasing the number of unrollings
 609      too much (at most by 1).  */
 610   best_copies = 2 * nunroll + 10;
 611
 612   i = 2 * nunroll + 2;
 613   if (i - 1 >= desc->niter)
 614     i = desc->niter - 2;
 615
 616   for (; i >= nunroll - 1; i--)
 617     {
 618       unsigned exit_mod = desc->niter % (i + 1);
 619
 620       if (!loop_exit_at_end_p (loop))
 621         n_copies = exit_mod + i + 1;
 622       else if (exit_mod != (unsigned) i
 623                || desc->noloop_assumptions != NULL_RTX)
 624         n_copies = exit_mod + i + 2;
 625       else
 626         n_copies = i + 1;
 627
 628       if (n_copies < best_copies)
 629         {
 630           best_copies = n_copies;
 631           best_unroll = i;
 632         }
 633     }
 634
 635   if (dump_file)
 636     fprintf (dump_file, ";; max_unroll %d (%d copies, initial %d).\n",
 637              best_unroll + 1, best_copies, nunroll);
 638
 639   loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
 640   loop->lpt_decision.times = best_unroll;
 641
 642   if (dump_file)
 643     fprintf (dump_file,
 644              ";; Decided to unroll the constant times rolling loop, %d times.\n",
 645              loop->lpt_decision.times);
 646 }
 647
 648 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1
 649    times.  The transformation does this:
 650
 651    for (i = 0; i < 102; i++)
 652      body;
 653
 654    ==>
 655
 656    i = 0;
 657    body; i++;
 658    body; i++;
 659    while (i < 102)
 660      {
 661        body; i++;
 662        body; i++;
 663        body; i++;
 664        body; i++;
 665      }
 666   */
 667 static void
 668 unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
 669 {
 670   unsigned HOST_WIDE_INT niter;
 671   unsigned exit_mod;
 672   sbitmap wont_exit;
 673   unsigned n_remove_edges, i;
 674   edge *remove_edges;
 675   unsigned max_unroll = loop->lpt_decision.times;
 676   struct niter_desc *desc = get_simple_loop_desc (loop);
 677   bool exit_at_end = loop_exit_at_end_p (loop);
 678   struct opt_info *opt_info = NULL;
 679   bool ok;
 680
 681   niter = desc->niter;
 682
 683   /* Should not get here (such loop should be peeled instead).  */
 684   gcc_assert (niter > max_unroll + 1);
 685
 686   exit_mod = niter % (max_unroll + 1);
 687
 688   wont_exit = sbitmap_alloc (max_unroll + 1);
 689   sbitmap_ones (wont_exit);
 690
 691   remove_edges = xcalloc (max_unroll + exit_mod + 1, sizeof (edge));
 692   n_remove_edges = 0;
 693   if (flag_split_ivs_in_unroller
 694       || flag_variable_expansion_in_unroller)
 695     opt_info = analyze_insns_in_loop (loop);
 696
 697   if (!exit_at_end)
 698     {
 699       /* The exit is not at the end of the loop; leave exit test
 700          in the first copy, so that the loops that start with test
 701          of exit condition have continuous body after unrolling.  */
 702
 703       if (dump_file)
 704         fprintf (dump_file, ";; Condition on beginning of loop.\n");
 705
 706       /* Peel exit_mod iterations.  */
 707       RESET_BIT (wont_exit, 0);
 708       if (desc->noloop_assumptions)
 709         RESET_BIT (wont_exit, 1);
 710
 711       if (exit_mod)
 712         {
 713           opt_info_start_duplication (opt_info);
 714           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 715                                               loops, exit_mod,
 716                                               wont_exit, desc->out_edge,
 717                                               remove_edges, &n_remove_edges,
 718                                               DLTHE_FLAG_UPDATE_FREQ);
 719           gcc_assert (ok);
 720
 721           if (opt_info && exit_mod > 1)
 722             apply_opt_in_copies (opt_info, exit_mod, false, false);
 723
 724           desc->noloop_assumptions = NULL_RTX;
 725           desc->niter -= exit_mod;
 726           desc->niter_max -= exit_mod;
 727         }
 728
 729       SET_BIT (wont_exit, 1);
 730     }
 731   else
 732     {
 733       /* Leave exit test in last copy, for the same reason as above if
 734          the loop tests the condition at the end of loop body.  */
 735
 736       if (dump_file)
 737         fprintf (dump_file, ";; Condition on end of loop.\n");
 738
 739       /* We know that niter >= max_unroll + 2; so we do not need to care of
 740          case when we would exit before reaching the loop.  So just peel
 741          exit_mod + 1 iterations.  */
 742       if (exit_mod != max_unroll
 743           || desc->noloop_assumptions)
 744         {
 745           RESET_BIT (wont_exit, 0);
 746           if (desc->noloop_assumptions)
 747             RESET_BIT (wont_exit, 1);
 748
 749           opt_info_start_duplication (opt_info);
 750           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 751                                               loops, exit_mod + 1,
 752                                               wont_exit, desc->out_edge,
 753                                               remove_edges, &n_remove_edges,
 754                                               DLTHE_FLAG_UPDATE_FREQ);
 755           gcc_assert (ok);
 756
 757           if (opt_info && exit_mod > 0)
 758             apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
 759
 760           desc->niter -= exit_mod + 1;
 761           desc->niter_max -= exit_mod + 1;
 762           desc->noloop_assumptions = NULL_RTX;
 763
 764           SET_BIT (wont_exit, 0);
 765           SET_BIT (wont_exit, 1);
 766         }
 767
 768       RESET_BIT (wont_exit, max_unroll);
 769     }
 770
 771   /* Now unroll the loop.  */
 772
 773   opt_info_start_duplication (opt_info);
 774   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
 775                                       loops, max_unroll,
 776                                       wont_exit, desc->out_edge,
 777                                       remove_edges, &n_remove_edges,
 778                                       DLTHE_FLAG_UPDATE_FREQ);
 779   gcc_assert (ok);
 780
 781   if (opt_info)
 782     {
 783       apply_opt_in_copies (opt_info, max_unroll, true, true);
 784       free_opt_info (opt_info);
 785     }
 786
 787   free (wont_exit);
 788
 789   if (exit_at_end)
 790     {
 791       basic_block exit_block = desc->in_edge->src->rbi->copy;
 792       /* Find a new in and out edge; they are in the last copy we have made.  */
 793
 794       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
 795         {
 796           desc->out_edge = EDGE_SUCC (exit_block, 0);
 797           desc->in_edge = EDGE_SUCC (exit_block, 1);
 798         }
 799       else
 800         {
 801           desc->out_edge = EDGE_SUCC (exit_block, 1);
 802           desc->in_edge = EDGE_SUCC (exit_block, 0);
 803         }
 804     }
 805
 806   desc->niter /= max_unroll + 1;
 807   desc->niter_max /= max_unroll + 1;
 808   desc->niter_expr = GEN_INT (desc->niter);
 809
 810   /* Remove the edges.  */
 811   for (i = 0; i < n_remove_edges; i++)
 812     remove_path (loops, remove_edges[i]);
 813   free (remove_edges);
 814
 815   if (dump_file)
 816     fprintf (dump_file,
 817              ";; Unrolled loop %d times, constant # of iterations %i insns\n",
 818              max_unroll, num_loop_insns (loop));
 819 }
 820
 821 /* Decide whether to unroll LOOP iterating runtime computable number of times
 822    and how much.  */
 823 static void
 824 decide_unroll_runtime_iterations (struct loop *loop, int flags)
 825 {
 826   unsigned nunroll, nunroll_by_av, i;
 827   struct niter_desc *desc;
 828
 829   if (!(flags & UAP_UNROLL))
 830     {
 831       /* We were not asked to, just return back silently.  */
 832       return;
 833     }
 834
 835   if (dump_file)
 836     fprintf (dump_file,
 837              "\n;; Considering unrolling loop with runtime "
 838              "computable number of iterations\n");
 839
 840   /* nunroll = total number of copies of the original loop body in
 841      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 842   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
 843   nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
 844   if (nunroll > nunroll_by_av)
 845     nunroll = nunroll_by_av;
 846   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
 847     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 848
 849   /* Skip big loops.  */
 850   if (nunroll <= 1)
 851     {
 852       if (dump_file)
 853         fprintf (dump_file, ";; Not considering loop, is too big\n");
 854       return;
 855     }
 856
 857   /* Check for simple loops.  */
 858   desc = get_simple_loop_desc (loop);
 859
 860   /* Check simpleness.  */
 861   if (!desc->simple_p || desc->assumptions)
 862     {
 863       if (dump_file)
 864         fprintf (dump_file,
 865                  ";; Unable to prove that the number of iterations "
 866                  "can be counted in runtime\n");
 867       return;
 868     }
 869
 870   if (desc->const_iter)
 871     {
 872       if (dump_file)
 873         fprintf (dump_file, ";; Loop iterates constant times\n");
 874       return;
 875     }
 876
 877   /* If we have profile feedback, check whether the loop rolls.  */
 878   if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll)
 879     {
 880       if (dump_file)
 881         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 882       return;
 883     }
 884
 885   /* Success; now force nunroll to be power of 2, as we are unable to
 886      cope with overflows in computation of number of iterations.  */
 887   for (i = 1; 2 * i <= nunroll; i *= 2)
 888     continue;
 889
 890   loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
 891   loop->lpt_decision.times = i - 1;
 892
 893   if (dump_file)
 894     fprintf (dump_file,
 895              ";; Decided to unroll the runtime computable "
 896              "times rolling loop, %d times.\n",
 897              loop->lpt_decision.times);
 898 }
 899
 900 /* Unroll LOOP for that we are able to count number of iterations in runtime
 901    LOOP->LPT_DECISION.TIMES + 1 times.  The transformation does this (with some
 902    extra care for case n < 0):
 903
 904    for (i = 0; i < n; i++)
 905      body;
 906
 907    ==>
 908
 909    i = 0;
 910    mod = n % 4;
 911
 912    switch (mod)
 913      {
 914        case 3:
 915          body; i++;
 916        case 2:
 917          body; i++;
 918        case 1:
 919          body; i++;
 920        case 0: ;
 921      }
 922
 923    while (i < n)
 924      {
 925        body; i++;
 926        body; i++;
 927        body; i++;
 928        body; i++;
 929      }
 930    */
 931 static void
 932 unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
 933 {
 934   rtx old_niter, niter, init_code, branch_code, tmp;
 935   unsigned i, j, p;
 936   basic_block preheader, *body, *dom_bbs, swtch, ezc_swtch;
 937   unsigned n_dom_bbs;
 938   sbitmap wont_exit;
 939   int may_exit_copy;
 940   unsigned n_peel, n_remove_edges;
 941   edge *remove_edges, e;
 942   bool extra_zero_check, last_may_exit;
 943   unsigned max_unroll = loop->lpt_decision.times;
 944   struct niter_desc *desc = get_simple_loop_desc (loop);
 945   bool exit_at_end = loop_exit_at_end_p (loop);
 946   struct opt_info *opt_info = NULL;
 947   bool ok;
 948
 949   if (flag_split_ivs_in_unroller
 950       || flag_variable_expansion_in_unroller)
 951     opt_info = analyze_insns_in_loop (loop);
 952
 953   /* Remember blocks whose dominators will have to be updated.  */
 954   dom_bbs = xcalloc (n_basic_blocks, sizeof (basic_block));
 955   n_dom_bbs = 0;
 956
 957   body = get_loop_body (loop);
 958   for (i = 0; i < loop->num_nodes; i++)
 959     {
 960       unsigned nldom;
 961       basic_block *ldom;
 962
 963       nldom = get_dominated_by (CDI_DOMINATORS, body[i], &ldom);
 964       for (j = 0; j < nldom; j++)
 965         if (!flow_bb_inside_loop_p (loop, ldom[j]))
 966           dom_bbs[n_dom_bbs++] = ldom[j];
 967
 968       free (ldom);
 969     }
 970   free (body);
 971
 972   if (!exit_at_end)
 973     {
 974       /* Leave exit in first copy (for explanation why see comment in
 975          unroll_loop_constant_iterations).  */
 976       may_exit_copy = 0;
 977       n_peel = max_unroll - 1;
 978       extra_zero_check = true;
 979       last_may_exit = false;
 980     }
 981   else
 982     {
 983       /* Leave exit in last copy (for explanation why see comment in
 984          unroll_loop_constant_iterations).  */
 985       may_exit_copy = max_unroll;
 986       n_peel = max_unroll;
 987       extra_zero_check = false;
 988       last_may_exit = true;
 989     }
 990
 991   /* Get expression for number of iterations.  */
 992   start_sequence ();
 993   old_niter = niter = gen_reg_rtx (desc->mode);
 994   tmp = force_operand (copy_rtx (desc->niter_expr), niter);
 995   if (tmp != niter)
 996     emit_move_insn (niter, tmp);
 997
 998   /* Count modulo by ANDing it with max_unroll; we use the fact that
 999      the number of unrollings is a power of two, and thus this is correct
1000      even if there is overflow in the computation.  */
1001   niter = expand_simple_binop (desc->mode, AND,
1002                                niter,
1003                                GEN_INT (max_unroll),
1004                                NULL_RTX, 0, OPTAB_LIB_WIDEN);
1005
1006   init_code = get_insns ();
1007   end_sequence ();
1008
1009   /* Precondition the loop.  */
1010   loop_split_edge_with (loop_preheader_edge (loop), init_code);
1011
1012   remove_edges = xcalloc (max_unroll + n_peel + 1, sizeof (edge));
1013   n_remove_edges = 0;
1014
1015   wont_exit = sbitmap_alloc (max_unroll + 2);
1016
1017   /* Peel the first copy of loop body (almost always we must leave exit test
1018      here; the only exception is when we have extra zero check and the number
1019      of iterations is reliable.  Also record the place of (possible) extra
1020      zero check.  */
1021   sbitmap_zero (wont_exit);
1022   if (extra_zero_check
1023       && !desc->noloop_assumptions)
1024     SET_BIT (wont_exit, 1);
1025   ezc_swtch = loop_preheader_edge (loop)->src;
1026   ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1027                                       loops, 1,
1028                                       wont_exit, desc->out_edge,
1029                                       remove_edges, &n_remove_edges,
1030                                       DLTHE_FLAG_UPDATE_FREQ);
1031   gcc_assert (ok);
1032
1033   /* Record the place where switch will be built for preconditioning.  */
1034   swtch = loop_split_edge_with (loop_preheader_edge (loop),
1035                                 NULL_RTX);
1036
1037   for (i = 0; i < n_peel; i++)
1038     {
1039       /* Peel the copy.  */
1040       sbitmap_zero (wont_exit);
1041       if (i != n_peel - 1 || !last_may_exit)
1042         SET_BIT (wont_exit, 1);
1043       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1044                                           loops, 1,
1045                                           wont_exit, desc->out_edge,
1046                                           remove_edges, &n_remove_edges,
1047                                           DLTHE_FLAG_UPDATE_FREQ);
1048       gcc_assert (ok);
1049
1050       /* Create item for switch.  */
1051       j = n_peel - i - (extra_zero_check ? 0 : 1);
1052       p = REG_BR_PROB_BASE / (i + 2);
1053
1054       preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1055       branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
1056                                           block_label (preheader), p,
1057                                           NULL_RTX);
1058
1059       swtch = loop_split_edge_with (single_pred_edge (swtch), branch_code);
1060       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1061       single_pred_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1062       e = make_edge (swtch, preheader,
1063                      single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1064       e->probability = p;
1065     }
1066
1067   if (extra_zero_check)
1068     {
1069       /* Add branch for zero iterations.  */
1070       p = REG_BR_PROB_BASE / (max_unroll + 1);
1071       swtch = ezc_swtch;
1072       preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1073       branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
1074                                           block_label (preheader), p,
1075                                           NULL_RTX);
1076
1077       swtch = loop_split_edge_with (single_succ_edge (swtch), branch_code);
1078       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1079       single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1080       e = make_edge (swtch, preheader,
1081                      single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1082       e->probability = p;
1083     }
1084
1085   /* Recount dominators for outer blocks.  */
1086   iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, n_dom_bbs);
1087
1088   /* And unroll loop.  */
1089
1090   sbitmap_ones (wont_exit);
1091   RESET_BIT (wont_exit, may_exit_copy);
1092   opt_info_start_duplication (opt_info);
1093
1094   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1095                                       loops, max_unroll,
1096                                       wont_exit, desc->out_edge,
1097                                       remove_edges, &n_remove_edges,
1098                                       DLTHE_FLAG_UPDATE_FREQ);
1099   gcc_assert (ok);
1100
1101   if (opt_info)
1102     {
1103       apply_opt_in_copies (opt_info, max_unroll, true, true);
1104       free_opt_info (opt_info);
1105     }
1106
1107   free (wont_exit);
1108
1109   if (exit_at_end)
1110     {
1111       basic_block exit_block = desc->in_edge->src->rbi->copy;
1112       /* Find a new in and out edge; they are in the last copy we have
1113          made.  */
1114
1115       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
1116         {
1117           desc->out_edge = EDGE_SUCC (exit_block, 0);
1118           desc->in_edge = EDGE_SUCC (exit_block, 1);
1119         }
1120       else
1121         {
1122           desc->out_edge = EDGE_SUCC (exit_block, 1);
1123           desc->in_edge = EDGE_SUCC (exit_block, 0);
1124         }
1125     }
1126
1127   /* Remove the edges.  */
1128   for (i = 0; i < n_remove_edges; i++)
1129     remove_path (loops, remove_edges[i]);
1130   free (remove_edges);
1131
1132   /* We must be careful when updating the number of iterations due to
1133      preconditioning and the fact that the value must be valid at entry
1134      of the loop.  After passing through the above code, we see that
1135      the correct new number of iterations is this:  */
1136   gcc_assert (!desc->const_iter);
1137   desc->niter_expr =
1138     simplify_gen_binary (UDIV, desc->mode, old_niter,
1139                          GEN_INT (max_unroll + 1));
1140   desc->niter_max /= max_unroll + 1;
1141   if (exit_at_end)
1142     {
1143       desc->niter_expr =
1144         simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1145       desc->noloop_assumptions = NULL_RTX;
1146       desc->niter_max--;
1147     }
1148
1149   if (dump_file)
1150     fprintf (dump_file,
1151              ";; Unrolled loop %d times, counting # of iterations "
1152              "in runtime, %i insns\n",
1153              max_unroll, num_loop_insns (loop));
1154 }
1155
1156 /* Decide whether to simply peel LOOP and how much.  */
1157 static void
1158 decide_peel_simple (struct loop *loop, int flags)
1159 {
1160   unsigned npeel;
1161   struct niter_desc *desc;
1162
1163   if (!(flags & UAP_PEEL))
1164     {
1165       /* We were not asked to, just return back silently.  */
1166       return;
1167     }
1168
1169   if (dump_file)
1170     fprintf (dump_file, "\n;; Considering simply peeling loop\n");
1171
1172   /* npeel = number of iterations to peel.  */
1173   npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns;
1174   if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_PEEL_TIMES))
1175     npeel = PARAM_VALUE (PARAM_MAX_PEEL_TIMES);
1176
1177   /* Skip big loops.  */
1178   if (!npeel)
1179     {
1180       if (dump_file)
1181         fprintf (dump_file, ";; Not considering loop, is too big\n");
1182       return;
1183     }
1184
1185   /* Check for simple loops.  */
1186   desc = get_simple_loop_desc (loop);
1187
1188   /* Check number of iterations.  */
1189   if (desc->simple_p && !desc->assumptions && desc->const_iter)
1190     {
1191       if (dump_file)
1192         fprintf (dump_file, ";; Loop iterates constant times\n");
1193       return;
1194     }
1195
1196   /* Do not simply peel loops with branches inside -- it increases number
1197      of mispredicts.  */
1198   if (num_loop_branches (loop) > 1)
1199     {
1200       if (dump_file)
1201         fprintf (dump_file, ";; Not peeling, contains branches\n");
1202       return;
1203     }
1204
1205   if (loop->header->count)
1206     {
1207       unsigned niter = expected_loop_iterations (loop);
1208       if (niter + 1 > npeel)
1209         {
1210           if (dump_file)
1211             {
1212               fprintf (dump_file, ";; Not peeling loop, rolls too much (");
1213               fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1214                        (HOST_WIDEST_INT) (niter + 1));
1215               fprintf (dump_file, " iterations > %d [maximum peelings])\n",
1216                        npeel);
1217             }
1218           return;
1219         }
1220       npeel = niter + 1;
1221     }
1222   else
1223     {
1224       /* For now we have no good heuristics to decide whether loop peeling
1225          will be effective, so disable it.  */
1226       if (dump_file)
1227         fprintf (dump_file,
1228                  ";; Not peeling loop, no evidence it will be profitable\n");
1229       return;
1230     }
1231
1232   /* Success.  */
1233   loop->lpt_decision.decision = LPT_PEEL_SIMPLE;
1234   loop->lpt_decision.times = npeel;
1235
1236   if (dump_file)
1237     fprintf (dump_file, ";; Decided to simply peel the loop, %d times.\n",
1238              loop->lpt_decision.times);
1239 }
1240
1241 /* Peel a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
1242    while (cond)
1243      body;
1244
1245    ==>
1246
1247    if (!cond) goto end;
1248    body;
1249    if (!cond) goto end;
1250    body;
1251    while (cond)
1252      body;
1253    end: ;
1254    */
1255 static void
1256 peel_loop_simple (struct loops *loops, struct loop *loop)
1257 {
1258   sbitmap wont_exit;
1259   unsigned npeel = loop->lpt_decision.times;
1260   struct niter_desc *desc = get_simple_loop_desc (loop);
1261   struct opt_info *opt_info = NULL;
1262   bool ok;
1263
1264   if (flag_split_ivs_in_unroller && npeel > 1)
1265     opt_info = analyze_insns_in_loop (loop);
1266
1267   wont_exit = sbitmap_alloc (npeel + 1);
1268   sbitmap_zero (wont_exit);
1269
1270   opt_info_start_duplication (opt_info);
1271
1272   ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1273                                       loops, npeel, wont_exit,
1274                                       NULL, NULL,
1275                                       NULL, DLTHE_FLAG_UPDATE_FREQ);
1276   gcc_assert (ok);
1277
1278   free (wont_exit);
1279
1280   if (opt_info)
1281     {
1282       apply_opt_in_copies (opt_info, npeel, false, false);
1283       free_opt_info (opt_info);
1284     }
1285
1286   if (desc->simple_p)
1287     {
1288       if (desc->const_iter)
1289         {
1290           desc->niter -= npeel;
1291           desc->niter_expr = GEN_INT (desc->niter);
1292           desc->noloop_assumptions = NULL_RTX;
1293         }
1294       else
1295         {
1296           /* We cannot just update niter_expr, as its value might be clobbered
1297              inside loop.  We could handle this by counting the number into
1298              temporary just like we do in runtime unrolling, but it does not
1299              seem worthwhile.  */
1300           free_simple_loop_desc (loop);
1301         }
1302     }
1303   if (dump_file)
1304     fprintf (dump_file, ";; Peeling loop %d times\n", npeel);
1305 }
1306
1307 /* Decide whether to unroll LOOP stupidly and how much.  */
1308 static void
1309 decide_unroll_stupid (struct loop *loop, int flags)
1310 {
1311   unsigned nunroll, nunroll_by_av, i;
1312   struct niter_desc *desc;
1313
1314   if (!(flags & UAP_UNROLL_ALL))
1315     {
1316       /* We were not asked to, just return back silently.  */
1317       return;
1318     }
1319
1320   if (dump_file)
1321     fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n");
1322
1323   /* nunroll = total number of copies of the original loop body in
1324      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
1325   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
1326   nunroll_by_av
1327     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
1328   if (nunroll > nunroll_by_av)
1329     nunroll = nunroll_by_av;
1330   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
1331     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1332
1333   /* Skip big loops.  */
1334   if (nunroll <= 1)
1335     {
1336       if (dump_file)
1337         fprintf (dump_file, ";; Not considering loop, is too big\n");
1338       return;
1339     }
1340
1341   /* Check for simple loops.  */
1342   desc = get_simple_loop_desc (loop);
1343
1344   /* Check simpleness.  */
1345   if (desc->simple_p && !desc->assumptions)
1346     {
1347       if (dump_file)
1348         fprintf (dump_file, ";; The loop is simple\n");
1349       return;
1350     }
1351
1352   /* Do not unroll loops with branches inside -- it increases number
1353      of mispredicts.  */
1354   if (num_loop_branches (loop) > 1)
1355     {
1356       if (dump_file)
1357         fprintf (dump_file, ";; Not unrolling, contains branches\n");
1358       return;
1359     }
1360
1361   /* If we have profile feedback, check whether the loop rolls.  */
1362   if (loop->header->count
1363       && expected_loop_iterations (loop) < 2 * nunroll)
1364     {
1365       if (dump_file)
1366         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
1367       return;
1368     }
1369
1370   /* Success.  Now force nunroll to be power of 2, as it seems that this
1371      improves results (partially because of better alignments, partially
1372      because of some dark magic).  */
1373   for (i = 1; 2 * i <= nunroll; i *= 2)
1374     continue;
1375
1376   loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1377   loop->lpt_decision.times = i - 1;
1378
1379   if (dump_file)
1380     fprintf (dump_file,
1381              ";; Decided to unroll the loop stupidly, %d times.\n",
1382              loop->lpt_decision.times);
1383 }
1384
1385 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
1386    while (cond)
1387      body;
1388
1389    ==>
1390
1391    while (cond)
1392      {
1393        body;
1394        if (!cond) break;
1395        body;
1396        if (!cond) break;
1397        body;
1398        if (!cond) break;
1399        body;
1400      }
1401    */
1402 static void
1403 unroll_loop_stupid (struct loops *loops, struct loop *loop)
1404 {
1405   sbitmap wont_exit;
1406   unsigned nunroll = loop->lpt_decision.times;
1407   struct niter_desc *desc = get_simple_loop_desc (loop);
1408   struct opt_info *opt_info = NULL;
1409   bool ok;
1410
1411   if (flag_split_ivs_in_unroller
1412       || flag_variable_expansion_in_unroller)
1413     opt_info = analyze_insns_in_loop (loop);
1414
1415
1416   wont_exit = sbitmap_alloc (nunroll + 1);
1417   sbitmap_zero (wont_exit);
1418   opt_info_start_duplication (opt_info);
1419
1420   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1421                                       loops, nunroll, wont_exit,
1422                                       NULL, NULL, NULL,
1423                                       DLTHE_FLAG_UPDATE_FREQ);
1424   gcc_assert (ok);
1425
1426   if (opt_info)
1427     {
1428       apply_opt_in_copies (opt_info, nunroll, true, true);
1429       free_opt_info (opt_info);
1430     }
1431
1432   free (wont_exit);
1433
1434   if (desc->simple_p)
1435     {
1436       /* We indeed may get here provided that there are nontrivial assumptions
1437          for a loop to be really simple.  We could update the counts, but the
1438          problem is that we are unable to decide which exit will be taken
1439          (not really true in case the number of iterations is constant,
1440          but noone will do anything with this information, so we do not
1441          worry about it).  */
1442       desc->simple_p = false;
1443     }
1444
1445   if (dump_file)
1446     fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
1447              nunroll, num_loop_insns (loop));
1448 }
1449
1450 /* A hash function for information about insns to split.  */
1451
1452 static hashval_t
1453 si_info_hash (const void *ivts)
1454 {
1455   return htab_hash_pointer (((struct iv_to_split *) ivts)->insn);
1456 }
1457
1458 /* An equality functions for information about insns to split.  */
1459
1460 static int
1461 si_info_eq (const void *ivts1, const void *ivts2)
1462 {
1463   const struct iv_to_split *i1 = ivts1;
1464   const struct iv_to_split *i2 = ivts2;
1465
1466   return i1->insn == i2->insn;
1467 }
1468
1469 /* Return a hash for VES, which is really a "var_to_expand *".  */
1470
1471 static hashval_t
1472 ve_info_hash (const void *ves)
1473 {
1474   return htab_hash_pointer (((struct var_to_expand *) ves)->insn);
1475 }
1476
1477 /* Return true if IVTS1 and IVTS2 (which are really both of type
1478    "var_to_expand *") refer to the same instruction.  */
1479
1480 static int
1481 ve_info_eq (const void *ivts1, const void *ivts2)
1482 {
1483   const struct var_to_expand *i1 = ivts1;
1484   const struct var_to_expand *i2 = ivts2;
1485
1486   return i1->insn == i2->insn;
1487 }
1488
1489 /* Returns true if REG is referenced in one insn in LOOP.  */
1490
1491 bool
1492 referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg)
1493 {
1494   basic_block *body, bb;
1495   unsigned i;
1496   int count_ref = 0;
1497   rtx insn;
1498
1499   body = get_loop_body (loop);
1500   for (i = 0; i < loop->num_nodes; i++)
1501     {
1502       bb = body[i];
1503
1504       FOR_BB_INSNS (bb, insn)
1505       {
1506         if (rtx_referenced_p (reg, insn))
1507           count_ref++;
1508       }
1509     }
1510   return (count_ref  == 1);
1511 }
1512
1513 /* Determine whether INSN contains an accumulator
1514    which can be expanded into separate copies,
1515    one for each copy of the LOOP body.
1516
1517    for (i = 0 ; i < n; i++)
1518      sum += a[i];
1519
1520    ==>
1521
1522    sum += a[i]
1523    ....
1524    i = i+1;
1525    sum1 += a[i]
1526    ....
1527    i = i+1
1528    sum2 += a[i];
1529    ....
1530
1531    Return NULL if INSN contains no opportunity for expansion of accumulator.
1532    Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
1533    information and return a pointer to it.
1534 */
1535
1536 static struct var_to_expand *
1537 analyze_insn_to_expand_var (struct loop *loop, rtx insn)
1538 {
1539   rtx set, dest, src, op1;
1540   struct var_to_expand *ves;
1541   enum machine_mode mode1, mode2;
1542
1543   set = single_set (insn);
1544   if (!set)
1545     return NULL;
1546
1547   dest = SET_DEST (set);
1548   src = SET_SRC (set);
1549
1550   if (GET_CODE (src) != PLUS
1551       && GET_CODE (src) != MINUS
1552       && GET_CODE (src) != MULT)
1553     return NULL;
1554
1555   if (!XEXP (src, 0))
1556     return NULL;
1557
1558   op1 = XEXP (src, 0);
1559
1560   if (!REG_P (dest)
1561       && !(GET_CODE (dest) == SUBREG
1562            && REG_P (SUBREG_REG (dest))))
1563     return NULL;
1564
1565   if (!rtx_equal_p (dest, op1))
1566     return NULL;
1567
1568   if (!referenced_in_one_insn_in_loop_p (loop, dest))
1569     return NULL;
1570
1571   if (rtx_referenced_p (dest, XEXP (src, 1)))
1572     return NULL;
1573
1574   mode1 = GET_MODE (dest);
1575   mode2 = GET_MODE (XEXP (src, 1));
1576   if ((FLOAT_MODE_P (mode1)
1577        || FLOAT_MODE_P (mode2))
1578       && !flag_unsafe_math_optimizations)
1579     return NULL;
1580
1581   /* Record the accumulator to expand.  */
1582   ves = xmalloc (sizeof (struct var_to_expand));
1583   ves->insn = insn;
1584   ves->var_expansions = VEC_alloc (rtx, heap, 1);
1585   ves->reg = copy_rtx (dest);
1586   ves->op = GET_CODE (src);
1587   ves->expansion_count = 0;
1588   ves->reuse_expansion = 0;
1589   return ves;
1590 }
1591
1592 /* Determine whether there is an induction variable in INSN that
1593    we would like to split during unrolling.
1594
1595    I.e. replace
1596
1597    i = i + 1;
1598    ...
1599    i = i + 1;
1600    ...
1601    i = i + 1;
1602    ...
1603
1604    type chains by
1605
1606    i0 = i + 1
1607    ...
1608    i = i0 + 1
1609    ...
1610    i = i0 + 2
1611    ...
1612
1613    Return NULL if INSN contains no interesting IVs.  Otherwise, allocate
1614    an IV_TO_SPLIT structure, fill it with the relevant information and return a
1615    pointer to it.  */
1616
1617 static struct iv_to_split *
1618 analyze_iv_to_split_insn (rtx insn)
1619 {
1620   rtx set, dest;
1621   struct rtx_iv iv;
1622   struct iv_to_split *ivts;
1623   bool ok;
1624
1625   /* For now we just split the basic induction variables.  Later this may be
1626      extended for example by selecting also addresses of memory references.  */
1627   set = single_set (insn);
1628   if (!set)
1629     return NULL;
1630
1631   dest = SET_DEST (set);
1632   if (!REG_P (dest))
1633     return NULL;
1634
1635   if (!biv_p (insn, dest))
1636     return NULL;
1637
1638   ok = iv_analyze (insn, dest, &iv);
1639   gcc_assert (ok);
1640
1641   if (iv.step == const0_rtx
1642       || iv.mode != iv.extend_mode)
1643     return NULL;
1644
1645   /* Record the insn to split.  */
1646   ivts = xmalloc (sizeof (struct iv_to_split));
1647   ivts->insn = insn;
1648   ivts->base_var = NULL_RTX;
1649   ivts->step = iv.step;
1650   ivts->n_loc = 1;
1651   ivts->loc[0] = 1;
1652
1653   return ivts;
1654 }
1655
1656 /* Determines which of insns in LOOP can be optimized.
1657    Return a OPT_INFO struct with the relevant hash tables filled
1658    with all insns to be optimized.  The FIRST_NEW_BLOCK field
1659    is undefined for the return value.  */
1660
1661 static struct opt_info *
1662 analyze_insns_in_loop (struct loop *loop)
1663 {
1664   basic_block *body, bb;
1665   unsigned i, num_edges = 0;
1666   struct opt_info *opt_info = xcalloc (1, sizeof (struct opt_info));
1667   rtx insn;
1668   struct iv_to_split *ivts = NULL;
1669   struct var_to_expand *ves = NULL;
1670   PTR *slot1;
1671   PTR *slot2;
1672   edge *edges = get_loop_exit_edges (loop, &num_edges);
1673   bool can_apply = false;
1674
1675   iv_analysis_loop_init (loop);
1676
1677   body = get_loop_body (loop);
1678
1679   if (flag_split_ivs_in_unroller)
1680     opt_info->insns_to_split = htab_create (5 * loop->num_nodes,
1681                                             si_info_hash, si_info_eq, free);
1682
1683   /* Record the loop exit bb and loop preheader before the unrolling.  */
1684   if (!loop_preheader_edge (loop)->src)
1685     {
1686       loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1687       opt_info->loop_preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1688     }
1689   else
1690     opt_info->loop_preheader = loop_preheader_edge (loop)->src;
1691
1692   if (num_edges == 1
1693       && !(edges[0]->flags & EDGE_COMPLEX))
1694     {
1695       opt_info->loop_exit = loop_split_edge_with (edges[0], NULL_RTX);
1696       can_apply = true;
1697     }
1698
1699   if (flag_variable_expansion_in_unroller
1700       && can_apply)
1701     opt_info->insns_with_var_to_expand = htab_create (5 * loop->num_nodes,
1702                                                       ve_info_hash, ve_info_eq, free);
1703
1704   for (i = 0; i < loop->num_nodes; i++)
1705     {
1706       bb = body[i];
1707       if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1708         continue;
1709
1710       FOR_BB_INSNS (bb, insn)
1711       {
1712         if (!INSN_P (insn))
1713           continue;
1714
1715         if (opt_info->insns_to_split)
1716           ivts = analyze_iv_to_split_insn (insn);
1717
1718         if (ivts)
1719           {
1720             slot1 = htab_find_slot (opt_info->insns_to_split, ivts, INSERT);
1721             *slot1 = ivts;
1722             continue;
1723           }
1724
1725         if (opt_info->insns_with_var_to_expand)
1726           ves = analyze_insn_to_expand_var (loop, insn);
1727
1728         if (ves)
1729           {
1730             slot2 = htab_find_slot (opt_info->insns_with_var_to_expand, ves, INSERT);
1731             *slot2 = ves;
1732           }
1733       }
1734     }
1735
1736   free (edges);
1737   free (body);
1738   return opt_info;
1739 }
1740
1741 /* Called just before loop duplication.  Records start of duplicated area
1742    to OPT_INFO.  */
1743
1744 static void
1745 opt_info_start_duplication (struct opt_info *opt_info)
1746 {
1747   if (opt_info)
1748     opt_info->first_new_block = last_basic_block;
1749 }
1750
1751 /* Determine the number of iterations between initialization of the base
1752    variable and the current copy (N_COPY).  N_COPIES is the total number
1753    of newly created copies.  UNROLLING is true if we are unrolling
1754    (not peeling) the loop.  */
1755
1756 static unsigned
1757 determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
1758 {
1759   if (unrolling)
1760     {
1761       /* If we are unrolling, initialization is done in the original loop
1762          body (number 0).  */
1763       return n_copy;
1764     }
1765   else
1766     {
1767       /* If we are peeling, the copy in that the initialization occurs has
1768          number 1.  The original loop (number 0) is the last.  */
1769       if (n_copy)
1770         return n_copy - 1;
1771       else
1772         return n_copies;
1773     }
1774 }
1775
1776 /* Locate in EXPR the expression corresponding to the location recorded
1777    in IVTS, and return a pointer to the RTX for this location.  */
1778
1779 static rtx *
1780 get_ivts_expr (rtx expr, struct iv_to_split *ivts)
1781 {
1782   unsigned i;
1783   rtx *ret = &expr;
1784
1785   for (i = 0; i < ivts->n_loc; i++)
1786     ret = &XEXP (*ret, ivts->loc[i]);
1787
1788   return ret;
1789 }
1790
1791 /* Allocate basic variable for the induction variable chain.  Callback for
1792    htab_traverse.  */
1793
1794 static int
1795 allocate_basic_variable (void **slot, void *data ATTRIBUTE_UNUSED)
1796 {
1797   struct iv_to_split *ivts = *slot;
1798   rtx expr = *get_ivts_expr (single_set (ivts->insn), ivts);
1799
1800   ivts->base_var = gen_reg_rtx (GET_MODE (expr));
1801
1802   return 1;
1803 }
1804
1805 /* Insert initialization of basic variable of IVTS before INSN, taking
1806    the initial value from INSN.  */
1807
1808 static void
1809 insert_base_initialization (struct iv_to_split *ivts, rtx insn)
1810 {
1811   rtx expr = copy_rtx (*get_ivts_expr (single_set (insn), ivts));
1812   rtx seq;
1813
1814   start_sequence ();
1815   expr = force_operand (expr, ivts->base_var);
1816   if (expr != ivts->base_var)
1817     emit_move_insn (ivts->base_var, expr);
1818   seq = get_insns ();
1819   end_sequence ();
1820
1821   emit_insn_before (seq, insn);
1822 }
1823
1824 /* Replace the use of induction variable described in IVTS in INSN
1825    by base variable + DELTA * step.  */
1826
1827 static void
1828 split_iv (struct iv_to_split *ivts, rtx insn, unsigned delta)
1829 {
1830   rtx expr, *loc, seq, incr, var;
1831   enum machine_mode mode = GET_MODE (ivts->base_var);
1832   rtx src, dest, set;
1833
1834   /* Construct base + DELTA * step.  */
1835   if (!delta)
1836     expr = ivts->base_var;
1837   else
1838     {
1839       incr = simplify_gen_binary (MULT, mode,
1840                                   ivts->step, gen_int_mode (delta, mode));
1841       expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
1842                                   ivts->base_var, incr);
1843     }
1844
1845   /* Figure out where to do the replacement.  */
1846   loc = get_ivts_expr (single_set (insn), ivts);
1847
1848   /* If we can make the replacement right away, we're done.  */
1849   if (validate_change (insn, loc, expr, 0))
1850     return;
1851
1852   /* Otherwise, force EXPR into a register and try again.  */
1853   start_sequence ();
1854   var = gen_reg_rtx (mode);
1855   expr = force_operand (expr, var);
1856   if (expr != var)
1857     emit_move_insn (var, expr);
1858   seq = get_insns ();
1859   end_sequence ();
1860   emit_insn_before (seq, insn);
1861
1862   if (validate_change (insn, loc, var, 0))
1863     return;
1864
1865   /* The last chance.  Try recreating the assignment in insn
1866      completely from scratch.  */
1867   set = single_set (insn);
1868   gcc_assert (set);
1869
1870   start_sequence ();
1871   *loc = var;
1872   src = copy_rtx (SET_SRC (set));
1873   dest = copy_rtx (SET_DEST (set));
1874   src = force_operand (src, dest);
1875   if (src != dest)
1876     emit_move_insn (dest, src);
1877   seq = get_insns ();
1878   end_sequence ();
1879
1880   emit_insn_before (seq, insn);
1881   delete_insn (insn);
1882 }
1883
1884
1885 /* Return one expansion of the accumulator recorded in struct VE.  */
1886
1887 static rtx
1888 get_expansion (struct var_to_expand *ve)
1889 {
1890   rtx reg;
1891
1892   if (ve->reuse_expansion == 0)
1893     reg = ve->reg;
1894   else
1895     reg = VEC_index (rtx, ve->var_expansions, ve->reuse_expansion - 1);
1896
1897   if (VEC_length (rtx, ve->var_expansions) == (unsigned) ve->reuse_expansion)
1898     ve->reuse_expansion = 0;
1899   else
1900     ve->reuse_expansion++;
1901
1902   return reg;
1903 }
1904
1905
1906 /* Given INSN replace the uses of the accumulator recorded in VE
1907    with a new register.  */
1908
1909 static void
1910 expand_var_during_unrolling (struct var_to_expand *ve, rtx insn)
1911 {
1912   rtx new_reg, set;
1913   bool really_new_expansion = false;
1914
1915   set = single_set (insn);
1916   gcc_assert (set);
1917
1918   /* Generate a new register only if the expansion limit has not been
1919      reached.  Else reuse an already existing expansion.  */
1920   if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count)
1921     {
1922       really_new_expansion = true;
1923       new_reg = gen_reg_rtx (GET_MODE (ve->reg));
1924     }
1925   else
1926     new_reg = get_expansion (ve);
1927
1928   validate_change (insn, &SET_DEST (set), new_reg, 1);
1929   validate_change (insn, &XEXP (SET_SRC (set), 0), new_reg, 1);
1930
1931   if (apply_change_group ())
1932     if (really_new_expansion)
1933       {
1934         VEC_safe_push (rtx, heap, ve->var_expansions, new_reg);
1935         ve->expansion_count++;
1936       }
1937 }
1938
1939 /* Initialize the variable expansions in loop preheader.
1940    Callbacks for htab_traverse.  PLACE_P is the loop-preheader
1941    basic block where the initialization of the expansions
1942    should take place.  */
1943
1944 static int
1945 insert_var_expansion_initialization (void **slot, void *place_p)
1946 {
1947   struct var_to_expand *ve = *slot;
1948   basic_block place = (basic_block)place_p;
1949   rtx seq, var, zero_init, insn;
1950   unsigned i;
1951
1952   if (VEC_length (rtx, ve->var_expansions) == 0)
1953     return 1;
1954
1955   start_sequence ();
1956   if (ve->op == PLUS || ve->op == MINUS)
1957     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
1958       {
1959         zero_init =  CONST0_RTX (GET_MODE (var));
1960         emit_move_insn (var, zero_init);
1961       }
1962   else if (ve->op == MULT)
1963     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
1964       {
1965         zero_init =  CONST1_RTX (GET_MODE (var));
1966         emit_move_insn (var, zero_init);
1967       }
1968
1969   seq = get_insns ();
1970   end_sequence ();
1971
1972   insn = BB_HEAD (place);
1973   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
1974     insn = NEXT_INSN (insn);
1975
1976   emit_insn_after (seq, insn);
1977   /* Continue traversing the hash table.  */
1978   return 1;
1979 }
1980
1981 /*  Combine the variable expansions at the loop exit.
1982     Callbacks for htab_traverse.  PLACE_P is the loop exit
1983     basic block where the summation of the expansions should
1984     take place.  */
1985
1986 static int
1987 combine_var_copies_in_loop_exit (void **slot, void *place_p)
1988 {
1989   struct var_to_expand *ve = *slot;
1990   basic_block place = (basic_block)place_p;
1991   rtx sum = ve->reg;
1992   rtx expr, seq, var, insn;
1993   unsigned i;
1994
1995   if (VEC_length (rtx, ve->var_expansions) == 0)
1996     return 1;
1997
1998   start_sequence ();
1999   if (ve->op == PLUS || ve->op == MINUS)
2000     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2001       {
2002         sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg),
2003                                    var, sum);
2004       }
2005   else if (ve->op == MULT)
2006     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2007       {
2008         sum = simplify_gen_binary (MULT, GET_MODE (ve->reg),
2009                                    var, sum);
2010       }
2011
2012   expr = force_operand (sum, ve->reg);
2013   if (expr != ve->reg)
2014     emit_move_insn (ve->reg, expr);
2015   seq = get_insns ();
2016   end_sequence ();
2017
2018   insn = BB_HEAD (place);
2019   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
2020     insn = NEXT_INSN (insn);
2021
2022   emit_insn_after (seq, insn);
2023
2024   /* Continue traversing the hash table.  */
2025   return 1;
2026 }
2027
2028 /* Apply loop optimizations in loop copies using the
2029    data which gathered during the unrolling.  Structure
2030    OPT_INFO record that data.
2031
2032    UNROLLING is true if we unrolled (not peeled) the loop.
2033    REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
2034    the loop (as it should happen in complete unrolling, but not in ordinary
2035    peeling of the loop).  */
2036
2037 static void
2038 apply_opt_in_copies (struct opt_info *opt_info,
2039                      unsigned n_copies, bool unrolling,
2040                      bool rewrite_original_loop)
2041 {
2042   unsigned i, delta;
2043   basic_block bb, orig_bb;
2044   rtx insn, orig_insn, next;
2045   struct iv_to_split ivts_templ, *ivts;
2046   struct var_to_expand ve_templ, *ves;
2047
2048   /* Sanity check -- we need to put initialization in the original loop
2049      body.  */
2050   gcc_assert (!unrolling || rewrite_original_loop);
2051
2052   /* Allocate the basic variables (i0).  */
2053   if (opt_info->insns_to_split)
2054     htab_traverse (opt_info->insns_to_split, allocate_basic_variable, NULL);
2055
2056   for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2057     {
2058       bb = BASIC_BLOCK (i);
2059       orig_bb = bb->rbi->original;
2060
2061       delta = determine_split_iv_delta (bb->rbi->copy_number, n_copies,
2062                                         unrolling);
2063       orig_insn = BB_HEAD (orig_bb);
2064       for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); insn = next)
2065         {
2066           next = NEXT_INSN (insn);
2067           if (!INSN_P (insn))
2068             continue;
2069
2070           while (!INSN_P (orig_insn))
2071             orig_insn = NEXT_INSN (orig_insn);
2072
2073           ivts_templ.insn = orig_insn;
2074           ve_templ.insn = orig_insn;
2075
2076           /* Apply splitting iv optimization.  */
2077           if (opt_info->insns_to_split)
2078             {
2079               ivts = htab_find (opt_info->insns_to_split, &ivts_templ);
2080
2081               if (ivts)
2082                 {
2083 #ifdef ENABLE_CHECKING
2084                   gcc_assert (rtx_equal_p (PATTERN (insn), PATTERN (orig_insn)));
2085 #endif
2086
2087                   if (!delta)
2088                     insert_base_initialization (ivts, insn);
2089                   split_iv (ivts, insn, delta);
2090                 }
2091             }
2092           /* Apply variable expansion optimization.  */
2093           if (unrolling && opt_info->insns_with_var_to_expand)
2094             {
2095               ves = htab_find (opt_info->insns_with_var_to_expand, &ve_templ);
2096               if (ves)
2097                 {
2098 #ifdef ENABLE_CHECKING
2099                   gcc_assert (rtx_equal_p (PATTERN (insn), PATTERN (orig_insn)));
2100 #endif
2101                   expand_var_during_unrolling (ves, insn);
2102                 }
2103             }
2104           orig_insn = NEXT_INSN (orig_insn);
2105         }
2106     }
2107
2108   if (!rewrite_original_loop)
2109     return;
2110
2111   /* Initialize the variable expansions in the loop preheader
2112      and take care of combining them at the loop exit.  */
2113   if (opt_info->insns_with_var_to_expand)
2114     {
2115       htab_traverse (opt_info->insns_with_var_to_expand,
2116                      insert_var_expansion_initialization,
2117                      opt_info->loop_preheader);
2118       htab_traverse (opt_info->insns_with_var_to_expand,
2119                      combine_var_copies_in_loop_exit,
2120                      opt_info->loop_exit);
2121     }
2122
2123   /* Rewrite also the original loop body.  Find them as originals of the blocks
2124      in the last copied iteration, i.e. those that have
2125      bb->rbi->original->copy == bb.  */
2126   for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2127     {
2128       bb = BASIC_BLOCK (i);
2129       orig_bb = bb->rbi->original;
2130       if (orig_bb->rbi->copy != bb)
2131         continue;
2132
2133       delta = determine_split_iv_delta (0, n_copies, unrolling);
2134       for (orig_insn = BB_HEAD (orig_bb);
2135            orig_insn != NEXT_INSN (BB_END (bb));
2136            orig_insn = next)
2137         {
2138           next = NEXT_INSN (orig_insn);
2139
2140           if (!INSN_P (orig_insn))
2141             continue;
2142
2143           ivts_templ.insn = orig_insn;
2144           if (opt_info->insns_to_split)
2145             {
2146               ivts = htab_find (opt_info->insns_to_split, &ivts_templ);
2147               if (ivts)
2148                 {
2149                   if (!delta)
2150                     insert_base_initialization (ivts, orig_insn);
2151                   split_iv (ivts, orig_insn, delta);
2152                   continue;
2153                 }
2154             }
2155
2156         }
2157     }
2158 }
2159
2160 /*  Release the data structures used for the variable expansion
2161     optimization.  Callbacks for htab_traverse.  */
2162
2163 static int
2164 release_var_copies (void **slot, void *data ATTRIBUTE_UNUSED)
2165 {
2166   struct var_to_expand *ve = *slot;
2167
2168   VEC_free (rtx, heap, ve->var_expansions);
2169
2170   /* Continue traversing the hash table.  */
2171   return 1;
2172 }
2173
2174 /* Release OPT_INFO.  */
2175
2176 static void
2177 free_opt_info (struct opt_info *opt_info)
2178 {
2179   if (opt_info->insns_to_split)
2180     htab_delete (opt_info->insns_to_split);
2181   if (opt_info->insns_with_var_to_expand)
2182     {
2183       htab_traverse (opt_info->insns_with_var_to_expand,
2184                      release_var_copies, NULL);
2185       htab_delete (opt_info->insns_with_var_to_expand);
2186     }
2187   free (opt_info);
2188 }