gcc/loop-unroll.c

   1 /* Loop unrolling.
   2    Copyright (C) 2002-2014 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "tm.h"
  24 #include "rtl.h"
  25 #include "tree.h"
  26 #include "hard-reg-set.h"
  27 #include "obstack.h"
  28 #include "profile.h"
  29 #include "basic-block.h"
  30 #include "cfgloop.h"
  31 #include "params.h"
  32 #include "expr.h"
  33 #include "hash-table.h"
  34 #include "recog.h"
  35 #include "target.h"
  36 #include "dumpfile.h"
  37
  38 /* This pass performs loop unrolling.  We only perform this
  39    optimization on innermost loops (with single exception) because
  40    the impact on performance is greatest here, and we want to avoid
  41    unnecessary code size growth.  The gain is caused by greater sequentiality
  42    of code, better code to optimize for further passes and in some cases
  43    by fewer testings of exit conditions.  The main problem is code growth,
  44    that impacts performance negatively due to effect of caches.
  45
  46    What we do:
  47
  48    -- unrolling of loops that roll constant times; this is almost always
  49       win, as we get rid of exit condition tests.
  50    -- unrolling of loops that roll number of times that we can compute
  51       in runtime; we also get rid of exit condition tests here, but there
  52       is the extra expense for calculating the number of iterations
  53    -- simple unrolling of remaining loops; this is performed only if we
  54       are asked to, as the gain is questionable in this case and often
  55       it may even slow down the code
  56    For more detailed descriptions of each of those, see comments at
  57    appropriate function below.
  58
  59    There is a lot of parameters (defined and described in params.def) that
  60    control how much we unroll.
  61
  62    ??? A great problem is that we don't have a good way how to determine
  63    how many times we should unroll the loop; the experiments I have made
  64    showed that this choice may affect performance in order of several %.
  65    */
  66
  67 /* Information about induction variables to split.  */
  68
  69 struct iv_to_split
  70 {
  71   rtx_insn *insn;       /* The insn in that the induction variable occurs.  */
  72   rtx orig_var;         /* The variable (register) for the IV before split.  */
  73   rtx base_var;         /* The variable on that the values in the further
  74                            iterations are based.  */
  75   rtx step;             /* Step of the induction variable.  */
  76   struct iv_to_split *next; /* Next entry in walking order.  */
  77 };
  78
  79 /* Information about accumulators to expand.  */
  80
  81 struct var_to_expand
  82 {
  83   rtx_insn *insn;                  /* The insn in that the variable expansion occurs.  */
  84   rtx reg;                         /* The accumulator which is expanded.  */
  85   vec<rtx> var_expansions;   /* The copies of the accumulator which is expanded.  */
  86   struct var_to_expand *next;      /* Next entry in walking order.  */
  87   enum rtx_code op;                /* The type of the accumulation - addition, subtraction
  88                                       or multiplication.  */
  89   int expansion_count;             /* Count the number of expansions generated so far.  */
  90   int reuse_expansion;             /* The expansion we intend to reuse to expand
  91                                       the accumulator.  If REUSE_EXPANSION is 0 reuse
  92                                       the original accumulator.  Else use
  93                                       var_expansions[REUSE_EXPANSION - 1].  */
  94 };
  95
  96 /* Hashtable helper for iv_to_split.  */
  97
  98 struct iv_split_hasher : typed_free_remove <iv_to_split>
  99 {
 100   typedef iv_to_split value_type;
 101   typedef iv_to_split compare_type;
 102   static inline hashval_t hash (const value_type *);
 103   static inline bool equal (const value_type *, const compare_type *);
 104 };
 105
 106
 107 /* A hash function for information about insns to split.  */
 108
 109 inline hashval_t
 110 iv_split_hasher::hash (const value_type *ivts)
 111 {
 112   return (hashval_t) INSN_UID (ivts->insn);
 113 }
 114
 115 /* An equality functions for information about insns to split.  */
 116
 117 inline bool
 118 iv_split_hasher::equal (const value_type *i1, const compare_type *i2)
 119 {
 120   return i1->insn == i2->insn;
 121 }
 122
 123 /* Hashtable helper for iv_to_split.  */
 124
 125 struct var_expand_hasher : typed_free_remove <var_to_expand>
 126 {
 127   typedef var_to_expand value_type;
 128   typedef var_to_expand compare_type;
 129   static inline hashval_t hash (const value_type *);
 130   static inline bool equal (const value_type *, const compare_type *);
 131 };
 132
 133 /* Return a hash for VES.  */
 134
 135 inline hashval_t
 136 var_expand_hasher::hash (const value_type *ves)
 137 {
 138   return (hashval_t) INSN_UID (ves->insn);
 139 }
 140
 141 /* Return true if I1 and I2 refer to the same instruction.  */
 142
 143 inline bool
 144 var_expand_hasher::equal (const value_type *i1, const compare_type *i2)
 145 {
 146   return i1->insn == i2->insn;
 147 }
 148
 149 /* Information about optimization applied in
 150    the unrolled loop.  */
 151
 152 struct opt_info
 153 {
 154   hash_table<iv_split_hasher> *insns_to_split; /* A hashtable of insns to
 155                                                   split.  */
 156   struct iv_to_split *iv_to_split_head; /* The first iv to split.  */
 157   struct iv_to_split **iv_to_split_tail; /* Pointer to the tail of the list.  */
 158   hash_table<var_expand_hasher> *insns_with_var_to_expand; /* A hashtable of
 159                                         insns with accumulators to expand.  */
 160   struct var_to_expand *var_to_expand_head; /* The first var to expand.  */
 161   struct var_to_expand **var_to_expand_tail; /* Pointer to the tail of the list.  */
 162   unsigned first_new_block;        /* The first basic block that was
 163                                       duplicated.  */
 164   basic_block loop_exit;           /* The loop exit basic block.  */
 165   basic_block loop_preheader;      /* The loop preheader basic block.  */
 166 };
 167
 168 static void decide_unroll_stupid (struct loop *, int);
 169 static void decide_unroll_constant_iterations (struct loop *, int);
 170 static void decide_unroll_runtime_iterations (struct loop *, int);
 171 static void unroll_loop_stupid (struct loop *);
 172 static void decide_unrolling (int);
 173 static void unroll_loop_constant_iterations (struct loop *);
 174 static void unroll_loop_runtime_iterations (struct loop *);
 175 static struct opt_info *analyze_insns_in_loop (struct loop *);
 176 static void opt_info_start_duplication (struct opt_info *);
 177 static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
 178 static void free_opt_info (struct opt_info *);
 179 static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx_insn *);
 180 static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx, int *);
 181 static struct iv_to_split *analyze_iv_to_split_insn (rtx_insn *);
 182 static void expand_var_during_unrolling (struct var_to_expand *, rtx_insn *);
 183 static void insert_var_expansion_initialization (struct var_to_expand *,
 184                                                  basic_block);
 185 static void combine_var_copies_in_loop_exit (struct var_to_expand *,
 186                                              basic_block);
 187 static rtx get_expansion (struct var_to_expand *);
 188
 189 /* Emit a message summarizing the unroll that will be
 190    performed for LOOP, along with the loop's location LOCUS, if
 191    appropriate given the dump or -fopt-info settings.  */
 192
 193 static void
 194 report_unroll (struct loop *loop, location_t locus)
 195 {
 196   int report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_RTL | TDF_DETAILS;
 197
 198   if (loop->lpt_decision.decision == LPT_NONE)
 199     return;
 200
 201   if (!dump_enabled_p ())
 202     return;
 203
 204   dump_printf_loc (report_flags, locus,
 205                    "loop unrolled %d times",
 206                    loop->lpt_decision.times);
 207   if (profile_info)
 208     dump_printf (report_flags,
 209                  " (header execution count %d)",
 210                  (int)loop->header->count);
 211
 212   dump_printf (report_flags, "\n");
 213 }
 214
 215 /* Decide whether unroll loops and how much.  */
 216 static void
 217 decide_unrolling (int flags)
 218 {
 219   struct loop *loop;
 220
 221   /* Scan the loops, inner ones first.  */
 222   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
 223     {
 224       loop->lpt_decision.decision = LPT_NONE;
 225       location_t locus = get_loop_location (loop);
 226
 227       if (dump_enabled_p ())
 228         dump_printf_loc (TDF_RTL, locus,
 229                          ";; *** Considering loop %d at BB %d for "
 230                          "unrolling ***\n",
 231                          loop->num, loop->header->index);
 232
 233       /* Do not peel cold areas.  */
 234       if (optimize_loop_for_size_p (loop))
 235         {
 236           if (dump_file)
 237             fprintf (dump_file, ";; Not considering loop, cold area\n");
 238           continue;
 239         }
 240
 241       /* Can the loop be manipulated?  */
 242       if (!can_duplicate_loop_p (loop))
 243         {
 244           if (dump_file)
 245             fprintf (dump_file,
 246                      ";; Not considering loop, cannot duplicate\n");
 247           continue;
 248         }
 249
 250       /* Skip non-innermost loops.  */
 251       if (loop->inner)
 252         {
 253           if (dump_file)
 254             fprintf (dump_file, ";; Not considering loop, is not innermost\n");
 255           continue;
 256         }
 257
 258       loop->ninsns = num_loop_insns (loop);
 259       loop->av_ninsns = average_num_loop_insns (loop);
 260
 261       /* Try transformations one by one in decreasing order of
 262          priority.  */
 263
 264       decide_unroll_constant_iterations (loop, flags);
 265       if (loop->lpt_decision.decision == LPT_NONE)
 266         decide_unroll_runtime_iterations (loop, flags);
 267       if (loop->lpt_decision.decision == LPT_NONE)
 268         decide_unroll_stupid (loop, flags);
 269
 270       report_unroll (loop, locus);
 271     }
 272 }
 273
 274 /* Unroll LOOPS.  */
 275 void
 276 unroll_loops (int flags)
 277 {
 278   struct loop *loop;
 279   bool changed = false;
 280
 281   /* Now decide rest of unrolling.  */
 282   decide_unrolling (flags);
 283
 284   /* Scan the loops, inner ones first.  */
 285   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
 286     {
 287       /* And perform the appropriate transformations.  */
 288       switch (loop->lpt_decision.decision)
 289         {
 290         case LPT_UNROLL_CONSTANT:
 291           unroll_loop_constant_iterations (loop);
 292           changed = true;
 293           break;
 294         case LPT_UNROLL_RUNTIME:
 295           unroll_loop_runtime_iterations (loop);
 296           changed = true;
 297           break;
 298         case LPT_UNROLL_STUPID:
 299           unroll_loop_stupid (loop);
 300           changed = true;
 301           break;
 302         case LPT_NONE:
 303           break;
 304         default:
 305           gcc_unreachable ();
 306         }
 307     }
 308
 309     if (changed)
 310       {
 311         calculate_dominance_info (CDI_DOMINATORS);
 312         fix_loop_structure (NULL);
 313       }
 314
 315   iv_analysis_done ();
 316 }
 317
 318 /* Check whether exit of the LOOP is at the end of loop body.  */
 319
 320 static bool
 321 loop_exit_at_end_p (struct loop *loop)
 322 {
 323   struct niter_desc *desc = get_simple_loop_desc (loop);
 324   rtx_insn *insn;
 325
 326   /* We should never have conditional in latch block.  */
 327   gcc_assert (desc->in_edge->dest != loop->header);
 328
 329   if (desc->in_edge->dest != loop->latch)
 330     return false;
 331
 332   /* Check that the latch is empty.  */
 333   FOR_BB_INSNS (loop->latch, insn)
 334     {
 335       if (INSN_P (insn) && active_insn_p (insn))
 336         return false;
 337     }
 338
 339   return true;
 340 }
 341
 342 /* Decide whether to unroll LOOP iterating constant number of times
 343    and how much.  */
 344
 345 static void
 346 decide_unroll_constant_iterations (struct loop *loop, int flags)
 347 {
 348   unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
 349   struct niter_desc *desc;
 350   widest_int iterations;
 351
 352   if (!(flags & UAP_UNROLL))
 353     {
 354       /* We were not asked to, just return back silently.  */
 355       return;
 356     }
 357
 358   if (dump_file)
 359     fprintf (dump_file,
 360              "\n;; Considering unrolling loop with constant "
 361              "number of iterations\n");
 362
 363   /* nunroll = total number of copies of the original loop body in
 364      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 365   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
 366   nunroll_by_av
 367     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
 368   if (nunroll > nunroll_by_av)
 369     nunroll = nunroll_by_av;
 370   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
 371     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 372
 373   if (targetm.loop_unroll_adjust)
 374     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
 375
 376   /* Skip big loops.  */
 377   if (nunroll <= 1)
 378     {
 379       if (dump_file)
 380         fprintf (dump_file, ";; Not considering loop, is too big\n");
 381       return;
 382     }
 383
 384   /* Check for simple loops.  */
 385   desc = get_simple_loop_desc (loop);
 386
 387   /* Check number of iterations.  */
 388   if (!desc->simple_p || !desc->const_iter || desc->assumptions)
 389     {
 390       if (dump_file)
 391         fprintf (dump_file,
 392                  ";; Unable to prove that the loop iterates constant times\n");
 393       return;
 394     }
 395
 396   /* Check whether the loop rolls enough to consider.
 397      Consult also loop bounds and profile; in the case the loop has more
 398      than one exit it may well loop less than determined maximal number
 399      of iterations.  */
 400   if (desc->niter < 2 * nunroll
 401       || ((get_estimated_loop_iterations (loop, &iterations)
 402            || get_max_loop_iterations (loop, &iterations))
 403           && wi::ltu_p (iterations, 2 * nunroll)))
 404     {
 405       if (dump_file)
 406         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 407       return;
 408     }
 409
 410   /* Success; now compute number of iterations to unroll.  We alter
 411      nunroll so that as few as possible copies of loop body are
 412      necessary, while still not decreasing the number of unrollings
 413      too much (at most by 1).  */
 414   best_copies = 2 * nunroll + 10;
 415
 416   i = 2 * nunroll + 2;
 417   if (i - 1 >= desc->niter)
 418     i = desc->niter - 2;
 419
 420   for (; i >= nunroll - 1; i--)
 421     {
 422       unsigned exit_mod = desc->niter % (i + 1);
 423
 424       if (!loop_exit_at_end_p (loop))
 425         n_copies = exit_mod + i + 1;
 426       else if (exit_mod != (unsigned) i
 427                || desc->noloop_assumptions != NULL_RTX)
 428         n_copies = exit_mod + i + 2;
 429       else
 430         n_copies = i + 1;
 431
 432       if (n_copies < best_copies)
 433         {
 434           best_copies = n_copies;
 435           best_unroll = i;
 436         }
 437     }
 438
 439   loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
 440   loop->lpt_decision.times = best_unroll;
 441 }
 442
 443 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES times.
 444    The transformation does this:
 445
 446    for (i = 0; i < 102; i++)
 447      body;
 448
 449    ==>  (LOOP->LPT_DECISION.TIMES == 3)
 450
 451    i = 0;
 452    body; i++;
 453    body; i++;
 454    while (i < 102)
 455      {
 456        body; i++;
 457        body; i++;
 458        body; i++;
 459        body; i++;
 460      }
 461   */
 462 static void
 463 unroll_loop_constant_iterations (struct loop *loop)
 464 {
 465   unsigned HOST_WIDE_INT niter;
 466   unsigned exit_mod;
 467   sbitmap wont_exit;
 468   unsigned i;
 469   edge e;
 470   unsigned max_unroll = loop->lpt_decision.times;
 471   struct niter_desc *desc = get_simple_loop_desc (loop);
 472   bool exit_at_end = loop_exit_at_end_p (loop);
 473   struct opt_info *opt_info = NULL;
 474   bool ok;
 475
 476   niter = desc->niter;
 477
 478   /* Should not get here (such loop should be peeled instead).  */
 479   gcc_assert (niter > max_unroll + 1);
 480
 481   exit_mod = niter % (max_unroll + 1);
 482
 483   wont_exit = sbitmap_alloc (max_unroll + 1);
 484   bitmap_ones (wont_exit);
 485
 486   auto_vec<edge> remove_edges;
 487   if (flag_split_ivs_in_unroller
 488       || flag_variable_expansion_in_unroller)
 489     opt_info = analyze_insns_in_loop (loop);
 490
 491   if (!exit_at_end)
 492     {
 493       /* The exit is not at the end of the loop; leave exit test
 494          in the first copy, so that the loops that start with test
 495          of exit condition have continuous body after unrolling.  */
 496
 497       if (dump_file)
 498         fprintf (dump_file, ";; Condition at beginning of loop.\n");
 499
 500       /* Peel exit_mod iterations.  */
 501       bitmap_clear_bit (wont_exit, 0);
 502       if (desc->noloop_assumptions)
 503         bitmap_clear_bit (wont_exit, 1);
 504
 505       if (exit_mod)
 506         {
 507           opt_info_start_duplication (opt_info);
 508           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 509                                               exit_mod,
 510                                               wont_exit, desc->out_edge,
 511                                               &remove_edges,
 512                                               DLTHE_FLAG_UPDATE_FREQ
 513                                               | (opt_info && exit_mod > 1
 514                                                  ? DLTHE_RECORD_COPY_NUMBER
 515                                                    : 0));
 516           gcc_assert (ok);
 517
 518           if (opt_info && exit_mod > 1)
 519             apply_opt_in_copies (opt_info, exit_mod, false, false);
 520
 521           desc->noloop_assumptions = NULL_RTX;
 522           desc->niter -= exit_mod;
 523           loop->nb_iterations_upper_bound -= exit_mod;
 524           if (loop->any_estimate
 525               && wi::leu_p (exit_mod, loop->nb_iterations_estimate))
 526             loop->nb_iterations_estimate -= exit_mod;
 527           else
 528             loop->any_estimate = false;
 529         }
 530
 531       bitmap_set_bit (wont_exit, 1);
 532     }
 533   else
 534     {
 535       /* Leave exit test in last copy, for the same reason as above if
 536          the loop tests the condition at the end of loop body.  */
 537
 538       if (dump_file)
 539         fprintf (dump_file, ";; Condition at end of loop.\n");
 540
 541       /* We know that niter >= max_unroll + 2; so we do not need to care of
 542          case when we would exit before reaching the loop.  So just peel
 543          exit_mod + 1 iterations.  */
 544       if (exit_mod != max_unroll
 545           || desc->noloop_assumptions)
 546         {
 547           bitmap_clear_bit (wont_exit, 0);
 548           if (desc->noloop_assumptions)
 549             bitmap_clear_bit (wont_exit, 1);
 550
 551           opt_info_start_duplication (opt_info);
 552           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 553                                               exit_mod + 1,
 554                                               wont_exit, desc->out_edge,
 555                                               &remove_edges,
 556                                               DLTHE_FLAG_UPDATE_FREQ
 557                                               | (opt_info && exit_mod > 0
 558                                                  ? DLTHE_RECORD_COPY_NUMBER
 559                                                    : 0));
 560           gcc_assert (ok);
 561
 562           if (opt_info && exit_mod > 0)
 563             apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
 564
 565           desc->niter -= exit_mod + 1;
 566           loop->nb_iterations_upper_bound -= exit_mod + 1;
 567           if (loop->any_estimate
 568               && wi::leu_p (exit_mod + 1, loop->nb_iterations_estimate))
 569             loop->nb_iterations_estimate -= exit_mod + 1;
 570           else
 571             loop->any_estimate = false;
 572           desc->noloop_assumptions = NULL_RTX;
 573
 574           bitmap_set_bit (wont_exit, 0);
 575           bitmap_set_bit (wont_exit, 1);
 576         }
 577
 578       bitmap_clear_bit (wont_exit, max_unroll);
 579     }
 580
 581   /* Now unroll the loop.  */
 582
 583   opt_info_start_duplication (opt_info);
 584   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
 585                                       max_unroll,
 586                                       wont_exit, desc->out_edge,
 587                                       &remove_edges,
 588                                       DLTHE_FLAG_UPDATE_FREQ
 589                                       | (opt_info
 590                                          ? DLTHE_RECORD_COPY_NUMBER
 591                                            : 0));
 592   gcc_assert (ok);
 593
 594   if (opt_info)
 595     {
 596       apply_opt_in_copies (opt_info, max_unroll, true, true);
 597       free_opt_info (opt_info);
 598     }
 599
 600   free (wont_exit);
 601
 602   if (exit_at_end)
 603     {
 604       basic_block exit_block = get_bb_copy (desc->in_edge->src);
 605       /* Find a new in and out edge; they are in the last copy we have made.  */
 606
 607       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
 608         {
 609           desc->out_edge = EDGE_SUCC (exit_block, 0);
 610           desc->in_edge = EDGE_SUCC (exit_block, 1);
 611         }
 612       else
 613         {
 614           desc->out_edge = EDGE_SUCC (exit_block, 1);
 615           desc->in_edge = EDGE_SUCC (exit_block, 0);
 616         }
 617     }
 618
 619   desc->niter /= max_unroll + 1;
 620   loop->nb_iterations_upper_bound
 621     = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1);
 622   if (loop->any_estimate)
 623     loop->nb_iterations_estimate
 624       = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1);
 625   desc->niter_expr = GEN_INT (desc->niter);
 626
 627   /* Remove the edges.  */
 628   FOR_EACH_VEC_ELT (remove_edges, i, e)
 629     remove_path (e);
 630
 631   if (dump_file)
 632     fprintf (dump_file,
 633              ";; Unrolled loop %d times, constant # of iterations %i insns\n",
 634              max_unroll, num_loop_insns (loop));
 635 }
 636
 637 /* Decide whether to unroll LOOP iterating runtime computable number of times
 638    and how much.  */
 639 static void
 640 decide_unroll_runtime_iterations (struct loop *loop, int flags)
 641 {
 642   unsigned nunroll, nunroll_by_av, i;
 643   struct niter_desc *desc;
 644   widest_int iterations;
 645
 646   if (!(flags & UAP_UNROLL))
 647     {
 648       /* We were not asked to, just return back silently.  */
 649       return;
 650     }
 651
 652   if (dump_file)
 653     fprintf (dump_file,
 654              "\n;; Considering unrolling loop with runtime "
 655              "computable number of iterations\n");
 656
 657   /* nunroll = total number of copies of the original loop body in
 658      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 659   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
 660   nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
 661   if (nunroll > nunroll_by_av)
 662     nunroll = nunroll_by_av;
 663   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
 664     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 665
 666   if (targetm.loop_unroll_adjust)
 667     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
 668
 669   /* Skip big loops.  */
 670   if (nunroll <= 1)
 671     {
 672       if (dump_file)
 673         fprintf (dump_file, ";; Not considering loop, is too big\n");
 674       return;
 675     }
 676
 677   /* Check for simple loops.  */
 678   desc = get_simple_loop_desc (loop);
 679
 680   /* Check simpleness.  */
 681   if (!desc->simple_p || desc->assumptions)
 682     {
 683       if (dump_file)
 684         fprintf (dump_file,
 685                  ";; Unable to prove that the number of iterations "
 686                  "can be counted in runtime\n");
 687       return;
 688     }
 689
 690   if (desc->const_iter)
 691     {
 692       if (dump_file)
 693         fprintf (dump_file, ";; Loop iterates constant times\n");
 694       return;
 695     }
 696
 697   /* Check whether the loop rolls.  */
 698   if ((get_estimated_loop_iterations (loop, &iterations)
 699        || get_max_loop_iterations (loop, &iterations))
 700       && wi::ltu_p (iterations, 2 * nunroll))
 701     {
 702       if (dump_file)
 703         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 704       return;
 705     }
 706
 707   /* Success; now force nunroll to be power of 2, as we are unable to
 708      cope with overflows in computation of number of iterations.  */
 709   for (i = 1; 2 * i <= nunroll; i *= 2)
 710     continue;
 711
 712   loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
 713   loop->lpt_decision.times = i - 1;
 714 }
 715
 716 /* Splits edge E and inserts the sequence of instructions INSNS on it, and
 717    returns the newly created block.  If INSNS is NULL_RTX, nothing is changed
 718    and NULL is returned instead.  */
 719
 720 basic_block
 721 split_edge_and_insert (edge e, rtx_insn *insns)
 722 {
 723   basic_block bb;
 724
 725   if (!insns)
 726     return NULL;
 727   bb = split_edge (e);
 728   emit_insn_after (insns, BB_END (bb));
 729
 730   /* ??? We used to assume that INSNS can contain control flow insns, and
 731      that we had to try to find sub basic blocks in BB to maintain a valid
 732      CFG.  For this purpose we used to set the BB_SUPERBLOCK flag on BB
 733      and call break_superblocks when going out of cfglayout mode.  But it
 734      turns out that this never happens; and that if it does ever happen,
 735      the verify_flow_info at the end of the RTL loop passes would fail.
 736
 737      There are two reasons why we expected we could have control flow insns
 738      in INSNS.  The first is when a comparison has to be done in parts, and
 739      the second is when the number of iterations is computed for loops with
 740      the number of iterations known at runtime.  In both cases, test cases
 741      to get control flow in INSNS appear to be impossible to construct:
 742
 743       * If do_compare_rtx_and_jump needs several branches to do comparison
 744         in a mode that needs comparison by parts, we cannot analyze the
 745         number of iterations of the loop, and we never get to unrolling it.
 746
 747       * The code in expand_divmod that was suspected to cause creation of
 748         branching code seems to be only accessed for signed division.  The
 749         divisions used by # of iterations analysis are always unsigned.
 750         Problems might arise on architectures that emits branching code
 751         for some operations that may appear in the unroller (especially
 752         for division), but we have no such architectures.
 753
 754      Considering all this, it was decided that we should for now assume
 755      that INSNS can in theory contain control flow insns, but in practice
 756      it never does.  So we don't handle the theoretical case, and should
 757      a real failure ever show up, we have a pretty good clue for how to
 758      fix it.  */
 759
 760   return bb;
 761 }
 762
 763 /* Prepare a sequence comparing OP0 with OP1 using COMP and jumping to LABEL if
 764    true, with probability PROB.  If CINSN is not NULL, it is the insn to copy
 765    in order to create a jump.  */
 766
 767 static rtx_insn *
 768 compare_and_jump_seq (rtx op0, rtx op1, enum rtx_code comp, rtx label, int prob,
 769                       rtx_insn *cinsn)
 770 {
 771   rtx_insn *seq, *jump;
 772   rtx cond;
 773   enum machine_mode mode;
 774
 775   mode = GET_MODE (op0);
 776   if (mode == VOIDmode)
 777     mode = GET_MODE (op1);
 778
 779   start_sequence ();
 780   if (GET_MODE_CLASS (mode) == MODE_CC)
 781     {
 782       /* A hack -- there seems to be no easy generic way how to make a
 783          conditional jump from a ccmode comparison.  */
 784       gcc_assert (cinsn);
 785       cond = XEXP (SET_SRC (pc_set (cinsn)), 0);
 786       gcc_assert (GET_CODE (cond) == comp);
 787       gcc_assert (rtx_equal_p (op0, XEXP (cond, 0)));
 788       gcc_assert (rtx_equal_p (op1, XEXP (cond, 1)));
 789       emit_jump_insn (copy_insn (PATTERN (cinsn)));
 790       jump = get_last_insn ();
 791       gcc_assert (JUMP_P (jump));
 792       JUMP_LABEL (jump) = JUMP_LABEL (cinsn);
 793       LABEL_NUSES (JUMP_LABEL (jump))++;
 794       redirect_jump (jump, label, 0);
 795     }
 796   else
 797     {
 798       gcc_assert (!cinsn);
 799
 800       op0 = force_operand (op0, NULL_RTX);
 801       op1 = force_operand (op1, NULL_RTX);
 802       do_compare_rtx_and_jump (op0, op1, comp, 0,
 803                                mode, NULL_RTX, NULL_RTX, label, -1);
 804       jump = get_last_insn ();
 805       gcc_assert (JUMP_P (jump));
 806       JUMP_LABEL (jump) = label;
 807       LABEL_NUSES (label)++;
 808     }
 809   add_int_reg_note (jump, REG_BR_PROB, prob);
 810
 811   seq = get_insns ();
 812   end_sequence ();
 813
 814   return seq;
 815 }
 816
 817 /* Unroll LOOP for which we are able to count number of iterations in runtime
 818    LOOP->LPT_DECISION.TIMES times.  The transformation does this (with some
 819    extra care for case n < 0):
 820
 821    for (i = 0; i < n; i++)
 822      body;
 823
 824    ==>  (LOOP->LPT_DECISION.TIMES == 3)
 825
 826    i = 0;
 827    mod = n % 4;
 828
 829    switch (mod)
 830      {
 831        case 3:
 832          body; i++;
 833        case 2:
 834          body; i++;
 835        case 1:
 836          body; i++;
 837        case 0: ;
 838      }
 839
 840    while (i < n)
 841      {
 842        body; i++;
 843        body; i++;
 844        body; i++;
 845        body; i++;
 846      }
 847    */
 848 static void
 849 unroll_loop_runtime_iterations (struct loop *loop)
 850 {
 851   rtx old_niter, niter, tmp;
 852   rtx_insn *init_code, *branch_code;
 853   unsigned i, j, p;
 854   basic_block preheader, *body, swtch, ezc_swtch;
 855   sbitmap wont_exit;
 856   int may_exit_copy;
 857   unsigned n_peel;
 858   edge e;
 859   bool extra_zero_check, last_may_exit;
 860   unsigned max_unroll = loop->lpt_decision.times;
 861   struct niter_desc *desc = get_simple_loop_desc (loop);
 862   bool exit_at_end = loop_exit_at_end_p (loop);
 863   struct opt_info *opt_info = NULL;
 864   bool ok;
 865
 866   if (flag_split_ivs_in_unroller
 867       || flag_variable_expansion_in_unroller)
 868     opt_info = analyze_insns_in_loop (loop);
 869
 870   /* Remember blocks whose dominators will have to be updated.  */
 871   auto_vec<basic_block> dom_bbs;
 872
 873   body = get_loop_body (loop);
 874   for (i = 0; i < loop->num_nodes; i++)
 875     {
 876       vec<basic_block> ldom;
 877       basic_block bb;
 878
 879       ldom = get_dominated_by (CDI_DOMINATORS, body[i]);
 880       FOR_EACH_VEC_ELT (ldom, j, bb)
 881         if (!flow_bb_inside_loop_p (loop, bb))
 882           dom_bbs.safe_push (bb);
 883
 884       ldom.release ();
 885     }
 886   free (body);
 887
 888   if (!exit_at_end)
 889     {
 890       /* Leave exit in first copy (for explanation why see comment in
 891          unroll_loop_constant_iterations).  */
 892       may_exit_copy = 0;
 893       n_peel = max_unroll - 1;
 894       extra_zero_check = true;
 895       last_may_exit = false;
 896     }
 897   else
 898     {
 899       /* Leave exit in last copy (for explanation why see comment in
 900          unroll_loop_constant_iterations).  */
 901       may_exit_copy = max_unroll;
 902       n_peel = max_unroll;
 903       extra_zero_check = false;
 904       last_may_exit = true;
 905     }
 906
 907   /* Get expression for number of iterations.  */
 908   start_sequence ();
 909   old_niter = niter = gen_reg_rtx (desc->mode);
 910   tmp = force_operand (copy_rtx (desc->niter_expr), niter);
 911   if (tmp != niter)
 912     emit_move_insn (niter, tmp);
 913
 914   /* Count modulo by ANDing it with max_unroll; we use the fact that
 915      the number of unrollings is a power of two, and thus this is correct
 916      even if there is overflow in the computation.  */
 917   niter = expand_simple_binop (desc->mode, AND,
 918                                niter, gen_int_mode (max_unroll, desc->mode),
 919                                NULL_RTX, 0, OPTAB_LIB_WIDEN);
 920
 921   init_code = get_insns ();
 922   end_sequence ();
 923   unshare_all_rtl_in_chain (init_code);
 924
 925   /* Precondition the loop.  */
 926   split_edge_and_insert (loop_preheader_edge (loop), init_code);
 927
 928   auto_vec<edge> remove_edges;
 929
 930   wont_exit = sbitmap_alloc (max_unroll + 2);
 931
 932   /* Peel the first copy of loop body (almost always we must leave exit test
 933      here; the only exception is when we have extra zero check and the number
 934      of iterations is reliable.  Also record the place of (possible) extra
 935      zero check.  */
 936   bitmap_clear (wont_exit);
 937   if (extra_zero_check
 938       && !desc->noloop_assumptions)
 939     bitmap_set_bit (wont_exit, 1);
 940   ezc_swtch = loop_preheader_edge (loop)->src;
 941   ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 942                                       1, wont_exit, desc->out_edge,
 943                                       &remove_edges,
 944                                       DLTHE_FLAG_UPDATE_FREQ);
 945   gcc_assert (ok);
 946
 947   /* Record the place where switch will be built for preconditioning.  */
 948   swtch = split_edge (loop_preheader_edge (loop));
 949
 950   for (i = 0; i < n_peel; i++)
 951     {
 952       /* Peel the copy.  */
 953       bitmap_clear (wont_exit);
 954       if (i != n_peel - 1 || !last_may_exit)
 955         bitmap_set_bit (wont_exit, 1);
 956       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 957                                           1, wont_exit, desc->out_edge,
 958                                           &remove_edges,
 959                                           DLTHE_FLAG_UPDATE_FREQ);
 960       gcc_assert (ok);
 961
 962       /* Create item for switch.  */
 963       j = n_peel - i - (extra_zero_check ? 0 : 1);
 964       p = REG_BR_PROB_BASE / (i + 2);
 965
 966       preheader = split_edge (loop_preheader_edge (loop));
 967       branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
 968                                           block_label (preheader), p,
 969                                           NULL);
 970
 971       /* We rely on the fact that the compare and jump cannot be optimized out,
 972          and hence the cfg we create is correct.  */
 973       gcc_assert (branch_code != NULL_RTX);
 974
 975       swtch = split_edge_and_insert (single_pred_edge (swtch), branch_code);
 976       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
 977       single_pred_edge (swtch)->probability = REG_BR_PROB_BASE - p;
 978       e = make_edge (swtch, preheader,
 979                      single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
 980       e->count = RDIV (preheader->count * REG_BR_PROB_BASE, p);
 981       e->probability = p;
 982     }
 983
 984   if (extra_zero_check)
 985     {
 986       /* Add branch for zero iterations.  */
 987       p = REG_BR_PROB_BASE / (max_unroll + 1);
 988       swtch = ezc_swtch;
 989       preheader = split_edge (loop_preheader_edge (loop));
 990       branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
 991                                           block_label (preheader), p,
 992                                           NULL);
 993       gcc_assert (branch_code != NULL_RTX);
 994
 995       swtch = split_edge_and_insert (single_succ_edge (swtch), branch_code);
 996       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
 997       single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p;
 998       e = make_edge (swtch, preheader,
 999                      single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1000       e->count = RDIV (preheader->count * REG_BR_PROB_BASE, p);
1001       e->probability = p;
1002     }
1003
1004   /* Recount dominators for outer blocks.  */
1005   iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false);
1006
1007   /* And unroll loop.  */
1008
1009   bitmap_ones (wont_exit);
1010   bitmap_clear_bit (wont_exit, may_exit_copy);
1011   opt_info_start_duplication (opt_info);
1012
1013   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1014                                       max_unroll,
1015                                       wont_exit, desc->out_edge,
1016                                       &remove_edges,
1017                                       DLTHE_FLAG_UPDATE_FREQ
1018                                       | (opt_info
1019                                          ? DLTHE_RECORD_COPY_NUMBER
1020                                            : 0));
1021   gcc_assert (ok);
1022
1023   if (opt_info)
1024     {
1025       apply_opt_in_copies (opt_info, max_unroll, true, true);
1026       free_opt_info (opt_info);
1027     }
1028
1029   free (wont_exit);
1030
1031   if (exit_at_end)
1032     {
1033       basic_block exit_block = get_bb_copy (desc->in_edge->src);
1034       /* Find a new in and out edge; they are in the last copy we have
1035          made.  */
1036
1037       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
1038         {
1039           desc->out_edge = EDGE_SUCC (exit_block, 0);
1040           desc->in_edge = EDGE_SUCC (exit_block, 1);
1041         }
1042       else
1043         {
1044           desc->out_edge = EDGE_SUCC (exit_block, 1);
1045           desc->in_edge = EDGE_SUCC (exit_block, 0);
1046         }
1047     }
1048
1049   /* Remove the edges.  */
1050   FOR_EACH_VEC_ELT (remove_edges, i, e)
1051     remove_path (e);
1052
1053   /* We must be careful when updating the number of iterations due to
1054      preconditioning and the fact that the value must be valid at entry
1055      of the loop.  After passing through the above code, we see that
1056      the correct new number of iterations is this:  */
1057   gcc_assert (!desc->const_iter);
1058   desc->niter_expr =
1059     simplify_gen_binary (UDIV, desc->mode, old_niter,
1060                          gen_int_mode (max_unroll + 1, desc->mode));
1061   loop->nb_iterations_upper_bound
1062     = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1);
1063   if (loop->any_estimate)
1064     loop->nb_iterations_estimate
1065       = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1);
1066   if (exit_at_end)
1067     {
1068       desc->niter_expr =
1069         simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1070       desc->noloop_assumptions = NULL_RTX;
1071       --loop->nb_iterations_upper_bound;
1072       if (loop->any_estimate
1073           && loop->nb_iterations_estimate != 0)
1074         --loop->nb_iterations_estimate;
1075       else
1076         loop->any_estimate = false;
1077     }
1078
1079   if (dump_file)
1080     fprintf (dump_file,
1081              ";; Unrolled loop %d times, counting # of iterations "
1082              "in runtime, %i insns\n",
1083              max_unroll, num_loop_insns (loop));
1084 }
1085
1086 /* Decide whether to unroll LOOP stupidly and how much.  */
1087 static void
1088 decide_unroll_stupid (struct loop *loop, int flags)
1089 {
1090   unsigned nunroll, nunroll_by_av, i;
1091   struct niter_desc *desc;
1092   widest_int iterations;
1093
1094   if (!(flags & UAP_UNROLL_ALL))
1095     {
1096       /* We were not asked to, just return back silently.  */
1097       return;
1098     }
1099
1100   if (dump_file)
1101     fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n");
1102
1103   /* nunroll = total number of copies of the original loop body in
1104      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
1105   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
1106   nunroll_by_av
1107     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
1108   if (nunroll > nunroll_by_av)
1109     nunroll = nunroll_by_av;
1110   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
1111     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1112
1113   if (targetm.loop_unroll_adjust)
1114     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
1115
1116   /* Skip big loops.  */
1117   if (nunroll <= 1)
1118     {
1119       if (dump_file)
1120         fprintf (dump_file, ";; Not considering loop, is too big\n");
1121       return;
1122     }
1123
1124   /* Check for simple loops.  */
1125   desc = get_simple_loop_desc (loop);
1126
1127   /* Check simpleness.  */
1128   if (desc->simple_p && !desc->assumptions)
1129     {
1130       if (dump_file)
1131         fprintf (dump_file, ";; The loop is simple\n");
1132       return;
1133     }
1134
1135   /* Do not unroll loops with branches inside -- it increases number
1136      of mispredicts.
1137      TODO: this heuristic needs tunning; call inside the loop body
1138      is also relatively good reason to not unroll.  */
1139   if (num_loop_branches (loop) > 1)
1140     {
1141       if (dump_file)
1142         fprintf (dump_file, ";; Not unrolling, contains branches\n");
1143       return;
1144     }
1145
1146   /* Check whether the loop rolls.  */
1147   if ((get_estimated_loop_iterations (loop, &iterations)
1148        || get_max_loop_iterations (loop, &iterations))
1149       && wi::ltu_p (iterations, 2 * nunroll))
1150     {
1151       if (dump_file)
1152         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
1153       return;
1154     }
1155
1156   /* Success.  Now force nunroll to be power of 2, as it seems that this
1157      improves results (partially because of better alignments, partially
1158      because of some dark magic).  */
1159   for (i = 1; 2 * i <= nunroll; i *= 2)
1160     continue;
1161
1162   loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1163   loop->lpt_decision.times = i - 1;
1164 }
1165
1166 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation does this:
1167
1168    while (cond)
1169      body;
1170
1171    ==>  (LOOP->LPT_DECISION.TIMES == 3)
1172
1173    while (cond)
1174      {
1175        body;
1176        if (!cond) break;
1177        body;
1178        if (!cond) break;
1179        body;
1180        if (!cond) break;
1181        body;
1182      }
1183    */
1184 static void
1185 unroll_loop_stupid (struct loop *loop)
1186 {
1187   sbitmap wont_exit;
1188   unsigned nunroll = loop->lpt_decision.times;
1189   struct niter_desc *desc = get_simple_loop_desc (loop);
1190   struct opt_info *opt_info = NULL;
1191   bool ok;
1192
1193   if (flag_split_ivs_in_unroller
1194       || flag_variable_expansion_in_unroller)
1195     opt_info = analyze_insns_in_loop (loop);
1196
1197
1198   wont_exit = sbitmap_alloc (nunroll + 1);
1199   bitmap_clear (wont_exit);
1200   opt_info_start_duplication (opt_info);
1201
1202   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1203                                       nunroll, wont_exit,
1204                                       NULL, NULL,
1205                                       DLTHE_FLAG_UPDATE_FREQ
1206                                       | (opt_info
1207                                          ? DLTHE_RECORD_COPY_NUMBER
1208                                            : 0));
1209   gcc_assert (ok);
1210
1211   if (opt_info)
1212     {
1213       apply_opt_in_copies (opt_info, nunroll, true, true);
1214       free_opt_info (opt_info);
1215     }
1216
1217   free (wont_exit);
1218
1219   if (desc->simple_p)
1220     {
1221       /* We indeed may get here provided that there are nontrivial assumptions
1222          for a loop to be really simple.  We could update the counts, but the
1223          problem is that we are unable to decide which exit will be taken
1224          (not really true in case the number of iterations is constant,
1225          but no one will do anything with this information, so we do not
1226          worry about it).  */
1227       desc->simple_p = false;
1228     }
1229
1230   if (dump_file)
1231     fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
1232              nunroll, num_loop_insns (loop));
1233 }
1234
1235 /* Returns true if REG is referenced in one nondebug insn in LOOP.
1236    Set *DEBUG_USES to the number of debug insns that reference the
1237    variable.  */
1238
1239 bool
1240 referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg,
1241                                   int *debug_uses)
1242 {
1243   basic_block *body, bb;
1244   unsigned i;
1245   int count_ref = 0;
1246   rtx_insn *insn;
1247
1248   body = get_loop_body (loop);
1249   for (i = 0; i < loop->num_nodes; i++)
1250     {
1251       bb = body[i];
1252
1253       FOR_BB_INSNS (bb, insn)
1254         if (!rtx_referenced_p (reg, insn))
1255           continue;
1256         else if (DEBUG_INSN_P (insn))
1257           ++*debug_uses;
1258         else if (++count_ref > 1)
1259           break;
1260     }
1261   free (body);
1262   return (count_ref  == 1);
1263 }
1264
1265 /* Reset the DEBUG_USES debug insns in LOOP that reference REG.  */
1266
1267 static void
1268 reset_debug_uses_in_loop (struct loop *loop, rtx reg, int debug_uses)
1269 {
1270   basic_block *body, bb;
1271   unsigned i;
1272   rtx_insn *insn;
1273
1274   body = get_loop_body (loop);
1275   for (i = 0; debug_uses && i < loop->num_nodes; i++)
1276     {
1277       bb = body[i];
1278
1279       FOR_BB_INSNS (bb, insn)
1280         if (!DEBUG_INSN_P (insn) || !rtx_referenced_p (reg, insn))
1281           continue;
1282         else
1283           {
1284             validate_change (insn, &INSN_VAR_LOCATION_LOC (insn),
1285                              gen_rtx_UNKNOWN_VAR_LOC (), 0);
1286             if (!--debug_uses)
1287               break;
1288           }
1289     }
1290   free (body);
1291 }
1292
1293 /* Determine whether INSN contains an accumulator
1294    which can be expanded into separate copies,
1295    one for each copy of the LOOP body.
1296
1297    for (i = 0 ; i < n; i++)
1298      sum += a[i];
1299
1300    ==>
1301
1302    sum += a[i]
1303    ....
1304    i = i+1;
1305    sum1 += a[i]
1306    ....
1307    i = i+1
1308    sum2 += a[i];
1309    ....
1310
1311    Return NULL if INSN contains no opportunity for expansion of accumulator.
1312    Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
1313    information and return a pointer to it.
1314 */
1315
1316 static struct var_to_expand *
1317 analyze_insn_to_expand_var (struct loop *loop, rtx_insn *insn)
1318 {
1319   rtx set, dest, src;
1320   struct var_to_expand *ves;
1321   unsigned accum_pos;
1322   enum rtx_code code;
1323   int debug_uses = 0;
1324
1325   set = single_set (insn);
1326   if (!set)
1327     return NULL;
1328
1329   dest = SET_DEST (set);
1330   src = SET_SRC (set);
1331   code = GET_CODE (src);
1332
1333   if (code != PLUS && code != MINUS && code != MULT && code != FMA)
1334     return NULL;
1335
1336   if (FLOAT_MODE_P (GET_MODE (dest)))
1337     {
1338       if (!flag_associative_math)
1339         return NULL;
1340       /* In the case of FMA, we're also changing the rounding.  */
1341       if (code == FMA && !flag_unsafe_math_optimizations)
1342         return NULL;
1343     }
1344
1345   /* Hmm, this is a bit paradoxical.  We know that INSN is a valid insn
1346      in MD.  But if there is no optab to generate the insn, we can not
1347      perform the variable expansion.  This can happen if an MD provides
1348      an insn but not a named pattern to generate it, for example to avoid
1349      producing code that needs additional mode switches like for x87/mmx.
1350
1351      So we check have_insn_for which looks for an optab for the operation
1352      in SRC.  If it doesn't exist, we can't perform the expansion even
1353      though INSN is valid.  */
1354   if (!have_insn_for (code, GET_MODE (src)))
1355     return NULL;
1356
1357   if (!REG_P (dest)
1358       && !(GET_CODE (dest) == SUBREG
1359            && REG_P (SUBREG_REG (dest))))
1360     return NULL;
1361
1362   /* Find the accumulator use within the operation.  */
1363   if (code == FMA)
1364     {
1365       /* We only support accumulation via FMA in the ADD position.  */
1366       if (!rtx_equal_p  (dest, XEXP (src, 2)))
1367         return NULL;
1368       accum_pos = 2;
1369     }
1370   else if (rtx_equal_p (dest, XEXP (src, 0)))
1371     accum_pos = 0;
1372   else if (rtx_equal_p (dest, XEXP (src, 1)))
1373     {
1374       /* The method of expansion that we are using; which includes the
1375          initialization of the expansions with zero and the summation of
1376          the expansions at the end of the computation will yield wrong
1377          results for (x = something - x) thus avoid using it in that case.  */
1378       if (code == MINUS)
1379         return NULL;
1380       accum_pos = 1;
1381     }
1382   else
1383     return NULL;
1384
1385   /* It must not otherwise be used.  */
1386   if (code == FMA)
1387     {
1388       if (rtx_referenced_p (dest, XEXP (src, 0))
1389           || rtx_referenced_p (dest, XEXP (src, 1)))
1390         return NULL;
1391     }
1392   else if (rtx_referenced_p (dest, XEXP (src, 1 - accum_pos)))
1393     return NULL;
1394
1395   /* It must be used in exactly one insn.  */
1396   if (!referenced_in_one_insn_in_loop_p (loop, dest, &debug_uses))
1397     return NULL;
1398
1399   if (dump_file)
1400     {
1401       fprintf (dump_file, "\n;; Expanding Accumulator ");
1402       print_rtl (dump_file, dest);
1403       fprintf (dump_file, "\n");
1404     }
1405
1406   if (debug_uses)
1407     /* Instead of resetting the debug insns, we could replace each
1408        debug use in the loop with the sum or product of all expanded
1409        accummulators.  Since we'll only know of all expansions at the
1410        end, we'd have to keep track of which vars_to_expand a debug
1411        insn in the loop references, take note of each copy of the
1412        debug insn during unrolling, and when it's all done, compute
1413        the sum or product of each variable and adjust the original
1414        debug insn and each copy thereof.  What a pain!  */
1415     reset_debug_uses_in_loop (loop, dest, debug_uses);
1416
1417   /* Record the accumulator to expand.  */
1418   ves = XNEW (struct var_to_expand);
1419   ves->insn = insn;
1420   ves->reg = copy_rtx (dest);
1421   ves->var_expansions.create (1);
1422   ves->next = NULL;
1423   ves->op = GET_CODE (src);
1424   ves->expansion_count = 0;
1425   ves->reuse_expansion = 0;
1426   return ves;
1427 }
1428
1429 /* Determine whether there is an induction variable in INSN that
1430    we would like to split during unrolling.
1431
1432    I.e. replace
1433
1434    i = i + 1;
1435    ...
1436    i = i + 1;
1437    ...
1438    i = i + 1;
1439    ...
1440
1441    type chains by
1442
1443    i0 = i + 1
1444    ...
1445    i = i0 + 1
1446    ...
1447    i = i0 + 2
1448    ...
1449
1450    Return NULL if INSN contains no interesting IVs.  Otherwise, allocate
1451    an IV_TO_SPLIT structure, fill it with the relevant information and return a
1452    pointer to it.  */
1453
1454 static struct iv_to_split *
1455 analyze_iv_to_split_insn (rtx_insn *insn)
1456 {
1457   rtx set, dest;
1458   struct rtx_iv iv;
1459   struct iv_to_split *ivts;
1460   bool ok;
1461
1462   /* For now we just split the basic induction variables.  Later this may be
1463      extended for example by selecting also addresses of memory references.  */
1464   set = single_set (insn);
1465   if (!set)
1466     return NULL;
1467
1468   dest = SET_DEST (set);
1469   if (!REG_P (dest))
1470     return NULL;
1471
1472   if (!biv_p (insn, dest))
1473     return NULL;
1474
1475   ok = iv_analyze_result (insn, dest, &iv);
1476
1477   /* This used to be an assert under the assumption that if biv_p returns
1478      true that iv_analyze_result must also return true.  However, that
1479      assumption is not strictly correct as evidenced by pr25569.
1480
1481      Returning NULL when iv_analyze_result returns false is safe and
1482      avoids the problems in pr25569 until the iv_analyze_* routines
1483      can be fixed, which is apparently hard and time consuming
1484      according to their author.  */
1485   if (! ok)
1486     return NULL;
1487
1488   if (iv.step == const0_rtx
1489       || iv.mode != iv.extend_mode)
1490     return NULL;
1491
1492   /* Record the insn to split.  */
1493   ivts = XNEW (struct iv_to_split);
1494   ivts->insn = insn;
1495   ivts->orig_var = dest;
1496   ivts->base_var = NULL_RTX;
1497   ivts->step = iv.step;
1498   ivts->next = NULL;
1499
1500   return ivts;
1501 }
1502
1503 /* Determines which of insns in LOOP can be optimized.
1504    Return a OPT_INFO struct with the relevant hash tables filled
1505    with all insns to be optimized.  The FIRST_NEW_BLOCK field
1506    is undefined for the return value.  */
1507
1508 static struct opt_info *
1509 analyze_insns_in_loop (struct loop *loop)
1510 {
1511   basic_block *body, bb;
1512   unsigned i;
1513   struct opt_info *opt_info = XCNEW (struct opt_info);
1514   rtx_insn *insn;
1515   struct iv_to_split *ivts = NULL;
1516   struct var_to_expand *ves = NULL;
1517   iv_to_split **slot1;
1518   var_to_expand **slot2;
1519   vec<edge> edges = get_loop_exit_edges (loop);
1520   edge exit;
1521   bool can_apply = false;
1522
1523   iv_analysis_loop_init (loop);
1524
1525   body = get_loop_body (loop);
1526
1527   if (flag_split_ivs_in_unroller)
1528     {
1529       opt_info->insns_to_split
1530         = new hash_table<iv_split_hasher> (5 * loop->num_nodes);
1531       opt_info->iv_to_split_head = NULL;
1532       opt_info->iv_to_split_tail = &opt_info->iv_to_split_head;
1533     }
1534
1535   /* Record the loop exit bb and loop preheader before the unrolling.  */
1536   opt_info->loop_preheader = loop_preheader_edge (loop)->src;
1537
1538   if (edges.length () == 1)
1539     {
1540       exit = edges[0];
1541       if (!(exit->flags & EDGE_COMPLEX))
1542         {
1543           opt_info->loop_exit = split_edge (exit);
1544           can_apply = true;
1545         }
1546     }
1547
1548   if (flag_variable_expansion_in_unroller
1549       && can_apply)
1550     {
1551       opt_info->insns_with_var_to_expand
1552         = new hash_table<var_expand_hasher> (5 * loop->num_nodes);
1553       opt_info->var_to_expand_head = NULL;
1554       opt_info->var_to_expand_tail = &opt_info->var_to_expand_head;
1555     }
1556
1557   for (i = 0; i < loop->num_nodes; i++)
1558     {
1559       bb = body[i];
1560       if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1561         continue;
1562
1563       FOR_BB_INSNS (bb, insn)
1564       {
1565         if (!INSN_P (insn))
1566           continue;
1567
1568         if (opt_info->insns_to_split)
1569           ivts = analyze_iv_to_split_insn (insn);
1570
1571         if (ivts)
1572           {
1573             slot1 = opt_info->insns_to_split->find_slot (ivts, INSERT);
1574             gcc_assert (*slot1 == NULL);
1575             *slot1 = ivts;
1576             *opt_info->iv_to_split_tail = ivts;
1577             opt_info->iv_to_split_tail = &ivts->next;
1578             continue;
1579           }
1580
1581         if (opt_info->insns_with_var_to_expand)
1582           ves = analyze_insn_to_expand_var (loop, insn);
1583
1584         if (ves)
1585           {
1586             slot2 = opt_info->insns_with_var_to_expand->find_slot (ves, INSERT);
1587             gcc_assert (*slot2 == NULL);
1588             *slot2 = ves;
1589             *opt_info->var_to_expand_tail = ves;
1590             opt_info->var_to_expand_tail = &ves->next;
1591           }
1592       }
1593     }
1594
1595   edges.release ();
1596   free (body);
1597   return opt_info;
1598 }
1599
1600 /* Called just before loop duplication.  Records start of duplicated area
1601    to OPT_INFO.  */
1602
1603 static void
1604 opt_info_start_duplication (struct opt_info *opt_info)
1605 {
1606   if (opt_info)
1607     opt_info->first_new_block = last_basic_block_for_fn (cfun);
1608 }
1609
1610 /* Determine the number of iterations between initialization of the base
1611    variable and the current copy (N_COPY).  N_COPIES is the total number
1612    of newly created copies.  UNROLLING is true if we are unrolling
1613    (not peeling) the loop.  */
1614
1615 static unsigned
1616 determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
1617 {
1618   if (unrolling)
1619     {
1620       /* If we are unrolling, initialization is done in the original loop
1621          body (number 0).  */
1622       return n_copy;
1623     }
1624   else
1625     {
1626       /* If we are peeling, the copy in that the initialization occurs has
1627          number 1.  The original loop (number 0) is the last.  */
1628       if (n_copy)
1629         return n_copy - 1;
1630       else
1631         return n_copies;
1632     }
1633 }
1634
1635 /* Allocate basic variable for the induction variable chain.  */
1636
1637 static void
1638 allocate_basic_variable (struct iv_to_split *ivts)
1639 {
1640   rtx expr = SET_SRC (single_set (ivts->insn));
1641
1642   ivts->base_var = gen_reg_rtx (GET_MODE (expr));
1643 }
1644
1645 /* Insert initialization of basic variable of IVTS before INSN, taking
1646    the initial value from INSN.  */
1647
1648 static void
1649 insert_base_initialization (struct iv_to_split *ivts, rtx_insn *insn)
1650 {
1651   rtx expr = copy_rtx (SET_SRC (single_set (insn)));
1652   rtx_insn *seq;
1653
1654   start_sequence ();
1655   expr = force_operand (expr, ivts->base_var);
1656   if (expr != ivts->base_var)
1657     emit_move_insn (ivts->base_var, expr);
1658   seq = get_insns ();
1659   end_sequence ();
1660
1661   emit_insn_before (seq, insn);
1662 }
1663
1664 /* Replace the use of induction variable described in IVTS in INSN
1665    by base variable + DELTA * step.  */
1666
1667 static void
1668 split_iv (struct iv_to_split *ivts, rtx_insn *insn, unsigned delta)
1669 {
1670   rtx expr, *loc, incr, var;
1671   rtx_insn *seq;
1672   enum machine_mode mode = GET_MODE (ivts->base_var);
1673   rtx src, dest, set;
1674
1675   /* Construct base + DELTA * step.  */
1676   if (!delta)
1677     expr = ivts->base_var;
1678   else
1679     {
1680       incr = simplify_gen_binary (MULT, mode,
1681                                   ivts->step, gen_int_mode (delta, mode));
1682       expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
1683                                   ivts->base_var, incr);
1684     }
1685
1686   /* Figure out where to do the replacement.  */
1687   loc = &SET_SRC (single_set (insn));
1688
1689   /* If we can make the replacement right away, we're done.  */
1690   if (validate_change (insn, loc, expr, 0))
1691     return;
1692
1693   /* Otherwise, force EXPR into a register and try again.  */
1694   start_sequence ();
1695   var = gen_reg_rtx (mode);
1696   expr = force_operand (expr, var);
1697   if (expr != var)
1698     emit_move_insn (var, expr);
1699   seq = get_insns ();
1700   end_sequence ();
1701   emit_insn_before (seq, insn);
1702
1703   if (validate_change (insn, loc, var, 0))
1704     return;
1705
1706   /* The last chance.  Try recreating the assignment in insn
1707      completely from scratch.  */
1708   set = single_set (insn);
1709   gcc_assert (set);
1710
1711   start_sequence ();
1712   *loc = var;
1713   src = copy_rtx (SET_SRC (set));
1714   dest = copy_rtx (SET_DEST (set));
1715   src = force_operand (src, dest);
1716   if (src != dest)
1717     emit_move_insn (dest, src);
1718   seq = get_insns ();
1719   end_sequence ();
1720
1721   emit_insn_before (seq, insn);
1722   delete_insn (insn);
1723 }
1724
1725
1726 /* Return one expansion of the accumulator recorded in struct VE.  */
1727
1728 static rtx
1729 get_expansion (struct var_to_expand *ve)
1730 {
1731   rtx reg;
1732
1733   if (ve->reuse_expansion == 0)
1734     reg = ve->reg;
1735   else
1736     reg = ve->var_expansions[ve->reuse_expansion - 1];
1737
1738   if (ve->var_expansions.length () == (unsigned) ve->reuse_expansion)
1739     ve->reuse_expansion = 0;
1740   else
1741     ve->reuse_expansion++;
1742
1743   return reg;
1744 }
1745
1746
1747 /* Given INSN replace the uses of the accumulator recorded in VE
1748    with a new register.  */
1749
1750 static void
1751 expand_var_during_unrolling (struct var_to_expand *ve, rtx_insn *insn)
1752 {
1753   rtx new_reg, set;
1754   bool really_new_expansion = false;
1755
1756   set = single_set (insn);
1757   gcc_assert (set);
1758
1759   /* Generate a new register only if the expansion limit has not been
1760      reached.  Else reuse an already existing expansion.  */
1761   if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count)
1762     {
1763       really_new_expansion = true;
1764       new_reg = gen_reg_rtx (GET_MODE (ve->reg));
1765     }
1766   else
1767     new_reg = get_expansion (ve);
1768
1769   validate_replace_rtx_group (SET_DEST (set), new_reg, insn);
1770   if (apply_change_group ())
1771     if (really_new_expansion)
1772       {
1773         ve->var_expansions.safe_push (new_reg);
1774         ve->expansion_count++;
1775       }
1776 }
1777
1778 /* Initialize the variable expansions in loop preheader.  PLACE is the
1779    loop-preheader basic block where the initialization of the
1780    expansions should take place.  The expansions are initialized with
1781    (-0) when the operation is plus or minus to honor sign zero.  This
1782    way we can prevent cases where the sign of the final result is
1783    effected by the sign of the expansion.  Here is an example to
1784    demonstrate this:
1785
1786    for (i = 0 ; i < n; i++)
1787      sum += something;
1788
1789    ==>
1790
1791    sum += something
1792    ....
1793    i = i+1;
1794    sum1 += something
1795    ....
1796    i = i+1
1797    sum2 += something;
1798    ....
1799
1800    When SUM is initialized with -zero and SOMETHING is also -zero; the
1801    final result of sum should be -zero thus the expansions sum1 and sum2
1802    should be initialized with -zero as well (otherwise we will get +zero
1803    as the final result).  */
1804
1805 static void
1806 insert_var_expansion_initialization (struct var_to_expand *ve,
1807                                      basic_block place)
1808 {
1809   rtx_insn *seq;
1810   rtx var, zero_init;
1811   unsigned i;
1812   enum machine_mode mode = GET_MODE (ve->reg);
1813   bool honor_signed_zero_p = HONOR_SIGNED_ZEROS (mode);
1814
1815   if (ve->var_expansions.length () == 0)
1816     return;
1817
1818   start_sequence ();
1819   switch (ve->op)
1820     {
1821     case FMA:
1822       /* Note that we only accumulate FMA via the ADD operand.  */
1823     case PLUS:
1824     case MINUS:
1825       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1826         {
1827           if (honor_signed_zero_p)
1828             zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode);
1829           else
1830             zero_init = CONST0_RTX (mode);
1831           emit_move_insn (var, zero_init);
1832         }
1833       break;
1834
1835     case MULT:
1836       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1837         {
1838           zero_init = CONST1_RTX (GET_MODE (var));
1839           emit_move_insn (var, zero_init);
1840         }
1841       break;
1842
1843     default:
1844       gcc_unreachable ();
1845     }
1846
1847   seq = get_insns ();
1848   end_sequence ();
1849
1850   emit_insn_after (seq, BB_END (place));
1851 }
1852
1853 /* Combine the variable expansions at the loop exit.  PLACE is the
1854    loop exit basic block where the summation of the expansions should
1855    take place.  */
1856
1857 static void
1858 combine_var_copies_in_loop_exit (struct var_to_expand *ve, basic_block place)
1859 {
1860   rtx sum = ve->reg;
1861   rtx expr, var;
1862   rtx_insn *seq, *insn;
1863   unsigned i;
1864
1865   if (ve->var_expansions.length () == 0)
1866     return;
1867
1868   start_sequence ();
1869   switch (ve->op)
1870     {
1871     case FMA:
1872       /* Note that we only accumulate FMA via the ADD operand.  */
1873     case PLUS:
1874     case MINUS:
1875       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1876         sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg), var, sum);
1877       break;
1878
1879     case MULT:
1880       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1881         sum = simplify_gen_binary (MULT, GET_MODE (ve->reg), var, sum);
1882       break;
1883
1884     default:
1885       gcc_unreachable ();
1886     }
1887
1888   expr = force_operand (sum, ve->reg);
1889   if (expr != ve->reg)
1890     emit_move_insn (ve->reg, expr);
1891   seq = get_insns ();
1892   end_sequence ();
1893
1894   insn = BB_HEAD (place);
1895   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
1896     insn = NEXT_INSN (insn);
1897
1898   emit_insn_after (seq, insn);
1899 }
1900
1901 /* Strip away REG_EQUAL notes for IVs we're splitting.
1902
1903    Updating REG_EQUAL notes for IVs we split is tricky: We
1904    cannot tell until after unrolling, DF-rescanning, and liveness
1905    updating, whether an EQ_USE is reached by the split IV while
1906    the IV reg is still live.  See PR55006.
1907
1908    ??? We cannot use remove_reg_equal_equiv_notes_for_regno,
1909    because RTL loop-iv requires us to defer rescanning insns and
1910    any notes attached to them.  So resort to old techniques...  */
1911
1912 static void
1913 maybe_strip_eq_note_for_split_iv (struct opt_info *opt_info, rtx_insn *insn)
1914 {
1915   struct iv_to_split *ivts;
1916   rtx note = find_reg_equal_equiv_note (insn);
1917   if (! note)
1918     return;
1919   for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
1920     if (reg_mentioned_p (ivts->orig_var, note))
1921       {
1922         remove_note (insn, note);
1923         return;
1924       }
1925 }
1926
1927 /* Apply loop optimizations in loop copies using the
1928    data which gathered during the unrolling.  Structure
1929    OPT_INFO record that data.
1930
1931    UNROLLING is true if we unrolled (not peeled) the loop.
1932    REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
1933    the loop (as it should happen in complete unrolling, but not in ordinary
1934    peeling of the loop).  */
1935
1936 static void
1937 apply_opt_in_copies (struct opt_info *opt_info,
1938                      unsigned n_copies, bool unrolling,
1939                      bool rewrite_original_loop)
1940 {
1941   unsigned i, delta;
1942   basic_block bb, orig_bb;
1943   rtx_insn *insn, *orig_insn, *next;
1944   struct iv_to_split ivts_templ, *ivts;
1945   struct var_to_expand ve_templ, *ves;
1946
1947   /* Sanity check -- we need to put initialization in the original loop
1948      body.  */
1949   gcc_assert (!unrolling || rewrite_original_loop);
1950
1951   /* Allocate the basic variables (i0).  */
1952   if (opt_info->insns_to_split)
1953     for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
1954       allocate_basic_variable (ivts);
1955
1956   for (i = opt_info->first_new_block;
1957        i < (unsigned) last_basic_block_for_fn (cfun);
1958        i++)
1959     {
1960       bb = BASIC_BLOCK_FOR_FN (cfun, i);
1961       orig_bb = get_bb_original (bb);
1962
1963       /* bb->aux holds position in copy sequence initialized by
1964          duplicate_loop_to_header_edge.  */
1965       delta = determine_split_iv_delta ((size_t)bb->aux, n_copies,
1966                                         unrolling);
1967       bb->aux = 0;
1968       orig_insn = BB_HEAD (orig_bb);
1969       FOR_BB_INSNS_SAFE (bb, insn, next)
1970         {
1971           if (!INSN_P (insn)
1972               || (DEBUG_INSN_P (insn)
1973                   && TREE_CODE (INSN_VAR_LOCATION_DECL (insn)) == LABEL_DECL))
1974             continue;
1975
1976           while (!INSN_P (orig_insn)
1977                  || (DEBUG_INSN_P (orig_insn)
1978                      && (TREE_CODE (INSN_VAR_LOCATION_DECL (orig_insn))
1979                          == LABEL_DECL)))
1980             orig_insn = NEXT_INSN (orig_insn);
1981
1982           ivts_templ.insn = orig_insn;
1983           ve_templ.insn = orig_insn;
1984
1985           /* Apply splitting iv optimization.  */
1986           if (opt_info->insns_to_split)
1987             {
1988               maybe_strip_eq_note_for_split_iv (opt_info, insn);
1989
1990               ivts = opt_info->insns_to_split->find (&ivts_templ);
1991
1992               if (ivts)
1993                 {
1994                   gcc_assert (GET_CODE (PATTERN (insn))
1995                               == GET_CODE (PATTERN (orig_insn)));
1996
1997                   if (!delta)
1998                     insert_base_initialization (ivts, insn);
1999                   split_iv (ivts, insn, delta);
2000                 }
2001             }
2002           /* Apply variable expansion optimization.  */
2003           if (unrolling && opt_info->insns_with_var_to_expand)
2004             {
2005               ves = (struct var_to_expand *)
2006                 opt_info->insns_with_var_to_expand->find (&ve_templ);
2007               if (ves)
2008                 {
2009                   gcc_assert (GET_CODE (PATTERN (insn))
2010                               == GET_CODE (PATTERN (orig_insn)));
2011                   expand_var_during_unrolling (ves, insn);
2012                 }
2013             }
2014           orig_insn = NEXT_INSN (orig_insn);
2015         }
2016     }
2017
2018   if (!rewrite_original_loop)
2019     return;
2020
2021   /* Initialize the variable expansions in the loop preheader
2022      and take care of combining them at the loop exit.  */
2023   if (opt_info->insns_with_var_to_expand)
2024     {
2025       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2026         insert_var_expansion_initialization (ves, opt_info->loop_preheader);
2027       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2028         combine_var_copies_in_loop_exit (ves, opt_info->loop_exit);
2029     }
2030
2031   /* Rewrite also the original loop body.  Find them as originals of the blocks
2032      in the last copied iteration, i.e. those that have
2033      get_bb_copy (get_bb_original (bb)) == bb.  */
2034   for (i = opt_info->first_new_block;
2035        i < (unsigned) last_basic_block_for_fn (cfun);
2036        i++)
2037     {
2038       bb = BASIC_BLOCK_FOR_FN (cfun, i);
2039       orig_bb = get_bb_original (bb);
2040       if (get_bb_copy (orig_bb) != bb)
2041         continue;
2042
2043       delta = determine_split_iv_delta (0, n_copies, unrolling);
2044       for (orig_insn = BB_HEAD (orig_bb);
2045            orig_insn != NEXT_INSN (BB_END (bb));
2046            orig_insn = next)
2047         {
2048           next = NEXT_INSN (orig_insn);
2049
2050           if (!INSN_P (orig_insn))
2051             continue;
2052
2053           ivts_templ.insn = orig_insn;
2054           if (opt_info->insns_to_split)
2055             {
2056               maybe_strip_eq_note_for_split_iv (opt_info, orig_insn);
2057
2058               ivts = (struct iv_to_split *)
2059                 opt_info->insns_to_split->find (&ivts_templ);
2060               if (ivts)
2061                 {
2062                   if (!delta)
2063                     insert_base_initialization (ivts, orig_insn);
2064                   split_iv (ivts, orig_insn, delta);
2065                   continue;
2066                 }
2067             }
2068
2069         }
2070     }
2071 }
2072
2073 /* Release OPT_INFO.  */
2074
2075 static void
2076 free_opt_info (struct opt_info *opt_info)
2077 {
2078   delete opt_info->insns_to_split;
2079   opt_info->insns_to_split = NULL;
2080   if (opt_info->insns_with_var_to_expand)
2081     {
2082       struct var_to_expand *ves;
2083
2084       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2085         ves->var_expansions.release ();
2086       delete opt_info->insns_with_var_to_expand;
2087       opt_info->insns_with_var_to_expand = NULL;
2088     }
2089   free (opt_info);
2090 }